File indexing completed on 2024-04-14 03:40:38

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0004     SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
0005     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 
0010 #include "entryedict.h"
0011 
0012 #include "dictfileedict.h"
0013 #include "kitenmacros.h"
0014 
0015 #include <QDebug>
0016 
0017 #define QSTRINGLISTCHECK(x) (x == NULL ? QStringList() : *x)
0018 
0019 using namespace Qt::StringLiterals;
0020 
0021 EntryEdict::EntryEdict(const QString &dict)
0022     : Entry(dict)
0023 {
0024 }
0025 
0026 EntryEdict::EntryEdict(const QString &dict, const QString &entry)
0027     : Entry(dict)
0028 {
0029     loadEntry(entry);
0030 }
0031 
0032 Entry *EntryEdict::clone() const
0033 {
0034     return new EntryEdict(*this);
0035 }
0036 
0037 /**
0038  * Regenerate a QString like the one we got in loadEntry()
0039  */
0040 QString EntryEdict::dumpEntry() const
0041 {
0042     QString readings = QString(Readings.isEmpty() ? QStringLiteral(" ") : QStringLiteral(" [") + Readings.first() + QStringLiteral("] "));
0043 
0044     return QStringLiteral("%1%2/%3/").arg(Word).arg(readings).arg(Meanings.join(QLatin1Char('/')));
0045 }
0046 
0047 QString EntryEdict::getDictionaryType() const
0048 {
0049     return EDICT;
0050 }
0051 
0052 QString EntryEdict::getTypes() const
0053 {
0054     return m_types.join(outputListDelimiter);
0055 }
0056 
0057 QStringList EntryEdict::getTypesList() const
0058 {
0059     return m_types;
0060 }
0061 
0062 bool EntryEdict::isAdjective() const
0063 {
0064     for (const QString &type : EdictFormatting::Adjectives) {
0065         if (m_types.contains(type)) {
0066             return true;
0067         }
0068     }
0069 
0070     return false;
0071 }
0072 
0073 bool EntryEdict::isAdverb() const
0074 {
0075     for (const QString &type : EdictFormatting::Adverbs) {
0076         if (m_types.contains(type)) {
0077             return true;
0078         }
0079     }
0080 
0081     return false;
0082 }
0083 
0084 bool EntryEdict::isCommon() const
0085 {
0086     return getExtendedInfoItem(QStringLiteral("common")) == QLatin1Char('1');
0087 }
0088 
0089 bool EntryEdict::isExpression() const
0090 {
0091     for (const QString &type : EdictFormatting::Expressions) {
0092         if (m_types.contains(type)) {
0093             return true;
0094         }
0095     }
0096 
0097     return false;
0098 }
0099 
0100 bool EntryEdict::isFukisokuVerb() const
0101 {
0102     for (const QString &type : EdictFormatting::FukisokuVerbs) {
0103         if (m_types.contains(type)) {
0104             return true;
0105         }
0106     }
0107 
0108     return false;
0109 }
0110 
0111 bool EntryEdict::isGodanVerb() const
0112 {
0113     for (const QString &type : EdictFormatting::GodanVerbs) {
0114         if (m_types.contains(type)) {
0115             return true;
0116         }
0117     }
0118 
0119     return false;
0120 }
0121 
0122 bool EntryEdict::isIchidanVerb() const
0123 {
0124     for (const QString &type : EdictFormatting::IchidanVerbs) {
0125         if (m_types.contains(type)) {
0126             return true;
0127         }
0128     }
0129 
0130     return false;
0131 }
0132 
0133 bool EntryEdict::isNoun() const
0134 {
0135     for (const QString &type : EdictFormatting::Nouns) {
0136         if (m_types.contains(type)) {
0137             return true;
0138         }
0139     }
0140 
0141     return false;
0142 }
0143 
0144 bool EntryEdict::isParticle() const
0145 {
0146     return m_types.contains(EdictFormatting::Particle);
0147 }
0148 
0149 bool EntryEdict::isPrefix() const
0150 {
0151     for (const QString &type : EdictFormatting::Prefix) {
0152         if (m_types.contains(type)) {
0153             return true;
0154         }
0155     }
0156 
0157     return false;
0158 }
0159 
0160 bool EntryEdict::isSuffix() const
0161 {
0162     for (const QString &type : EdictFormatting::Suffix) {
0163         if (m_types.contains(type)) {
0164             return true;
0165         }
0166     }
0167 
0168     return false;
0169 }
0170 
0171 bool EntryEdict::isVerb() const
0172 {
0173     for (const QString &type : EdictFormatting::Verbs) {
0174         if (m_types.contains(type)) {
0175             return true;
0176         }
0177     }
0178 
0179     return false;
0180 }
0181 
0182 QString EntryEdict::HTMLWord() const
0183 {
0184     return QStringLiteral("<span class=\"Word\">%1</span>").arg(Word.isEmpty() ? kanjiLinkify(Meanings.first()) : kanjiLinkify(Word));
0185 }
0186 
0187 /**
0188  * Makes a link out of each kanji in @param inString
0189  */
0190 QString EntryEdict::kanjiLinkify(const QString &inString) const
0191 {
0192     QString outString;
0193 
0194     for (int i = 0; i < inString.length(); i++) {
0195         if (isKanji(inString.at(i))) {
0196             outString += makeLink(QString(inString.at(i)));
0197         } else {
0198             outString += inString.at(i);
0199         }
0200     }
0201 
0202     return outString;
0203 }
0204 
0205 /**
0206  * Take a QString and load it into the Entry as appropriate
0207  * The format is basically: KANJI [KANA] /(general information) gloss/gloss/.../
0208  * Note that they can rudely place more (general information) in gloss's that are
0209  * not the first one.
0210  */
0211 bool EntryEdict::loadEntry(const QString &entryLine)
0212 {
0213     /* Set tempQString to be the reading and word portion of the entryLine */
0214     int endOfKanjiAndKanaSection = entryLine.indexOf('/'_L1);
0215     if (endOfKanjiAndKanaSection == -1) {
0216         return false;
0217     }
0218     QString tempQString = entryLine.left(endOfKanjiAndKanaSection);
0219     /* The actual Word is the beginning of the line */
0220     int endOfKanji = tempQString.indexOf(' '_L1);
0221     if (endOfKanji == -1) {
0222         return false;
0223     }
0224     Word = tempQString.left(endOfKanji);
0225 
0226     /* The Reading is either Word or encased in '[' */
0227     Readings.clear();
0228     int startOfReading = tempQString.indexOf('['_L1);
0229     if (startOfReading != -1) // This field is optional for EDICT (and kiten)
0230     {
0231         Readings.append(tempQString.left(tempQString.lastIndexOf(']'_L1)).mid(startOfReading + 1));
0232     }
0233     /* TODO: use this code or not?
0234     * app does not handle only reading and no word entries
0235     * very well so far
0236     else
0237     {
0238       Readings.append(Word);
0239       Word.clear();
0240     }
0241     */
0242 
0243     /* set Meanings to be all of the meanings in the definition */
0244     QString remainingLine = entryLine.mid(endOfKanjiAndKanaSection);
0245     // Trim to last '/'
0246     remainingLine = remainingLine.left(remainingLine.lastIndexOf('/'_L1));
0247     Meanings = remainingLine.split('/'_L1, Qt::SkipEmptyParts);
0248 
0249     if (Meanings.empty()) {
0250         return false;
0251     }
0252 
0253     if (Meanings.last() == QLatin1String("(P)")) {
0254         ExtendedInfo[QStringLiteral("common")] = QStringLiteral("1");
0255         Meanings.removeLast();
0256     }
0257 
0258     QString firstWord = Meanings.first();
0259     QStringList stringTypes;
0260 
0261     // Pulls the various types out
0262     // TODO: Remove them from the original string
0263     for (int i = firstWord.indexOf(QLatin1Char('(')); i != -1; i = firstWord.indexOf(QLatin1Char('('), i + 1)) {
0264         QString parentheses = firstWord.mid(i + 1, firstWord.indexOf(QLatin1Char(')'), i) - i - 1);
0265         stringTypes += parentheses.split(','_L1);
0266     }
0267 
0268     for (const QString &str : stringTypes) {
0269         if (EdictFormatting::PartsOfSpeech.contains(str)) {
0270             m_types += str;
0271         } else if (EdictFormatting::FieldOfApplication.contains(str)) {
0272             ExtendedInfo[QStringLiteral("field")] = str;
0273         } else if (EdictFormatting::MiscMarkings.contains(str)) {
0274             m_miscMarkings += str;
0275         }
0276     }
0277 
0278     return true;
0279 }
0280 
0281 bool EntryEdict::matchesWordType(const DictQuery &query) const
0282 {
0283     if (!query.isEmpty()) {
0284         if (query.getMatchWordType() == DictQuery::Verb && isVerb()) {
0285             return true;
0286         }
0287         if (query.getMatchWordType() == DictQuery::Noun && isNoun()) {
0288             return true;
0289         }
0290         if (query.getMatchWordType() == DictQuery::Adjective && isAdjective()) {
0291             return true;
0292         }
0293         if (query.getMatchWordType() == DictQuery::Adverb && isAdverb()) {
0294             return true;
0295         }
0296         if (query.getMatchWordType() == DictQuery::Expression && isExpression()) {
0297             return true;
0298         }
0299         if (query.getMatchWordType() == DictQuery::Prefix && isPrefix()) {
0300             return true;
0301         }
0302         if (query.getMatchWordType() == DictQuery::Suffix && isSuffix()) {
0303             return true;
0304         }
0305         if (query.getMatchWordType() == DictQuery::Any) {
0306             return true;
0307         }
0308     }
0309 
0310     return false;
0311 }
0312 
0313 /**
0314  * Returns a HTML version of an Entry
0315  */
0316 QString EntryEdict::toHTML() const
0317 {
0318     QString result = QStringLiteral("<div class=\"%1\">").arg(EDICT.toUpper());
0319     if (isCommon()) {
0320         result += QLatin1String("<div class=\"Common\">");
0321     }
0322 
0323     for (const QString &field : QSTRINGLISTCHECK(DictFileEdict::displayFields)) {
0324         if (field == QLatin1String("--NewLine--"))
0325             result += QLatin1String("<br>");
0326         else if (field == QLatin1String("Word/Kanji"))
0327             result += HTMLWord() + ' '_L1;
0328         else if (field == QLatin1String("Meaning"))
0329             result += HTMLMeanings() + ' '_L1;
0330         else if (field == QLatin1String("Reading"))
0331             result += HTMLReadings() + ' '_L1;
0332         else
0333             qDebug() << "Unknown field: " << field;
0334     }
0335 
0336     if (isCommon()) {
0337         result += QLatin1String("</div>");
0338     }
0339 
0340     result += QLatin1String("</div>");
0341     return result;
0342 }
0343 
0344 #ifdef KITEN_EDICTFORMATTING
0345 
0346 /**
0347  * The basic idea of this is to provide a mapping from possible entry types to
0348  * possible things the user could enter. Then our code for the matching entry can simply
0349  * use this mapping to determine if a given entry could be understood to match the user's input.
0350  *
0351  * There are two basic approaches we could take:
0352  *   Convert the user's entry into a list of types, see if the Entry type matches any of
0353  *           the conversions from this list (the list comparisons will be MANY enums).
0354  *   Convert our Entry types to a list of acceptable string aliases. Then compare the
0355  *           user's input to this list (the list will be a relatively small list of strings).
0356  *
0357  * My gut instinct is that the first case (comparison of a largish list of ints) will be
0358  * faster, and so that's the one that's implemented here.
0359  *
0360  * The following are the minimum list of case-insensitive aliases that the user could enter:
0361  *   noun
0362  *   verb:
0363  *     ichidan
0364  *     godan
0365  *   adjective
0366  *   adverb
0367  *   particle
0368  *
0369  * Note that our File Parser will also expand to general cases, if not included already:
0370  * For Example: v5aru -> v5aru,v5 (so that a search for "godan" will find it)
0371  */
0372 namespace EdictFormatting
0373 {
0374 enum WordType {
0375     noun,
0376     verb,
0377     adjective,
0378     adverb,
0379     particle,
0380     ichidanVerb,
0381     godanVerb,
0382     fukisokuVerb,
0383     expression,
0384     idiomaticExpression,
0385     prefix,
0386     suffix,
0387     nounPrefix,
0388     nounSuffix
0389 };
0390 
0391 // Forward declarations of our functions to be used.
0392 QMultiHash<WordType, QString> createPartOfSpeechCategories();
0393 QSet<QString> createPartsOfSpeech();
0394 QSet<QString> createMiscMarkings();
0395 QSet<QString> createFieldOfApplication();
0396 QStringList createNounsList();
0397 QStringList createVerbsList();
0398 QStringList createExpressionsList();
0399 QStringList createPrefixesList();
0400 QStringList createSuffixesList();
0401 
0402 // Define our public variables.
0403 QMultiHash<WordType, QString> PartOfSpeechCategories = createPartOfSpeechCategories();
0404 QSet<QString> PartsOfSpeech = createPartsOfSpeech();
0405 QSet<QString> MiscMarkings = createMiscMarkings();
0406 QSet<QString> FieldOfApplication = createFieldOfApplication();
0407 
0408 // PartOfSpeechCategories needs to has some values before this line.
0409 QStringList Nouns = createNounsList();
0410 QStringList Adjectives = PartOfSpeechCategories.values(adjective);
0411 QStringList Adverbs = PartOfSpeechCategories.values(adverb);
0412 QStringList IchidanVerbs = PartOfSpeechCategories.values(ichidanVerb);
0413 QStringList GodanVerbs = PartOfSpeechCategories.values(godanVerb);
0414 QStringList FukisokuVerbs = PartOfSpeechCategories.values(fukisokuVerb);
0415 QStringList Verbs = createVerbsList();
0416 QStringList Expressions = createExpressionsList();
0417 QStringList Prefix = createPrefixesList();
0418 QStringList Suffix = createSuffixesList();
0419 QString Particle = PartOfSpeechCategories.value(particle);
0420 
0421 QStringList createNounsList()
0422 {
0423     QStringList list;
0424     list.append(PartOfSpeechCategories.values(noun));
0425     list.append(PartOfSpeechCategories.values(nounPrefix));
0426     list.append(PartOfSpeechCategories.values(nounSuffix));
0427     return list;
0428 }
0429 
0430 QStringList createVerbsList()
0431 {
0432     QStringList list;
0433     list.append(PartOfSpeechCategories.values(verb));
0434     list.append(IchidanVerbs);
0435     list.append(GodanVerbs);
0436     list.append(FukisokuVerbs);
0437     return list;
0438 }
0439 
0440 QStringList createExpressionsList()
0441 {
0442     QStringList list;
0443     list.append(PartOfSpeechCategories.values(expression));
0444     list.append(PartOfSpeechCategories.values(idiomaticExpression));
0445     return list;
0446 }
0447 
0448 QStringList createPrefixesList()
0449 {
0450     QStringList list;
0451     list.append(PartOfSpeechCategories.values(prefix));
0452     list.append(PartOfSpeechCategories.values(nounPrefix));
0453     return list;
0454 }
0455 
0456 QStringList createSuffixesList()
0457 {
0458     QStringList list;
0459     list.append(PartOfSpeechCategories.values(suffix));
0460     list.append(PartOfSpeechCategories.values(nounSuffix));
0461     return list;
0462 }
0463 
0464 QMultiHash<WordType, QString> createPartOfSpeechCategories()
0465 {
0466     QMultiHash<WordType, QString> categories;
0467 
0468     // Nouns
0469     categories.insert(noun, QStringLiteral("n"));
0470     categories.insert(noun, QStringLiteral("n-adv"));
0471     categories.insert(noun, QStringLiteral("n-t"));
0472     categories.insert(noun, QStringLiteral("adv-n"));
0473 
0474     // Noun (used as a prefix)
0475     categories.insert(nounPrefix, QStringLiteral("n-pref"));
0476 
0477     // Noun (used as a suffix)
0478     categories.insert(nounSuffix, QStringLiteral("n-suf"));
0479 
0480     // Ichidan Verbs
0481     categories.insert(ichidanVerb, QStringLiteral("v1"));
0482     categories.insert(ichidanVerb, QStringLiteral("vz"));
0483 
0484     // Godan Verbs
0485     categories.insert(godanVerb, QStringLiteral("v5"));
0486     categories.insert(godanVerb, QStringLiteral("v5aru"));
0487     categories.insert(godanVerb, QStringLiteral("v5b"));
0488     categories.insert(godanVerb, QStringLiteral("v5g"));
0489     categories.insert(godanVerb, QStringLiteral("v5k"));
0490     categories.insert(godanVerb, QStringLiteral("v5k-s"));
0491     categories.insert(godanVerb, QStringLiteral("v5m"));
0492     categories.insert(godanVerb, QStringLiteral("v5n"));
0493     categories.insert(godanVerb, QStringLiteral("v5r"));
0494     categories.insert(godanVerb, QStringLiteral("v5r-i"));
0495     categories.insert(godanVerb, QStringLiteral("v5s"));
0496     categories.insert(godanVerb, QStringLiteral("v5t"));
0497     categories.insert(godanVerb, QStringLiteral("v5u"));
0498     categories.insert(godanVerb, QStringLiteral("v5u-s"));
0499     categories.insert(godanVerb, QStringLiteral("v5uru"));
0500     categories.insert(godanVerb, QStringLiteral("v5z"));
0501 
0502     // Fukisoku verbs
0503     categories.insert(fukisokuVerb, QStringLiteral("iv"));
0504     categories.insert(fukisokuVerb, QStringLiteral("vk"));
0505     categories.insert(fukisokuVerb, QStringLiteral("vn"));
0506     categories.insert(fukisokuVerb, QStringLiteral("vs-i"));
0507     categories.insert(fukisokuVerb, QStringLiteral("vs-s"));
0508 
0509     // Other Verbs
0510     categories.insert(verb, QStringLiteral("vi"));
0511     categories.insert(verb, QStringLiteral("vs"));
0512     categories.insert(verb, QStringLiteral("vt"));
0513     categories.insert(verb, QStringLiteral("aux-v"));
0514 
0515     // Adjectives
0516     categories.insert(adjective, QStringLiteral("adj-i"));
0517     categories.insert(adjective, QStringLiteral("adj-na"));
0518     categories.insert(adjective, QStringLiteral("adj-no"));
0519     categories.insert(adjective, QStringLiteral("adj-pn"));
0520     categories.insert(adjective, QStringLiteral("adj-t"));
0521     categories.insert(adjective, QStringLiteral("adj-f"));
0522     categories.insert(adjective, QStringLiteral("adj"));
0523     categories.insert(adjective, QStringLiteral("aux-adj"));
0524 
0525     // Adverbs
0526     categories.insert(adverb, QStringLiteral("adv"));
0527     categories.insert(adverb, QStringLiteral("adv-n"));
0528     categories.insert(adverb, QStringLiteral("adv-to"));
0529 
0530     // Particle
0531     categories.insert(particle, QStringLiteral("prt"));
0532 
0533     // Expression
0534     categories.insert(expression, QStringLiteral("exp"));
0535 
0536     // Idiomatic expression
0537     categories.insert(idiomaticExpression, QStringLiteral("id"));
0538 
0539     // Prefix
0540     categories.insert(prefix, QStringLiteral("pref"));
0541 
0542     // Suffix
0543     categories.insert(suffix, QStringLiteral("suf"));
0544 
0545     return categories;
0546 }
0547 
0548 QSet<QString> createPartsOfSpeech()
0549 {
0550     QSet<QString> category;
0551 
0552     category << QStringLiteral("adj-i") << QStringLiteral("adj-na") << QStringLiteral("adj-no") << QStringLiteral("adj-pn") << QStringLiteral("adj-t")
0553              << QStringLiteral("adj-f") << QStringLiteral("adj") << QStringLiteral("adv") << QStringLiteral("adv-n") << QStringLiteral("adv-to")
0554              << QStringLiteral("aux") << QStringLiteral("aux-v") << QStringLiteral("aux-adj") << QStringLiteral("conj") << QStringLiteral("ctr")
0555              << QStringLiteral("exp") << QStringLiteral("id") << QStringLiteral("int") << QStringLiteral("iv") << QStringLiteral("n") << QStringLiteral("n-adv")
0556              << QStringLiteral("n-pref") << QStringLiteral("n-suf") << QStringLiteral("n-t") << QStringLiteral("num") << QStringLiteral("pn")
0557              << QStringLiteral("pref") << QStringLiteral("prt") << QStringLiteral("suf") << QStringLiteral("v1") << QStringLiteral("v5")
0558              << QStringLiteral("v5aru") << QStringLiteral("v5b") << QStringLiteral("v5g") << QStringLiteral("v5k") << QStringLiteral("v5k-s")
0559              << QStringLiteral("v5m") << QStringLiteral("v5n") << QStringLiteral("v5r") << QStringLiteral("v5r-i") << QStringLiteral("v5s")
0560              << QStringLiteral("v5t") << QStringLiteral("v5u") << QStringLiteral("v5u-s") << QStringLiteral("v5uru") << QStringLiteral("v5z")
0561              << QStringLiteral("vz") << QStringLiteral("vi") << QStringLiteral("vk") << QStringLiteral("vn") << QStringLiteral("vs") << QStringLiteral("vs-i")
0562              << QStringLiteral("vs-s") << QStringLiteral("vt");
0563 
0564     return category;
0565 }
0566 
0567 QSet<QString> createFieldOfApplication()
0568 {
0569     QSet<QString> category;
0570 
0571     // Field of Application terms
0572     category << QStringLiteral("Buddh") << QStringLiteral("MA") << QStringLiteral("comp") << QStringLiteral("food") << QStringLiteral("geom")
0573              << QStringLiteral("ling") << QStringLiteral("math") << QStringLiteral("mil") << QStringLiteral("physics");
0574 
0575     return category;
0576 }
0577 
0578 QSet<QString> createMiscMarkings()
0579 {
0580     QSet<QString> category;
0581 
0582     // Miscellaneous Markings (in EDICT terms)
0583     category << QStringLiteral("X") << QStringLiteral("abbr") << QStringLiteral("arch") << QStringLiteral("ateji") << QStringLiteral("chn")
0584              << QStringLiteral("col") << QStringLiteral("derog") << QStringLiteral("eK") << QStringLiteral("ek") << QStringLiteral("fam")
0585              << QStringLiteral("fem") << QStringLiteral("gikun") << QStringLiteral("hon") << QStringLiteral("hum") << QStringLiteral("iK")
0586              << QStringLiteral("id") << QStringLiteral("io") << QStringLiteral("m-sl") << QStringLiteral("male") << QStringLiteral("male-sl")
0587              << QStringLiteral("ng") << QStringLiteral("oK") << QStringLiteral("obs") << QStringLiteral("obsc") << QStringLiteral("ok")
0588              << QStringLiteral("poet") << QStringLiteral("pol") << QStringLiteral("rare") << QStringLiteral("sens") << QStringLiteral("sl")
0589              << QStringLiteral("uK") << QStringLiteral("uk") << QStringLiteral("vulg");
0590 
0591     return category;
0592 }
0593 }
0594 
0595 #endif