File indexing completed on 2024-04-14 03:40:38
0001 /* 0002 This file is part of Kiten, a KDE Japanese Reference Tool 0003 SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com> 0004 SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com> 0005 SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com> 0006 0007 SPDX-License-Identifier: LGPL-2.0-or-later 0008 */ 0009 0010 #include "entryedict.h" 0011 0012 #include "dictfileedict.h" 0013 #include "kitenmacros.h" 0014 0015 #include <QDebug> 0016 0017 #define QSTRINGLISTCHECK(x) (x == NULL ? QStringList() : *x) 0018 0019 using namespace Qt::StringLiterals; 0020 0021 EntryEdict::EntryEdict(const QString &dict) 0022 : Entry(dict) 0023 { 0024 } 0025 0026 EntryEdict::EntryEdict(const QString &dict, const QString &entry) 0027 : Entry(dict) 0028 { 0029 loadEntry(entry); 0030 } 0031 0032 Entry *EntryEdict::clone() const 0033 { 0034 return new EntryEdict(*this); 0035 } 0036 0037 /** 0038 * Regenerate a QString like the one we got in loadEntry() 0039 */ 0040 QString EntryEdict::dumpEntry() const 0041 { 0042 QString readings = QString(Readings.isEmpty() ? QStringLiteral(" ") : QStringLiteral(" [") + Readings.first() + QStringLiteral("] ")); 0043 0044 return QStringLiteral("%1%2/%3/").arg(Word).arg(readings).arg(Meanings.join(QLatin1Char('/'))); 0045 } 0046 0047 QString EntryEdict::getDictionaryType() const 0048 { 0049 return EDICT; 0050 } 0051 0052 QString EntryEdict::getTypes() const 0053 { 0054 return m_types.join(outputListDelimiter); 0055 } 0056 0057 QStringList EntryEdict::getTypesList() const 0058 { 0059 return m_types; 0060 } 0061 0062 bool EntryEdict::isAdjective() const 0063 { 0064 for (const QString &type : EdictFormatting::Adjectives) { 0065 if (m_types.contains(type)) { 0066 return true; 0067 } 0068 } 0069 0070 return false; 0071 } 0072 0073 bool EntryEdict::isAdverb() const 0074 { 0075 for (const QString &type : EdictFormatting::Adverbs) { 0076 if (m_types.contains(type)) { 0077 return true; 0078 } 0079 } 0080 0081 return false; 0082 } 0083 0084 bool EntryEdict::isCommon() const 0085 { 0086 return getExtendedInfoItem(QStringLiteral("common")) == QLatin1Char('1'); 0087 } 0088 0089 bool EntryEdict::isExpression() const 0090 { 0091 for (const QString &type : EdictFormatting::Expressions) { 0092 if (m_types.contains(type)) { 0093 return true; 0094 } 0095 } 0096 0097 return false; 0098 } 0099 0100 bool EntryEdict::isFukisokuVerb() const 0101 { 0102 for (const QString &type : EdictFormatting::FukisokuVerbs) { 0103 if (m_types.contains(type)) { 0104 return true; 0105 } 0106 } 0107 0108 return false; 0109 } 0110 0111 bool EntryEdict::isGodanVerb() const 0112 { 0113 for (const QString &type : EdictFormatting::GodanVerbs) { 0114 if (m_types.contains(type)) { 0115 return true; 0116 } 0117 } 0118 0119 return false; 0120 } 0121 0122 bool EntryEdict::isIchidanVerb() const 0123 { 0124 for (const QString &type : EdictFormatting::IchidanVerbs) { 0125 if (m_types.contains(type)) { 0126 return true; 0127 } 0128 } 0129 0130 return false; 0131 } 0132 0133 bool EntryEdict::isNoun() const 0134 { 0135 for (const QString &type : EdictFormatting::Nouns) { 0136 if (m_types.contains(type)) { 0137 return true; 0138 } 0139 } 0140 0141 return false; 0142 } 0143 0144 bool EntryEdict::isParticle() const 0145 { 0146 return m_types.contains(EdictFormatting::Particle); 0147 } 0148 0149 bool EntryEdict::isPrefix() const 0150 { 0151 for (const QString &type : EdictFormatting::Prefix) { 0152 if (m_types.contains(type)) { 0153 return true; 0154 } 0155 } 0156 0157 return false; 0158 } 0159 0160 bool EntryEdict::isSuffix() const 0161 { 0162 for (const QString &type : EdictFormatting::Suffix) { 0163 if (m_types.contains(type)) { 0164 return true; 0165 } 0166 } 0167 0168 return false; 0169 } 0170 0171 bool EntryEdict::isVerb() const 0172 { 0173 for (const QString &type : EdictFormatting::Verbs) { 0174 if (m_types.contains(type)) { 0175 return true; 0176 } 0177 } 0178 0179 return false; 0180 } 0181 0182 QString EntryEdict::HTMLWord() const 0183 { 0184 return QStringLiteral("<span class=\"Word\">%1</span>").arg(Word.isEmpty() ? kanjiLinkify(Meanings.first()) : kanjiLinkify(Word)); 0185 } 0186 0187 /** 0188 * Makes a link out of each kanji in @param inString 0189 */ 0190 QString EntryEdict::kanjiLinkify(const QString &inString) const 0191 { 0192 QString outString; 0193 0194 for (int i = 0; i < inString.length(); i++) { 0195 if (isKanji(inString.at(i))) { 0196 outString += makeLink(QString(inString.at(i))); 0197 } else { 0198 outString += inString.at(i); 0199 } 0200 } 0201 0202 return outString; 0203 } 0204 0205 /** 0206 * Take a QString and load it into the Entry as appropriate 0207 * The format is basically: KANJI [KANA] /(general information) gloss/gloss/.../ 0208 * Note that they can rudely place more (general information) in gloss's that are 0209 * not the first one. 0210 */ 0211 bool EntryEdict::loadEntry(const QString &entryLine) 0212 { 0213 /* Set tempQString to be the reading and word portion of the entryLine */ 0214 int endOfKanjiAndKanaSection = entryLine.indexOf('/'_L1); 0215 if (endOfKanjiAndKanaSection == -1) { 0216 return false; 0217 } 0218 QString tempQString = entryLine.left(endOfKanjiAndKanaSection); 0219 /* The actual Word is the beginning of the line */ 0220 int endOfKanji = tempQString.indexOf(' '_L1); 0221 if (endOfKanji == -1) { 0222 return false; 0223 } 0224 Word = tempQString.left(endOfKanji); 0225 0226 /* The Reading is either Word or encased in '[' */ 0227 Readings.clear(); 0228 int startOfReading = tempQString.indexOf('['_L1); 0229 if (startOfReading != -1) // This field is optional for EDICT (and kiten) 0230 { 0231 Readings.append(tempQString.left(tempQString.lastIndexOf(']'_L1)).mid(startOfReading + 1)); 0232 } 0233 /* TODO: use this code or not? 0234 * app does not handle only reading and no word entries 0235 * very well so far 0236 else 0237 { 0238 Readings.append(Word); 0239 Word.clear(); 0240 } 0241 */ 0242 0243 /* set Meanings to be all of the meanings in the definition */ 0244 QString remainingLine = entryLine.mid(endOfKanjiAndKanaSection); 0245 // Trim to last '/' 0246 remainingLine = remainingLine.left(remainingLine.lastIndexOf('/'_L1)); 0247 Meanings = remainingLine.split('/'_L1, Qt::SkipEmptyParts); 0248 0249 if (Meanings.empty()) { 0250 return false; 0251 } 0252 0253 if (Meanings.last() == QLatin1String("(P)")) { 0254 ExtendedInfo[QStringLiteral("common")] = QStringLiteral("1"); 0255 Meanings.removeLast(); 0256 } 0257 0258 QString firstWord = Meanings.first(); 0259 QStringList stringTypes; 0260 0261 // Pulls the various types out 0262 // TODO: Remove them from the original string 0263 for (int i = firstWord.indexOf(QLatin1Char('(')); i != -1; i = firstWord.indexOf(QLatin1Char('('), i + 1)) { 0264 QString parentheses = firstWord.mid(i + 1, firstWord.indexOf(QLatin1Char(')'), i) - i - 1); 0265 stringTypes += parentheses.split(','_L1); 0266 } 0267 0268 for (const QString &str : stringTypes) { 0269 if (EdictFormatting::PartsOfSpeech.contains(str)) { 0270 m_types += str; 0271 } else if (EdictFormatting::FieldOfApplication.contains(str)) { 0272 ExtendedInfo[QStringLiteral("field")] = str; 0273 } else if (EdictFormatting::MiscMarkings.contains(str)) { 0274 m_miscMarkings += str; 0275 } 0276 } 0277 0278 return true; 0279 } 0280 0281 bool EntryEdict::matchesWordType(const DictQuery &query) const 0282 { 0283 if (!query.isEmpty()) { 0284 if (query.getMatchWordType() == DictQuery::Verb && isVerb()) { 0285 return true; 0286 } 0287 if (query.getMatchWordType() == DictQuery::Noun && isNoun()) { 0288 return true; 0289 } 0290 if (query.getMatchWordType() == DictQuery::Adjective && isAdjective()) { 0291 return true; 0292 } 0293 if (query.getMatchWordType() == DictQuery::Adverb && isAdverb()) { 0294 return true; 0295 } 0296 if (query.getMatchWordType() == DictQuery::Expression && isExpression()) { 0297 return true; 0298 } 0299 if (query.getMatchWordType() == DictQuery::Prefix && isPrefix()) { 0300 return true; 0301 } 0302 if (query.getMatchWordType() == DictQuery::Suffix && isSuffix()) { 0303 return true; 0304 } 0305 if (query.getMatchWordType() == DictQuery::Any) { 0306 return true; 0307 } 0308 } 0309 0310 return false; 0311 } 0312 0313 /** 0314 * Returns a HTML version of an Entry 0315 */ 0316 QString EntryEdict::toHTML() const 0317 { 0318 QString result = QStringLiteral("<div class=\"%1\">").arg(EDICT.toUpper()); 0319 if (isCommon()) { 0320 result += QLatin1String("<div class=\"Common\">"); 0321 } 0322 0323 for (const QString &field : QSTRINGLISTCHECK(DictFileEdict::displayFields)) { 0324 if (field == QLatin1String("--NewLine--")) 0325 result += QLatin1String("<br>"); 0326 else if (field == QLatin1String("Word/Kanji")) 0327 result += HTMLWord() + ' '_L1; 0328 else if (field == QLatin1String("Meaning")) 0329 result += HTMLMeanings() + ' '_L1; 0330 else if (field == QLatin1String("Reading")) 0331 result += HTMLReadings() + ' '_L1; 0332 else 0333 qDebug() << "Unknown field: " << field; 0334 } 0335 0336 if (isCommon()) { 0337 result += QLatin1String("</div>"); 0338 } 0339 0340 result += QLatin1String("</div>"); 0341 return result; 0342 } 0343 0344 #ifdef KITEN_EDICTFORMATTING 0345 0346 /** 0347 * The basic idea of this is to provide a mapping from possible entry types to 0348 * possible things the user could enter. Then our code for the matching entry can simply 0349 * use this mapping to determine if a given entry could be understood to match the user's input. 0350 * 0351 * There are two basic approaches we could take: 0352 * Convert the user's entry into a list of types, see if the Entry type matches any of 0353 * the conversions from this list (the list comparisons will be MANY enums). 0354 * Convert our Entry types to a list of acceptable string aliases. Then compare the 0355 * user's input to this list (the list will be a relatively small list of strings). 0356 * 0357 * My gut instinct is that the first case (comparison of a largish list of ints) will be 0358 * faster, and so that's the one that's implemented here. 0359 * 0360 * The following are the minimum list of case-insensitive aliases that the user could enter: 0361 * noun 0362 * verb: 0363 * ichidan 0364 * godan 0365 * adjective 0366 * adverb 0367 * particle 0368 * 0369 * Note that our File Parser will also expand to general cases, if not included already: 0370 * For Example: v5aru -> v5aru,v5 (so that a search for "godan" will find it) 0371 */ 0372 namespace EdictFormatting 0373 { 0374 enum WordType { 0375 noun, 0376 verb, 0377 adjective, 0378 adverb, 0379 particle, 0380 ichidanVerb, 0381 godanVerb, 0382 fukisokuVerb, 0383 expression, 0384 idiomaticExpression, 0385 prefix, 0386 suffix, 0387 nounPrefix, 0388 nounSuffix 0389 }; 0390 0391 // Forward declarations of our functions to be used. 0392 QMultiHash<WordType, QString> createPartOfSpeechCategories(); 0393 QSet<QString> createPartsOfSpeech(); 0394 QSet<QString> createMiscMarkings(); 0395 QSet<QString> createFieldOfApplication(); 0396 QStringList createNounsList(); 0397 QStringList createVerbsList(); 0398 QStringList createExpressionsList(); 0399 QStringList createPrefixesList(); 0400 QStringList createSuffixesList(); 0401 0402 // Define our public variables. 0403 QMultiHash<WordType, QString> PartOfSpeechCategories = createPartOfSpeechCategories(); 0404 QSet<QString> PartsOfSpeech = createPartsOfSpeech(); 0405 QSet<QString> MiscMarkings = createMiscMarkings(); 0406 QSet<QString> FieldOfApplication = createFieldOfApplication(); 0407 0408 // PartOfSpeechCategories needs to has some values before this line. 0409 QStringList Nouns = createNounsList(); 0410 QStringList Adjectives = PartOfSpeechCategories.values(adjective); 0411 QStringList Adverbs = PartOfSpeechCategories.values(adverb); 0412 QStringList IchidanVerbs = PartOfSpeechCategories.values(ichidanVerb); 0413 QStringList GodanVerbs = PartOfSpeechCategories.values(godanVerb); 0414 QStringList FukisokuVerbs = PartOfSpeechCategories.values(fukisokuVerb); 0415 QStringList Verbs = createVerbsList(); 0416 QStringList Expressions = createExpressionsList(); 0417 QStringList Prefix = createPrefixesList(); 0418 QStringList Suffix = createSuffixesList(); 0419 QString Particle = PartOfSpeechCategories.value(particle); 0420 0421 QStringList createNounsList() 0422 { 0423 QStringList list; 0424 list.append(PartOfSpeechCategories.values(noun)); 0425 list.append(PartOfSpeechCategories.values(nounPrefix)); 0426 list.append(PartOfSpeechCategories.values(nounSuffix)); 0427 return list; 0428 } 0429 0430 QStringList createVerbsList() 0431 { 0432 QStringList list; 0433 list.append(PartOfSpeechCategories.values(verb)); 0434 list.append(IchidanVerbs); 0435 list.append(GodanVerbs); 0436 list.append(FukisokuVerbs); 0437 return list; 0438 } 0439 0440 QStringList createExpressionsList() 0441 { 0442 QStringList list; 0443 list.append(PartOfSpeechCategories.values(expression)); 0444 list.append(PartOfSpeechCategories.values(idiomaticExpression)); 0445 return list; 0446 } 0447 0448 QStringList createPrefixesList() 0449 { 0450 QStringList list; 0451 list.append(PartOfSpeechCategories.values(prefix)); 0452 list.append(PartOfSpeechCategories.values(nounPrefix)); 0453 return list; 0454 } 0455 0456 QStringList createSuffixesList() 0457 { 0458 QStringList list; 0459 list.append(PartOfSpeechCategories.values(suffix)); 0460 list.append(PartOfSpeechCategories.values(nounSuffix)); 0461 return list; 0462 } 0463 0464 QMultiHash<WordType, QString> createPartOfSpeechCategories() 0465 { 0466 QMultiHash<WordType, QString> categories; 0467 0468 // Nouns 0469 categories.insert(noun, QStringLiteral("n")); 0470 categories.insert(noun, QStringLiteral("n-adv")); 0471 categories.insert(noun, QStringLiteral("n-t")); 0472 categories.insert(noun, QStringLiteral("adv-n")); 0473 0474 // Noun (used as a prefix) 0475 categories.insert(nounPrefix, QStringLiteral("n-pref")); 0476 0477 // Noun (used as a suffix) 0478 categories.insert(nounSuffix, QStringLiteral("n-suf")); 0479 0480 // Ichidan Verbs 0481 categories.insert(ichidanVerb, QStringLiteral("v1")); 0482 categories.insert(ichidanVerb, QStringLiteral("vz")); 0483 0484 // Godan Verbs 0485 categories.insert(godanVerb, QStringLiteral("v5")); 0486 categories.insert(godanVerb, QStringLiteral("v5aru")); 0487 categories.insert(godanVerb, QStringLiteral("v5b")); 0488 categories.insert(godanVerb, QStringLiteral("v5g")); 0489 categories.insert(godanVerb, QStringLiteral("v5k")); 0490 categories.insert(godanVerb, QStringLiteral("v5k-s")); 0491 categories.insert(godanVerb, QStringLiteral("v5m")); 0492 categories.insert(godanVerb, QStringLiteral("v5n")); 0493 categories.insert(godanVerb, QStringLiteral("v5r")); 0494 categories.insert(godanVerb, QStringLiteral("v5r-i")); 0495 categories.insert(godanVerb, QStringLiteral("v5s")); 0496 categories.insert(godanVerb, QStringLiteral("v5t")); 0497 categories.insert(godanVerb, QStringLiteral("v5u")); 0498 categories.insert(godanVerb, QStringLiteral("v5u-s")); 0499 categories.insert(godanVerb, QStringLiteral("v5uru")); 0500 categories.insert(godanVerb, QStringLiteral("v5z")); 0501 0502 // Fukisoku verbs 0503 categories.insert(fukisokuVerb, QStringLiteral("iv")); 0504 categories.insert(fukisokuVerb, QStringLiteral("vk")); 0505 categories.insert(fukisokuVerb, QStringLiteral("vn")); 0506 categories.insert(fukisokuVerb, QStringLiteral("vs-i")); 0507 categories.insert(fukisokuVerb, QStringLiteral("vs-s")); 0508 0509 // Other Verbs 0510 categories.insert(verb, QStringLiteral("vi")); 0511 categories.insert(verb, QStringLiteral("vs")); 0512 categories.insert(verb, QStringLiteral("vt")); 0513 categories.insert(verb, QStringLiteral("aux-v")); 0514 0515 // Adjectives 0516 categories.insert(adjective, QStringLiteral("adj-i")); 0517 categories.insert(adjective, QStringLiteral("adj-na")); 0518 categories.insert(adjective, QStringLiteral("adj-no")); 0519 categories.insert(adjective, QStringLiteral("adj-pn")); 0520 categories.insert(adjective, QStringLiteral("adj-t")); 0521 categories.insert(adjective, QStringLiteral("adj-f")); 0522 categories.insert(adjective, QStringLiteral("adj")); 0523 categories.insert(adjective, QStringLiteral("aux-adj")); 0524 0525 // Adverbs 0526 categories.insert(adverb, QStringLiteral("adv")); 0527 categories.insert(adverb, QStringLiteral("adv-n")); 0528 categories.insert(adverb, QStringLiteral("adv-to")); 0529 0530 // Particle 0531 categories.insert(particle, QStringLiteral("prt")); 0532 0533 // Expression 0534 categories.insert(expression, QStringLiteral("exp")); 0535 0536 // Idiomatic expression 0537 categories.insert(idiomaticExpression, QStringLiteral("id")); 0538 0539 // Prefix 0540 categories.insert(prefix, QStringLiteral("pref")); 0541 0542 // Suffix 0543 categories.insert(suffix, QStringLiteral("suf")); 0544 0545 return categories; 0546 } 0547 0548 QSet<QString> createPartsOfSpeech() 0549 { 0550 QSet<QString> category; 0551 0552 category << QStringLiteral("adj-i") << QStringLiteral("adj-na") << QStringLiteral("adj-no") << QStringLiteral("adj-pn") << QStringLiteral("adj-t") 0553 << QStringLiteral("adj-f") << QStringLiteral("adj") << QStringLiteral("adv") << QStringLiteral("adv-n") << QStringLiteral("adv-to") 0554 << QStringLiteral("aux") << QStringLiteral("aux-v") << QStringLiteral("aux-adj") << QStringLiteral("conj") << QStringLiteral("ctr") 0555 << QStringLiteral("exp") << QStringLiteral("id") << QStringLiteral("int") << QStringLiteral("iv") << QStringLiteral("n") << QStringLiteral("n-adv") 0556 << QStringLiteral("n-pref") << QStringLiteral("n-suf") << QStringLiteral("n-t") << QStringLiteral("num") << QStringLiteral("pn") 0557 << QStringLiteral("pref") << QStringLiteral("prt") << QStringLiteral("suf") << QStringLiteral("v1") << QStringLiteral("v5") 0558 << QStringLiteral("v5aru") << QStringLiteral("v5b") << QStringLiteral("v5g") << QStringLiteral("v5k") << QStringLiteral("v5k-s") 0559 << QStringLiteral("v5m") << QStringLiteral("v5n") << QStringLiteral("v5r") << QStringLiteral("v5r-i") << QStringLiteral("v5s") 0560 << QStringLiteral("v5t") << QStringLiteral("v5u") << QStringLiteral("v5u-s") << QStringLiteral("v5uru") << QStringLiteral("v5z") 0561 << QStringLiteral("vz") << QStringLiteral("vi") << QStringLiteral("vk") << QStringLiteral("vn") << QStringLiteral("vs") << QStringLiteral("vs-i") 0562 << QStringLiteral("vs-s") << QStringLiteral("vt"); 0563 0564 return category; 0565 } 0566 0567 QSet<QString> createFieldOfApplication() 0568 { 0569 QSet<QString> category; 0570 0571 // Field of Application terms 0572 category << QStringLiteral("Buddh") << QStringLiteral("MA") << QStringLiteral("comp") << QStringLiteral("food") << QStringLiteral("geom") 0573 << QStringLiteral("ling") << QStringLiteral("math") << QStringLiteral("mil") << QStringLiteral("physics"); 0574 0575 return category; 0576 } 0577 0578 QSet<QString> createMiscMarkings() 0579 { 0580 QSet<QString> category; 0581 0582 // Miscellaneous Markings (in EDICT terms) 0583 category << QStringLiteral("X") << QStringLiteral("abbr") << QStringLiteral("arch") << QStringLiteral("ateji") << QStringLiteral("chn") 0584 << QStringLiteral("col") << QStringLiteral("derog") << QStringLiteral("eK") << QStringLiteral("ek") << QStringLiteral("fam") 0585 << QStringLiteral("fem") << QStringLiteral("gikun") << QStringLiteral("hon") << QStringLiteral("hum") << QStringLiteral("iK") 0586 << QStringLiteral("id") << QStringLiteral("io") << QStringLiteral("m-sl") << QStringLiteral("male") << QStringLiteral("male-sl") 0587 << QStringLiteral("ng") << QStringLiteral("oK") << QStringLiteral("obs") << QStringLiteral("obsc") << QStringLiteral("ok") 0588 << QStringLiteral("poet") << QStringLiteral("pol") << QStringLiteral("rare") << QStringLiteral("sens") << QStringLiteral("sl") 0589 << QStringLiteral("uK") << QStringLiteral("uk") << QStringLiteral("vulg"); 0590 0591 return category; 0592 } 0593 } 0594 0595 #endif