File indexing completed on 2024-03-24 03:44:44
0001 /* 0002 This file is part of Kiten, a KDE Japanese Reference Tool 0003 SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com> 0004 SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com> 0005 SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com> 0006 SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com> 0007 0008 SPDX-License-Identifier: LGPL-2.0-or-later 0009 */ 0010 0011 #include "dictfilekanjidic.h" 0012 0013 #include "dictquery.h" 0014 #include "entrykanjidic.h" 0015 #include "entrylist.h" 0016 #include "kitenmacros.h" 0017 0018 #include <KConfigSkeleton> 0019 0020 #include <QDebug> 0021 #include <QFile> 0022 #include <QRegularExpression> 0023 #include <QStringDecoder> 0024 0025 using namespace Qt::StringLiterals; 0026 0027 QStringList *DictFileKanjidic::displayFields = nullptr; 0028 0029 DictFileKanjidic::DictFileKanjidic() 0030 : DictFile(KANJIDIC) 0031 { 0032 m_dictionaryType = KANJIDIC; 0033 m_searchableAttributes.clear(); 0034 m_searchableAttributes.insert(QStringLiteral("bushu"), QStringLiteral("B")); 0035 m_searchableAttributes.insert(QStringLiteral("classical"), QStringLiteral("C")); 0036 m_searchableAttributes.insert(QStringLiteral("henshall"), QStringLiteral("E")); 0037 m_searchableAttributes.insert(QStringLiteral("frequency"), QStringLiteral("F")); 0038 m_searchableAttributes.insert(QStringLiteral("grade"), QStringLiteral("G")); 0039 m_searchableAttributes.insert(QStringLiteral("halpern"), QStringLiteral("H")); 0040 m_searchableAttributes.insert(QStringLiteral("spahn"), QStringLiteral("I")); 0041 m_searchableAttributes.insert(QStringLiteral("hadamitzky"), QStringLiteral("I")); 0042 m_searchableAttributes.insert(QStringLiteral("gakken"), QStringLiteral("K")); 0043 m_searchableAttributes.insert(QStringLiteral("heisig"), QStringLiteral("L")); 0044 m_searchableAttributes.insert(QStringLiteral("morohashi"), QStringLiteral("M")); 0045 m_searchableAttributes.insert(QStringLiteral("nelson"), QStringLiteral("N")); 0046 m_searchableAttributes.insert(QStringLiteral("oneill"), QStringLiteral("O")); 0047 m_searchableAttributes.insert(QStringLiteral("skip"), QStringLiteral("P")); 0048 m_searchableAttributes.insert(QStringLiteral("4cc"), QStringLiteral("Q")); 0049 m_searchableAttributes.insert(QStringLiteral("stroke"), QStringLiteral("S")); 0050 m_searchableAttributes.insert(QStringLiteral("strokes"), QStringLiteral("S")); 0051 m_searchableAttributes.insert(QStringLiteral("unicode"), QStringLiteral("U")); 0052 m_searchableAttributes.insert(QStringLiteral("haig"), QStringLiteral("V")); 0053 m_searchableAttributes.insert(QStringLiteral("korean"), QStringLiteral("W")); 0054 m_searchableAttributes.insert(QStringLiteral("pinyin"), QStringLiteral("Y")); 0055 m_searchableAttributes.insert(QStringLiteral("other"), QStringLiteral("D")); 0056 } 0057 0058 QMap<QString, QString> DictFileKanjidic::displayOptions() const 0059 { 0060 // Enumerate the fields in our dict.... there are a rather lot of them here 0061 // It will be useful for a few things to have the full list generated on it's own 0062 QMap<QString, QString> list; 0063 // TODO: Figure out how to internationalize these easily 0064 list.insert(QStringLiteral("Bushu Number(B)"), QStringLiteral("B")); 0065 list.insert(QStringLiteral("Classical Radical Number(C)"), QStringLiteral("C")); 0066 list.insert(QStringLiteral("Henshall's Index Number(E)"), QStringLiteral("E")); 0067 list.insert(QStringLiteral("Frequency Ranking(F)"), QStringLiteral("F")); 0068 list.insert(QStringLiteral("Grade Level(G)"), QStringLiteral("G")); 0069 list.insert(QStringLiteral("Halpern's New J-E Char Dictionary(H)"), QStringLiteral("H")); 0070 list.insert(QStringLiteral("Spahn & Hadamitzky Reference(I)"), QStringLiteral("I")); 0071 list.insert(QStringLiteral("Gakken Kanji Dictionary Index(K)"), QStringLiteral("K")); 0072 list.insert(QStringLiteral("Heisig's Index(L)"), QStringLiteral("L")); 0073 list.insert(QStringLiteral("Morohashi's Daikanwajiten(M)"), QStringLiteral("M")); 0074 list.insert(QStringLiteral("Nelsons Modern Reader's J-E Index(N)"), QStringLiteral("N")); 0075 list.insert(QStringLiteral("O'Neill's 'Japanese Names' Index(O)"), QStringLiteral("O")); 0076 list.insert(QStringLiteral("SKIP Code(P)"), QStringLiteral("P")); 0077 list.insert(QStringLiteral("Four Corner codes(Q)"), QStringLiteral("Q")); 0078 list.insert(QStringLiteral("Stroke Count(S)"), QStringLiteral("S")); 0079 list.insert(QStringLiteral("Unicode Value(U)"), QStringLiteral("U")); 0080 list.insert(QStringLiteral("Haig's New Nelson J-E Dict(V)"), QStringLiteral("V")); 0081 list.insert(QStringLiteral("Korean Reading(W)"), QStringLiteral("W")); 0082 list.insert(QStringLiteral("kanjidic field: X"), QStringLiteral("X")); 0083 list.insert(QStringLiteral("Pinyin Reading(Y)"), QStringLiteral("Y")); 0084 list.insert(QStringLiteral("Common SKIP Misclassifications(Z)"), QStringLiteral("Z")); 0085 list.insert(QStringLiteral("Misc Dictionary Codes (D)"), QStringLiteral("D")); 0086 return list; 0087 } 0088 0089 EntryList *DictFileKanjidic::doSearch(const DictQuery &query) 0090 { 0091 if (query.isEmpty() || !m_validKanjidic) { 0092 return new EntryList(); 0093 } 0094 0095 qDebug() << "Search from:" << getName(); 0096 QString searchQuery = query.getWord(); 0097 if (searchQuery.length() == 0) { 0098 searchQuery = query.getPronunciation(); 0099 if (searchQuery.length() == 0) { 0100 searchQuery = query.getMeaning().split(' '_L1).first().toLower(); 0101 if (searchQuery.length() == 0) { 0102 QList<QString> keys = query.listPropertyKeys(); 0103 if (keys.empty()) { 0104 return new EntryList(); 0105 } 0106 searchQuery = keys[0]; 0107 searchQuery = searchQuery + query.getProperty(searchQuery); 0108 } 0109 } 0110 } 0111 0112 auto results = new EntryList(); 0113 for (const QString &line : m_kanjidic) { 0114 if (line.contains(searchQuery)) { 0115 Entry *entry = makeEntry(line); 0116 if (entry->matchesQuery(query)) { 0117 results->append(entry); 0118 } else 0119 delete entry; 0120 } 0121 } 0122 0123 return results; 0124 } 0125 0126 QStringList DictFileKanjidic::dumpDictionary() 0127 { 0128 if (!m_validKanjidic) { 0129 return {}; 0130 } 0131 0132 return m_kanjidic; 0133 } 0134 0135 QStringList DictFileKanjidic::listDictDisplayOptions(QStringList list) const 0136 { 0137 list += displayOptions().keys(); 0138 return list; 0139 } 0140 0141 bool DictFileKanjidic::loadDictionary(const QString &file, const QString &name) 0142 { 0143 if (!m_kanjidic.isEmpty()) { 0144 return true; 0145 } 0146 0147 QFile dictionary(file); 0148 if (!dictionary.open(QIODevice::ReadOnly | QIODevice::Text)) { 0149 return false; 0150 } 0151 0152 qDebug() << "Loading kanjidic from:" << file; 0153 0154 QStringDecoder decoder("EUC-JP"); 0155 const QString decoded = decoder(dictionary.readAll()); 0156 0157 QTextStream fileStream(decoded.toUtf8()); 0158 0159 QString currentLine; 0160 while (!fileStream.atEnd()) { 0161 currentLine = fileStream.readLine(); 0162 if (currentLine[0] != '#'_L1) { 0163 m_kanjidic << currentLine; 0164 } 0165 } 0166 0167 dictionary.close(); 0168 0169 if (!validDictionaryFile(file)) { 0170 return false; 0171 } 0172 0173 m_dictionaryName = name; 0174 m_dictionaryFile = file; 0175 0176 return true; 0177 } 0178 0179 QMap<QString, QString> DictFileKanjidic::loadDisplayOptions() const 0180 { 0181 QMap<QString, QString> list = displayOptions(); 0182 list[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji"); 0183 list[QStringLiteral("Reading")] = QStringLiteral("Reading"); 0184 list[QStringLiteral("Meaning")] = QStringLiteral("Meaning"); 0185 list[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--"); 0186 0187 return list; 0188 } 0189 0190 QStringList *DictFileKanjidic::loadListType(KConfigSkeletonItem *item, QStringList *list, const QMap<QString, QString> &long2short) 0191 { 0192 QStringList listFromItem; 0193 0194 if (item != nullptr) { 0195 listFromItem = item->property().toStringList(); 0196 } 0197 0198 if (!listFromItem.isEmpty()) { 0199 delete list; 0200 0201 list = new QStringList(); 0202 for (const QString &it : listFromItem) { 0203 if (long2short.contains(it)) { 0204 list->append(long2short[it]); 0205 } 0206 } 0207 } 0208 0209 return list; 0210 } 0211 0212 void DictFileKanjidic::loadSettings() 0213 { 0214 this->displayFields = new QStringList(loadDisplayOptions().values()); 0215 } 0216 0217 void DictFileKanjidic::loadSettings(KConfigSkeleton *config) 0218 { 0219 KConfigSkeletonItem *item = config->findItem(getType() + "__displayFields"_L1); 0220 this->displayFields = loadListType(item, this->displayFields, loadDisplayOptions()); 0221 } 0222 0223 inline Entry *DictFileKanjidic::makeEntry(const QString &entry) 0224 { 0225 return new EntryKanjidic(getName(), entry); 0226 } 0227 0228 /** 0229 * Scan a potential file for the correct format, remembering to skip comment 0230 * characters. This is not a foolproof scan, but it should be checked before adding 0231 * a new dictionary. 0232 */ 0233 bool DictFileKanjidic::validDictionaryFile(const QString &filename) 0234 { 0235 QFile file(filename); 0236 if (!file.exists() || !file.open(QIODevice::ReadOnly)) { 0237 return false; 0238 } 0239 0240 QStringDecoder decoder("EUC-JP"); 0241 const QString decoded = decoder(file.readAll()); 0242 0243 QTextStream fileStream(decoded.toUtf8()); 0244 0245 QRegularExpression format(QStringLiteral("^\\S\\s+(\\S+\\s+)+(\\{(\\S+\\s?)+\\})+")); 0246 m_validKanjidic = true; 0247 while (!fileStream.atEnd()) { 0248 QString currentLine = fileStream.readLine(); 0249 0250 if (currentLine[0] == '#'_L1) { 0251 continue; 0252 } else if (currentLine.contains(format)) { 0253 continue; 0254 } 0255 0256 m_validKanjidic = false; 0257 break; 0258 } 0259 0260 file.close(); 0261 return m_validKanjidic; 0262 } 0263 0264 /** 0265 * Reject queries that specify anything we don't understand 0266 */ 0267 bool DictFileKanjidic::validQuery(const DictQuery &query) 0268 { 0269 // Multi kanji searches don't apply to this file 0270 if (query.getWord().length() > 1) { 0271 return false; 0272 } 0273 0274 // Now check if we have any properties specified that we don't understand 0275 QStringList propertiesWeHandle = m_searchableAttributes.values() + m_searchableAttributes.keys(); 0276 propertiesWeHandle += QStringLiteral("common"); // We map this to be (has a G value) 0277 0278 const QStringList properties = query.listPropertyKeys(); 0279 for (QStringList::const_iterator it = properties.constBegin(); it != properties.constEnd(); ++it) { 0280 if (!propertiesWeHandle.contains(*it)) { 0281 return false; 0282 } 0283 } 0284 0285 return true; 0286 }