File indexing completed on 2024-04-21 03:41:59
0001 /* 0002 This file is part of Kiten, a KDE Japanese Reference Tool 0003 SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com> 0004 SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com> 0005 SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com> 0006 SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com> 0007 0008 SPDX-License-Identifier: LGPL-2.0-or-later 0009 */ 0010 0011 #include "dictfileedict.h" 0012 0013 #include <KConfigSkeleton> 0014 0015 #include <QFile> 0016 #include <QRegularExpression> 0017 #include <QString> 0018 #include <QStringDecoder> 0019 #include <QTextStream> 0020 #include <QVector> 0021 0022 #include "deinflection.h" 0023 #include "dictfilefieldselector.h" 0024 #include "dictquery.h" 0025 #include "entryedict.h" 0026 #include "entrylist.h" 0027 #include "kitenmacros.h" 0028 0029 using namespace Qt::StringLiterals; 0030 0031 QString *DictFileEdict::deinflectionLabel = nullptr; 0032 QStringList *DictFileEdict::displayFields = nullptr; 0033 QString *DictFileEdict::wordType = nullptr; 0034 0035 /** 0036 * Per instructions in the super-class, this constructor basically sets the 0037 * dictionaryType member variable to identify this as an edict-type database handler. 0038 */ 0039 DictFileEdict::DictFileEdict() 0040 : DictFile(EDICT) 0041 , m_deinflection(nullptr) 0042 , m_hasDeinflection(false) 0043 { 0044 m_dictionaryType = EDICT; 0045 m_searchableAttributes.insert(QStringLiteral("common"), QStringLiteral("common")); 0046 } 0047 0048 /** 0049 * The destructor... ditch our memory maps and close our files here 0050 * (if they were open). 0051 */ 0052 DictFileEdict::~DictFileEdict() 0053 { 0054 delete m_deinflection; 0055 m_deinflection = nullptr; 0056 } 0057 0058 QMap<QString, QString> DictFileEdict::displayOptions() const 0059 { 0060 QMap<QString, QString> list; 0061 list[QStringLiteral("Part of speech(type)")] = QStringLiteral("type"); 0062 return list; 0063 } 0064 0065 /** 0066 * Do a search, respond with a list of entries. 0067 * The general strategy will be to take the first word of the query, and do a 0068 * binary search on the dictionary for that item. Take all results and filter 0069 * them using the rest of the query with the validate method. 0070 */ 0071 EntryList *DictFileEdict::doSearch(const DictQuery &query) 0072 { 0073 if (query.isEmpty() || !m_edictFile.valid()) // No query or dict, no results. 0074 { 0075 return new EntryList(); 0076 } 0077 0078 qDebug() << "Search from : " << getName(); 0079 0080 QString firstChoice = query.getWord(); 0081 if (firstChoice.length() == 0) { 0082 firstChoice = query.getPronunciation(); 0083 if (firstChoice.length() == 0) { 0084 firstChoice = query.getMeaning().split(' '_L1).first().toLower(); 0085 if (firstChoice.length() == 0) { 0086 // The nastiest situation... we have to assemble a search string 0087 // from the first property 0088 QList<QString> keys = query.listPropertyKeys(); 0089 if (keys.empty()) // Shouldn't happen... but maybe in the future 0090 { 0091 return new EntryList(); 0092 } 0093 firstChoice = keys[0]; 0094 firstChoice = firstChoice + query.getProperty(firstChoice); 0095 // TODO: doSearch: some accommodation for searching for ranges and such of properties 0096 } 0097 } 0098 } else { 0099 // Only search for one kanji or the 0100 // binary lookup mechanism breaks 0101 firstChoice = firstChoice.at(0); 0102 } 0103 0104 QVector<QString> preliminaryResults = m_edictFile.findMatches(firstChoice); 0105 0106 if (preliminaryResults.empty()) // If there were no matches... return an empty list 0107 { 0108 return new EntryList(); 0109 } 0110 0111 auto results = new EntryList(); 0112 for (const QString &it : preliminaryResults) { 0113 // qDebug() << "result: " << it << endl; 0114 Entry *result = makeEntry(it); 0115 auto resultEdict = static_cast<EntryEdict *>(result); 0116 if (result->matchesQuery(query) && resultEdict->matchesWordType(query)) { 0117 results->append(result); 0118 } else { 0119 delete result; 0120 } 0121 } 0122 0123 // At this point we should have some preliminary results 0124 // and if there were no matches, it probably means the user 0125 // input was a verb or adjective, so we have to deinflect it. 0126 bool isAnyQuery = query.getMatchWordType() == DictQuery::Any; 0127 bool isVerbQuery = query.getMatchWordType() == DictQuery::Verb; 0128 bool isAdjectiveQuery = query.getMatchWordType() == DictQuery::Adjective; 0129 if (results->count() == 0 && (isAnyQuery || isVerbQuery || isAdjectiveQuery)) { 0130 delete results; 0131 results = m_deinflection->search(query, preliminaryResults); 0132 QString *label = m_deinflection->getDeinflectionLabel(); 0133 if (!label->isEmpty() && !m_hasDeinflection) { 0134 deinflectionLabel = label; 0135 m_hasDeinflection = true; 0136 wordType = m_deinflection->getWordType(); 0137 } 0138 } else { 0139 deinflectionLabel = nullptr; 0140 wordType = nullptr; 0141 m_hasDeinflection = false; 0142 } 0143 0144 if (results) { 0145 auto common = new EntryList(); 0146 auto uncommon = new EntryList(); 0147 EntryList::EntryIterator i(*results); 0148 while (i.hasNext()) { 0149 auto entry = static_cast<EntryEdict *>(i.next()); 0150 if (entry->isCommon()) { 0151 common->append(entry); 0152 } else { 0153 uncommon->append(entry); 0154 } 0155 } 0156 0157 delete results; 0158 results = new EntryList(); 0159 results->appendList(common); 0160 results->appendList(uncommon); 0161 delete common; 0162 delete uncommon; 0163 0164 auto exact = new EntryList(); 0165 auto beginning = new EntryList(); 0166 auto ending = new EntryList(); 0167 auto anywhere = new EntryList(); 0168 EntryList::EntryIterator it(*results); 0169 while (it.hasNext()) { 0170 Entry *entry = it.next(); 0171 0172 if (entry->getWord() == query.getWord()) { 0173 exact->append(entry); 0174 } else if (entry->getWord().startsWith(query.getWord())) { 0175 beginning->append(entry); 0176 } else if (entry->getWord().endsWith(query.getWord())) { 0177 ending->append(entry); 0178 } else { 0179 anywhere->append(entry); 0180 } 0181 } 0182 0183 delete results; 0184 results = new EntryList(); 0185 results->appendList(exact); 0186 results->appendList(beginning); 0187 results->appendList(ending); 0188 results->appendList(anywhere); 0189 delete exact; 0190 delete beginning; 0191 delete ending; 0192 delete anywhere; 0193 } 0194 0195 return results; 0196 } 0197 0198 /** 0199 * Make a list of all the extra fields in our db.. Entry uses this to decide 0200 * what goes in the interpretations it gives. 0201 */ 0202 QStringList DictFileEdict::listDictDisplayOptions(QStringList x) const 0203 { 0204 x += displayOptions().keys(); 0205 return x; 0206 } 0207 0208 /** 0209 * Load up the dictionary 0210 */ 0211 bool DictFileEdict::loadDictionary(const QString &fileName, const QString &dictName) 0212 { 0213 if (m_edictFile.valid()) { 0214 return false; // Already loaded 0215 } 0216 0217 if (m_edictFile.loadFile(fileName)) { 0218 m_dictionaryName = dictName; 0219 m_dictionaryFile = fileName; 0220 0221 m_deinflection = new Deinflection(m_dictionaryName); 0222 m_deinflection->load(); 0223 0224 return true; 0225 } 0226 0227 return false; 0228 } 0229 0230 QMap<QString, QString> DictFileEdict::loadDisplayOptions() const 0231 { 0232 QMap<QString, QString> list = displayOptions(); 0233 list[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji"); 0234 list[QStringLiteral("Reading")] = QStringLiteral("Reading"); 0235 list[QStringLiteral("Meaning")] = QStringLiteral("Meaning"); 0236 list[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--"); 0237 0238 return list; 0239 } 0240 0241 QStringList *DictFileEdict::loadListType(KConfigSkeletonItem *item, QStringList *list, const QMap<QString, QString> &long2short) 0242 { 0243 QStringList listFromItem; 0244 0245 if (item != nullptr) { 0246 listFromItem = item->property().toStringList(); 0247 } 0248 0249 if (!listFromItem.isEmpty()) { 0250 delete list; 0251 0252 list = new QStringList(); 0253 for (const QString &it : listFromItem) { 0254 if (long2short.contains(it)) { 0255 list->append(long2short[it]); 0256 } 0257 } 0258 } 0259 0260 return list; 0261 } 0262 0263 void DictFileEdict::loadSettings() 0264 { 0265 this->displayFields = new QStringList(loadDisplayOptions().values()); 0266 } 0267 0268 void DictFileEdict::loadSettings(KConfigSkeleton *config) 0269 { 0270 QMap<QString, QString> long2short = displayOptions(); 0271 long2short[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji"); 0272 long2short[QStringLiteral("Reading")] = QStringLiteral("Reading"); 0273 long2short[QStringLiteral("Meaning")] = QStringLiteral("Meaning"); 0274 long2short[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--"); 0275 0276 KConfigSkeletonItem *item = config->findItem(getType() + "__displayFields"_L1); 0277 this->displayFields = loadListType(item, this->displayFields, long2short); 0278 } 0279 0280 inline Entry *DictFileEdict::makeEntry(const QString &entry) 0281 { 0282 return new EntryEdict(getName(), entry); 0283 } 0284 0285 DictionaryPreferenceDialog *DictFileEdict::preferencesWidget(KConfigSkeleton *config, QWidget *parent) 0286 { 0287 auto dialog = new DictFileFieldSelector(config, getType(), parent); 0288 dialog->addAvailable(listDictDisplayOptions(QStringList())); 0289 return dialog; 0290 } 0291 0292 /** 0293 * Scan a potential file for the correct format, remembering to skip comment 0294 * characters. This is not a foolproof scan, but it should be checked before adding 0295 * a new dictionary. 0296 * Valid EDICT format is considered: 0297 * \<kanji or kana\>+ [\<kana\>] /latin characters & symbols/separated with slashes/ 0298 * Comment lines start with... something... not remembering now. 0299 */ 0300 bool DictFileEdict::validDictionaryFile(const QString &filename) 0301 { 0302 QFile file(filename); 0303 bool returnFlag = true; 0304 0305 if (!file.exists() || !file.open(QIODevice::ReadOnly)) { 0306 return false; 0307 } 0308 0309 // Now we can actually check the file 0310 QStringDecoder decoder("EUC-JP"); 0311 const QString decoded = decoder(file.readAll()); 0312 0313 QTextStream fileStream(decoded.toUtf8()); 0314 0315 QString commentMarker(QStringLiteral("????")); // Note: Don't touch this! vim seems to have 0316 // An odd text codec error here too :( 0317 QRegularExpression formattedLine(QStringLiteral("^\\S+\\s+(\\[\\S+\\]\\s+)?/.*/$")); 0318 while (!fileStream.atEnd()) { 0319 QString line = fileStream.readLine(); 0320 0321 if (line.left(4) == commentMarker) { 0322 continue; 0323 } 0324 if (line.contains(formattedLine)) // If it matches our regex 0325 { 0326 continue; 0327 } 0328 0329 returnFlag = false; 0330 break; 0331 } 0332 0333 file.close(); 0334 return returnFlag; 0335 } 0336 0337 /** 0338 * Reject queries that specify anything we don't understand 0339 */ 0340 // TODO: Actually write this method (validQuery) 0341 bool DictFileEdict::validQuery(const DictQuery &query) 0342 { 0343 Q_UNUSED(query) 0344 return true; 0345 }