File indexing completed on 2024-04-21 03:41:59

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com>
0004     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0005     SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
0006     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
0007 
0008     SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010 
0011 #include "dictfileedict.h"
0012 
0013 #include <KConfigSkeleton>
0014 
0015 #include <QFile>
0016 #include <QRegularExpression>
0017 #include <QString>
0018 #include <QStringDecoder>
0019 #include <QTextStream>
0020 #include <QVector>
0021 
0022 #include "deinflection.h"
0023 #include "dictfilefieldselector.h"
0024 #include "dictquery.h"
0025 #include "entryedict.h"
0026 #include "entrylist.h"
0027 #include "kitenmacros.h"
0028 
0029 using namespace Qt::StringLiterals;
0030 
0031 QString *DictFileEdict::deinflectionLabel = nullptr;
0032 QStringList *DictFileEdict::displayFields = nullptr;
0033 QString *DictFileEdict::wordType = nullptr;
0034 
0035 /**
0036  * Per instructions in the super-class, this constructor basically sets the
0037  * dictionaryType member variable to identify this as an edict-type database handler.
0038  */
0039 DictFileEdict::DictFileEdict()
0040     : DictFile(EDICT)
0041     , m_deinflection(nullptr)
0042     , m_hasDeinflection(false)
0043 {
0044     m_dictionaryType = EDICT;
0045     m_searchableAttributes.insert(QStringLiteral("common"), QStringLiteral("common"));
0046 }
0047 
0048 /**
0049  * The destructor... ditch our memory maps and close our files here
0050  * (if they were open).
0051  */
0052 DictFileEdict::~DictFileEdict()
0053 {
0054     delete m_deinflection;
0055     m_deinflection = nullptr;
0056 }
0057 
0058 QMap<QString, QString> DictFileEdict::displayOptions() const
0059 {
0060     QMap<QString, QString> list;
0061     list[QStringLiteral("Part of speech(type)")] = QStringLiteral("type");
0062     return list;
0063 }
0064 
0065 /**
0066  * Do a search, respond with a list of entries.
0067  * The general strategy will be to take the first word of the query, and do a
0068  * binary search on the dictionary for that item. Take all results and filter
0069  * them using the rest of the query with the validate method.
0070  */
0071 EntryList *DictFileEdict::doSearch(const DictQuery &query)
0072 {
0073     if (query.isEmpty() || !m_edictFile.valid()) // No query or dict, no results.
0074     {
0075         return new EntryList();
0076     }
0077 
0078     qDebug() << "Search from : " << getName();
0079 
0080     QString firstChoice = query.getWord();
0081     if (firstChoice.length() == 0) {
0082         firstChoice = query.getPronunciation();
0083         if (firstChoice.length() == 0) {
0084             firstChoice = query.getMeaning().split(' '_L1).first().toLower();
0085             if (firstChoice.length() == 0) {
0086                 // The nastiest situation... we have to assemble a search string
0087                 // from the first property
0088                 QList<QString> keys = query.listPropertyKeys();
0089                 if (keys.empty()) // Shouldn't happen... but maybe in the future
0090                 {
0091                     return new EntryList();
0092                 }
0093                 firstChoice = keys[0];
0094                 firstChoice = firstChoice + query.getProperty(firstChoice);
0095                 // TODO: doSearch: some accommodation for searching for ranges and such of properties
0096             }
0097         }
0098     } else {
0099         // Only search for one kanji or the
0100         // binary lookup mechanism breaks
0101         firstChoice = firstChoice.at(0);
0102     }
0103 
0104     QVector<QString> preliminaryResults = m_edictFile.findMatches(firstChoice);
0105 
0106     if (preliminaryResults.empty()) // If there were no matches... return an empty list
0107     {
0108         return new EntryList();
0109     }
0110 
0111     auto results = new EntryList();
0112     for (const QString &it : preliminaryResults) {
0113         //     qDebug() << "result: " << it << endl;
0114         Entry *result = makeEntry(it);
0115         auto resultEdict = static_cast<EntryEdict *>(result);
0116         if (result->matchesQuery(query) && resultEdict->matchesWordType(query)) {
0117             results->append(result);
0118         } else {
0119             delete result;
0120         }
0121     }
0122 
0123     // At this point we should have some preliminary results
0124     // and if there were no matches, it probably means the user
0125     // input was a verb or adjective, so we have to deinflect it.
0126     bool isAnyQuery = query.getMatchWordType() == DictQuery::Any;
0127     bool isVerbQuery = query.getMatchWordType() == DictQuery::Verb;
0128     bool isAdjectiveQuery = query.getMatchWordType() == DictQuery::Adjective;
0129     if (results->count() == 0 && (isAnyQuery || isVerbQuery || isAdjectiveQuery)) {
0130         delete results;
0131         results = m_deinflection->search(query, preliminaryResults);
0132         QString *label = m_deinflection->getDeinflectionLabel();
0133         if (!label->isEmpty() && !m_hasDeinflection) {
0134             deinflectionLabel = label;
0135             m_hasDeinflection = true;
0136             wordType = m_deinflection->getWordType();
0137         }
0138     } else {
0139         deinflectionLabel = nullptr;
0140         wordType = nullptr;
0141         m_hasDeinflection = false;
0142     }
0143 
0144     if (results) {
0145         auto common = new EntryList();
0146         auto uncommon = new EntryList();
0147         EntryList::EntryIterator i(*results);
0148         while (i.hasNext()) {
0149             auto entry = static_cast<EntryEdict *>(i.next());
0150             if (entry->isCommon()) {
0151                 common->append(entry);
0152             } else {
0153                 uncommon->append(entry);
0154             }
0155         }
0156 
0157         delete results;
0158         results = new EntryList();
0159         results->appendList(common);
0160         results->appendList(uncommon);
0161         delete common;
0162         delete uncommon;
0163 
0164         auto exact = new EntryList();
0165         auto beginning = new EntryList();
0166         auto ending = new EntryList();
0167         auto anywhere = new EntryList();
0168         EntryList::EntryIterator it(*results);
0169         while (it.hasNext()) {
0170             Entry *entry = it.next();
0171 
0172             if (entry->getWord() == query.getWord()) {
0173                 exact->append(entry);
0174             } else if (entry->getWord().startsWith(query.getWord())) {
0175                 beginning->append(entry);
0176             } else if (entry->getWord().endsWith(query.getWord())) {
0177                 ending->append(entry);
0178             } else {
0179                 anywhere->append(entry);
0180             }
0181         }
0182 
0183         delete results;
0184         results = new EntryList();
0185         results->appendList(exact);
0186         results->appendList(beginning);
0187         results->appendList(ending);
0188         results->appendList(anywhere);
0189         delete exact;
0190         delete beginning;
0191         delete ending;
0192         delete anywhere;
0193     }
0194 
0195     return results;
0196 }
0197 
0198 /**
0199  * Make a list of all the extra fields in our db.. Entry uses this to decide
0200  * what goes in the interpretations it gives.
0201  */
0202 QStringList DictFileEdict::listDictDisplayOptions(QStringList x) const
0203 {
0204     x += displayOptions().keys();
0205     return x;
0206 }
0207 
0208 /**
0209  * Load up the dictionary
0210  */
0211 bool DictFileEdict::loadDictionary(const QString &fileName, const QString &dictName)
0212 {
0213     if (m_edictFile.valid()) {
0214         return false; // Already loaded
0215     }
0216 
0217     if (m_edictFile.loadFile(fileName)) {
0218         m_dictionaryName = dictName;
0219         m_dictionaryFile = fileName;
0220 
0221         m_deinflection = new Deinflection(m_dictionaryName);
0222         m_deinflection->load();
0223 
0224         return true;
0225     }
0226 
0227     return false;
0228 }
0229 
0230 QMap<QString, QString> DictFileEdict::loadDisplayOptions() const
0231 {
0232     QMap<QString, QString> list = displayOptions();
0233     list[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji");
0234     list[QStringLiteral("Reading")] = QStringLiteral("Reading");
0235     list[QStringLiteral("Meaning")] = QStringLiteral("Meaning");
0236     list[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--");
0237 
0238     return list;
0239 }
0240 
0241 QStringList *DictFileEdict::loadListType(KConfigSkeletonItem *item, QStringList *list, const QMap<QString, QString> &long2short)
0242 {
0243     QStringList listFromItem;
0244 
0245     if (item != nullptr) {
0246         listFromItem = item->property().toStringList();
0247     }
0248 
0249     if (!listFromItem.isEmpty()) {
0250         delete list;
0251 
0252         list = new QStringList();
0253         for (const QString &it : listFromItem) {
0254             if (long2short.contains(it)) {
0255                 list->append(long2short[it]);
0256             }
0257         }
0258     }
0259 
0260     return list;
0261 }
0262 
0263 void DictFileEdict::loadSettings()
0264 {
0265     this->displayFields = new QStringList(loadDisplayOptions().values());
0266 }
0267 
0268 void DictFileEdict::loadSettings(KConfigSkeleton *config)
0269 {
0270     QMap<QString, QString> long2short = displayOptions();
0271     long2short[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji");
0272     long2short[QStringLiteral("Reading")] = QStringLiteral("Reading");
0273     long2short[QStringLiteral("Meaning")] = QStringLiteral("Meaning");
0274     long2short[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--");
0275 
0276     KConfigSkeletonItem *item = config->findItem(getType() + "__displayFields"_L1);
0277     this->displayFields = loadListType(item, this->displayFields, long2short);
0278 }
0279 
0280 inline Entry *DictFileEdict::makeEntry(const QString &entry)
0281 {
0282     return new EntryEdict(getName(), entry);
0283 }
0284 
0285 DictionaryPreferenceDialog *DictFileEdict::preferencesWidget(KConfigSkeleton *config, QWidget *parent)
0286 {
0287     auto dialog = new DictFileFieldSelector(config, getType(), parent);
0288     dialog->addAvailable(listDictDisplayOptions(QStringList()));
0289     return dialog;
0290 }
0291 
0292 /**
0293  * Scan a potential file for the correct format, remembering to skip comment
0294  * characters. This is not a foolproof scan, but it should be checked before adding
0295  * a new dictionary.
0296  * Valid EDICT format is considered:
0297  * \<kanji or kana\>+ [\<kana\>] /latin characters & symbols/separated with slashes/
0298  * Comment lines start with... something... not remembering now.
0299  */
0300 bool DictFileEdict::validDictionaryFile(const QString &filename)
0301 {
0302     QFile file(filename);
0303     bool returnFlag = true;
0304 
0305     if (!file.exists() || !file.open(QIODevice::ReadOnly)) {
0306         return false;
0307     }
0308 
0309     // Now we can actually check the file
0310     QStringDecoder decoder("EUC-JP");
0311     const QString decoded = decoder(file.readAll());
0312 
0313     QTextStream fileStream(decoded.toUtf8());
0314 
0315     QString commentMarker(QStringLiteral("????")); // Note: Don't touch this! vim seems to have
0316                                                        // An odd text codec error here too :(
0317     QRegularExpression formattedLine(QStringLiteral("^\\S+\\s+(\\[\\S+\\]\\s+)?/.*/$"));
0318     while (!fileStream.atEnd()) {
0319         QString line = fileStream.readLine();
0320 
0321         if (line.left(4) == commentMarker) {
0322             continue;
0323         }
0324         if (line.contains(formattedLine)) // If it matches our regex
0325         {
0326             continue;
0327         }
0328 
0329         returnFlag = false;
0330         break;
0331     }
0332 
0333     file.close();
0334     return returnFlag;
0335 }
0336 
0337 /**
0338  * Reject queries that specify anything we don't understand
0339  */
0340 // TODO: Actually write this method (validQuery)
0341 bool DictFileEdict::validQuery(const DictQuery &query)
0342 {
0343     Q_UNUSED(query)
0344     return true;
0345 }