File indexing completed on 2024-04-21 07:27:59

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com>
0004     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0005     SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
0006     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
0007 
0008     SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010 
0011 #include "dictfilekanjidic.h"
0012 
0013 #include "dictquery.h"
0014 #include "entrykanjidic.h"
0015 #include "entrylist.h"
0016 #include "kitenmacros.h"
0017 
0018 #include <KConfigSkeleton>
0019 
0020 #include <QDebug>
0021 #include <QFile>
0022 #include <QRegularExpression>
0023 #include <QStringDecoder>
0024 
0025 using namespace Qt::StringLiterals;
0026 
0027 QStringList *DictFileKanjidic::displayFields = nullptr;
0028 
0029 DictFileKanjidic::DictFileKanjidic()
0030     : DictFile(KANJIDIC)
0031 {
0032     m_dictionaryType = KANJIDIC;
0033     m_searchableAttributes.clear();
0034     m_searchableAttributes.insert(QStringLiteral("bushu"), QStringLiteral("B"));
0035     m_searchableAttributes.insert(QStringLiteral("classical"), QStringLiteral("C"));
0036     m_searchableAttributes.insert(QStringLiteral("henshall"), QStringLiteral("E"));
0037     m_searchableAttributes.insert(QStringLiteral("frequency"), QStringLiteral("F"));
0038     m_searchableAttributes.insert(QStringLiteral("grade"), QStringLiteral("G"));
0039     m_searchableAttributes.insert(QStringLiteral("halpern"), QStringLiteral("H"));
0040     m_searchableAttributes.insert(QStringLiteral("spahn"), QStringLiteral("I"));
0041     m_searchableAttributes.insert(QStringLiteral("hadamitzky"), QStringLiteral("I"));
0042     m_searchableAttributes.insert(QStringLiteral("gakken"), QStringLiteral("K"));
0043     m_searchableAttributes.insert(QStringLiteral("heisig"), QStringLiteral("L"));
0044     m_searchableAttributes.insert(QStringLiteral("morohashi"), QStringLiteral("M"));
0045     m_searchableAttributes.insert(QStringLiteral("nelson"), QStringLiteral("N"));
0046     m_searchableAttributes.insert(QStringLiteral("oneill"), QStringLiteral("O"));
0047     m_searchableAttributes.insert(QStringLiteral("skip"), QStringLiteral("P"));
0048     m_searchableAttributes.insert(QStringLiteral("4cc"), QStringLiteral("Q"));
0049     m_searchableAttributes.insert(QStringLiteral("stroke"), QStringLiteral("S"));
0050     m_searchableAttributes.insert(QStringLiteral("strokes"), QStringLiteral("S"));
0051     m_searchableAttributes.insert(QStringLiteral("unicode"), QStringLiteral("U"));
0052     m_searchableAttributes.insert(QStringLiteral("haig"), QStringLiteral("V"));
0053     m_searchableAttributes.insert(QStringLiteral("korean"), QStringLiteral("W"));
0054     m_searchableAttributes.insert(QStringLiteral("pinyin"), QStringLiteral("Y"));
0055     m_searchableAttributes.insert(QStringLiteral("other"), QStringLiteral("D"));
0056 }
0057 
0058 QMap<QString, QString> DictFileKanjidic::displayOptions() const
0059 {
0060     // Enumerate the fields in our dict.... there are a rather lot of them here
0061     // It will be useful for a few things to have the full list generated on it's own
0062     QMap<QString, QString> list;
0063     // TODO: Figure out how to internationalize these easily
0064     list.insert(QStringLiteral("Bushu Number(B)"), QStringLiteral("B"));
0065     list.insert(QStringLiteral("Classical Radical Number(C)"), QStringLiteral("C"));
0066     list.insert(QStringLiteral("Henshall's Index Number(E)"), QStringLiteral("E"));
0067     list.insert(QStringLiteral("Frequency Ranking(F)"), QStringLiteral("F"));
0068     list.insert(QStringLiteral("Grade Level(G)"), QStringLiteral("G"));
0069     list.insert(QStringLiteral("Halpern's New J-E Char Dictionary(H)"), QStringLiteral("H"));
0070     list.insert(QStringLiteral("Spahn & Hadamitzky Reference(I)"), QStringLiteral("I"));
0071     list.insert(QStringLiteral("Gakken Kanji Dictionary Index(K)"), QStringLiteral("K"));
0072     list.insert(QStringLiteral("Heisig's Index(L)"), QStringLiteral("L"));
0073     list.insert(QStringLiteral("Morohashi's Daikanwajiten(M)"), QStringLiteral("M"));
0074     list.insert(QStringLiteral("Nelsons Modern Reader's J-E Index(N)"), QStringLiteral("N"));
0075     list.insert(QStringLiteral("O'Neill's 'Japanese Names' Index(O)"), QStringLiteral("O"));
0076     list.insert(QStringLiteral("SKIP Code(P)"), QStringLiteral("P"));
0077     list.insert(QStringLiteral("Four Corner codes(Q)"), QStringLiteral("Q"));
0078     list.insert(QStringLiteral("Stroke Count(S)"), QStringLiteral("S"));
0079     list.insert(QStringLiteral("Unicode Value(U)"), QStringLiteral("U"));
0080     list.insert(QStringLiteral("Haig's New Nelson J-E Dict(V)"), QStringLiteral("V"));
0081     list.insert(QStringLiteral("Korean Reading(W)"), QStringLiteral("W"));
0082     list.insert(QStringLiteral("kanjidic field: X"), QStringLiteral("X"));
0083     list.insert(QStringLiteral("Pinyin Reading(Y)"), QStringLiteral("Y"));
0084     list.insert(QStringLiteral("Common SKIP Misclassifications(Z)"), QStringLiteral("Z"));
0085     list.insert(QStringLiteral("Misc Dictionary Codes (D)"), QStringLiteral("D"));
0086     return list;
0087 }
0088 
0089 EntryList *DictFileKanjidic::doSearch(const DictQuery &query)
0090 {
0091     if (query.isEmpty() || !m_validKanjidic) {
0092         return new EntryList();
0093     }
0094 
0095     qDebug() << "Search from:" << getName();
0096     QString searchQuery = query.getWord();
0097     if (searchQuery.length() == 0) {
0098         searchQuery = query.getPronunciation();
0099         if (searchQuery.length() == 0) {
0100             searchQuery = query.getMeaning().split(' '_L1).first().toLower();
0101             if (searchQuery.length() == 0) {
0102                 QList<QString> keys = query.listPropertyKeys();
0103                 if (keys.empty()) {
0104                     return new EntryList();
0105                 }
0106                 searchQuery = keys[0];
0107                 searchQuery = searchQuery + query.getProperty(searchQuery);
0108             }
0109         }
0110     }
0111 
0112     auto results = new EntryList();
0113     for (const QString &line : m_kanjidic) {
0114         if (line.contains(searchQuery)) {
0115             Entry *entry = makeEntry(line);
0116             if (entry->matchesQuery(query)) {
0117                 results->append(entry);
0118             } else
0119                 delete entry;
0120         }
0121     }
0122 
0123     return results;
0124 }
0125 
0126 QStringList DictFileKanjidic::dumpDictionary()
0127 {
0128     if (!m_validKanjidic) {
0129         return {};
0130     }
0131 
0132     return m_kanjidic;
0133 }
0134 
0135 QStringList DictFileKanjidic::listDictDisplayOptions(QStringList list) const
0136 {
0137     list += displayOptions().keys();
0138     return list;
0139 }
0140 
0141 bool DictFileKanjidic::loadDictionary(const QString &file, const QString &name)
0142 {
0143     if (!m_kanjidic.isEmpty()) {
0144         return true;
0145     }
0146 
0147     QFile dictionary(file);
0148     if (!dictionary.open(QIODevice::ReadOnly | QIODevice::Text)) {
0149         return false;
0150     }
0151 
0152     qDebug() << "Loading kanjidic from:" << file;
0153 
0154     QStringDecoder decoder("EUC-JP");
0155     const QString decoded = decoder(dictionary.readAll());
0156 
0157     QTextStream fileStream(decoded.toUtf8());
0158 
0159     QString currentLine;
0160     while (!fileStream.atEnd()) {
0161         currentLine = fileStream.readLine();
0162         if (currentLine[0] != '#'_L1) {
0163             m_kanjidic << currentLine;
0164         }
0165     }
0166 
0167     dictionary.close();
0168 
0169     if (!validDictionaryFile(file)) {
0170         return false;
0171     }
0172 
0173     m_dictionaryName = name;
0174     m_dictionaryFile = file;
0175 
0176     return true;
0177 }
0178 
0179 QMap<QString, QString> DictFileKanjidic::loadDisplayOptions() const
0180 {
0181     QMap<QString, QString> list = displayOptions();
0182     list[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji");
0183     list[QStringLiteral("Reading")] = QStringLiteral("Reading");
0184     list[QStringLiteral("Meaning")] = QStringLiteral("Meaning");
0185     list[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--");
0186 
0187     return list;
0188 }
0189 
0190 QStringList *DictFileKanjidic::loadListType(KConfigSkeletonItem *item, QStringList *list, const QMap<QString, QString> &long2short)
0191 {
0192     QStringList listFromItem;
0193 
0194     if (item != nullptr) {
0195         listFromItem = item->property().toStringList();
0196     }
0197 
0198     if (!listFromItem.isEmpty()) {
0199         delete list;
0200 
0201         list = new QStringList();
0202         for (const QString &it : listFromItem) {
0203             if (long2short.contains(it)) {
0204                 list->append(long2short[it]);
0205             }
0206         }
0207     }
0208 
0209     return list;
0210 }
0211 
0212 void DictFileKanjidic::loadSettings()
0213 {
0214     this->displayFields = new QStringList(loadDisplayOptions().values());
0215 }
0216 
0217 void DictFileKanjidic::loadSettings(KConfigSkeleton *config)
0218 {
0219     KConfigSkeletonItem *item = config->findItem(getType() + "__displayFields"_L1);
0220     this->displayFields = loadListType(item, this->displayFields, loadDisplayOptions());
0221 }
0222 
0223 inline Entry *DictFileKanjidic::makeEntry(const QString &entry)
0224 {
0225     return new EntryKanjidic(getName(), entry);
0226 }
0227 
0228 /**
0229  * Scan a potential file for the correct format, remembering to skip comment
0230  * characters. This is not a foolproof scan, but it should be checked before adding
0231  * a new dictionary.
0232  */
0233 bool DictFileKanjidic::validDictionaryFile(const QString &filename)
0234 {
0235     QFile file(filename);
0236     if (!file.exists() || !file.open(QIODevice::ReadOnly)) {
0237         return false;
0238     }
0239 
0240     QStringDecoder decoder("EUC-JP");
0241     const QString decoded = decoder(file.readAll());
0242 
0243     QTextStream fileStream(decoded.toUtf8());
0244 
0245     QRegularExpression format(QStringLiteral("^\\S\\s+(\\S+\\s+)+(\\{(\\S+\\s?)+\\})+"));
0246     m_validKanjidic = true;
0247     while (!fileStream.atEnd()) {
0248         QString currentLine = fileStream.readLine();
0249 
0250         if (currentLine[0] == '#'_L1) {
0251             continue;
0252         } else if (currentLine.contains(format)) {
0253             continue;
0254         }
0255 
0256         m_validKanjidic = false;
0257         break;
0258     }
0259 
0260     file.close();
0261     return m_validKanjidic;
0262 }
0263 
0264 /**
0265  * Reject queries that specify anything we don't understand
0266  */
0267 bool DictFileKanjidic::validQuery(const DictQuery &query)
0268 {
0269     // Multi kanji searches don't apply to this file
0270     if (query.getWord().length() > 1) {
0271         return false;
0272     }
0273 
0274     // Now check if we have any properties specified that we don't understand
0275     QStringList propertiesWeHandle = m_searchableAttributes.values() + m_searchableAttributes.keys();
0276     propertiesWeHandle += QStringLiteral("common"); // We map this to be (has a G value)
0277 
0278     const QStringList properties = query.listPropertyKeys();
0279     for (QStringList::const_iterator it = properties.constBegin(); it != properties.constEnd(); ++it) {
0280         if (!propertiesWeHandle.contains(*it)) {
0281             return false;
0282         }
0283     }
0284 
0285     return true;
0286 }