File indexing completed on 2024-04-21 03:41:59

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com>
0004     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0005     SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
0006     SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
0007 
0008     SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010 
0011 #include "deinflection.h"
0012 
0013 #include "dictfileedict.h"
0014 #include "dictquery.h"
0015 #include "entryedict.h"
0016 #include "entrylist.h"
0017 
0018 #include <KLocalizedString>
0019 #include <KMessageBox>
0020 
0021 #include <QFile>
0022 #include <QHash>
0023 #include <QList>
0024 #include <QStandardPaths>
0025 #include <QString>
0026 #include <QStringDecoder>
0027 #include <QTextStream>
0028 
0029 using namespace Qt::StringLiterals;
0030 
0031 // This is a very primative form of information hiding
0032 // But C++ can get stupid with static QT objects...
0033 // So this turns out to be much, much easier
0034 // TODO: Fix this for thread safety/functionality (I'm presuming it's broken atm)
0035 
0036 // Declare our constants
0037 QList<Deinflection::Conjugation> *Deinflection::conjugationList = nullptr;
0038 
0039 Deinflection::Deinflection(const QString &name)
0040     : m_deinflectionLabel(QString())
0041     , m_wordType(QString())
0042     , m_dictionaryName(name)
0043 {
0044 }
0045 
0046 QString *Deinflection::getDeinflectionLabel()
0047 {
0048     return &m_deinflectionLabel;
0049 }
0050 
0051 QString *Deinflection::getWordType()
0052 {
0053     return &m_wordType;
0054 }
0055 
0056 EntryList *Deinflection::search(const DictQuery &query, const QVector<QString> &preliminaryResults)
0057 {
0058     if (conjugationList == nullptr) {
0059         return nullptr;
0060     }
0061 
0062     m_deinflectionLabel = QString();
0063     m_wordType = QString();
0064 
0065     auto entries = new EntryList();
0066 
0067     QStringList edictTypesList;
0068     edictTypesList.append(EdictFormatting::Adjectives);
0069     edictTypesList.append(EdictFormatting::Verbs);
0070 
0071     QString edictTypes = edictTypesList.join(QLatin1Char(','));
0072 
0073     for (const QString &item : preliminaryResults) {
0074         EntryEdict *entry = makeEntry(item);
0075         QStringListIterator it(entry->getTypesList());
0076         bool matched = false;
0077         while (it.hasNext() && !matched) {
0078             if (edictTypes.contains(it.next())) {
0079                 entries->append(entry);
0080                 matched = true;
0081             }
0082         }
0083         if (!matched)
0084             delete entry;
0085     }
0086 
0087     auto results = new EntryList();
0088     EntryList::EntryIterator it(*entries);
0089     while (it.hasNext()) {
0090         auto entry = static_cast<EntryEdict *>(it.next());
0091 
0092         QString text = query.getWord();
0093         if (text.isEmpty()) {
0094             text = query.getPronunciation();
0095 
0096             if (text.isEmpty()) {
0097                 entries->deleteAll();
0098                 delete entries;
0099                 delete results;
0100                 return nullptr;
0101             }
0102         }
0103 
0104         QString word = entry->getWord();
0105         for (const Deinflection::Conjugation &conj : *conjugationList) {
0106             if (text.endsWith(conj.ending) && word.endsWith(conj.replace) && text.startsWith(word.left(word.length() - conj.replace.length()))) {
0107                 QString replacement = text;
0108                 replacement.truncate(text.length() - conj.ending.length());
0109                 replacement += conj.replace;
0110 
0111                 if (word == replacement) {
0112                     if (m_deinflectionLabel.isEmpty()) {
0113                         m_deinflectionLabel = conj.label;
0114                     }
0115 
0116                     if (m_wordType.isEmpty()) {
0117                         if (entry->isVerb()) {
0118                             m_wordType = i18n("verb");
0119                         } else if (entry->isAdjective()) {
0120                             m_wordType = i18n("adjective");
0121                         }
0122                     }
0123 
0124                     results->append(entry);
0125                     break;
0126                 }
0127             }
0128         }
0129     }
0130     delete entries;
0131     return results;
0132 }
0133 
0134 bool Deinflection::load()
0135 {
0136     if (conjugationList != nullptr) {
0137         return true;
0138     }
0139 
0140     conjugationList = new QList<Conjugation>;
0141 
0142     QString vconj = QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("kiten/vconj"));
0143 
0144     // Find the file
0145     if (vconj.isEmpty()) {
0146         KMessageBox::error(nullptr, i18n("Verb deinflection information not found, so verb deinflection cannot be used."));
0147         return false;
0148     }
0149 
0150     QHash<unsigned long, QString> names;
0151     // Open the file
0152     QFile f(vconj);
0153     if (!f.open(QIODevice::ReadOnly)) {
0154         KMessageBox::error(nullptr, i18n("Verb deinflection information could not be loaded, so verb deinflection cannot be used."));
0155         return false;
0156     }
0157 
0158     QStringDecoder decoder("EUC-JP");
0159     const QString decoded = decoder(f.readAll());
0160     QTextStream t(decoded.toUtf8());
0161 
0162     // The file starts out with a number -> name list of the conjugation types
0163     // In the format "#[#]  NAME\n"
0164     // The next section beginning is flagged with a $ at the beginning of the line
0165     for (QString text = t.readLine(); !t.atEnd() && text.at(0) != '$'_L1; text = t.readLine()) {
0166         if (text.at(0) != '#'_L1) {
0167             unsigned long number = text.left(2).trimmed().toULong();
0168             QString name = text.right(text.length() - 2).trimmed();
0169             names[number] = name;
0170         }
0171     }
0172 
0173     // Now for the actual conjugation data
0174     // Format is "NUMBER_FROM_LIST_ABOVE  ENDING_TO_REPLACE\n"
0175     QString replacement = QString();
0176     for (QString text = t.readLine(); !t.atEnd(); text = t.readLine()) {
0177         if (!text.isEmpty() && text.at(0) == '$'_L1) {
0178             replacement = text.right(1).trimmed();
0179         } else if (!text.trimmed().isEmpty() && text.at(0) != '#'_L1) {
0180             unsigned long labelIndex = text.section(' '_L1, 0, 1).trimmed().toULong();
0181 
0182             Conjugation conj;
0183             conj.label = names.value(labelIndex);
0184             conj.ending = text.section(' '_L1, 2).trimmed();
0185             conj.replace = replacement;
0186 
0187             conjugationList->append(conj);
0188         }
0189     }
0190 
0191     f.close();
0192 
0193     return true;
0194 }
0195 
0196 inline EntryEdict *Deinflection::makeEntry(const QString &entry)
0197 {
0198     return new EntryEdict(m_dictionaryName, entry);
0199 }