File indexing completed on 2024-09-15 03:28:21

0001 /*
0002     This file is part of Kiten, a KDE Japanese Reference Tool
0003     SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "radicalfile.h"
0009 #include "DictKanjidic/dictfilekanjidic.h"
0010 #include "kitenmacros.h"
0011 
0012 #include <QFile>
0013 #include <QRegularExpression>
0014 #include <QString>
0015 #include <QStringDecoder>
0016 #include <QTextStream>
0017 
0018 using namespace Qt::StringLiterals;
0019 
0020 RadicalFile::RadicalFile(QString &radkfile, const QString &kanjidic)
0021 {
0022     loadRadicalFile(radkfile);
0023     if (!kanjidic.isEmpty()) {
0024         loadKanjidic(kanjidic);
0025     }
0026 }
0027 
0028 QSet<Kanji> RadicalFile::kanjiContainingRadicals(QSet<QString> &radicallist) const
0029 {
0030     QSet<QString> kanjiStringSet;
0031     QSet<Kanji> result;
0032     if (m_radicals.count() < 1 || radicallist.count() < 1) {
0033         return result;
0034     }
0035 
0036     // Start out with our first set
0037     kanjiStringSet = m_radicals[*radicallist.begin()].getKanji();
0038     // Make a set intersection of these m_kanji
0039     for (const QString &rad : radicallist) {
0040         kanjiStringSet &= m_radicals[rad].getKanji();
0041     }
0042 
0043     // Convert our set of QString to a set of Kanji
0044     for (const QString &kanji : kanjiStringSet) {
0045         result += m_kanji[kanji];
0046     }
0047 
0048     return result;
0049 }
0050 
0051 bool RadicalFile::loadRadicalFile(QString &radkfile)
0052 {
0053     QFile f(radkfile);
0054     if (!f.open(QIODevice::ReadOnly)) {
0055         return false;
0056     }
0057 
0058     // Read our radical file through a eucJP codec (helpfully builtin to Qt)
0059     QStringDecoder decoder("EUC-JP");
0060     const QString decoded = decoder(f.readAll());
0061     QTextStream t(decoded.toUtf8());
0062 
0063     Radical *newestRadical = nullptr;
0064     QHash<QString, QSet<QString>> krad;
0065 
0066     while (!t.atEnd()) {
0067         QString line = t.readLine();
0068         if (line.length() == 0 || line.at(0) == '#'_L1) {
0069             // Skip comment characters
0070             continue;
0071         } else if (line.at(0) == '$'_L1) {
0072             // Start of a new radical
0073             if (newestRadical != nullptr) {
0074                 m_radicals.insert(newestRadical->toString(), *newestRadical);
0075             }
0076             delete newestRadical;
0077             QStringList lineElements = line.split(QRegularExpression(QStringLiteral("\\s+")));
0078             newestRadical = new Radical(lineElements.at(1), lineElements.at(2).toUInt(), m_radicals.size());
0079         } else if (newestRadical != nullptr) {
0080             // List of m_kanji, potentially
0081             const QList<QString> m_kanjiList = line.trimmed().split(QLatin1String(""), Qt::SkipEmptyParts);
0082             const QSet<QString> kanjiSet = QSet<QString>(m_kanjiList.begin(), m_kanjiList.end());
0083             newestRadical->addKanji(kanjiSet);
0084             for (const QString &kanji : m_kanjiList) {
0085                 krad[kanji] += newestRadical->toString();
0086             }
0087         }
0088     }
0089     if (newestRadical != nullptr) {
0090         m_radicals[newestRadical->toString()] = *newestRadical;
0091         delete newestRadical;
0092     }
0093 
0094     // Move contents of our krad QHash into our hash of m_kanji
0095     QHash<QString, QSet<QString>>::iterator it;
0096     for (it = krad.begin(); it != krad.end(); ++it) {
0097         m_kanji.insert(it.key(), Kanji(it.key(), it.value()));
0098     }
0099     f.close();
0100     return true;
0101 }
0102 
0103 // Mostly copied from KanjiBrowser::loadKanji()
0104 bool RadicalFile::loadKanjidic(const QString &kanjidic)
0105 {
0106     DictFileKanjidic dictFileKanjidic;
0107     dictFileKanjidic.loadSettings();
0108     dictFileKanjidic.loadDictionary(kanjidic, KANJIDIC);
0109 
0110     QRegularExpression strokeMatch(QStringLiteral("^S\\d+"));
0111     for (const QString &line : dictFileKanjidic.dumpDictionary()) {
0112         const QString kanji = line[0];
0113 
0114         QStringList strokesSection = line.split(" "_L1, Qt::SkipEmptyParts).filter(strokeMatch);
0115 
0116         unsigned int strokes = strokesSection.first().remove(0, 1).toInt();
0117 
0118         if (m_kanji.contains(kanji)) {
0119             m_kanji[kanji].setStrokes(strokes);
0120         }
0121     }
0122 
0123     return true;
0124 }
0125 
0126 QMultiMap<int, Radical> *RadicalFile::mapRadicalsByStrokes(int max_strokes) const
0127 {
0128     auto result = new QMultiMap<int, Radical>();
0129     for (const Radical &rad : m_radicals) {
0130         int strokes = rad.strokes();
0131         if ((max_strokes > 0) && (strokes > max_strokes)) {
0132             strokes = max_strokes;
0133         }
0134         result->insert(strokes, rad);
0135     }
0136     return result;
0137 }
0138 
0139 QSet<QString> RadicalFile::radicalsInKanji(QSet<Kanji> &kanjilist) const
0140 {
0141     QSet<QString> possibleRadicals;
0142     for (const QString &kanji : kanjilist) {
0143         possibleRadicals |= m_kanji[kanji].getRadicals();
0144     }
0145 
0146     return possibleRadicals;
0147 }