File indexing completed on 2024-04-28 03:40:32
0001 /*************************************************************************** 0002 * Copyright (C) 2002 by Gunnar Schmi Dt <kmouth@schmi-dt.de * 0003 * (C) 2015 by Jeremy Whiting <jpwhiting@kde.org> * 0004 * * 0005 * This program is free software; you can redistribute it and/or modify * 0006 * it under the terms of the GNU General Public License as published by * 0007 * the Free Software Foundation; either version 2 of the License, or * 0008 * (at your option) any later version. * 0009 * * 0010 * This program is distributed in the hope that it will be useful, * 0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0013 * GNU General Public License for more details. * 0014 * * 0015 * You should have received a copy of the GNU General Public License * 0016 * along with this program; if not, write to the * 0017 * Free Software Foundation, Inc., * 0018 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * 0019 ***************************************************************************/ 0020 0021 #include "wordlist.h" 0022 0023 #include <QApplication> 0024 #include <QDir> 0025 #include <QList> 0026 #include <QProgressDialog> 0027 #include <QRegularExpression> 0028 #include <QStandardPaths> 0029 #include <QTextStream> 0030 0031 #include <KLocalizedString> 0032 0033 namespace WordList 0034 { 0035 void addWords(WordMap &map, const QString &line); 0036 0037 XMLReader::XMLReader() 0038 { 0039 } 0040 0041 XMLReader::~XMLReader() 0042 { 0043 } 0044 0045 bool XMLReader::read(QIODevice *device) 0046 { 0047 xml.setDevice(device); 0048 0049 list.clear(); 0050 0051 while (xml.readNextStartElement()) { 0052 const QString word = xml.name().toString(); 0053 addWords(list, word); 0054 } 0055 0056 return !xml.error(); 0057 } 0058 0059 QString XMLReader::errorString() const 0060 { 0061 return QLatin1String("%1\nLine %2, column %3").arg(xml.errorString()).arg(xml.lineNumber()).arg(xml.columnNumber()); 0062 } 0063 0064 WordMap XMLReader::getList() 0065 { 0066 return list; 0067 } 0068 0069 /***************************************************************************/ 0070 0071 QProgressDialog *progressDialog() 0072 { 0073 QProgressDialog *pdlg = new QProgressDialog(i18n("Creating Word List"), i18n("Parsing the KDE documentation..."), 0, 100); 0074 pdlg->setCancelButton(nullptr); 0075 pdlg->setAutoReset(false); 0076 pdlg->setAutoClose(false); 0077 return pdlg; 0078 } 0079 0080 bool saveWordList(const WordMap &map, const QString &filename) 0081 { 0082 QFile file(filename); 0083 if (!file.open(QIODevice::WriteOnly)) 0084 return false; 0085 0086 QTextStream stream(&file); 0087 stream.setEncoding(QStringConverter::Utf8); 0088 stream << "WPDictFile\n"; 0089 WordMap::ConstIterator it; 0090 for (it = map.constBegin(); it != map.constEnd(); ++it) 0091 stream << it.key() << "\t" << it.value() << "\t2\n"; 0092 file.close(); 0093 return true; 0094 } 0095 0096 /***************************************************************************/ 0097 0098 void addWords(WordMap &map, const QString &line) 0099 { 0100 const QStringList words = line.split(QRegularExpression(QStringLiteral("\\W"))); 0101 0102 QStringList::ConstIterator it; 0103 for (it = words.constBegin(); it != words.constEnd(); ++it) { 0104 if (!(*it).contains(QRegularExpression(QStringLiteral("\\d|_")))) { 0105 QString key = (*it).toLower(); 0106 if (map.contains(key)) 0107 map[key] += 1; 0108 else 0109 map[key] = 1; 0110 } 0111 } 0112 } 0113 0114 void addWords(WordMap &map, const WordMap &add) 0115 { 0116 WordList::WordMap::ConstIterator it; 0117 for (it = add.constBegin(); it != add.constEnd(); ++it) 0118 if (map.contains(it.key())) 0119 map[it.key()] += it.value(); 0120 else 0121 map[it.key()] = it.value(); 0122 } 0123 0124 void addWordsFromFile(WordMap &map, const QString &filename, QStringConverter::Encoding encoding) 0125 { 0126 QFile xmlfile(filename); 0127 XMLReader reader; 0128 0129 if (reader.read(&xmlfile)) // try to load the file as an xml-file 0130 addWords(map, reader.getList()); 0131 else { 0132 QFile wpdfile(filename); 0133 if (wpdfile.open(QIODevice::ReadOnly)) { 0134 QTextStream stream(&wpdfile); 0135 stream.setEncoding(QStringConverter::Utf8); 0136 0137 if (!stream.atEnd()) { 0138 QString line = stream.readLine(); 0139 if (line == QLatin1String("WPDictFile")) { // Contains the file a weighted word list? 0140 // We can assume that weighted word lists are always UTF8 coded. 0141 while (!stream.atEnd()) { 0142 QString s = stream.readLine(); 0143 if (!(s.isNull() || s.isEmpty())) { 0144 QStringList list = s.split(QLatin1Char('\t')); 0145 bool ok; 0146 int weight = list[1].toInt(&ok); 0147 if (ok && (weight > 0)) { 0148 if (map.contains(list[0])) 0149 map[list[0]] += weight; 0150 else 0151 map[list[0]] = weight; 0152 } 0153 } 0154 } 0155 } else { // Count the words in an ordinary text file 0156 QFile file(filename); 0157 if (file.open(QIODevice::ReadOnly)) { 0158 QTextStream plainstream(&file); 0159 plainstream.setEncoding(encoding); 0160 while (!plainstream.atEnd()) 0161 addWords(map, plainstream.readLine()); 0162 } 0163 } 0164 } 0165 } 0166 } 0167 } 0168 0169 WordMap parseFiles(const QStringList &files, QStringConverter::Encoding encoding, QProgressDialog *pdlg) 0170 { 0171 int progress = 0; 0172 int steps = files.count(); 0173 int percent = 0; 0174 0175 WordMap map; 0176 QStringList::ConstIterator it; 0177 for (progress = 1, it = files.constBegin(); it != files.constEnd(); ++progress, ++it) { 0178 addWordsFromFile(map, *it, encoding); 0179 0180 if (steps != 0 && progress * 100 / steps > percent) { 0181 percent = progress * 100 / steps; 0182 pdlg->setValue(percent); 0183 qApp->processEvents(QEventLoop::AllEvents, 20); 0184 } 0185 } 0186 return map; 0187 } 0188 0189 WordMap mergeFiles(const QMap<QString, int> &files, QProgressDialog *pdlg) 0190 { 0191 pdlg->setLabelText(i18n("Merging dictionaries...")); 0192 pdlg->show(); 0193 qApp->processEvents(QEventLoop::AllEvents, 20); 0194 0195 int progress = 0; 0196 int steps = files.count(); 0197 int percent = 0; 0198 float totalWeight = 0; 0199 long long maxWeight = 0; 0200 0201 QMap<QString, float> map; 0202 QMap<QString, int>::ConstIterator it; 0203 for (progress = 1, it = files.constBegin(); it != files.constEnd(); ++progress, ++it) { 0204 WordMap fileMap; 0205 addWordsFromFile(fileMap, it.key(), QStringConverter::Utf8); 0206 0207 long long weight = 0; 0208 WordMap::ConstIterator iter; 0209 for (iter = fileMap.constBegin(); iter != fileMap.constEnd(); ++iter) 0210 weight += iter.value(); 0211 float factor = 1.0 * it.value() / weight; 0212 totalWeight += it.value(); 0213 if (weight > maxWeight) 0214 maxWeight = weight; 0215 0216 for (iter = fileMap.constBegin(); iter != fileMap.constEnd(); ++iter) 0217 if (map.contains(iter.key())) 0218 map[iter.key()] += iter.value() * factor; 0219 else 0220 map[iter.key()] = iter.value() * factor; 0221 0222 if (steps != 0 && progress * 100 / steps > percent) { 0223 percent = progress * 100 / steps; 0224 pdlg->setValue(percent); 0225 qApp->processEvents(QEventLoop::AllEvents, 20); 0226 } 0227 } 0228 0229 float factor; 0230 if (1.0 * maxWeight * totalWeight > 1000000000) 0231 factor = 1000000000 / totalWeight; 0232 else 0233 factor = 1.0 * maxWeight; 0234 0235 WordMap resultMap; 0236 QMap<QString, float>::ConstIterator iter; 0237 for (iter = map.constBegin(); iter != map.constEnd(); ++iter) 0238 resultMap[iter.key()] = (int)(factor * iter.value() + 0.5); 0239 0240 return resultMap; 0241 } 0242 0243 WordMap parseKDEDoc(QString language, QProgressDialog *pdlg) 0244 { 0245 pdlg->setLabelText(i18n("Parsing the KDE documentation...")); 0246 pdlg->show(); 0247 qApp->processEvents(QEventLoop::AllEvents, 20); 0248 0249 QStringList files = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("html/") + language + QStringLiteral("/*.docbook")); 0250 if ((files.count() == 0) && (language.length() == 5)) { 0251 language = language.left(2); 0252 files = QStandardPaths::locateAll(QStandardPaths::GenericDataLocation, QStringLiteral("html/") + language + QStringLiteral("/*.docbook")); 0253 } 0254 0255 return parseFiles(files, QStringConverter::Utf8, pdlg); 0256 } 0257 0258 WordMap parseFile(const QString &filename, QStringConverter::Encoding encoding, QProgressDialog *pdlg) 0259 { 0260 pdlg->setLabelText(i18n("Parsing file...")); 0261 pdlg->show(); 0262 qApp->processEvents(QEventLoop::AllEvents, 20); 0263 0264 QStringList files; 0265 files.append(filename); 0266 0267 return parseFiles(files, encoding, pdlg); 0268 } 0269 0270 WordMap parseDir(const QString &directory, QStringConverter::Encoding encoding, QProgressDialog *pdlg) 0271 { 0272 pdlg->setLabelText(i18n("Parsing directory...")); 0273 pdlg->show(); 0274 qApp->processEvents(QEventLoop::AllEvents, 20); 0275 0276 QStringList directories; 0277 directories += directory; 0278 QStringList files; 0279 int dirNdx = 0; 0280 while (dirNdx < directories.count()) { 0281 QDir dir(directories.at(dirNdx)); 0282 const QFileInfoList entries = dir.entryInfoList(QDir::Dirs | QDir::Files | QDir::NoSymLinks | QDir::Readable); 0283 0284 for (int i = 0; i < entries.size(); ++i) { 0285 QFileInfo fileInfo = entries.at(i); 0286 0287 QString name = fileInfo.fileName(); 0288 if (name != QLatin1String(".") && name != QLatin1String("..")) { 0289 if (fileInfo.isDir()) 0290 directories += fileInfo.filePath(); 0291 else 0292 files += fileInfo.filePath(); 0293 } 0294 } 0295 directories.removeAt(dirNdx); 0296 } 0297 0298 return parseFiles(files, encoding, pdlg); 0299 } 0300 0301 /***************************************************************************/ 0302 0303 /* Structures used for storing *.aff files (part of OpenOffice.org dictionaries) 0304 */ 0305 struct AffEntry { 0306 bool cross; 0307 int charsToRemove; 0308 QString add; 0309 QStringList condition; 0310 }; 0311 0312 typedef QList<AffEntry> AffList; 0313 typedef QMap<QChar, AffList> AffMap; 0314 0315 /** Loads an *.aff file (part of OpenOffice.org dictionaries) 0316 */ 0317 void loadAffFile(const QString &filename, AffMap &prefixes, AffMap &suffixes) 0318 { 0319 QFile afile(filename); 0320 if (afile.open(QIODevice::ReadOnly)) { 0321 QTextStream stream(&afile); 0322 while (!stream.atEnd()) { 0323 QString s = stream.readLine(); 0324 QStringList fields = s.split(QRegularExpression(QStringLiteral("\\s"))); 0325 0326 bool cross = false; 0327 if (fields.count() == 4) { 0328 cross = (fields[2] == QLatin1String("Y")); 0329 } else { 0330 if (fields.count() >= 5) { 0331 AffEntry e; 0332 e.cross = cross; 0333 if (fields[2] == QLatin1String("0")) 0334 e.charsToRemove = 0; 0335 else 0336 e.charsToRemove = fields[2].length(); 0337 e.add = fields[3]; 0338 0339 if (fields[4] != QLatin1String(".")) { 0340 QString condition = fields[4]; 0341 for (int idx = 0; idx < condition.length(); ++idx) { 0342 if (condition[idx] == QLatin1Char('[')) { 0343 QString code; 0344 for (++idx; (idx < condition.length()) && condition[idx] != QLatin1Char(']'); ++idx) 0345 code += condition[idx]; 0346 e.condition << code; 0347 } else 0348 e.condition << QString(condition[idx]); 0349 } 0350 } 0351 0352 if (s.startsWith(QLatin1String("PFX"))) { 0353 AffList list; 0354 if (prefixes.contains(fields[1][0])) 0355 list = prefixes[fields[1][0]]; 0356 list << e; 0357 prefixes[fields[1][0]] = list; 0358 } else if (s.startsWith(QLatin1String("SFX"))) { 0359 AffList list; 0360 if (suffixes.contains(fields[1][0])) 0361 list = suffixes[fields[1][0]]; 0362 list << e; 0363 suffixes[fields[1][0]] = list; 0364 } 0365 } 0366 } 0367 } 0368 } 0369 } 0370 0371 /** Checks if the given word matches the given condition. Each entry of the 0372 * QStringList "condition" describes one character of the word. (If the word 0373 * has more characters than condition entries only the last characters are 0374 * compared). 0375 * Each entry contains either all valid characters (if it does _not_ start 0376 * with "^") or all invalid characters (if it starts with "^"). 0377 */ 0378 inline bool checkCondition(const QString &word, const QStringList &condition) 0379 { 0380 if (condition.count() == 0) 0381 return true; 0382 0383 if (word.length() < condition.count()) 0384 return false; 0385 0386 QStringList::ConstIterator it; 0387 int idx; 0388 for (it = condition.constBegin(), idx = word.length() - condition.count(); it != condition.constEnd(); ++it, ++idx) { 0389 if ((*it).contains(word[idx]) == ((*it)[0] == QLatin1Char('^'))) 0390 return false; 0391 } 0392 return true; 0393 } 0394 0395 /** Constructs words by adding suffixes to the given word, and copies the 0396 * resulting words from map to checkedMap. 0397 * @param modifiers discribes which suffixes are valid 0398 * @param cross true if the word has a prefix 0399 */ 0400 inline void checkWord(const QString &word, const QString &modifiers, bool cross, const WordMap &map, WordMap &checkedMap, const AffMap &suffixes) 0401 { 0402 for (int i = 0; i < modifiers.length(); i++) { 0403 if (suffixes.contains(modifiers[i])) { 0404 AffList sList = suffixes[modifiers[i]]; 0405 0406 AffList::ConstIterator sIt; 0407 for (sIt = sList.constBegin(); sIt != sList.constEnd(); ++sIt) { 0408 if (((*sIt).cross || !cross) && (checkCondition(word, (*sIt).condition))) { 0409 QString sWord = word.left(word.length() - (*sIt).charsToRemove) + (*sIt).add; 0410 if (map.contains(sWord)) 0411 checkedMap[sWord] = map[sWord]; 0412 } 0413 } 0414 } 0415 } 0416 } 0417 0418 /** Constructs words by adding pre- and suffixes to the given word, and 0419 * copies the resulting words from map to checkedMap. 0420 * @param modifiers discribes which pre- and suffixes are valid 0421 */ 0422 void checkWord(const QString &word, const QString &modifiers, const WordMap &map, WordMap &checkedMap, const AffMap &prefixes, const AffMap &suffixes) 0423 { 0424 if (map.contains(word)) 0425 checkedMap[word] = map[word]; 0426 0427 checkWord(word, modifiers, true, map, checkedMap, suffixes); 0428 0429 for (int i = 0; i < modifiers.length(); i++) { 0430 if (prefixes.contains(modifiers[i])) { 0431 AffList pList = prefixes[modifiers[i]]; 0432 0433 AffList::ConstIterator pIt; 0434 for (pIt = pList.constBegin(); pIt != pList.constEnd(); ++pIt) { 0435 QString pWord = (*pIt).add + word; 0436 if (map.contains(pWord)) 0437 checkedMap[pWord] = map[pWord]; 0438 0439 checkWord(pWord, modifiers, false, map, checkedMap, suffixes); 0440 } 0441 } 0442 } 0443 } 0444 0445 WordMap spellCheck(WordMap map, const QString &dictionary, QProgressDialog *pdlg) 0446 { 0447 if (dictionary.endsWith(QLatin1String(".dic"))) { 0448 AffMap prefixes; 0449 AffMap suffixes; 0450 WordMap checkedMap; 0451 loadAffFile(dictionary.left(dictionary.length() - 4) + QStringLiteral(".aff"), prefixes, suffixes); 0452 0453 pdlg->reset(); 0454 // pdlg->setAllowCancel (false); 0455 // pdlg->showCancelButton (false); 0456 pdlg->setAutoReset(false); 0457 pdlg->setAutoClose(false); 0458 pdlg->setLabelText(i18n("Performing spell check...")); 0459 pdlg->setMaximum(100); 0460 pdlg->setValue(0); 0461 qApp->processEvents(QEventLoop::AllEvents, 20); 0462 0463 QFile dfile(dictionary); 0464 if (dfile.open(QIODevice::ReadOnly)) { 0465 QTextStream stream(&dfile); 0466 int progress = 0; 0467 int steps = 0; 0468 int percent = 0; 0469 0470 if (!stream.atEnd()) { 0471 QString s = stream.readLine(); // Number of words 0472 steps = s.toInt(); 0473 } 0474 0475 while (!stream.atEnd()) { 0476 QString s = stream.readLine(); 0477 if (s.contains(QLatin1Char('/'))) { 0478 QString word = s.left(s.indexOf(QLatin1Char('/'))).toLower(); 0479 QString modifiers = s.right(s.length() - s.indexOf(QLatin1Char('/'))); 0480 0481 checkWord(word, modifiers, map, checkedMap, prefixes, suffixes); 0482 } else { 0483 if (!s.isEmpty() && !s.isNull() && map.contains(s.toLower())) 0484 checkedMap[s.toLower()] = map[s.toLower()]; 0485 } 0486 0487 progress++; 0488 if (steps != 0 && progress * 100 / steps > percent) { 0489 percent = progress * 100 / steps; 0490 pdlg->setValue(percent); 0491 qApp->processEvents(QEventLoop::AllEvents, 20); 0492 } 0493 } 0494 } 0495 0496 return checkedMap; 0497 } else 0498 return map; 0499 } 0500 0501 }