File indexing completed on 2025-04-27 10:16:11
0001 /** 0002 * parsetrigrams.cpp 0003 * 0004 * Parse a set of trigram files into a QMap, and serialize to stdout. 0005 * Note: we allow this data to be read into QHash. We use QMap here 0006 * to get deterministic output from run to run. 0007 * 0008 * SPDX-FileCopyrightText: 2006 Jacob Rideout <kde@jacobrideout.net> 0009 * 0010 * SPDX-License-Identifier: LGPL-2.1-or-later 0011 */ 0012 0013 #include <QDataStream> 0014 #include <QDir> 0015 #include <QFile> 0016 #include <QMap> 0017 #include <QRegularExpression> 0018 #include <QString> 0019 #include <QTextStream> 0020 0021 int main(int argc, char **argv) 0022 { 0023 if (argc < 2) { 0024 return 1; 0025 } 0026 0027 QFile sout; 0028 sout.open(stdout, QIODevice::WriteOnly); 0029 QDataStream out(&sout); 0030 0031 QString path = QLatin1String(argv[1]); 0032 QDir td(path); 0033 0034 /* 0035 * We use QMap (instead of QHash) here to get deterministic output 0036 * from run to run. 0037 */ 0038 QMap<QString, QMap<QString, int>> models; 0039 0040 const QRegularExpression rx(QStringLiteral("(?:.{3})\\s+(.*)")); 0041 const QStringList files = td.entryList(QDir::Files); 0042 for (const QString &fname : files) { 0043 QFile fin(td.filePath(fname)); 0044 fin.open(QFile::ReadOnly | QFile::Text); 0045 QTextStream stream(&fin); 0046 0047 // Not needed with Qt6, UTF-8 is the default 0048 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) 0049 stream.setCodec("UTF-8"); 0050 #endif 0051 while (!stream.atEnd()) { 0052 QString line = stream.readLine(); 0053 const QRegularExpressionMatch match = rx.match(line); 0054 if (match.hasMatch()) { 0055 #if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) 0056 models[fname][line.left(3)] = match.capturedView(1).toInt(); 0057 #else 0058 models[fname][line.left(3)] = match.capturedRef(1).toInt(); 0059 #endif 0060 } 0061 } 0062 } 0063 0064 out << models; 0065 }