File indexing completed on 2025-03-16 12:49:36
0001 /* 0002 Gettext translation file analyzer 0003 0004 SPDX-FileCopyrightText: 2007 Montel Laurent <montel@kde.org> 0005 SPDX-FileCopyrightText: 2009 Jos van den Oever <jos@vandenoever.info> 0006 SPDX-FileCopyrightText: 2014 Nick Shaforostoff <shaforostoff@gmail.com> 0007 0008 SPDX-License-Identifier: LGPL-2.1-or-later 0009 */ 0010 0011 0012 #include "poextractor.h" 0013 #include <QFile> 0014 #include <fstream> 0015 0016 using namespace KFileMetaData; 0017 0018 POExtractor::POExtractor(QObject* parent) 0019 : ExtractorPlugin(parent) 0020 { 0021 0022 } 0023 0024 const QStringList supportedMimeTypes = { 0025 QStringLiteral("text/x-gettext-translation"), 0026 }; 0027 0028 QStringList POExtractor::mimetypes() const 0029 { 0030 return supportedMimeTypes; 0031 } 0032 0033 void POExtractor::endMessage() 0034 { 0035 messages++; 0036 fuzzy+=isFuzzy; 0037 untranslated+=(!isTranslated); 0038 0039 isFuzzy = false; 0040 isTranslated = false; 0041 state = WHITESPACE; 0042 } 0043 0044 void POExtractor::handleComment(const char* data, quint32 length) 0045 { 0046 state = COMMENT; 0047 if (length >= 8 && strncmp(data, "#, fuzzy", 8) == 0) { // could be better 0048 isFuzzy = true; 0049 } 0050 } 0051 0052 void POExtractor::handleLine(const char* data, quint32 length) 0053 { 0054 if (state == ERROR) { 0055 return; 0056 } 0057 if (state == WHITESPACE) { 0058 if (length == 0) { 0059 return; 0060 } 0061 if (data[0] != '#') { 0062 state = COMMENT; //this allows PO files w/o comments 0063 } else { 0064 handleComment(data, length); 0065 return; 0066 } 0067 } 0068 if (state == COMMENT) { 0069 if (length == 0) { 0070 state = WHITESPACE; 0071 } else if (data[0] == '#') { 0072 handleComment(data, length); 0073 } else if (length > 7 && strncmp("msgctxt", data, 7) == 0) { 0074 state = MSGCTXT; 0075 } else if (length > 7 && strncmp("msgid \"", data, 7) == 0) { 0076 state = MSGID; 0077 } else { 0078 state = ERROR; 0079 } 0080 return; 0081 } else if (length > 1 && data[0] == '"' && data[length-1] == '"' 0082 && (state == MSGCTXT || state == MSGID || state == MSGSTR 0083 || state == MSGID_PLURAL)) { 0084 // continued text field 0085 isTranslated = state == MSGSTR && length > 2; 0086 } else if (state == MSGCTXT 0087 && length > 7 && strncmp("msgid \"", data, 7) == 0) { 0088 state = MSGID; 0089 } else if (state == MSGID 0090 && length > 14 && strncmp("msgid_plural \"", data, 14) == 0) { 0091 state = MSGID_PLURAL; 0092 } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR) 0093 && length > 8 && strncmp("msgstr", data, 6) == 0) { 0094 state = MSGSTR; 0095 isTranslated = strncmp(data+length-3, " \"\"", 3) != 0; 0096 } else if (state == MSGSTR) { 0097 if (length == 0) { 0098 endMessage(); 0099 } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries 0100 endMessage(); 0101 state = COMMENT; 0102 handleLine(data, length); 0103 } else { 0104 state = ERROR; 0105 } 0106 } else { 0107 state = ERROR; 0108 } 0109 #if 0 0110 if (messages > 1 || state != MSGSTR) return; 0111 0112 // handle special values in the first message 0113 // assumption is that value takes up only one line 0114 if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) { 0115 result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21)); 0116 } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) { 0117 result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20)); 0118 } else if (strncmp("\"Last-Translator: ", data, 18) == 0) { 0119 result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19)); 0120 } 0121 #endif 0122 } 0123 0124 void POExtractor::extract(ExtractionResult* result) 0125 { 0126 std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData()); 0127 if (!fstream.is_open()) { 0128 return; 0129 } 0130 0131 result->addType(Type::Text); 0132 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { 0133 return; 0134 } 0135 0136 state = WHITESPACE; 0137 messages = 0; 0138 untranslated = 0; 0139 fuzzy = 0; 0140 isFuzzy = false; 0141 isTranslated = false; 0142 0143 std::string line; 0144 int lines = 0; 0145 while (std::getline(fstream, line)) { 0146 //TODO add a parsed text of translation units 0147 //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size()); 0148 //result->append(QString::fromUtf8(arr)); 0149 0150 handleLine(line.c_str(), line.size()); 0151 lines++; 0152 0153 0154 if (messages <= 1 && state == MSGSTR) 0155 { 0156 // handle special values in the first message 0157 // assumption is that value takes up only one line 0158 if (strncmp("\"POT-Creation-Date: ", line.c_str(), 20) == 0) { 0159 result->add(Property::TranslationTemplateDate, QByteArray(line.c_str() + 20, line.size() - 21)); 0160 } else if (strncmp("\"PO-Revision-Date: ", line.c_str(), 19) == 0) { 0161 result->add(Property::TranslationLastUpDate, QByteArray(line.c_str() + 19, line.size() - 20)); 0162 } else if (strncmp("\"Last-Translator: ", line.c_str(), 18) == 0) { 0163 result->add(Property::TranslationLastAuthor, QString::fromUtf8(QByteArray::fromRawData(line.c_str() + 18, line.size() - 19))); 0164 } 0165 } 0166 } 0167 handleLine("", 0); //for files with non-empty last line 0168 messages--;//cause header does not count 0169 0170 result->add(Property::TranslationUnitsTotal, messages); 0171 result->add(Property::TranslationUnitsWithTranslation, messages-untranslated); 0172 result->add(Property::TranslationUnitsWithDraftTranslation, fuzzy); 0173 result->add(Property::LineCount, lines); 0174 //TODO WordCount 0175 } 0176 0177 #include "moc_poextractor.cpp"