File indexing completed on 2024-05-12 15:37:06

0001 /*
0002     Gettext translation file analyzer
0003 
0004     SPDX-FileCopyrightText: 2007 Montel Laurent <montel@kde.org>
0005     SPDX-FileCopyrightText: 2009 Jos van den Oever <jos@vandenoever.info>
0006     SPDX-FileCopyrightText: 2014 Nick Shaforostoff <shaforostoff@gmail.com>
0007 
0008     SPDX-License-Identifier: LGPL-2.1-or-later
0009 */
0010 
0011 
0012 #include "poextractor.h"
0013 #include <QFile>
0014 #include <fstream>
0015 
0016 using namespace KFileMetaData;
0017 
0018 POExtractor::POExtractor(QObject* parent)
0019     : ExtractorPlugin(parent)
0020 {
0021 
0022 }
0023 
0024 const QStringList supportedMimeTypes = {
0025     QStringLiteral("text/x-gettext-translation"),
0026 };
0027 
0028 QStringList POExtractor::mimetypes() const
0029 {
0030     return supportedMimeTypes;
0031 }
0032 
0033 void POExtractor::endMessage()
0034 {
0035     messages++;
0036     fuzzy+=isFuzzy;
0037     untranslated+=(!isTranslated);
0038 
0039     isFuzzy = false;
0040     isTranslated = false;
0041     state = WHITESPACE;
0042 }
0043 
0044 void POExtractor::handleComment(const char* data, quint32 length)
0045 {
0046     state = COMMENT;
0047     if (length >= 8 && strncmp(data, "#, fuzzy", 8) == 0) { // could be better
0048         isFuzzy = true;
0049     }
0050 }
0051 
0052 void POExtractor::handleLine(const char* data, quint32 length)
0053 {
0054     if (state == ERROR) {
0055         return;
0056     }
0057     if (state == WHITESPACE) {
0058         if (length == 0) {
0059             return;
0060         }
0061         if (data[0] != '#') {
0062             state = COMMENT; //this allows PO files w/o comments
0063         } else {
0064             handleComment(data, length);
0065             return;
0066         }
0067     }
0068     if (state == COMMENT) {
0069         if (length == 0) {
0070             state = WHITESPACE;
0071         } else if (data[0] == '#') {
0072             handleComment(data, length);
0073         } else if (length > 7 && strncmp("msgctxt", data, 7) == 0) {
0074             state = MSGCTXT;
0075         } else if (length > 7 && strncmp("msgid \"", data, 7) == 0) {
0076             state = MSGID;
0077         } else {
0078             state = ERROR;
0079         }
0080         return;
0081     } else if (length > 1 && data[0] == '"' && data[length-1] == '"'
0082             && (state == MSGCTXT || state == MSGID || state == MSGSTR
0083                 || state == MSGID_PLURAL)) {
0084         // continued text field
0085         isTranslated = state == MSGSTR && length > 2;
0086     } else if (state == MSGCTXT
0087             && length > 7 && strncmp("msgid \"", data, 7) == 0) {
0088         state = MSGID;
0089     } else if (state == MSGID
0090             && length > 14 && strncmp("msgid_plural \"", data, 14) == 0) {
0091         state = MSGID_PLURAL;
0092     } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR)
0093             && length > 8 && strncmp("msgstr", data, 6) == 0) {
0094         state = MSGSTR;
0095         isTranslated = strncmp(data+length-3, " \"\"", 3) != 0;
0096     } else if (state == MSGSTR) {
0097         if (length == 0) {
0098             endMessage();
0099         } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries
0100             endMessage();
0101             state = COMMENT;
0102             handleLine(data, length);
0103         } else {
0104             state = ERROR;
0105         }
0106     } else {
0107         state = ERROR;
0108     }
0109 #if 0
0110     if (messages > 1 || state != MSGSTR) return;
0111 
0112     // handle special values in the first message
0113     // assumption is that value takes up only one line
0114     if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) {
0115         result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21));
0116     } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) {
0117         result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20));
0118     } else if (strncmp("\"Last-Translator: ", data, 18) == 0) {
0119         result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19));
0120     }
0121 #endif
0122 }
0123 
0124 void POExtractor::extract(ExtractionResult* result)
0125 {
0126     std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
0127     if (!fstream.is_open()) {
0128         return;
0129     }
0130 
0131     result->addType(Type::Text);
0132     if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
0133         return;
0134     }
0135 
0136     state = WHITESPACE;
0137     messages = 0;
0138     untranslated = 0;
0139     fuzzy = 0;
0140     isFuzzy = false;
0141     isTranslated = false;
0142 
0143     std::string line;
0144     int lines = 0;
0145     while (std::getline(fstream, line)) {
0146         //TODO add a parsed text of translation units
0147         //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
0148         //result->append(QString::fromUtf8(arr));
0149 
0150         handleLine(line.c_str(), line.size());
0151         lines++;
0152         
0153         
0154         if (messages <= 1 && state == MSGSTR)
0155         {
0156             // handle special values in the first message
0157             // assumption is that value takes up only one line
0158             if (strncmp("\"POT-Creation-Date: ", line.c_str(), 20) == 0) {
0159                 result->add(Property::TranslationTemplateDate, QByteArray(line.c_str() + 20, line.size() - 21));
0160             } else if (strncmp("\"PO-Revision-Date: ", line.c_str(), 19) == 0) {
0161                 result->add(Property::TranslationLastUpDate, QByteArray(line.c_str() + 19, line.size() - 20));
0162             } else if (strncmp("\"Last-Translator: ", line.c_str(), 18) == 0) {
0163                 result->add(Property::TranslationLastAuthor, QString::fromUtf8(QByteArray::fromRawData(line.c_str() + 18, line.size() - 19)));
0164             }
0165         }
0166     }
0167     handleLine("", 0); //for files with non-empty last line
0168     messages--;//cause header does not count
0169 
0170     result->add(Property::TranslationUnitsTotal, messages);
0171     result->add(Property::TranslationUnitsWithTranslation, messages-untranslated);
0172     result->add(Property::TranslationUnitsWithDraftTranslation, fuzzy);
0173     result->add(Property::LineCount, lines);
0174     //TODO WordCount
0175 }
0176 
0177 #include "moc_poextractor.cpp"