File indexing completed on 2024-05-19 03:56:45

0001 /*
0002     SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in>
0003 
0004     SPDX-License-Identifier: LGPL-2.1-or-later
0005 */
0006 
0007 
0008 #include "plaintextextractor.h"
0009 
0010 #include <QDebug>
0011 #include <QStringDecoder>
0012 #include <QFile>
0013 
0014 #include <fstream>
0015 
0016 #if defined(Q_OS_LINUX) || defined(__GLIBC__)
0017     #include <sys/types.h>
0018     #include <sys/stat.h>
0019     #include <fcntl.h>
0020     #include <unistd.h>
0021 #endif
0022 
0023 using namespace KFileMetaData;
0024 
0025 PlainTextExtractor::PlainTextExtractor(QObject* parent)
0026     : ExtractorPlugin(parent)
0027 {
0028 
0029 }
0030 
0031 const QStringList supportedMimeTypes = {
0032     QStringLiteral("text/plain"),
0033 };
0034 
0035 QStringList PlainTextExtractor::mimetypes() const
0036 {
0037     return supportedMimeTypes;
0038 }
0039 
0040 void PlainTextExtractor::extract(ExtractionResult* result)
0041 {
0042 #if defined(Q_OS_LINUX) || defined(__GLIBC__)
0043     QByteArray filePath = QFile::encodeName(result->inputUrl());
0044 
0045 #ifdef O_NOATIME
0046     int fd = open(filePath.constData(), O_RDONLY | O_NOATIME);
0047     if (fd < 0)
0048 #else
0049     int fd;
0050 #endif
0051     {
0052         fd = open(filePath.constData(), O_RDONLY);
0053     }
0054 
0055     if (fd < 0) {
0056         return;
0057     }
0058 
0059     result->addType(Type::Text);
0060     if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
0061         close(fd);
0062         return;
0063     }
0064 
0065     QStringDecoder codec(QStringConverter::System);
0066 
0067     char* line = nullptr;
0068     size_t len = 0;
0069     int lines = 0;
0070     int r = 0;
0071 
0072     FILE* fp = fdopen(fd, "r");
0073 
0074     while ( (r = getline(&line, &len, fp)) != -1) {
0075         QString text = codec.decode(QByteArrayView(line, r - 1));
0076 
0077         if (codec.hasError()) {
0078             qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
0079             free(line);
0080             close(fd);
0081             return;
0082         }
0083         result->append(text);
0084 
0085         lines += 1;
0086     }
0087     if (result->inputFlags() & ExtractionResult::ExtractMetaData) {
0088         result->add(Property::LineCount, lines);
0089     }
0090 
0091     free(line);
0092     close(fd);
0093 
0094 #else
0095     std::string line;
0096     int lines = 0;
0097 
0098     std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
0099     if (!fstream.is_open()) {
0100         return;
0101     }
0102 
0103     result->addType(Type::Text);
0104     if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
0105         return;
0106     }
0107 
0108     QStringDecoder codec(QStringConverter::System);
0109     while (std::getline(fstream, line)) {
0110         QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
0111 
0112         QString text = codec.decode(arr);
0113 
0114         if (codec.hasError()) {
0115             qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
0116             return;
0117         }
0118         result->append(text);
0119 
0120         lines += 1;
0121     }
0122 
0123     result->add(Property::LineCount, lines);
0124 #endif
0125 }
0126 
0127 #include "moc_plaintextextractor.cpp"