File indexing completed on 2025-04-27 03:41:26
0001 /* 0002 SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in> 0003 0004 SPDX-License-Identifier: LGPL-2.1-or-later 0005 */ 0006 0007 0008 #include "plaintextextractor.h" 0009 0010 #include <QDebug> 0011 #include <QStringDecoder> 0012 #include <QFile> 0013 0014 #include <fstream> 0015 0016 #if defined(Q_OS_LINUX) || defined(__GLIBC__) 0017 #include <sys/types.h> 0018 #include <sys/stat.h> 0019 #include <fcntl.h> 0020 #include <unistd.h> 0021 #endif 0022 0023 using namespace KFileMetaData; 0024 0025 PlainTextExtractor::PlainTextExtractor(QObject* parent) 0026 : ExtractorPlugin(parent) 0027 { 0028 0029 } 0030 0031 const QStringList supportedMimeTypes = { 0032 QStringLiteral("text/plain"), 0033 }; 0034 0035 QStringList PlainTextExtractor::mimetypes() const 0036 { 0037 return supportedMimeTypes; 0038 } 0039 0040 void PlainTextExtractor::extract(ExtractionResult* result) 0041 { 0042 #if defined(Q_OS_LINUX) || defined(__GLIBC__) 0043 QByteArray filePath = QFile::encodeName(result->inputUrl()); 0044 0045 #ifdef O_NOATIME 0046 int fd = open(filePath.constData(), O_RDONLY | O_NOATIME); 0047 if (fd < 0) 0048 #else 0049 int fd; 0050 #endif 0051 { 0052 fd = open(filePath.constData(), O_RDONLY); 0053 } 0054 0055 if (fd < 0) { 0056 return; 0057 } 0058 0059 result->addType(Type::Text); 0060 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { 0061 close(fd); 0062 return; 0063 } 0064 0065 QStringDecoder codec(QStringConverter::System); 0066 0067 char* line = nullptr; 0068 size_t len = 0; 0069 int lines = 0; 0070 int r = 0; 0071 0072 FILE* fp = fdopen(fd, "r"); 0073 0074 while ( (r = getline(&line, &len, fp)) != -1) { 0075 QString text = codec.decode(QByteArrayView(line, r - 1)); 0076 0077 if (codec.hasError()) { 0078 qDebug() << "Invalid encoding. Ignoring" << result->inputUrl(); 0079 free(line); 0080 close(fd); 0081 return; 0082 } 0083 result->append(text); 0084 0085 lines += 1; 0086 } 0087 if (result->inputFlags() & ExtractionResult::ExtractMetaData) { 0088 result->add(Property::LineCount, lines); 0089 } 0090 0091 free(line); 0092 close(fd); 0093 0094 #else 0095 std::string line; 0096 int lines = 0; 0097 0098 std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData()); 0099 if (!fstream.is_open()) { 0100 return; 0101 } 0102 0103 result->addType(Type::Text); 0104 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { 0105 return; 0106 } 0107 0108 QStringDecoder codec(QStringConverter::System); 0109 while (std::getline(fstream, line)) { 0110 QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size()); 0111 0112 QString text = codec.decode(arr); 0113 0114 if (codec.hasError()) { 0115 qDebug() << "Invalid encoding. Ignoring" << result->inputUrl(); 0116 return; 0117 } 0118 result->append(text); 0119 0120 lines += 1; 0121 } 0122 0123 result->add(Property::LineCount, lines); 0124 #endif 0125 } 0126 0127 #include "moc_plaintextextractor.cpp"