File indexing completed on 2025-03-16 12:49:36
0001 /* 0002 SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in> 0003 0004 SPDX-License-Identifier: LGPL-2.1-or-later 0005 */ 0006 0007 0008 #include "plaintextextractor.h" 0009 0010 #include <QFile> 0011 #include <QTextCodec> 0012 #include <QDebug> 0013 0014 #include <fstream> 0015 0016 #if defined(Q_OS_LINUX) || defined(__GLIBC__) 0017 #include <sys/types.h> 0018 #include <sys/stat.h> 0019 #include <fcntl.h> 0020 #include <unistd.h> 0021 #endif 0022 0023 using namespace KFileMetaData; 0024 0025 PlainTextExtractor::PlainTextExtractor(QObject* parent) 0026 : ExtractorPlugin(parent) 0027 { 0028 0029 } 0030 0031 const QStringList supportedMimeTypes = { 0032 QStringLiteral("text/plain"), 0033 }; 0034 0035 QStringList PlainTextExtractor::mimetypes() const 0036 { 0037 return supportedMimeTypes; 0038 } 0039 0040 void PlainTextExtractor::extract(ExtractionResult* result) 0041 { 0042 #if defined(Q_OS_LINUX) || defined(__GLIBC__) 0043 QByteArray filePath = QFile::encodeName(result->inputUrl()); 0044 0045 #ifdef O_NOATIME 0046 int fd = open(filePath.constData(), O_RDONLY | O_NOATIME); 0047 if (fd < 0) 0048 #else 0049 int fd; 0050 #endif 0051 { 0052 fd = open(filePath.constData(), O_RDONLY); 0053 } 0054 0055 if (fd < 0) { 0056 return; 0057 } 0058 0059 result->addType(Type::Text); 0060 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { 0061 close(fd); 0062 return; 0063 } 0064 0065 QTextCodec* codec = QTextCodec::codecForLocale(); 0066 0067 char* line = nullptr; 0068 size_t len = 0; 0069 int lines = 0; 0070 int r = 0; 0071 0072 FILE* fp = fdopen(fd, "r"); 0073 0074 while ( (r = getline(&line, &len, fp)) != -1) { 0075 QTextCodec::ConverterState state; 0076 QString text = codec->toUnicode(line, r - 1, &state); 0077 0078 if (state.invalidChars > 0) { 0079 qDebug() << "Invalid encoding. Ignoring" << result->inputUrl(); 0080 free(line); 0081 close(fd); 0082 return; 0083 } 0084 result->append(text); 0085 0086 lines += 1; 0087 } 0088 if (result->inputFlags() & ExtractionResult::ExtractMetaData) { 0089 result->add(Property::LineCount, lines); 0090 } 0091 0092 free(line); 0093 close(fd); 0094 0095 #else 0096 std::string line; 0097 int lines = 0; 0098 0099 std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData()); 0100 if (!fstream.is_open()) { 0101 return; 0102 } 0103 0104 result->addType(Type::Text); 0105 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { 0106 return; 0107 } 0108 0109 QTextCodec* codec = QTextCodec::codecForLocale(); 0110 while (std::getline(fstream, line)) { 0111 QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size()); 0112 0113 QTextCodec::ConverterState state; 0114 QString text = codec->toUnicode(arr.constData(), arr.size(), &state); 0115 0116 if (state.invalidChars > 0) { 0117 qDebug() << "Invalid encoding. Ignoring" << result->inputUrl(); 0118 return; 0119 } 0120 result->append(text); 0121 0122 lines += 1; 0123 } 0124 0125 result->add(Property::LineCount, lines); 0126 #endif 0127 } 0128 0129 #include "moc_plaintextextractor.cpp"