File indexing completed on 2024-05-05 16:08:26

0001 /* This file is part of the KDE libraries
0002 
0003    Copyright (c) 2001,2002 Carsten Pfeiffer <pfeiffer@kde.org>
0004                  2007 Jos van den Oever <jos@vandenoever.info>
0005                  2010 Sebastian Trueg <trueg@kde.org>
0006 
0007    This library is free software; you can redistribute it and/or
0008    modify it under the terms of the GNU Library General Public
0009    License (LGPL) as published by the Free Software Foundation; either
0010    version 2 of the License, or (at your option) any later version.
0011 
0012    This library is distributed in the hope that it will be useful,
0013    but WITHOUT ANY WARRANTY; without even the implied warranty of
0014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015    Library General Public License for more details.
0016 
0017    You should have received a copy of the GNU Library General Public License
0018    along with this library; see the file COPYING.LIB.  If not, write to
0019    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0020    Boston, MA 02110-1301, USA.
0021 */
0022 
0023 #include "kfilemetainfo.h"
0024 #include "kfilemetainfoitem_p.h"
0025 #include "kfilewriteplugin.h"
0026 #include "kfilewriteplugin_p.h"
0027 
0028 #include "config-kdelibs4support.h"
0029 
0030 #if ! KIO_NO_STRIGI
0031 #include <strigi/bufferedstream.h>
0032 #include <strigi/analyzerconfiguration.h>
0033 #include <strigi/indexwriter.h>
0034 #include <strigi/analysisresult.h>
0035 #include <strigi/fieldtypes.h>
0036 #endif
0037 
0038 #include <QFileInfo>
0039 #include <QDateTime>
0040 
0041 class KFileMetaInfoGroupPrivate : public QSharedData
0042 {
0043 public:
0044     QString name;
0045 };
0046 
0047 KFileMetaInfoGroup::~KFileMetaInfoGroup()
0048 {
0049 }
0050 
0051 KFileMetaInfoGroup::KFileMetaInfoGroup(KFileMetaInfoGroup const &g)
0052 {
0053     d = g.d;
0054 }
0055 
0056 QDataStream &operator >> (QDataStream &s, KFileMetaInfo &)
0057 {
0058     return s;
0059 }
0060 
0061 QDataStream &operator << (QDataStream &s, const KFileMetaInfo &)
0062 {
0063     return s;
0064 }
0065 #if ! KIO_NO_STRIGI
0066 /**
0067  * @brief Wrap a QIODevice in a Strigi stream.
0068  **/
0069 class QIODeviceInputStream : public Strigi::BufferedInputStream
0070 {
0071 private:
0072     QIODevice &in;
0073     const qint64 m_maxRead;
0074     qint64 m_read;
0075     int32_t fillBuffer(char *start, int32_t space);
0076 public:
0077     QIODeviceInputStream(QIODevice &i, qint64 max);
0078 };
0079 
0080 int32_t
0081 QIODeviceInputStream::fillBuffer(char *start, int32_t space)
0082 {
0083     if (!in.isOpen() || !in.isReadable()) {
0084         return -1;
0085     }
0086 
0087     // we force a max stream read length according to the config since some Strigi
0088     // plugins simply ignore the value which will lead to frozen client apps
0089     qint64 max = m_maxRead;
0090     if (max < 0) {
0091         max = space;
0092     } else {
0093         max = qMin(qint64(space), qMax(max - m_read, qint64(0)));
0094     }
0095 
0096     // read into the buffer
0097     int32_t nwritten = in.read(start, max);
0098 
0099     // check the file stream status
0100     if (nwritten < 0) {
0101         m_error = "Could not read from QIODevice.";
0102         in.close();
0103         return -1;
0104     }
0105     if (nwritten == 0 || in.atEnd()) {
0106         in.close();
0107     }
0108     m_read += nwritten;
0109     return nwritten;
0110 }
0111 
0112 QIODeviceInputStream::QIODeviceInputStream(QIODevice &i, qint64 max)
0113     : in(i),
0114       m_maxRead(max),
0115       m_read(0)
0116 {
0117     // determine if we have a character device, which will likely never eof and thereby
0118     // potentially cause an infinite loop.
0119     if (i.isSequential()) {
0120         in.close(); // cause fillBuffer to return -1
0121     }
0122 }
0123 
0124 /**
0125  * @brief KMetaInfoWriter handles the data returned by the Strigi analyzers and
0126  * store it in a KFileMetaInfo.
0127  **/
0128 class KMetaInfoWriter : public Strigi::IndexWriter
0129 {
0130 public:
0131     // irrelevant for KFileMetaInfo
0132     void startAnalysis(const Strigi::AnalysisResult *)
0133     {
0134     }
0135 
0136     // irrelevant for KFileMetaInfo
0137     // we do not store text as metainfo
0138     void addText(const Strigi::AnalysisResult *, const char * /*s*/, int32_t /*n*/)
0139     {
0140     }
0141     void addValue(const Strigi::AnalysisResult *idx, const Strigi::RegisteredField *field,
0142                   const std::string &value)
0143     {
0144         if (idx->writerData()) {
0145             QString val = QString::fromUtf8(value.c_str(), value.size());
0146             if (!val.startsWith(':')) {
0147                 addValue(idx, field, val);
0148             }
0149         }
0150     }
0151     void addValue(const Strigi::AnalysisResult *idx, const Strigi::RegisteredField *field,
0152                   const unsigned char *data, uint32_t size)
0153     {
0154         if (idx->writerData()) {
0155             QByteArray d((const char *)data, size);
0156             addValue(idx, field, QVariant(d));
0157         }
0158     }
0159     void addValue(const Strigi::AnalysisResult *idx, const Strigi::RegisteredField *field,
0160                   uint32_t value)
0161     {
0162         if (idx->writerData()) {
0163             addValue(idx, field, QVariant((quint32)value));
0164         }
0165     }
0166     void addValue(const Strigi::AnalysisResult *idx, const Strigi::RegisteredField *field,
0167                   int32_t value)
0168     {
0169         if (idx->writerData()) {
0170             addValue(idx, field, QVariant((qint32)value));
0171         }
0172     }
0173     void addValue(const Strigi::AnalysisResult *idx, const Strigi::RegisteredField *field,
0174                   double value)
0175     {
0176         if (idx->writerData()) {
0177             addValue(idx, field, QVariant(value));
0178         }
0179     }
0180     void addValue(const Strigi::AnalysisResult *idx,
0181                   const Strigi::RegisteredField *field, const QVariant &value)
0182     {
0183         QHash<QString, KFileMetaInfoItem> *info
0184             = static_cast<QHash<QString, KFileMetaInfoItem>*>(
0185                   idx->writerData());
0186         if (info) {
0187             std::string name(field->key());
0188             QString key = QString::fromUtf8(name.c_str(), name.size());
0189             QHash<QString, KFileMetaInfoItem>::iterator i = info->find(key);
0190             if (i == info->end()) {
0191                 info->insert(key, KFileMetaInfoItem(key, value, 0, true));
0192             } else {
0193                 i.value().addValue(value);
0194             }
0195         }
0196     }
0197     void addValue(const Strigi::AnalysisResult *ar,
0198                   const Strigi::RegisteredField *field, const std::string &name,
0199                   const std::string &value)
0200     {
0201         if (ar->writerData()) {
0202             QVariantMap m;
0203             m.insert(name.c_str(), value.c_str());
0204             addValue(ar, field, m);
0205         }
0206     }
0207 
0208     /* irrelevant for KFileMetaInfo: These triples does not convey information
0209      * about this file, so we ignore it
0210      */
0211     void addTriplet(const std::string & /*subject*/,
0212                     const std::string & /*predicate*/, const std::string & /*object*/)
0213     {
0214     }
0215 
0216     // irrelevant for KFileMetaInfo
0217     void finishAnalysis(const Strigi::AnalysisResult *) {}
0218     // irrelevant for KFileMetaInfo
0219     void deleteEntries(const std::vector<std::string> &) {}
0220     // irrelevant for KFileMetaInfo
0221     void deleteAllEntries() {}
0222 };
0223 
0224 class KFileMetaInfoPrivate : public QSharedData
0225 {
0226 public:
0227     QHash<QString, KFileMetaInfoItem> items;
0228     QUrl m_url;
0229 
0230     void init(QIODevice &stream, const QUrl &url, const QDateTime &mtime, KFileMetaInfo::WhatFlags w = KFileMetaInfo::Everything);
0231     void initWriters(const QUrl & /*file*/);
0232     void operator= (const KFileMetaInfoPrivate &k)
0233     {
0234         items = k.items;
0235     }
0236 };
0237 static const KFileMetaInfoItem nullitem;
0238 
0239 class KFileMetaInfoAnalysisConfiguration : public Strigi::AnalyzerConfiguration
0240 {
0241 public:
0242     KFileMetaInfoAnalysisConfiguration(KFileMetaInfo::WhatFlags indexDetail)
0243         : m_indexDetail(indexDetail)
0244     {
0245     }
0246 
0247     int64_t maximalStreamReadLength(const Strigi::AnalysisResult &ar)
0248     {
0249         if (ar.depth() > 0) {
0250             return 0;    // ignore all data that has a depth > 0, i.e. files in archives
0251         } else if (m_indexDetail == KFileMetaInfo::Everything) {
0252             return -1;
0253         } else {
0254             return 65536;    // do not read the whole file - this is used for on-the-fly analysis
0255         }
0256     }
0257 
0258 private:
0259     KFileMetaInfo::WhatFlags m_indexDetail;
0260 };
0261 
0262 void KFileMetaInfoPrivate::init(QIODevice &stream, const QUrl &url, const QDateTime &mtime, KFileMetaInfo::WhatFlags w)
0263 {
0264     m_url = url;
0265 
0266     // get data from Strigi
0267     KFileMetaInfoAnalysisConfiguration c(w);
0268     Strigi::StreamAnalyzer indexer(c);
0269     KMetaInfoWriter writer;
0270     //qDebug() << url;
0271     Strigi::AnalysisResult idx(url.toLocalFile().toUtf8().constData(), mtime.toTime_t(), writer, indexer);
0272     idx.setWriterData(&items);
0273 
0274     QIODeviceInputStream strigiStream(stream, c.maximalStreamReadLength(idx));
0275     indexer.analyze(idx, &strigiStream);
0276 
0277     // TODO: get data from Nepomuk
0278 }
0279 
0280 void KFileMetaInfoPrivate::initWriters(const QUrl &file)
0281 {
0282     QStringList mimetypes;
0283     QHash<QString, KFileMetaInfoItem>::iterator i;
0284     for (i = items.begin(); i != items.end(); ++i) {
0285         KFileWritePlugin *w =
0286             KFileWriterProvider::self()->loadPlugin(i.key());
0287         if (w && w->canWrite(file, i.key())) {
0288             i.value().d->writer = w;
0289         }
0290     }
0291 }
0292 
0293 KFileMetaInfo::KFileMetaInfo(const QString &path, const QString & /*mimetype*/,
0294                              KFileMetaInfo::WhatFlags w)
0295     : d(new KFileMetaInfoPrivate())
0296 {
0297     QFileInfo fileinfo(path);
0298     QFile file(path);
0299     // only open the file if it is a filetype Qt understands
0300     // if e.g. the path points to a pipe, it is not opened
0301     if ((fileinfo.isFile() || fileinfo.isDir() || fileinfo.isSymLink())
0302             && file.open(QIODevice::ReadOnly)) {
0303         const QUrl u = QUrl::fromLocalFile(path);
0304         d->init(file, u, fileinfo.lastModified(), w);
0305         if (fileinfo.isWritable()) {
0306             d->initWriters(u);
0307         }
0308     }
0309 }
0310 
0311 KFileMetaInfo::KFileMetaInfo(const QUrl &url)
0312     : d(new KFileMetaInfoPrivate())
0313 {
0314     QFile file(url.toLocalFile());
0315     if (file.open(QIODevice::ReadOnly)) {
0316         QFileInfo fileinfo(url.toLocalFile());
0317         d->init(file, url, fileinfo.lastModified());
0318         if (fileinfo.isWritable()) {
0319             d->initWriters(url);
0320         }
0321     }
0322 }
0323 
0324 KFileMetaInfo::KFileMetaInfo() : d(new KFileMetaInfoPrivate())
0325 {
0326 }
0327 
0328 KFileMetaInfo::KFileMetaInfo(const KFileMetaInfo &k) : d(k.d)
0329 {
0330 }
0331 
0332 KFileMetaInfo &KFileMetaInfo::operator= (KFileMetaInfo const &kfmi)
0333 {
0334     d = kfmi.d;
0335     return *this;
0336 }
0337 
0338 KFileMetaInfo::~KFileMetaInfo()
0339 {
0340 }
0341 
0342 bool KFileMetaInfo::applyChanges()
0343 {
0344     // go through all editable fields and group them by writer
0345     QHash<KFileWritePlugin *, QVariantMap> data;
0346     QHash<QString, KFileMetaInfoItem>::const_iterator i;
0347     for (i = d->items.constBegin(); i != d->items.constEnd(); ++i) {
0348         if (i.value().isModified() && i.value().d->writer) {
0349             data[i.value().d->writer][i.key() ] = i.value().value();
0350         }
0351     }
0352 
0353     // call the writers on the data they can write
0354     bool ok = true;
0355     QHash<KFileWritePlugin *, QVariantMap>::const_iterator j;
0356     for (j = data.constBegin(); j != data.constEnd(); ++j) {
0357         ok &= j.key()->write(d->m_url, j.value());
0358     }
0359     return ok;
0360 }
0361 
0362 QUrl KFileMetaInfo::url() const
0363 {
0364     return d->m_url;
0365 }
0366 
0367 const QHash<QString, KFileMetaInfoItem> &KFileMetaInfo::items() const
0368 {
0369     return d->items;
0370 }
0371 
0372 const KFileMetaInfoItem &KFileMetaInfo::item(const QString &key) const
0373 {
0374     QHash<QString, KFileMetaInfoItem>::const_iterator i = d->items.constFind(key);
0375     return (i == d->items.constEnd()) ? nullitem : i.value();
0376 }
0377 
0378 QStringList KFileMetaInfo::keys() const
0379 {
0380     return d->items.keys();
0381 }
0382 
0383 KFileMetaInfoItem &KFileMetaInfo::item(const QString &key)
0384 {
0385     return d->items[key];
0386 }
0387 
0388 bool KFileMetaInfo::isValid() const
0389 {
0390     return !d->m_url.isEmpty();
0391 }
0392 
0393 QStringList KFileMetaInfo::preferredKeys() const
0394 {
0395     return QStringList();
0396 }
0397 
0398 QStringList KFileMetaInfo::supportedKeys() const
0399 {
0400     return QStringList();
0401 }
0402 
0403 #ifndef KDELIBS4SUPPORT_NO_DEPRECATED
0404 KFileMetaInfoGroupList KFileMetaInfo::preferredGroups() const
0405 {
0406     return KFileMetaInfoGroupList();
0407 }
0408 #endif
0409 
0410 #ifndef KDELIBS4SUPPORT_NO_DEPRECATED
0411 KFileMetaInfoGroupList KFileMetaInfo::supportedGroups() const
0412 {
0413     return KFileMetaInfoGroupList();
0414 }
0415 #endif
0416 #else //KIO_NO_STRIGI
0417 
0418 class KFileMetaInfoPrivate : public QSharedData
0419 {
0420 public:
0421 };
0422 
0423 KFileMetaInfo::KFileMetaInfo(const QString &path, const QString & /*mimetype*/,
0424                              KFileMetaInfo::WhatFlags w)
0425 {
0426 }
0427 
0428 KFileMetaInfo::KFileMetaInfo(const QUrl &url)
0429 {
0430 }
0431 
0432 KFileMetaInfo::KFileMetaInfo()
0433 {
0434 }
0435 
0436 KFileMetaInfo::KFileMetaInfo(const KFileMetaInfo &k)
0437 {
0438 }
0439 
0440 KFileMetaInfo &KFileMetaInfo::operator= (KFileMetaInfo const &kfmi)
0441 {
0442     d = kfmi.d;
0443     return *this;
0444 }
0445 
0446 KFileMetaInfo::~KFileMetaInfo()
0447 {
0448 }
0449 
0450 bool KFileMetaInfo::applyChanges()
0451 {
0452     return false;
0453 }
0454 
0455 QUrl KFileMetaInfo::url() const
0456 {
0457     return QUrl();
0458 }
0459 
0460 const QHash<QString, KFileMetaInfoItem> &KFileMetaInfo::items() const
0461 {
0462     static const QHash<QString, KFileMetaInfoItem> items;
0463     return items;
0464 }
0465 
0466 const KFileMetaInfoItem &KFileMetaInfo::item(const QString &key) const
0467 {
0468     static const KFileMetaInfoItem item;
0469     return item;
0470 }
0471 
0472 QStringList KFileMetaInfo::keys() const
0473 {
0474     return QStringList();
0475 }
0476 
0477 KFileMetaInfoItem &KFileMetaInfo::item(const QString &key)
0478 {
0479     static KFileMetaInfoItem item;
0480     return item;
0481 }
0482 
0483 bool KFileMetaInfo::isValid() const
0484 {
0485     return false;
0486 }
0487 
0488 QStringList KFileMetaInfo::preferredKeys() const
0489 {
0490     return QStringList();
0491 }
0492 
0493 QStringList KFileMetaInfo::supportedKeys() const
0494 {
0495     return QStringList();
0496 }
0497 #endif //KIO_NO_STRIGI
0498 
0499 KFileMetaInfoItemList KFileMetaInfoGroup::items() const
0500 {
0501     return KFileMetaInfoItemList();
0502 }
0503 
0504 const QString &KFileMetaInfoGroup::name() const
0505 {
0506     return d->name;
0507 }