File indexing completed on 2024-12-22 04:57:39
0001 /* 0002 * SPDX-FileCopyrightText: 2010 Casey Link <unnamedrambler@gmail.com> 0003 * SPDX-FileCopyrightText: 2009-2010 Klaralvdalens Datakonsult AB, a KDAB Group company <info@kdab.net> 0004 0005 * This file includes code from old files from previous KDE versions: 0006 * SPDX-FileCopyrightText: 2003 Andreas Gungl <a.gungl@gmx.de> 0007 * SPDX-FileCopyrightText: 1996-1998 Stefan Taferner <taferner@kde.org> 0008 * 0009 * SPDX-License-Identifier: GPL-2.0-or-later 0010 * 0011 */ 0012 0013 #include "kmindexreader.h" 0014 #include "kmindexreader-config.h" 0015 0016 #include "../mixedmaildir_debug.h" 0017 #include "../mixedmaildirresource_debug.h" 0018 0019 using Akonadi::MessageStatus; 0020 0021 #include <qplatformdefs.h> 0022 0023 // BEGIN: Magic definitions from old kmail code 0024 #if HAVE_BYTESWAP_H 0025 #include <byteswap.h> 0026 #endif 0027 0028 static const int INDEX_VERSION = 1506; 0029 const size_t readCount = 1; 0030 #ifndef MAX_LINE 0031 static const int MAX_LINE = 4096; 0032 #endif 0033 0034 // We define functions as kmail_swap_NN so that we don't get compile errors 0035 // on platforms where bswap_NN happens to be a function instead of a define. 0036 0037 /* Swap bytes in 16 bit value. */ 0038 #ifdef bswap_16 0039 #define kmail_swap_16(x) bswap_16(x) 0040 #else 0041 #define kmail_swap_16(x) ((((x) >> 8) & 0xff) | (((x)&0xff) << 8)) 0042 #endif 0043 0044 /* Swap bytes in 32 bit value. */ 0045 #ifdef bswap_32 0046 #define kmail_swap_32(x) bswap_32(x) 0047 #else 0048 #define kmail_swap_32(x) ((((x)&0xff000000) >> 24) | (((x)&0x00ff0000) >> 8) | (((x)&0x0000ff00) << 8) | (((x)&0x000000ff) << 24)) 0049 #endif 0050 0051 /* Swap bytes in 64 bit value. */ 0052 #ifdef bswap_64 0053 #define kmail_swap_64(x) bswap_64(x) 0054 #else 0055 #define kmail_swap_64(x) \ 0056 ((((x)&0xff00000000000000ull) >> 56) | (((x)&0x00ff000000000000ull) >> 40) | (((x)&0x0000ff0000000000ull) >> 24) | (((x)&0x000000ff00000000ull) >> 8) \ 0057 | (((x)&0x00000000ff000000ull) << 8) | (((x)&0x0000000000ff0000ull) << 24) | (((x)&0x000000000000ff00ull) << 40) | (((x)&0x00000000000000ffull) << 56)) 0058 #endif 0059 0060 /** The old status format, only one at a time possible. Needed 0061 for upgrade path purposes. */ 0062 using KMLegacyMsgStatus = enum { 0063 KMLegacyMsgStatusUnknown = ' ', 0064 KMLegacyMsgStatusNew = 'N', 0065 KMLegacyMsgStatusUnread = 'U', 0066 KMLegacyMsgStatusRead = 'R', 0067 KMLegacyMsgStatusOld = 'O', 0068 KMLegacyMsgStatusDeleted = 'D', 0069 KMLegacyMsgStatusReplied = 'A', 0070 KMLegacyMsgStatusForwarded = 'F', 0071 KMLegacyMsgStatusQueued = 'Q', 0072 KMLegacyMsgStatusSent = 'S', 0073 KMLegacyMsgStatusFlag = 'G' 0074 }; 0075 0076 // END: Magic definitions from old kmail code 0077 0078 // BEGIN: KMIndexMsg methods 0079 0080 KMIndexData::KMIndexData() 0081 : mPartsCacheBuilt(false) 0082 { 0083 const uint count = sizeof(mCachedLongParts) / sizeof(unsigned long); 0084 for (uint i = 0; i < count; ++i) { 0085 mCachedLongParts[i] = 0; 0086 } 0087 } 0088 0089 MessageStatus &KMIndexData::status() 0090 { 0091 if (mStatus.isOfUnknownStatus()) { 0092 mStatus.fromQInt32(mCachedLongParts[KMIndexReader::MsgStatusPart]); 0093 if (mStatus.isOfUnknownStatus()) { 0094 // We are opening an old index for the first time, get the legacy 0095 // status and merge it in. 0096 // This is kept to provide an upgrade path from the old single 0097 // status to the new multiple status scheme. 0098 auto legacyMsgStatus = (KMLegacyMsgStatus)mCachedLongParts[KMIndexReader::MsgLegacyStatusPart]; 0099 mStatus.setRead(); 0100 switch (legacyMsgStatus) { 0101 case KMLegacyMsgStatusUnknown: 0102 mStatus.clear(); 0103 break; 0104 case KMLegacyMsgStatusUnread: 0105 mStatus.setRead(false); 0106 break; 0107 case KMLegacyMsgStatusRead: 0108 mStatus.setRead(); 0109 break; 0110 case KMLegacyMsgStatusDeleted: 0111 mStatus.setDeleted(); 0112 break; 0113 case KMLegacyMsgStatusReplied: 0114 mStatus.setReplied(); 0115 break; 0116 case KMLegacyMsgStatusForwarded: 0117 mStatus.setForwarded(); 0118 break; 0119 case KMLegacyMsgStatusQueued: 0120 mStatus.setQueued(); 0121 break; 0122 case KMLegacyMsgStatusSent: 0123 mStatus.setSent(); 0124 break; 0125 case KMLegacyMsgStatusFlag: 0126 mStatus.setImportant(); 0127 break; 0128 default: 0129 break; 0130 } 0131 } 0132 } 0133 return mStatus; 0134 } 0135 0136 QStringList KMIndexData::tagList() const 0137 { 0138 return mCachedStringParts[KMIndexReader::MsgTagPart].split(QLatin1Char(','), Qt::SkipEmptyParts); 0139 } 0140 0141 quint64 KMIndexData::uid() const 0142 { 0143 return mCachedLongParts[KMIndexReader::MsgUIDPart]; 0144 } 0145 0146 bool KMIndexData::isEmpty() const 0147 { 0148 return !mPartsCacheBuilt; 0149 } 0150 0151 // END: KMIndexMsg methods 0152 0153 KMIndexReader::KMIndexReader(const QString &indexFile) 0154 : mIndexFileName(indexFile) 0155 , mIndexFile(indexFile) 0156 , mConvertToUtf8(false) 0157 , mIndexSwapByteOrder(false) 0158 , mHeaderOffset(0) 0159 , mError(false) 0160 { 0161 if (!mIndexFile.exists()) { 0162 qCDebug(MIXEDMAILDIR_LOG) << "file doesn't exist"; 0163 mError = true; 0164 } 0165 0166 if (!mIndexFile.open(QIODevice::ReadOnly)) { 0167 qCDebug(MIXEDMAILDIR_LOG) << "file can't be open"; 0168 mError = true; 0169 } 0170 0171 mFp = fdopen(mIndexFile.handle(), "r"); 0172 } 0173 0174 KMIndexReader::~KMIndexReader() 0175 { 0176 if (mFp) { 0177 fclose(mFp); 0178 } 0179 } 0180 0181 bool KMIndexReader::error() const 0182 { 0183 return mError; 0184 } 0185 0186 KMIndexDataPtr KMIndexReader::dataByOffset(quint64 offset) const 0187 { 0188 QHash<quint64, KMIndexDataPtr>::const_iterator it = mMsgByOffset.constFind(offset); 0189 if (it == mMsgByOffset.constEnd()) { 0190 return {}; 0191 } 0192 0193 return it.value(); 0194 } 0195 0196 KMIndexDataPtr KMIndexReader::dataByFileName(const QString &fileName) const 0197 { 0198 QHash<QString, KMIndexDataPtr>::const_iterator it = mMsgByFileName.constFind(fileName); 0199 if (it == mMsgByFileName.constEnd()) { 0200 return {}; 0201 } 0202 0203 return it.value(); 0204 } 0205 0206 bool KMIndexReader::readHeader(int *version) 0207 { 0208 int indexVersion; 0209 Q_ASSERT(mFp != nullptr); 0210 mIndexSwapByteOrder = false; 0211 mIndexSizeOfLong = sizeof(long); 0212 0213 int ret = fscanf(mFp, "# KMail-Index V%d\n", &indexVersion); 0214 if (ret == EOF || ret == 0) { 0215 return false; // index file has invalid header 0216 } 0217 if (version) { 0218 *version = indexVersion; 0219 } 0220 if (indexVersion < 1505) { 0221 if (indexVersion == 1503) { 0222 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "Need to convert old index file" << mIndexFileName << "to utf-8"; 0223 mConvertToUtf8 = true; 0224 } 0225 return true; 0226 } else if (indexVersion == 1505) { 0227 } else if (indexVersion < INDEX_VERSION) { 0228 qCCritical(MIXEDMAILDIR_LOG) << "Index file" << mIndexFileName << "is out of date. What to do?"; 0229 // createIndexFromContents(); 0230 return false; 0231 } else if (indexVersion > INDEX_VERSION) { 0232 qFatal("index file of newer version"); 0233 return false; 0234 } else { 0235 // Header 0236 quint32 byteOrder = 0; 0237 quint32 sizeOfLong = sizeof(long); // default 0238 0239 quint32 header_length = 0; 0240 QT_FSEEK(mFp, sizeof(char), SEEK_CUR); 0241 if (fread(&header_length, sizeof(header_length), readCount, mFp) != readCount) { 0242 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "Failed to read header_length"; 0243 return false; 0244 } 0245 if (header_length > 0xFFFF) { 0246 header_length = kmail_swap_32(header_length); 0247 } 0248 0249 off_t endOfHeader = QT_FTELL(mFp) + header_length; 0250 0251 bool needs_update = true; 0252 // Process available header parts 0253 if (header_length >= sizeof(byteOrder)) { 0254 if (fread(&byteOrder, sizeof(byteOrder), readCount, mFp) != readCount) { 0255 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "Failed to read byteOrder"; 0256 return false; 0257 } 0258 mIndexSwapByteOrder = (byteOrder == 0x78563412); 0259 header_length -= sizeof(byteOrder); 0260 0261 if (header_length >= sizeof(sizeOfLong)) { 0262 if (fread(&sizeOfLong, sizeof(sizeOfLong), readCount, mFp) != readCount) { 0263 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "Failed to read sizeOfLong"; 0264 return false; 0265 } 0266 if (mIndexSwapByteOrder) { 0267 sizeOfLong = kmail_swap_32(sizeOfLong); 0268 } 0269 mIndexSizeOfLong = sizeOfLong; 0270 header_length -= sizeof(sizeOfLong); 0271 needs_update = false; 0272 } 0273 } 0274 if (needs_update || mIndexSwapByteOrder || (mIndexSizeOfLong != sizeof(long))) { 0275 qCDebug(MIXEDMAILDIR_LOG) << "DIRTY!"; 0276 // setDirty( true ); 0277 } 0278 // Seek to end of header 0279 QT_FSEEK(mFp, endOfHeader, SEEK_SET); 0280 0281 if (mIndexSwapByteOrder) { 0282 qCDebug(MIXEDMAILDIR_LOG) << "Index File has byte order swapped!"; 0283 } 0284 if (mIndexSizeOfLong != sizeof(long)) { 0285 qCDebug(MIXEDMAILDIR_LOG) << "Index File sizeOfLong is" << mIndexSizeOfLong << "while sizeof(long) is" << sizeof(long) << "!"; 0286 } 0287 } 0288 return true; 0289 } 0290 0291 bool KMIndexReader::readIndex() 0292 { 0293 qint32 len; 0294 KMIndexData *msg = nullptr; 0295 0296 Q_ASSERT(mFp != nullptr); 0297 rewind(mFp); 0298 0299 mMsgList.clear(); 0300 mMsgByFileName.clear(); 0301 mMsgByOffset.clear(); 0302 0303 int version; 0304 0305 if (!readHeader(&version)) { 0306 return false; 0307 } 0308 0309 mHeaderOffset = QT_FTELL(mFp); 0310 0311 // loop through the entire index 0312 while (!feof(mFp)) { 0313 // qCDebug(MIXEDMAILDIR_LOG) << "NEW MSG!"; 0314 msg = nullptr; 0315 // check version (parsed by readHeader) 0316 // because different versions must be 0317 // parsed differently 0318 // qCDebug(MIXEDMAILDIR_LOG) << "parsing version" << version; 0319 if (version >= 1505) { 0320 // parse versions >= 1505 0321 if (!fread(&len, sizeof(len), 1, mFp)) { 0322 break; 0323 } 0324 0325 // swap bytes if needed 0326 if (mIndexSwapByteOrder) { 0327 len = kmail_swap_32(len); 0328 } 0329 0330 off_t offs = QT_FTELL(mFp); 0331 if (QT_FSEEK(mFp, len, SEEK_CUR)) { 0332 break; 0333 } 0334 msg = new KMIndexData(); 0335 fillPartsCache(msg, offs, len); 0336 } else { 0337 ////////////////////// 0338 // BEGIN UNTESTED CODE 0339 ////////////////////// 0340 // parse versions < 1505 0341 QByteArray line(MAX_LINE, '\0'); 0342 if (fgets(line.data(), MAX_LINE, mFp) == nullptr) { 0343 break; 0344 } 0345 if (feof(mFp)) { 0346 break; 0347 } 0348 if (*line.data() == '\0') { 0349 // really, i have no idea when or how this would occur 0350 // but we probably want to know if it does - Casey 0351 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "Unknowable bad occurred"; 0352 qCDebug(MIXEDMAILDIR_LOG) << "fclose(mFp = " << mFp << ")"; 0353 fclose(mFp); 0354 mFp = nullptr; 0355 mMsgList.clear(); 0356 mMsgByFileName.clear(); 0357 mMsgByOffset.clear(); 0358 return false; 0359 } 0360 off_t offs = QT_FTELL(mFp); 0361 if (QT_FSEEK(mFp, len, SEEK_CUR)) { 0362 break; 0363 } 0364 msg = new KMIndexData; 0365 fromOldIndexString(msg, line, mConvertToUtf8); 0366 0367 fillPartsCache(msg, offs, len); 0368 ////////////////////// 0369 // END UNTESTED CODE 0370 ////////////////////// 0371 } 0372 if (!msg) { 0373 break; 0374 } 0375 0376 if (msg->status().isDeleted()) { 0377 delete msg; // skip messages that are marked as deleted 0378 continue; 0379 } 0380 #ifdef OBSOLETE 0381 // else if (mi->isNew()) 0382 // { 0383 // mi->setStatus(KMMsgStatusUnread); 0384 // mi->setDirty(false); 0385 // } 0386 #endif 0387 KMIndexDataPtr msgPtr(msg); 0388 mMsgList.append(msgPtr); 0389 const QString fileName = msg->mCachedStringParts[MsgFilePart]; 0390 if (!fileName.isEmpty()) { 0391 mMsgByFileName.insert(fileName, msgPtr); 0392 } 0393 0394 const quint64 offset = msg->mCachedLongParts[MsgOffsetPart]; 0395 if (offset > 0) { 0396 mMsgByOffset.insert(offset, msgPtr); 0397 } 0398 } // end while 0399 0400 return true; 0401 } 0402 0403 //--- For compatibility with old index files 0404 bool KMIndexReader::fromOldIndexString(KMIndexData *msg, const QByteArray &str, bool toUtf8) 0405 { 0406 Q_UNUSED(toUtf8) 0407 // const char *start, *offset; 0408 // msg->modifiers = KMMsgInfoPrivate::ALL_SET; 0409 // msg->xmark = str.mid(33, 3).trimmed(); 0410 // msg->folderOffset = str.mid(2,9).toULong(); 0411 // msg->msgSize = str.mid(12,9).toULong(); 0412 // msg->date = (time_t)str.mid(22,10).toULong(); 0413 // mStatus.setStatusFromStr( str ); 0414 // if (toUtf8) { 0415 // msg->subject = str.mid(37, 100).trimmed(); 0416 // msg->from = str.mid(138, 50).trimmed(); 0417 // msg->to = str.mid(189, 50).trimmed(); 0418 // } else { 0419 // start = offset = str.data() + 37; 0420 // while (*start == ' ' && start - offset < 100) start++; 0421 // msg->subject = QString::fromUtf8(str.mid(start - str.data(), 0422 // 100 - (start - offset)), 100 - (start - offset)); 0423 // start = offset = str.data() + 138; 0424 // while (*start == ' ' && start - offset < 50) start++; 0425 // msg->from = QString::fromUtf8(str.mid(start - str.data(), 0426 // 50 - (start - offset)), 50 - (start - offset)); 0427 // start = offset = str.data() + 189; 0428 // while (*start == ' ' && start - offset < 50) start++; 0429 // msg->to = QString::fromUtf8(str.mid(start - str.data(), 0430 // 50 - (start - offset)), 50 - (start - offset)); 0431 // } 0432 // msg->replyToIdMD5 = str.mid(240, 22).trimmed(); 0433 // msg->msgIdMD5 = str.mid(263, 22).trimmed(); 0434 msg->mStatus.setStatusFromStr(QString::fromUtf8(str)); 0435 return true; 0436 } 0437 0438 //----------------------------------------------------------------------------- 0439 0440 static void swapEndian(QString &str) 0441 { 0442 QChar *data = str.data(); 0443 while (!data->isNull()) { 0444 *data = QChar(kmail_swap_16(data->unicode())); 0445 data++; 0446 } 0447 } 0448 0449 static int g_chunk_length = 0, g_chunk_offset = 0; 0450 static uchar *g_chunk = nullptr; 0451 0452 namespace 0453 { 0454 template<typename T> 0455 void copy_from_stream(T &x) 0456 { 0457 if (g_chunk_offset + int(sizeof(T)) > g_chunk_length) { 0458 g_chunk_offset = g_chunk_length; 0459 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "This should never happen.."; 0460 x = 0; 0461 } else { 0462 // the memcpy is optimized out by the compiler for the values 0463 // of sizeof(T) that is called with 0464 memcpy(&x, g_chunk + g_chunk_offset, sizeof(T)); 0465 g_chunk_offset += sizeof(T); 0466 } 0467 } 0468 } 0469 0470 bool KMIndexReader::fillPartsCache(KMIndexData *msg, off_t indexOff, short int indexLen) 0471 { 0472 if (!msg) { 0473 return false; 0474 } 0475 // qCDebug(MIXEDMAILDIR_LOG); 0476 if (g_chunk_length < indexLen) { 0477 g_chunk = (uchar *)realloc(g_chunk, g_chunk_length = indexLen); 0478 } 0479 0480 off_t first_off = QT_FTELL(mFp); 0481 QT_FSEEK(mFp, indexOff, SEEK_SET); 0482 if (fread(g_chunk, indexLen, readCount, mFp) != readCount) { 0483 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "Failed to read index"; 0484 return false; 0485 } 0486 QT_FSEEK(mFp, first_off, SEEK_SET); 0487 0488 MsgPartType type; 0489 quint16 len; 0490 off_t ret = 0; 0491 for (g_chunk_offset = 0; g_chunk_offset < indexLen; g_chunk_offset += len) { 0492 quint32 tmp; 0493 copy_from_stream(tmp); 0494 copy_from_stream(len); 0495 if (mIndexSwapByteOrder) { 0496 tmp = kmail_swap_32(tmp); 0497 len = kmail_swap_16(len); 0498 } 0499 type = (MsgPartType)tmp; 0500 if (g_chunk_offset + len > indexLen) { 0501 qCWarning(MIXEDMAILDIRRESOURCE_LOG) << "g_chunk_offset + len > indexLen" 0502 << "This should never happen.."; 0503 return false; 0504 } 0505 // Only try to create strings if the part is really a string part, see declaration of 0506 // MsgPartType 0507 if (len && ((type >= 1 && type <= 6) || type == 11 || type == 14 || type == 15 || type == 19)) { 0508 // This works because the QString constructor does a memcpy. 0509 // Otherwise we would need to be concerned about the alignment. 0510 QString str((QChar *)(g_chunk + g_chunk_offset), len / 2); 0511 msg->mCachedStringParts[type] = str; 0512 0513 // Normally we need to swap the byte order because the QStrings are written 0514 // in the style of Qt2 (MSB -> network ordered). 0515 // QStrings in Qt3 expect host ordering. 0516 // On e.g. Intel host ordering is LSB, on e.g. Sparc it is MSB. 0517 0518 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN 0519 // Byte order is little endian (swap is true) 0520 swapEndian(msg->mCachedStringParts[type]); 0521 #else 0522 // Byte order is big endian (swap is false) 0523 #endif 0524 } else if ((type >= 7 && type <= 10) || type == 12 || type == 13 || (type >= 16 && type <= 18)) { 0525 Q_ASSERT(mIndexSizeOfLong == len); 0526 if (mIndexSizeOfLong == sizeof(ret)) { 0527 // qCDebug(MIXEDMAILDIR_LOG) << "mIndexSizeOfLong == sizeof(ret)"; 0528 // this memcpy replaces the original call to copy_from_stream 0529 // so that g_chunk_offset is not changed 0530 memcpy(&ret, g_chunk + g_chunk_offset, sizeof(ret)); 0531 if (mIndexSwapByteOrder) { 0532 if (sizeof(ret) == 4) { 0533 ret = kmail_swap_32(ret); 0534 } else { 0535 ret = kmail_swap_64(ret); 0536 } 0537 } 0538 } 0539 ////////////////////// 0540 // BEGIN UNTESTED CODE 0541 ////////////////////// 0542 else if (mIndexSizeOfLong == 4) { 0543 // Long is stored as 4 bytes in index file, sizeof(long) = 8 0544 quint32 ret_32; 0545 // this memcpy replaces the original call to copy_from_stream 0546 // so that g_chunk_offset is not changed 0547 memcpy(&ret_32, g_chunk + g_chunk_offset, sizeof(quint32)); 0548 if (mIndexSwapByteOrder) { 0549 ret_32 = kmail_swap_32(ret_32); 0550 } 0551 ret = ret_32; 0552 } else if (mIndexSizeOfLong == 8) { 0553 // Long is stored as 8 bytes in index file, sizeof(long) = 4 0554 quint32 ret_1; 0555 quint32 ret_2; 0556 // these memcpys replace the original calls to copy_from_stream 0557 // so that g_chunk_offset is not changed 0558 memcpy(&ret_1, g_chunk + g_chunk_offset, sizeof(quint32)); 0559 memcpy(&ret_2, g_chunk + g_chunk_offset, sizeof(quint32)); 0560 if (!mIndexSwapByteOrder) { 0561 // Index file order is the same as the order of this CPU. 0562 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN 0563 // Index file order is little endian 0564 ret = ret_1; // We drop the 4 most significant bytes 0565 #else 0566 // Index file order is big endian 0567 ret = ret_2; // We drop the 4 most significant bytes 0568 #endif 0569 } else { 0570 // Index file order is different from this CPU. 0571 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN 0572 // Index file order is big endian 0573 ret = ret_2; // We drop the 4 most significant bytes 0574 #else 0575 // Index file order is little endian 0576 ret = ret_1; // We drop the 4 most significant bytes 0577 #endif 0578 // We swap the result to host order. 0579 ret = kmail_swap_32(ret); 0580 } 0581 } 0582 ////////////////////// 0583 // END UNTESTED CODE 0584 ////////////////////// 0585 msg->mCachedLongParts[type] = ret; 0586 } 0587 } // for loop 0588 msg->mPartsCacheBuilt = true; 0589 return true; 0590 } 0591 0592 //----------------------------------------------------------------------------- 0593 0594 QList<KMIndexDataPtr> KMIndexReader::messages() 0595 { 0596 return mMsgList; 0597 }