File indexing completed on 2025-05-11 11:33:07
0001 /* POLE - Portable C++ library to access OLE Storage 0002 SPDX-FileCopyrightText: 2002-2005 Ariya Hidayat <ariya@kde.org> 0003 SPDX-FileCopyrightText: 2011-2012 Matus Uzak <matus.uzak@ixonos.com> 0004 0005 Redistribution and use in source and binary forms, with or without 0006 modification, are permitted provided that the following conditions 0007 are met: 0008 * Redistributions of source code must retain the above copyright notice, 0009 this list of conditions and the following disclaimer. 0010 * Redistributions in binary form must reproduce the above copyright notice, 0011 this list of conditions and the following disclaimer in the documentation 0012 and/or other materials provided with the distribution. 0013 * Neither the name of the authors nor the names of its contributors may be 0014 used to endorse or promote products derived from this software without 0015 specific prior written permission. 0016 0017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 0018 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0019 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 0020 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 0021 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 0022 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 0023 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 0024 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 0025 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 0026 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 0027 THE POSSIBILITY OF SUCH DAMAGE. 0028 */ 0029 0030 #include "pole.h" 0031 0032 #include <fstream> 0033 #include <iostream> 0034 #include <list> 0035 #include <string> 0036 #include <vector> 0037 #include <string.h> 0038 #include <ios> // for std::hex 0039 0040 #include <QList> 0041 #include <QString> 0042 #include <QDebug> 0043 0044 //Enable to activate debugging output. 0045 //#define POLE_DEBUG 0046 0047 //Disabled because of too many false positives, both streams and unknown 0048 //objects MAY be invalid and still have a size set. 0049 //#define POLE_FAIL_ON_NEMPTY_NVALID_OBJS 0050 0051 //Validate stream object against [MS-CFB] — v20110318. Disabled because of too 0052 //many false positives. 0053 //#define POLE_FAIL_ON_NVALID_STREAM_OBJS 0054 0055 //Validate storage object against [MS-CFB] — v20110318. Disabled because of 0056 //too many false positives on Word8 documents. 0057 //#define POLE_CHECK_STORAGE_OBJS 0058 0059 //Validate sibling names against positions in the black red tree. Disabled 0060 //because of too many false positives on Word8 files with embedded documents. 0061 //#define POLE_CHECK_SIBLINGS 0062 0063 #define OLE_HEADER_SIZE 0x200 0064 0065 namespace POLE 0066 { 0067 0068 class Header 0069 { 0070 public: 0071 unsigned char id[8]; // signature, or magic identifier 0072 unsigned b_shift; // bbat->blockSize = 1 << b_shift 0073 unsigned s_shift; // sbat->blockSize = 1 << s_shift 0074 unsigned num_bat; // blocks allocated for big bat 0075 unsigned dirent_start; // starting block for directory info 0076 unsigned threshold; // switch from small to big file (usually 4K) 0077 unsigned sbat_start; // starting block index to store small bat 0078 unsigned num_sbat; // blocks allocated for small bat 0079 unsigned mbat_start; // starting block to store meta bat 0080 unsigned num_mbat; // blocks allocated for meta bat 0081 unsigned long bb_blocks[109]; 0082 0083 Header(); 0084 bool valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const; 0085 void load(const unsigned char* buffer); 0086 void save(unsigned char* buffer); 0087 void debug(); 0088 }; 0089 0090 class AllocTable 0091 { 0092 public: 0093 static const unsigned Eof; 0094 static const unsigned Avail; 0095 static const unsigned Bat; 0096 static const unsigned MetaBat; 0097 unsigned blockSize; 0098 AllocTable(); 0099 bool valid(const unsigned long filesize, const unsigned shift, const bool isFat = true) const; 0100 void clear(); 0101 unsigned long count(); 0102 void resize(unsigned long newsize); 0103 void preserve(unsigned long n); 0104 void set(unsigned long index, unsigned long val); 0105 unsigned unused(); 0106 void setChain(std::vector<unsigned long>); 0107 std::vector<unsigned long> follow(unsigned long start, bool& fail); 0108 unsigned long operator[](unsigned long index); 0109 void load(const unsigned char* buffer, unsigned len); 0110 void save(unsigned char* buffer); 0111 unsigned size(); 0112 void debug(); 0113 private: 0114 std::vector<unsigned long> data; 0115 AllocTable(const AllocTable&); 0116 AllocTable& operator=(const AllocTable&); 0117 }; 0118 0119 class DirEntry 0120 { 0121 public: 0122 bool valid; // false if invalid (should be skipped) 0123 std::string name; // the name, not in unicode anymore 0124 bool dir; // true if directory 0125 unsigned long size; // size (not valid if directory) 0126 unsigned long start; // starting block 0127 unsigned prev; // previous sibling 0128 unsigned next; // next sibling 0129 unsigned child; // first child 0130 }; 0131 0132 class DirTree 0133 { 0134 public: 0135 static const unsigned End; 0136 DirTree(); 0137 bool valid(void) const; 0138 void clear(); 0139 unsigned entryCount(); 0140 DirEntry* entry(unsigned index); 0141 DirEntry* entry(const std::string& name, bool create = false); 0142 int indexOf(DirEntry* e); 0143 int parent(unsigned index); 0144 std::string fullName(unsigned index); 0145 std::vector<unsigned> children(unsigned index); 0146 void load(unsigned char* buffer, unsigned len, const unsigned threshold, const unsigned max_sbat, const unsigned max_bbat); 0147 void save(unsigned char* buffer); 0148 unsigned size(); 0149 void debug(); 0150 private: 0151 std::vector<DirEntry> entries; 0152 DirTree(const DirTree&); 0153 DirTree& operator=(const DirTree&); 0154 }; 0155 0156 class StorageIO 0157 { 0158 public: 0159 Storage* storage; // owner 0160 std::string filename; // filename 0161 std::fstream file; // associated with above name 0162 int result; // result of operation 0163 bool opened; // true if file is opened 0164 unsigned long filesize; // size of the file 0165 0166 Header* header; // storage header 0167 DirTree* dirtree; // directory tree 0168 AllocTable* bbat; // allocation table for big blocks 0169 AllocTable* sbat; // allocation table for small blocks 0170 0171 std::vector<unsigned long> sb_blocks; // blocks for "small" files 0172 0173 std::list<Stream*> streams; 0174 0175 StorageIO(Storage* storage, const char* filename); 0176 ~StorageIO(); 0177 0178 bool open(); 0179 void close(); 0180 void flush(); 0181 void load(); 0182 void create(); 0183 0184 unsigned long loadBigBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen); 0185 unsigned long loadBigBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen); 0186 0187 unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen); 0188 0189 unsigned long loadSmallBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen); 0190 unsigned long loadSmallBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen); 0191 0192 unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen); 0193 0194 StreamIO* streamIO(const std::string& name); 0195 0196 private: 0197 // no copy or assign 0198 StorageIO(const StorageIO&); 0199 StorageIO& operator=(const StorageIO&); 0200 0201 }; 0202 0203 class StreamIO 0204 { 0205 public: 0206 StorageIO* io; 0207 DirEntry* entry; 0208 std::string fullName; 0209 bool eof; 0210 bool fail; 0211 0212 StreamIO(StorageIO* io, DirEntry* entry); 0213 ~StreamIO(); 0214 unsigned long size(); 0215 void seek(unsigned long pos); 0216 unsigned long tell(); 0217 int getch(); 0218 unsigned long read(unsigned char* data, unsigned long maxlen); 0219 0220 private: 0221 unsigned long readInternal(unsigned char* data, unsigned long maxlen); 0222 unsigned long readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen); 0223 0224 std::vector<unsigned long> blocks; 0225 0226 // no copy or assign 0227 StreamIO(const StreamIO&); 0228 StreamIO& operator=(const StreamIO&); 0229 0230 // pointer for read 0231 unsigned long m_pos; 0232 0233 // simple cache system to speed-up getch() 0234 unsigned char* cache_data; 0235 unsigned long base_cache_size; 0236 unsigned long cache_size; 0237 unsigned long cache_pos; 0238 void updateCache(); 0239 }; 0240 0241 } // namespace POLE 0242 0243 using namespace POLE; 0244 0245 static inline unsigned long readU16(const unsigned char* ptr) 0246 { 0247 return ptr[0] + (ptr[1] << 8); 0248 } 0249 0250 static inline unsigned long readU32(const unsigned char* ptr) 0251 { 0252 return unsigned(ptr[0]) + (unsigned(ptr[1]) << 8 ) + (unsigned(ptr[2]) << 16) + (unsigned(ptr[3]) << 24); 0253 } 0254 0255 static inline void writeU16(unsigned char* ptr, unsigned long data) 0256 { 0257 ptr[0] = (unsigned char)(data & 0xff); 0258 ptr[1] = (unsigned char)((data >> 8) & 0xff); 0259 } 0260 0261 static inline void writeU32(unsigned char* ptr, unsigned long data) 0262 { 0263 ptr[0] = (unsigned char)(data & 0xff); 0264 ptr[1] = (unsigned char)((data >> 8) & 0xff); 0265 ptr[2] = (unsigned char)((data >> 16) & 0xff); 0266 ptr[3] = (unsigned char)((data >> 24) & 0xff); 0267 } 0268 0269 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; 0270 0271 // =========== Header ========== 0272 0273 Header::Header() 0274 { 0275 b_shift = 9; 0276 s_shift = 6; 0277 num_bat = 0; 0278 dirent_start = 0; 0279 threshold = 4096; 0280 sbat_start = 0; 0281 num_sbat = 0; 0282 mbat_start = 0; 0283 num_mbat = 0; 0284 0285 for (unsigned i = 0; i < 8; i++) 0286 id[i] = pole_magic[i]; 0287 for (unsigned i = 0; i < 109; i++) 0288 bb_blocks[i] = AllocTable::Avail; 0289 } 0290 0291 bool Header::valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const 0292 { 0293 if (threshold != 4096) return false; 0294 if (num_bat == 0) return false; 0295 if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false; 0296 if ((num_bat < 109) && (num_mbat != 0)) return false; 0297 if (s_shift > b_shift) return false; 0298 if (b_shift <= 6) return false; 0299 if (b_shift > 12) return false; 0300 0301 // additional heuristics to check the header 0302 if (num_sbat > max_sbat_block) return false; 0303 if (num_bat > max_bbat_block) return false; 0304 0305 #ifdef POLE_DEBUG 0306 const unsigned ENDOFCHAIN = 0xfffffffe; 0307 const unsigned FREESECT = 0xffffffff; 0308 0309 if (num_sbat == 0 && 0310 sbat_start != ENDOFCHAIN && 0311 sbat_start != FREESECT) 0312 { 0313 qDebug() << Q_FUNC_INFO << 0314 "There aren't any minifat sectors, but there are links to some!"; 0315 } 0316 #endif 0317 0318 return true; 0319 } 0320 0321 void Header::load(const unsigned char* buffer) 0322 { 0323 b_shift = readU16(buffer + 0x1e); // sector shift 0324 s_shift = readU16(buffer + 0x20); // mini sector shift 0325 num_bat = readU32(buffer + 0x2c); // number of fat sectors 0326 dirent_start = readU32(buffer + 0x30); // first directory sector location 0327 threshold = readU32(buffer + 0x38); // transaction signature number 0328 sbat_start = readU32(buffer + 0x3c); // first mini fat sector location 0329 num_sbat = readU32(buffer + 0x40); // mini stream cutoff size 0330 mbat_start = readU32(buffer + 0x44); // first mini difat sector location 0331 num_mbat = readU32(buffer + 0x48); // number of difat sectors 0332 0333 for (unsigned i = 0; i < 8; i++) 0334 id[i] = buffer[i]; 0335 for (unsigned i = 0; i < 109; i++) 0336 bb_blocks[i] = readU32(buffer + 0x4C + i * 4); 0337 } 0338 0339 void Header::save(unsigned char* buffer) 0340 { 0341 memset(buffer, 0, 0x4c); 0342 memcpy(buffer, pole_magic, 8); // ole signature 0343 writeU32(buffer + 8, 0); // unknown 0344 writeU32(buffer + 12, 0); // unknown 0345 writeU32(buffer + 16, 0); // unknown 0346 writeU16(buffer + 24, 0x003e); // revision ? 0347 writeU16(buffer + 26, 3); // version ? 0348 writeU16(buffer + 28, 0xfffe); // unknown 0349 writeU16(buffer + 0x1e, b_shift); 0350 writeU16(buffer + 0x20, s_shift); 0351 writeU32(buffer + 0x2c, num_bat); 0352 writeU32(buffer + 0x30, dirent_start); 0353 writeU32(buffer + 0x38, threshold); 0354 writeU32(buffer + 0x3c, sbat_start); 0355 writeU32(buffer + 0x40, num_sbat); 0356 writeU32(buffer + 0x44, mbat_start); 0357 writeU32(buffer + 0x48, num_mbat); 0358 0359 for (unsigned i = 0; i < 109; i++) 0360 writeU32(buffer + 0x4C + i*4, bb_blocks[i]); 0361 } 0362 0363 void Header::debug() 0364 { 0365 qDebug() << Q_FUNC_INFO; 0366 qDebug() << "b_shift:" << b_shift; 0367 qDebug() << "s_shift:" << s_shift; 0368 qDebug() << "num_bat:" << num_bat; 0369 qDebug() << "dirent_start: 0x" << hex << dirent_start; 0370 qDebug() << "threshold:" << dec << threshold; 0371 qDebug() << "sbat_start: 0x" << hex << sbat_start; 0372 qDebug() << "num_sbat:" << dec << num_sbat; 0373 qDebug() << "mbat_start: 0x" << hex << mbat_start; 0374 qDebug() << "num_mbat:" << dec << num_mbat; 0375 0376 unsigned s = (num_bat <= 109) ? num_bat : 109; 0377 std::cout << "bat blocks:"; 0378 for (unsigned i = 0; i < s; i++) { 0379 std::cout << "0x" << std::hex << bb_blocks[i] << " "; 0380 } 0381 std::cout << std::dec << std::endl; 0382 } 0383 0384 // =========== AllocTable ========== 0385 0386 const unsigned AllocTable::Avail = 0xffffffff; 0387 const unsigned AllocTable::Eof = 0xfffffffe; 0388 const unsigned AllocTable::Bat = 0xfffffffd; 0389 const unsigned AllocTable::MetaBat = 0xfffffffc; 0390 0391 AllocTable::AllocTable() 0392 { 0393 blockSize = 4096; 0394 // initial size 0395 resize(128); 0396 } 0397 0398 bool AllocTable::valid(const unsigned long filesize, const unsigned shift, const bool isFat) const 0399 { 0400 unsigned long offset = 0; 0401 for (unsigned long i = 0; i < data.size(); i++) { 0402 switch (data[i]) { 0403 case AllocTable::Avail: 0404 case AllocTable::Eof: 0405 case AllocTable::Bat: 0406 case AllocTable::MetaBat: 0407 break; 0408 default: 0409 offset = data[i] << shift; 0410 if (isFat) { 0411 offset += OLE_HEADER_SIZE; 0412 } 0413 if (offset > filesize) { 0414 #ifdef POLE_DEBUG 0415 qDebug() << "Invalid location of sector in the stream!" << 0416 "offset:" << offset << " | filesize:" << filesize; 0417 #endif 0418 return false; 0419 } 0420 } 0421 } 0422 return true; 0423 } 0424 0425 unsigned long AllocTable::count() 0426 { 0427 return data.size(); 0428 } 0429 0430 void AllocTable::resize(unsigned long newsize) 0431 { 0432 unsigned oldsize = data.size(); 0433 data.resize(newsize); 0434 if (newsize > oldsize) 0435 for (unsigned i = oldsize; i < newsize; i++) 0436 data[i] = Avail; 0437 } 0438 0439 // make sure there're still free blocks 0440 void AllocTable::preserve(unsigned long n) 0441 { 0442 std::vector<unsigned long> pre; 0443 for (unsigned i = 0; i < n; i++) 0444 pre.push_back(unused()); 0445 } 0446 0447 unsigned long AllocTable::operator[](unsigned long index) 0448 { 0449 unsigned long result; 0450 result = data[index]; 0451 return result; 0452 } 0453 0454 void AllocTable::set(unsigned long index, unsigned long value) 0455 { 0456 if (index >= count()) resize(index + 1); 0457 data[ index ] = value; 0458 } 0459 0460 void AllocTable::setChain(std::vector<unsigned long> chain) 0461 { 0462 if (chain.size()) { 0463 for (unsigned i = 0; i < chain.size() - 1; i++) 0464 set(chain[i], chain[i+1]); 0465 set(chain[ chain.size()-1 ], AllocTable::Eof); 0466 } 0467 } 0468 0469 // follow 0470 std::vector<unsigned long> AllocTable::follow(unsigned long start, bool& fail) 0471 { 0472 std::vector<unsigned long> chain; 0473 0474 if (start >= count()) { 0475 #ifdef POLE_DEBUG 0476 qDebug() << Q_FUNC_INFO << "start >= count()!"; 0477 #endif 0478 fail = true; 0479 return chain; 0480 } 0481 0482 unsigned long p = start; 0483 while (p < count()) { 0484 if (p == (unsigned long)Eof) { 0485 #ifdef POLE_DEBUG 0486 qDebug() << Q_FUNC_INFO << "Eof detected!"; 0487 #endif 0488 break; 0489 } 0490 if (p == (unsigned long)Bat) { 0491 #ifdef POLE_DEBUG 0492 qDebug() << Q_FUNC_INFO << "Bat detected!"; 0493 #endif 0494 break; 0495 } 0496 if (p == (unsigned long)MetaBat) { 0497 #ifdef POLE_DEBUG 0498 qDebug() << Q_FUNC_INFO << "MetaBat detected!"; 0499 #endif 0500 break; 0501 } 0502 if (p >= count()) { 0503 #ifdef POLE_DEBUG 0504 qDebug() << Q_FUNC_INFO << "Invalid index detected!"; 0505 #endif 0506 fail = true; 0507 break; 0508 } 0509 chain.push_back(p); 0510 0511 // break if the chain is longer than the total sector count 0512 if (chain.size() > count()) { 0513 #ifdef POLE_DEBUG 0514 qDebug() << Q_FUNC_INFO << "Probably a loop detected!"; 0515 #endif 0516 fail = true; 0517 break; 0518 } 0519 p = data[ p ]; 0520 } 0521 if (p != (unsigned long)AllocTable::Eof) { 0522 #ifdef POLE_DEBUG 0523 qDebug() << Q_FUNC_INFO << "Last chain entry MUST be 0x" << hex << AllocTable::Eof << 0524 ", detected: 0x" << hex << p; 0525 #endif 0526 fail = true; 0527 } 0528 0529 return chain; 0530 } 0531 0532 unsigned AllocTable::unused() 0533 { 0534 // find first available block 0535 for (unsigned i = 0; i < data.size(); i++) 0536 if (data[i] == Avail) 0537 return i; 0538 0539 // completely full, so enlarge the table 0540 unsigned block = data.size(); 0541 resize(data.size() + 10); 0542 return block; 0543 } 0544 0545 void AllocTable::load(const unsigned char* buffer, unsigned len) 0546 { 0547 resize(len / 4); 0548 for (unsigned i = 0; i < count(); i++) 0549 set(i, readU32(buffer + i*4)); 0550 } 0551 0552 // return space required to save this dirtree 0553 unsigned AllocTable::size() 0554 { 0555 return count() * 4; 0556 } 0557 0558 void AllocTable::save(unsigned char* buffer) 0559 { 0560 for (unsigned i = 0; i < count(); i++) 0561 writeU32(buffer + i*4, data[i]); 0562 } 0563 0564 void AllocTable::debug() 0565 { 0566 qDebug() << "block size " << data.size(); 0567 for (unsigned i = 0; i < data.size(); i++) { 0568 if (data[i] == Avail) continue; 0569 std::cout << i << ": "; 0570 if (data[i] == Eof) std::cout << "[eof]"; 0571 else if (data[i] == Bat) std::cout << "[bat]"; 0572 else if (data[i] == MetaBat) std::cout << "[metabat]"; 0573 else std::cout << data[i]; 0574 std::cout << std::endl; 0575 } 0576 } 0577 0578 // =========== DirTree ========== 0579 0580 const unsigned DirTree::End = 0xffffffff; 0581 0582 /* 0583 * Compare DirEntry names according to the spec. 0584 */ 0585 int ename_cmp(QString& str1, QString& str2) 0586 { 0587 str1 = str1.toUpper(); 0588 str2 = str2.toUpper(); 0589 if (str1.size() < str2.size()) return -1; 0590 else if (str1.size() > str2.size()) return 1; 0591 else return str1.compare(str2); 0592 } 0593 0594 /* 0595 * Check if DirEntry elements at this level have unique names. 0596 */ 0597 bool valid_enames(DirTree* dirtree, unsigned index) 0598 { 0599 std::vector<unsigned> chi = dirtree->children(index); 0600 QList<std::string> names; 0601 DirEntry* e = 0; 0602 0603 #ifdef POLE_DEBUG 0604 if (chi.size()) { 0605 qDebug() << "---------------------"; 0606 qDebug() << Q_FUNC_INFO; 0607 qDebug() << "[KIDS]:"; 0608 } 0609 for (unsigned i = 0; i < chi.size(); i++) { 0610 e = dirtree->entry(chi[i]); 0611 if (!e->valid) std::cout << "[INVALID] "; 0612 printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n", 0613 e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir); 0614 } 0615 if (chi.size()) { 0616 qDebug() << "---------------------"; 0617 } 0618 #endif 0619 0620 for (unsigned i = 0; i < chi.size(); i++) { 0621 e = dirtree->entry(chi[i]); 0622 if (e->valid) { 0623 if (names.contains(e->name)) { 0624 return false; 0625 } else { 0626 names.append(e->name); 0627 } 0628 } 0629 } 0630 return true; 0631 } 0632 0633 bool DirTree::valid() const 0634 { 0635 const DirEntry* e; 0636 #ifdef POLE_CHECK_SIBLINGS 0637 QString str1, str2; 0638 #endif 0639 0640 #ifdef POLE_DEBUG 0641 qDebug() << Q_FUNC_INFO; 0642 #endif 0643 for (unsigned i = 0; i < entries.size(); i++) { 0644 e = &entries[i]; 0645 0646 #ifdef POLE_DEBUG 0647 if (!e->valid) std::cout << "[INVALID] "; 0648 printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n", 0649 e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir); 0650 #endif 0651 #ifdef POLE_FAIL_ON_NEMPTY_NVALID_OBJS 0652 if (!e->valid && e->size) { 0653 #ifdef POLE_DEBUG 0654 qDebug() << "Invalid DirEntry detected!"; 0655 #endif 0656 return false; 0657 } 0658 #endif 0659 //Looking for invalid stream objects. 0660 #ifdef POLE_FAIL_ON_NVALID_STREAM_OBJS 0661 if (!e->valid && !e->dir) { 0662 #ifdef POLE_DEBUG 0663 qDebug() << "Invalid DirEntry (stream object) detected!"; 0664 #endif 0665 return false; 0666 } 0667 #endif 0668 //Looking for invalid storage objects. 0669 if (!e->valid && e->dir) { 0670 #ifdef POLE_DEBUG 0671 qDebug() << "Invalid DirEntry (storage object) detected!"; 0672 #endif 0673 return false; 0674 } 0675 0676 //A root storage: size = size of the mini stream, start = first sector 0677 //of the mini stream, if the mini stream exists 0678 // 0679 //A storage object MAY have e->child set - [MS-CFB]. 0680 #ifdef POLE_CHECK_STORAGE_OBJS 0681 if ((e->valid && e->dir) && (i > 0) && 0682 ((e->start != 0) || (e->size != 0))) 0683 { 0684 #ifdef POLE_DEBUG 0685 qDebug() << "Invalid DirEntry (storage object) detected!"; 0686 #endif 0687 return false; 0688 } 0689 #endif 0690 //Looking for duplicate DirEntries in the storage object. 0691 if (e->valid && e->dir) { 0692 if (!valid_enames(const_cast<DirTree*>(this), i)) { 0693 #ifdef POLE_DEBUG 0694 qDebug() << "Invalid DirEntry (storage object) detected!"; 0695 #endif 0696 return false; 0697 } 0698 } 0699 0700 //Check the name of the left/right DirEntry. 0701 #ifdef POLE_CHECK_SIBLINGS 0702 if (e->prev != End) { 0703 str1 = QString(entries[e->prev].name.data()); 0704 } 0705 if (e->next != End) { 0706 str2 = QString(entries[e->next].name.data()); 0707 } 0708 if (!str1.isEmpty() && !str2.isEmpty()) { 0709 if (ename_cmp(str1, str2) > 0) { 0710 #ifdef POLE_DEBUG 0711 qDebug() << "DirEntry: [name, position] mismatch!"; 0712 #endif 0713 return false; 0714 } 0715 } 0716 #endif 0717 } 0718 return true; 0719 } 0720 0721 DirTree::DirTree() 0722 { 0723 clear(); 0724 } 0725 0726 void DirTree::clear() 0727 { 0728 // leave only root entry 0729 entries.resize(1); 0730 entries[0].valid = true; 0731 entries[0].name = "Root Entry"; 0732 entries[0].dir = true; 0733 entries[0].size = 0; 0734 entries[0].start = End; 0735 entries[0].prev = End; 0736 entries[0].next = End; 0737 entries[0].child = End; 0738 } 0739 0740 unsigned DirTree::entryCount() 0741 { 0742 return entries.size(); 0743 } 0744 0745 DirEntry* DirTree::entry(unsigned index) 0746 { 0747 if (index >= entryCount()) return (DirEntry*) 0; 0748 return &entries[ index ]; 0749 } 0750 0751 int DirTree::indexOf(DirEntry* e) 0752 { 0753 for (unsigned i = 0; i < entryCount(); i++) 0754 if (entry(i) == e) return i; 0755 0756 return -1; 0757 } 0758 0759 int DirTree::parent(unsigned index) 0760 { 0761 // brute-force, basically we iterate for each entries, find its children 0762 // and check if one of the children is 'index' 0763 for (unsigned j = 0; j < entryCount(); j++) { 0764 std::vector<unsigned> chi = children(j); 0765 for (unsigned i = 0; i < chi.size(); i++) 0766 if (chi[i] == index) 0767 return j; 0768 } 0769 0770 return -1; 0771 } 0772 0773 std::string DirTree::fullName(unsigned index) 0774 { 0775 // don't use root name ("Root Entry"), just give "/" 0776 if (index == 0) return "/"; 0777 0778 std::string result = entry(index)->name; 0779 result.insert(0, "/"); 0780 int p = parent(index); 0781 DirEntry * _entry = 0; 0782 while (p > 0) { 0783 _entry = entry(p); 0784 if (_entry->dir && _entry->valid) { 0785 result.insert(0, _entry->name); 0786 result.insert(0, "/"); 0787 } 0788 --p; 0789 index = p; 0790 if (index <= 0) break; 0791 } 0792 return result; 0793 } 0794 0795 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry 0796 // if not found and create is false, return 0 0797 // if create is true, a new entry is returned 0798 DirEntry* DirTree::entry(const std::string& name, bool create) 0799 { 0800 if (!name.length()) return (DirEntry*)0; 0801 0802 // quick check for "/" (that's root) 0803 if (name == "/") return entry(0); 0804 0805 // split the names, e.g "/ObjectPool/_1020961869" will become: 0806 // "ObjectPool" and "_1020961869" 0807 std::list<std::string> names; 0808 std::string::size_type start = 0, end = 0; 0809 if (name[0] == '/') start++; 0810 while (start < name.length()) { 0811 end = name.find_first_of('/', start); 0812 if (end == std::string::npos) end = name.length(); 0813 names.push_back(name.substr(start, end - start)); 0814 start = end + 1; 0815 } 0816 0817 // start from root 0818 int index = 0 ; 0819 0820 // trace one by one 0821 std::list<std::string>::iterator it; 0822 0823 for (it = names.begin(); it != names.end(); ++it) { 0824 // find among the children of index 0825 std::vector<unsigned> chi = children(index); 0826 unsigned child = 0; 0827 for (unsigned i = 0; i < chi.size(); i++) { 0828 DirEntry* ce = entry(chi[i]); 0829 if (ce) 0830 if (ce->valid && (ce->name.length() > 1)) 0831 if (ce->name == *it) 0832 child = chi[i]; 0833 } 0834 0835 // traverse to the child 0836 if (child > 0) index = child; 0837 else { 0838 // not found among children 0839 if (!create) return (DirEntry*)0; 0840 0841 // create a new entry 0842 unsigned parent = index; 0843 entries.push_back(DirEntry()); 0844 index = entryCount() - 1; 0845 DirEntry* e = entry(index); 0846 e->valid = true; 0847 e->name = *it; 0848 e->dir = false; 0849 e->size = 0; 0850 e->start = 0; 0851 e->child = End; 0852 e->prev = End; 0853 e->next = entry(parent)->child; 0854 entry(parent)->child = index; 0855 } 0856 } 0857 0858 return entry(index); 0859 } 0860 0861 // helper function: recursively find siblings of index 0862 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result, 0863 unsigned index) 0864 { 0865 DirEntry* e = dirtree->entry(index); 0866 if (!e) return; 0867 // if (!e->valid) return; 0868 0869 // prevent infinite loop 0870 for (unsigned i = 0; i < result.size(); i++) { 0871 if (result[i] == index) return; 0872 } 0873 // add myself 0874 result.push_back(index); 0875 0876 // visit previous sibling, don't go infinitely 0877 unsigned prev = e->prev; 0878 if ((prev > 0) && (prev < dirtree->entryCount())) { 0879 for (unsigned i = 0; i < result.size(); i++) 0880 if (result[i] == prev) prev = 0; 0881 if (prev) dirtree_find_siblings(dirtree, result, prev); 0882 } 0883 0884 // visit next sibling, don't go infinitely 0885 unsigned next = e->next; 0886 if ((next > 0) && (next < dirtree->entryCount())) { 0887 for (unsigned i = 0; i < result.size(); i++) 0888 if (result[i] == next) next = 0; 0889 if (next) dirtree_find_siblings(dirtree, result, next); 0890 } 0891 } 0892 0893 std::vector<unsigned> DirTree::children(unsigned index) 0894 { 0895 std::vector<unsigned> result; 0896 0897 DirEntry* e = entry(index); 0898 if (e) { 0899 if (e->valid && e->dir) { 0900 dirtree_find_siblings(this, result, e->child); 0901 } 0902 } 0903 return result; 0904 } 0905 0906 void DirTree::load(unsigned char* buffer, unsigned size, const unsigned threshold, 0907 const unsigned max_sbat, const unsigned max_bbat) 0908 { 0909 #ifdef POLE_DEBUG 0910 qDebug() << "-------------------------------"; 0911 qDebug() << Q_FUNC_INFO; 0912 #endif 0913 0914 entries.clear(); 0915 unsigned n = (size / 128); //num. of directory entries 0916 0917 for (unsigned i = 0; i < (size / 128); i++) { 0918 unsigned p = i * 128; 0919 0920 0921 // parse name of this entry, which stored as Unicode 16-bit 0922 int name_len = readU16(buffer + 0x40 + p); 0923 if (name_len > 64) { 0924 name_len = 64; 0925 #ifdef POLE_DEBUG 0926 qDebug() << "DirEntry: Invalid length of name!"; 0927 #endif 0928 } 0929 std::string name; 0930 for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2) { 0931 name.append(1, buffer[j+p]); 0932 } 0933 0934 // first char isn't printable ? remove it... 0935 if (buffer[p] < 32) { 0936 name.erase(0, 1); 0937 } 0938 0939 // [MS-CFB] — v20110318 0940 // 0x00 = Unknown or unallocated, 0x01 = directory (Storage Object), 0941 // 0x02 = file (Stream Object), 0x05 = Root Storage Object 0942 unsigned type = buffer[ 0x42 + p]; 0943 0944 DirEntry e; 0945 e.valid = true; 0946 e.name = name; 0947 e.start = readU32(buffer + 0x74 + p); 0948 e.size = readU32(buffer + 0x78 + p); 0949 e.prev = readU32(buffer + 0x44 + p); 0950 e.next = readU32(buffer + 0x48 + p); 0951 e.child = readU32(buffer + 0x4C + p); 0952 e.dir = false; 0953 0954 if ((type == 1) || (type == 5)) { 0955 e.dir = true; 0956 } 0957 0958 // sanity checks 0959 if ((type != 0) && (type != 1) && (type != 2) && (type != 5)) { 0960 e.valid = false; 0961 #ifdef POLE_DEBUG 0962 qDebug() << "DirEntry: invalid type!"; 0963 #endif 0964 } 0965 if ((type != 0) && (name_len < 1)) { 0966 e.valid = false; 0967 #ifdef POLE_DEBUG 0968 qDebug() << "DirEntry: invalid (type,name) pair!"; 0969 #endif 0970 } 0971 // unknown object 0972 if (type == 0) { 0973 if ((e.child != End) || (e.prev != End) || (e.next != End)) { 0974 e.valid = false; 0975 #ifdef POLE_DEBUG 0976 qDebug() << "DirEntry: reference to prev/next/child != NOSTREAM"; 0977 #endif 0978 } 0979 if ((e.start != 0) || (e.size != 0)) { 0980 e.valid = false; 0981 #ifdef POLE_DEBUG 0982 qDebug() << "DirEntry: start/size != ZERO"; 0983 #endif 0984 } 0985 } 0986 // storage objects 0987 if (type == 1) { 0988 if (((e.prev != End) && (e.prev >= n)) || 0989 ((e.next != End) && (e.next >= n)) || 0990 ((e.child != End) && (e.child >= n))) { 0991 e.valid = false; 0992 #ifdef POLE_DEBUG 0993 qDebug() << "DirEntry: reference to prev/next/child > object num. (" << n << ")"; 0994 #endif 0995 } 0996 } 0997 // stream object 0998 if (type == 2) { 0999 //check stream position 1000 if ((e.size >= threshold) && (e.start >= max_bbat)) { 1001 e.valid = false; 1002 #ifdef POLE_DEBUG 1003 qDebug() << "DirEntry: (e.start >= max_bbat)"; 1004 #endif 1005 } 1006 else if (e.start >= max_sbat) { 1007 e.valid = false; 1008 #ifdef POLE_DEBUG 1009 qDebug() << "DirEntry: (e.start >= max_sbat)"; 1010 #endif 1011 } 1012 //check stream object 1013 if (e.child != End) { 1014 e.valid = false; 1015 #ifdef POLE_DEBUG 1016 qDebug() << "DirEntry: (e.child != End)"; 1017 #endif 1018 } 1019 //NOTE: Disabled because of too many false positives. 1020 // if ((e->prev != End) || (e->next != End)) { 1021 // e.valid = false; 1022 // } 1023 } 1024 1025 // CLSID contains an object class GUID (globally unique identifier) if 1026 // this entry is a storage or root storage. In a stream object, this 1027 // field MUST be set to all zeroes. 1028 #ifdef POLE_DEBUG 1029 if (!e.valid) { 1030 std::cout << "[INVALID] "; 1031 } 1032 printf("DirEntry: name=%s type=%i prev=%i next=%i child=%i start=%lu size=%lu clsid=%lu.%lu.%lu.%lu\n", 1033 name.c_str(), type, e.prev, e.next, e.child, e.start, e.size, readU32(buffer + 0x50 + p), 1034 readU32(buffer + 0x54 + p), readU32(buffer + 0x58 + p), readU32(buffer + 0x5C + p)); 1035 #endif 1036 entries.push_back(e); 1037 } 1038 #ifdef POLE_DEBUG 1039 qDebug() << "-------------------------------"; 1040 #endif 1041 } 1042 1043 // return space required to save this dirtree 1044 unsigned DirTree::size() 1045 { 1046 return entryCount() * 128; 1047 } 1048 1049 void DirTree::save(unsigned char* buffer) 1050 { 1051 memset(buffer, 0, size()); 1052 1053 // root is fixed as "Root Entry" 1054 DirEntry* root = entry(0); 1055 std::string name = "Root Entry"; 1056 for (unsigned j = 0; j < name.length(); j++) 1057 buffer[ j*2 ] = name[j]; 1058 writeU16(buffer + 0x40, name.length()*2 + 2); 1059 writeU32(buffer + 0x74, 0xffffffff); 1060 writeU32(buffer + 0x78, 0); 1061 writeU32(buffer + 0x44, 0xffffffff); 1062 writeU32(buffer + 0x48, 0xffffffff); 1063 writeU32(buffer + 0x4c, root->child); 1064 buffer[ 0x42 ] = 5; 1065 buffer[ 0x43 ] = 1; 1066 1067 for (unsigned i = 1; i < entryCount(); i++) { 1068 DirEntry* e = entry(i); 1069 if (!e) continue; 1070 if (e->dir) { 1071 e->start = 0xffffffff; 1072 e->size = 0; 1073 } 1074 1075 // max length for name is 32 chars 1076 std::string name = e->name; 1077 if (name.length() > 32) 1078 name.erase(32, name.length()); 1079 1080 // write name as Unicode 16-bit 1081 for (unsigned j = 0; j < name.length(); j++) 1082 buffer[ i*128 + j*2 ] = name[j]; 1083 1084 writeU16(buffer + i*128 + 0x40, name.length()*2 + 2); 1085 writeU32(buffer + i*128 + 0x74, e->start); 1086 writeU32(buffer + i*128 + 0x78, e->size); 1087 writeU32(buffer + i*128 + 0x44, e->prev); 1088 writeU32(buffer + i*128 + 0x48, e->next); 1089 writeU32(buffer + i*128 + 0x4c, e->child); 1090 buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2; 1091 buffer[ i*128 + 0x43 ] = 1; // always black 1092 } 1093 } 1094 1095 void DirTree::debug() 1096 { 1097 for (unsigned i = 0; i < entryCount(); i++) { 1098 DirEntry* e = entry(i); 1099 if (!e) continue; 1100 std::cout << i << ": "; 1101 if (!e->valid) std::cout << "INVALID "; 1102 std::cout << e->name << " "; 1103 if (e->dir) std::cout << "(Dir) "; 1104 else std::cout << "(File) "; 1105 std::cout << e->size << " "; 1106 std::cout << "s:" << e->start << " "; 1107 std::cout << "("; 1108 if (e->child == End) std::cout << "-"; else std::cout << e->child; 1109 std::cout << " "; 1110 if (e->prev == End) std::cout << "-"; else std::cout << e->prev; 1111 std::cout << ":"; 1112 if (e->next == End) std::cout << "-"; else std::cout << e->next; 1113 std::cout << ")"; 1114 std::cout << std::endl; 1115 } 1116 } 1117 1118 // =========== StorageIO ========== 1119 1120 StorageIO::StorageIO(Storage* st, const char* fname) 1121 { 1122 storage = st; 1123 filename = fname; 1124 result = Storage::Ok; 1125 opened = false; 1126 1127 header = new Header(); 1128 dirtree = new DirTree(); 1129 bbat = new AllocTable(); 1130 sbat = new AllocTable(); 1131 1132 filesize = 0; 1133 bbat->blockSize = 1 << header->b_shift; 1134 sbat->blockSize = 1 << header->s_shift; 1135 } 1136 1137 StorageIO::~StorageIO() 1138 { 1139 if (opened) close(); 1140 delete sbat; 1141 delete bbat; 1142 delete dirtree; 1143 delete header; 1144 } 1145 1146 bool StorageIO::open() 1147 { 1148 // already opened ? close first 1149 if (opened) close(); 1150 1151 load(); 1152 1153 return result == Storage::Ok; 1154 } 1155 1156 void StorageIO::load() 1157 { 1158 unsigned char* buffer = 0; 1159 unsigned long buflen = 0; 1160 std::vector<unsigned long> blocks; 1161 1162 // open the file, check for error 1163 result = Storage::OpenFailed; 1164 file.open(filename.c_str(), std::ios::binary | std::ios::in); 1165 if (!file.good()) return; 1166 1167 // find size of input file 1168 file.seekg(0, std::ios::end); 1169 filesize = file.tellg(); 1170 1171 // load header 1172 buffer = new unsigned char[OLE_HEADER_SIZE]; 1173 file.seekg(0); 1174 file.read((char*)buffer, OLE_HEADER_SIZE); 1175 if (!file.good()) { 1176 delete[] buffer; 1177 return; 1178 } 1179 header->load(buffer); 1180 delete[] buffer; 1181 1182 // check OLE magic id 1183 result = Storage::NotOLE; 1184 for (unsigned i = 0; i < 8; i++) 1185 if (header->id[i] != pole_magic[i]) 1186 return; 1187 1188 // important block size 1189 bbat->blockSize = 1 << header->b_shift; 1190 sbat->blockSize = 1 << header->s_shift; 1191 const unsigned max_bbat_block = (filesize - OLE_HEADER_SIZE) / bbat->blockSize; 1192 const unsigned max_sbat_block = (filesize - OLE_HEADER_SIZE) / sbat->blockSize; 1193 1194 // sanity checks 1195 result = Storage::BadOLE; 1196 if (!header->valid(max_sbat_block, max_bbat_block)) { 1197 return; 1198 } 1199 1200 // find blocks allocated to store big bat 1201 // the first 109 blocks are in header, the rest in meta bat 1202 blocks.clear(); 1203 blocks.resize(header->num_bat); 1204 for (unsigned i = 0; i < 109; i++) { 1205 if (i >= header->num_bat) break; 1206 else blocks[i] = header->bb_blocks[i]; 1207 } 1208 if ((header->num_bat > 109) && (header->num_mbat > 0)) { 1209 unsigned char* buffer2 = new unsigned char[ bbat->blockSize ]; 1210 unsigned k = 109; 1211 unsigned mblock = header->mbat_start; 1212 for (unsigned r = 0; r < header->num_mbat; r++) { 1213 unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize); 1214 if (rr != bbat->blockSize) { 1215 delete[] buffer2; 1216 return; 1217 } 1218 for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) { 1219 if (k >= header->num_bat) break; 1220 else blocks[k++] = readU32(buffer2 + s); 1221 } 1222 mblock = readU32(buffer2 + bbat->blockSize - 4); 1223 } 1224 delete[] buffer2; 1225 } 1226 1227 // load big bat 1228 buflen = blocks.size() * bbat->blockSize; 1229 if (buflen > 0) { 1230 buffer = new unsigned char[ buflen ]; 1231 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 1232 if (r != buflen) { 1233 qCritical() << Q_FUNC_INFO << "SAT construction failed!"; 1234 delete[] buffer; 1235 return; 1236 } 1237 bbat->load(buffer, buflen); 1238 delete[] buffer; 1239 1240 if (!bbat->valid(filesize, header->b_shift, true)) { 1241 return; 1242 } 1243 } 1244 //TODO: make fail affect the result value 1245 bool fail = false; 1246 1247 // load small bat 1248 blocks.clear(); 1249 blocks = bbat->follow(header->sbat_start, fail); 1250 buflen = blocks.size() * bbat->blockSize; 1251 if (buflen > 0) { 1252 buffer = new unsigned char[ buflen ]; 1253 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 1254 if (r != buflen) { 1255 qCritical() << Q_FUNC_INFO << "SSAT construction failed!"; 1256 delete[] buffer; 1257 return; 1258 } 1259 sbat->load(buffer, buflen); 1260 delete[] buffer; 1261 1262 if (!sbat->valid(filesize, header->s_shift, false)) { 1263 return; 1264 } 1265 } 1266 1267 // load directory tree 1268 blocks.clear(); 1269 blocks = bbat->follow(header->dirent_start, fail); 1270 buflen = blocks.size() * bbat->blockSize; 1271 buffer = new unsigned char[ buflen ]; 1272 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 1273 if (r != buflen) { 1274 qCritical() << Q_FUNC_INFO << "DirTree construction failed!"; 1275 delete[] buffer; 1276 return; 1277 } 1278 dirtree->load(buffer, buflen, header->threshold, max_sbat_block, max_bbat_block); 1279 unsigned sb_start = readU32(buffer + 0x74); 1280 delete[] buffer; 1281 if (!dirtree->valid()) { 1282 qCritical() << Q_FUNC_INFO << "Invalid DirTree!"; 1283 return; 1284 } 1285 1286 // fetch block chain as data for small-files 1287 sb_blocks = bbat->follow(sb_start, fail); 1288 1289 // for troubleshooting, just enable this block 1290 #ifdef POLE_DEBUG 1291 header->debug(); 1292 sbat->debug(); 1293 bbat->debug(); 1294 dirtree->debug(); 1295 #endif 1296 1297 // so far so good 1298 result = Storage::Ok; 1299 opened = true; 1300 } 1301 1302 void StorageIO::create() 1303 { 1304 // std::cout << "Creating " << filename << std::endl; 1305 1306 file.open(filename.c_str(), std::ios::out | std::ios::binary); 1307 if (!file.good()) { 1308 qCritical() << Q_FUNC_INFO << "Can't create file:" << filename.c_str(); 1309 result = Storage::OpenFailed; 1310 return; 1311 } 1312 1313 // so far so good 1314 opened = true; 1315 result = Storage::Ok; 1316 } 1317 1318 void StorageIO::flush() 1319 { 1320 /* Note on Microsoft implementation: 1321 - directory entries are stored in the last block(s) 1322 - BATs are as second to the last 1323 - Meta BATs are third to the last 1324 */ 1325 } 1326 1327 void StorageIO::close() 1328 { 1329 if (!opened) return; 1330 1331 file.close(); 1332 opened = false; 1333 1334 std::list<Stream*>::iterator it; 1335 for (it = streams.begin(); it != streams.end(); ++it) 1336 delete *it; 1337 } 1338 1339 StreamIO* StorageIO::streamIO(const std::string& name) 1340 { 1341 #ifdef POLE_DEBUG 1342 qDebug() << Q_FUNC_INFO << "preparing stream:" << name.c_str(); 1343 #endif 1344 // sanity check 1345 if (!name.length()) return (StreamIO*)0; 1346 1347 // search in the entries 1348 DirEntry* entry = dirtree->entry(name); 1349 //if( entry) std::cout << "FOUND\n"; 1350 if (!entry) return (StreamIO*)0; 1351 //if( !entry->dir ) std::cout << " NOT DIR\n"; 1352 if (entry->dir) return (StreamIO*)0; 1353 1354 StreamIO* result = new StreamIO(this, entry); 1355 result->fullName = name; 1356 1357 return result; 1358 } 1359 1360 unsigned long StorageIO::loadBigBlocks(const std::vector<unsigned long>& blocks, 1361 unsigned char* data, unsigned long maxlen) 1362 { 1363 return loadBigBlocks(&blocks[0], blocks.size(), data, maxlen); 1364 } 1365 1366 unsigned long StorageIO::loadBigBlocks(const unsigned long *blocks, unsigned blockCount, 1367 unsigned char *data, unsigned long maxlen) 1368 { 1369 // sentinel 1370 if (!data) return 0; 1371 if (!file.good()) return 0; 1372 if (!blocks) return 0; 1373 if (blockCount < 1) return 0; 1374 if (maxlen == 0) return 0; 1375 1376 // read block one by one, seems fast enough 1377 unsigned long bytes = 0; 1378 for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) { 1379 unsigned long block = blocks[i]; 1380 unsigned long pos = bbat->blockSize * (block + 1); 1381 unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes; 1382 if (pos + p > filesize) p = filesize - pos; 1383 file.seekg(pos); 1384 file.read((char*)data + bytes, p); 1385 if (!file.good()) return 0; 1386 bytes += p; 1387 } 1388 1389 return bytes; 1390 } 1391 1392 unsigned long StorageIO::loadBigBlock(unsigned long block, 1393 unsigned char* data, unsigned long maxlen) 1394 { 1395 // sentinel 1396 if (!data) return 0; 1397 if (!file.good()) return 0; 1398 1399 return loadBigBlocks(&block, 1, data, maxlen); 1400 } 1401 1402 // return number of bytes which has been read 1403 unsigned long StorageIO::loadSmallBlocks(const std::vector<unsigned long>& blocks, 1404 unsigned char* data, unsigned long maxlen) 1405 { 1406 return loadSmallBlocks(&blocks[0], blocks.size(), data, maxlen); 1407 } 1408 1409 unsigned long StorageIO::loadSmallBlocks(const unsigned long *blocks, unsigned blockCount, 1410 unsigned char *data, unsigned long maxlen) 1411 { 1412 // sentinel 1413 if (!data) return 0; 1414 if (!file.good()) return 0; 1415 if (!blocks) return 0; 1416 if (blockCount < 1) return 0; 1417 if (maxlen == 0) return 0; 1418 1419 // our own local buffer 1420 unsigned char* buf = new unsigned char[ bbat->blockSize ]; 1421 1422 // read small block one by one 1423 unsigned long bytes = 0; 1424 for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) { 1425 unsigned long block = blocks[i]; 1426 1427 // find where the small-block exactly is 1428 unsigned long pos = block * sbat->blockSize; 1429 unsigned long bbindex = pos / bbat->blockSize; 1430 if (bbindex >= sb_blocks.size()) break; 1431 1432 unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize); 1433 if (r != bbat->blockSize) { 1434 delete[] buf; 1435 return 0; 1436 } 1437 1438 // copy the data 1439 unsigned offset = pos % bbat->blockSize; 1440 unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes : bbat->blockSize - offset; 1441 p = (sbat->blockSize < p) ? sbat->blockSize : p; 1442 memcpy(data + bytes, buf + offset, p); 1443 bytes += p; 1444 } 1445 1446 delete[] buf; 1447 1448 return bytes; 1449 } 1450 1451 unsigned long StorageIO::loadSmallBlock(unsigned long block, 1452 unsigned char* data, unsigned long maxlen) 1453 { 1454 // sentinel 1455 if (!data) return 0; 1456 if (!file.good()) return 0; 1457 1458 return loadSmallBlocks(&block, 1, data, maxlen); 1459 } 1460 1461 // =========== StreamIO ========== 1462 1463 StreamIO::StreamIO(StorageIO* s, DirEntry* e) 1464 { 1465 io = s; 1466 entry = e; 1467 eof = false; 1468 fail = false; 1469 1470 m_pos = 0; 1471 1472 if (entry->size >= io->header->threshold) { 1473 blocks = io->bbat->follow(entry->start, fail); 1474 } else { 1475 blocks = io->sbat->follow(entry->start, fail); 1476 } 1477 1478 // prepare cache 1479 cache_pos = 0; 1480 base_cache_size = cache_size = 4096; // optimal ? 1481 cache_data = new unsigned char[base_cache_size]; 1482 updateCache(); 1483 } 1484 1485 // FIXME tell parent we're gone 1486 StreamIO::~StreamIO() 1487 { 1488 delete[] cache_data; 1489 } 1490 1491 void StreamIO::seek(unsigned long pos) 1492 { 1493 m_pos = pos; 1494 } 1495 1496 unsigned long StreamIO::tell() 1497 { 1498 return m_pos; 1499 } 1500 1501 int StreamIO::getch() 1502 { 1503 // past end-of-file ? 1504 if (m_pos > entry->size) return -1; 1505 1506 // need to update cache ? 1507 if (!cache_size || (m_pos < cache_pos) || 1508 (m_pos >= cache_pos + cache_size)) 1509 updateCache(); 1510 1511 // something bad if we don't get good cache 1512 if (!cache_size) return -1; 1513 1514 int data = cache_data[m_pos - cache_pos]; 1515 m_pos++; 1516 1517 return data; 1518 } 1519 1520 unsigned long StreamIO::read(unsigned char *data, unsigned long maxlen) 1521 { 1522 // sanity checks 1523 if (!data) return 0; 1524 if (maxlen == 0) return 0; 1525 1526 unsigned long totalbytes = 0; 1527 1528 while (totalbytes < maxlen) { 1529 // need to update cache ? 1530 if (!cache_size || (m_pos < cache_pos) || 1531 (m_pos >= cache_pos + cache_size)) 1532 updateCache(); 1533 if (!cache_size) break; 1534 1535 const unsigned long remaining = cache_size - (m_pos - cache_pos); 1536 const unsigned long count = std::min(remaining, maxlen - totalbytes); 1537 memcpy(data + totalbytes, &cache_data[m_pos - cache_pos], count); 1538 totalbytes += count; 1539 m_pos += count; 1540 } 1541 return totalbytes; 1542 } 1543 1544 unsigned long StreamIO::readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen) 1545 { 1546 // sanity checks 1547 if (!data) return 0; 1548 if (maxlen == 0) return 0; 1549 1550 unsigned long totalbytes = 0; 1551 1552 if (entry->size < io->header->threshold) { 1553 // small file 1554 unsigned long index = pos / io->sbat->blockSize; 1555 1556 if (index >= blocks.size()) return 0; 1557 1558 unsigned char buf[4096]; 1559 unsigned long offset = pos % io->sbat->blockSize; 1560 while (totalbytes < maxlen) { 1561 if (index >= blocks.size()) break; 1562 io->loadSmallBlock(blocks[index], &buf[0], io->bbat->blockSize); 1563 unsigned long count = io->sbat->blockSize - offset; 1564 if (count > maxlen - totalbytes) count = maxlen - totalbytes; 1565 memcpy(data + totalbytes, &buf[0] + offset, count); 1566 totalbytes += count; 1567 offset = 0; 1568 index++; 1569 } 1570 1571 } else { 1572 // big file 1573 unsigned long index = pos / io->bbat->blockSize; 1574 1575 if (index >= blocks.size()) return 0; 1576 1577 unsigned char buf[4096]; 1578 unsigned long offset = pos % io->bbat->blockSize; 1579 while (totalbytes < maxlen) { 1580 if (index >= blocks.size()) break; 1581 unsigned long r = io->loadBigBlock(blocks[index], &buf[0], io->bbat->blockSize); 1582 if (r != io->bbat->blockSize) { 1583 return 0; 1584 } 1585 unsigned long count = io->bbat->blockSize - offset; 1586 if (count > maxlen - totalbytes) count = maxlen - totalbytes; 1587 memcpy(data + totalbytes, &buf[0] + offset, count); 1588 totalbytes += count; 1589 index++; 1590 offset = 0; 1591 } 1592 1593 } 1594 1595 return totalbytes; 1596 } 1597 1598 unsigned long StreamIO::readInternal(unsigned char* data, unsigned long maxlen) 1599 { 1600 unsigned long bytes = readInternal(tell(), data, maxlen); 1601 m_pos += bytes; 1602 return bytes; 1603 } 1604 1605 void StreamIO::updateCache() 1606 { 1607 // sanity check 1608 if (!cache_data) return; 1609 1610 cache_pos = m_pos - (m_pos % base_cache_size); 1611 unsigned long bytes = base_cache_size; 1612 if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos; 1613 if (cache_pos + bytes <= m_pos) { 1614 cache_size = 0; 1615 } else { 1616 cache_size = readInternal(cache_pos, cache_data, bytes); 1617 } 1618 } 1619 1620 1621 // =========== Storage ========== 1622 1623 Storage::Storage(const char* filename) 1624 { 1625 io = new StorageIO(this, filename); 1626 } 1627 1628 Storage::~Storage() 1629 { 1630 delete io; 1631 } 1632 1633 int Storage::result() 1634 { 1635 return io->result; 1636 } 1637 1638 bool Storage::open() 1639 { 1640 return io->open(); 1641 } 1642 1643 void Storage::close() 1644 { 1645 io->close(); 1646 } 1647 1648 std::list<std::string> Storage::entries(const std::string& path) 1649 { 1650 std::list<std::string> result; 1651 DirTree* dt = io->dirtree; 1652 DirEntry* e = dt->entry(path, false); 1653 if (e) { 1654 if (e->dir) { 1655 unsigned parent = dt->indexOf(e); 1656 std::vector<unsigned> children = dt->children(parent); 1657 for (unsigned i = 0; i < children.size(); i++) 1658 result.push_back(dt->entry(children[i])->name); 1659 } 1660 } 1661 return result; 1662 } 1663 1664 bool Storage::isDirectory(const std::string& name) 1665 { 1666 DirEntry* e = io->dirtree->entry(name, false); 1667 return e ? e->dir : false; 1668 } 1669 1670 // =========== Stream ========== 1671 1672 Stream::Stream(Storage* storage, const std::string& name) 1673 { 1674 io = storage->io->streamIO(name); 1675 } 1676 1677 // FIXME tell parent we're gone 1678 Stream::~Stream() 1679 { 1680 delete io; 1681 } 1682 1683 std::string Stream::fullName() 1684 { 1685 return io ? io->fullName : std::string(); 1686 } 1687 1688 unsigned long Stream::tell() 1689 { 1690 return io ? io->tell() : 0; 1691 } 1692 1693 void Stream::seek(unsigned long newpos) 1694 { 1695 if (io) io->seek(newpos); 1696 } 1697 1698 unsigned long Stream::size() 1699 { 1700 return io ? io->entry->size : 0; 1701 } 1702 1703 int Stream::getch() 1704 { 1705 return io ? io->getch() : 0; 1706 } 1707 1708 unsigned long Stream::read(unsigned char* data, unsigned long maxlen) 1709 { 1710 return io ? io->read(data, maxlen) : 0; 1711 } 1712 1713 bool Stream::eof() 1714 { 1715 return io ? io->eof : false; 1716 } 1717 1718 bool Stream::fail() 1719 { 1720 return io ? io->fail : true; 1721 }