File indexing completed on 2024-12-08 12:55:56
0001 /* POLE - Portable C++ library to access OLE Storage 0002 Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org> 0003 Copyright (C) 2011-2012 Matus Uzak <matus.uzak@ixonos.com> 0004 0005 Redistribution and use in source and binary forms, with or without 0006 modification, are permitted provided that the following conditions 0007 are met: 0008 * Redistributions of source code must retain the above copyright notice, 0009 this list of conditions and the following disclaimer. 0010 * Redistributions in binary form must reproduce the above copyright notice, 0011 this list of conditions and the following disclaimer in the documentation 0012 and/or other materials provided with the distribution. 0013 * Neither the name of the authors nor the names of its contributors may be 0014 used to endorse or promote products derived from this software without 0015 specific prior written permission. 0016 0017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 0018 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0019 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 0020 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 0021 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 0022 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 0023 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 0024 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 0025 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 0026 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 0027 THE POSSIBILITY OF SUCH DAMAGE. 0028 */ 0029 0030 #include "pole.h" 0031 0032 #include <fstream> 0033 #include <iostream> 0034 #include <list> 0035 #include <string> 0036 #include <vector> 0037 #include <string.h> 0038 #include <ios> // for std::hex 0039 0040 #include <QList> 0041 #include <QString> 0042 #include <QDebug> 0043 0044 //Enable to activate debugging output. 0045 //#define POLE_DEBUG 0046 0047 //Disabled because of too many false positives, both streams and unknown 0048 //objects MAY be invalid and still have a size set. 0049 //#define POLE_FAIL_ON_NEMPTY_NVALID_OBJS 0050 0051 //Validate stream object against [MS-CFB] — v20110318. Disabled because of too 0052 //many false positives. 0053 //#define POLE_FAIL_ON_NVALID_STREAM_OBJS 0054 0055 //Validate storage object against [MS-CFB] — v20110318. Disabled because of 0056 //too many false positives on Word8 documents. 0057 //#define POLE_CHECK_STORAGE_OBJS 0058 0059 //Validate sibling names against positions in the black red tree. Disabled 0060 //because of too many false positives on Word8 files with embedded documents. 0061 //#define POLE_CHECK_SIBLINGS 0062 0063 #define OLE_HEADER_SIZE 0x200 0064 0065 namespace POLE 0066 { 0067 0068 class Header 0069 { 0070 public: 0071 unsigned char id[8]; // signature, or magic identifier 0072 unsigned b_shift; // bbat->blockSize = 1 << b_shift 0073 unsigned s_shift; // sbat->blockSize = 1 << s_shift 0074 unsigned num_bat; // blocks allocated for big bat 0075 unsigned dirent_start; // starting block for directory info 0076 unsigned threshold; // switch from small to big file (usually 4K) 0077 unsigned sbat_start; // starting block index to store small bat 0078 unsigned num_sbat; // blocks allocated for small bat 0079 unsigned mbat_start; // starting block to store meta bat 0080 unsigned num_mbat; // blocks allocated for meta bat 0081 unsigned long bb_blocks[109]; 0082 0083 Header(); 0084 bool valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const; 0085 void load(const unsigned char* buffer); 0086 void save(unsigned char* buffer); 0087 void debug(); 0088 }; 0089 0090 class AllocTable 0091 { 0092 public: 0093 static const unsigned Eof; 0094 static const unsigned Avail; 0095 static const unsigned Bat; 0096 static const unsigned MetaBat; 0097 unsigned blockSize; 0098 AllocTable(); 0099 bool valid(const unsigned long filesize, const unsigned shift, const bool isFat = true) const; 0100 void clear(); 0101 unsigned long count(); 0102 void resize(unsigned long newsize); 0103 void preserve(unsigned long n); 0104 void set(unsigned long index, unsigned long val); 0105 unsigned unused(); 0106 void setChain(std::vector<unsigned long>); 0107 std::vector<unsigned long> follow(unsigned long start, bool& fail); 0108 unsigned long operator[](unsigned long index); 0109 void load(const unsigned char* buffer, unsigned len); 0110 void save(unsigned char* buffer); 0111 unsigned size(); 0112 void debug(); 0113 private: 0114 std::vector<unsigned long> data; 0115 AllocTable(const AllocTable&); 0116 AllocTable& operator=(const AllocTable&); 0117 }; 0118 0119 class DirEntry 0120 { 0121 public: 0122 bool valid; // false if invalid (should be skipped) 0123 std::string name; // the name, not in unicode anymore 0124 bool dir; // true if directory 0125 unsigned long size; // size (not valid if directory) 0126 unsigned long start; // starting block 0127 unsigned prev; // previous sibling 0128 unsigned next; // next sibling 0129 unsigned child; // first child 0130 }; 0131 0132 class DirTree 0133 { 0134 public: 0135 static const unsigned End; 0136 DirTree(); 0137 bool valid(void) const; 0138 void clear(); 0139 unsigned entryCount(); 0140 DirEntry* entry(unsigned index); 0141 DirEntry* entry(const std::string& name, bool create = false); 0142 int indexOf(DirEntry* e); 0143 int parent(unsigned index); 0144 std::string fullName(unsigned index); 0145 std::vector<unsigned> children(unsigned index); 0146 void load(unsigned char* buffer, unsigned len, const unsigned threshold, const unsigned max_sbat, const unsigned max_bbat); 0147 void save(unsigned char* buffer); 0148 unsigned size(); 0149 void debug(); 0150 private: 0151 std::vector<DirEntry> entries; 0152 DirTree(const DirTree&); 0153 DirTree& operator=(const DirTree&); 0154 }; 0155 0156 class StorageIO 0157 { 0158 public: 0159 Storage* storage; // owner 0160 std::string filename; // filename 0161 std::fstream file; // associated with above name 0162 int result; // result of operation 0163 bool opened; // true if file is opened 0164 unsigned long filesize; // size of the file 0165 0166 Header* header; // storage header 0167 DirTree* dirtree; // directory tree 0168 AllocTable* bbat; // allocation table for big blocks 0169 AllocTable* sbat; // allocation table for small blocks 0170 0171 std::vector<unsigned long> sb_blocks; // blocks for "small" files 0172 0173 std::list<Stream*> streams; 0174 0175 StorageIO(Storage* storage, const char* filename); 0176 ~StorageIO(); 0177 0178 bool open(); 0179 void close(); 0180 void flush(); 0181 void load(); 0182 void create(); 0183 0184 unsigned long loadBigBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen); 0185 unsigned long loadBigBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen); 0186 0187 unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen); 0188 0189 unsigned long loadSmallBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen); 0190 unsigned long loadSmallBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen); 0191 0192 unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen); 0193 0194 StreamIO* streamIO(const std::string& name); 0195 0196 private: 0197 // no copy or assign 0198 StorageIO(const StorageIO&); 0199 StorageIO& operator=(const StorageIO&); 0200 0201 }; 0202 0203 class StreamIO 0204 { 0205 public: 0206 StorageIO* io; 0207 DirEntry* entry; 0208 std::string fullName; 0209 bool eof; 0210 bool fail; 0211 0212 StreamIO(StorageIO* io, DirEntry* entry); 0213 ~StreamIO(); 0214 unsigned long size(); 0215 void seek(unsigned long pos); 0216 unsigned long tell(); 0217 int getch(); 0218 unsigned long read(unsigned char* data, unsigned long maxlen); 0219 0220 private: 0221 unsigned long readInternal(unsigned char* data, unsigned long maxlen); 0222 unsigned long readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen); 0223 0224 std::vector<unsigned long> blocks; 0225 0226 // no copy or assign 0227 StreamIO(const StreamIO&); 0228 StreamIO& operator=(const StreamIO&); 0229 0230 // pointer for read 0231 unsigned long m_pos; 0232 0233 // simple cache system to speed-up getch() 0234 unsigned char* cache_data; 0235 unsigned long base_cache_size; 0236 unsigned long cache_size; 0237 unsigned long cache_pos; 0238 void updateCache(); 0239 }; 0240 0241 } // namespace POLE 0242 0243 using namespace POLE; 0244 0245 static inline unsigned long readU16(const unsigned char* ptr) 0246 { 0247 return ptr[0] + (ptr[1] << 8); 0248 } 0249 0250 static inline unsigned long readU32(const unsigned char* ptr) 0251 { 0252 return unsigned(ptr[0]) + (unsigned(ptr[1]) << 8 ) + (unsigned(ptr[2]) << 16) + (unsigned(ptr[3]) << 24); 0253 } 0254 0255 static inline void writeU16(unsigned char* ptr, unsigned long data) 0256 { 0257 ptr[0] = (unsigned char)(data & 0xff); 0258 ptr[1] = (unsigned char)((data >> 8) & 0xff); 0259 } 0260 0261 static inline void writeU32(unsigned char* ptr, unsigned long data) 0262 { 0263 ptr[0] = (unsigned char)(data & 0xff); 0264 ptr[1] = (unsigned char)((data >> 8) & 0xff); 0265 ptr[2] = (unsigned char)((data >> 16) & 0xff); 0266 ptr[3] = (unsigned char)((data >> 24) & 0xff); 0267 } 0268 0269 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; 0270 0271 // =========== Header ========== 0272 0273 Header::Header() 0274 { 0275 b_shift = 9; 0276 s_shift = 6; 0277 num_bat = 0; 0278 dirent_start = 0; 0279 threshold = 4096; 0280 sbat_start = 0; 0281 num_sbat = 0; 0282 mbat_start = 0; 0283 num_mbat = 0; 0284 0285 for (unsigned i = 0; i < 8; i++) 0286 id[i] = pole_magic[i]; 0287 for (unsigned i = 0; i < 109; i++) 0288 bb_blocks[i] = AllocTable::Avail; 0289 } 0290 0291 bool Header::valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const 0292 { 0293 if (threshold != 4096) return false; 0294 if (num_bat == 0) return false; 0295 if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false; 0296 if ((num_bat < 109) && (num_mbat != 0)) return false; 0297 if (s_shift > b_shift) return false; 0298 if (b_shift <= 6) return false; 0299 if (b_shift > 12) return false; 0300 0301 // additional heuristics to check the header 0302 if (num_sbat > max_sbat_block) return false; 0303 if (num_bat > max_bbat_block) return false; 0304 0305 #ifdef POLE_DEBUG 0306 const unsigned ENDOFCHAIN = 0xfffffffe; 0307 const unsigned FREESECT = 0xffffffff; 0308 0309 if (num_sbat == 0 && 0310 sbat_start != ENDOFCHAIN && 0311 sbat_start != FREESECT) 0312 { 0313 qDebug() << Q_FUNC_INFO << 0314 "There aren't any minifat sectors, but there are links to some!"; 0315 } 0316 #endif 0317 0318 return true; 0319 } 0320 0321 void Header::load(const unsigned char* buffer) 0322 { 0323 b_shift = readU16(buffer + 0x1e); // sector shift 0324 s_shift = readU16(buffer + 0x20); // mini sector shift 0325 num_bat = readU32(buffer + 0x2c); // number of fat sectors 0326 dirent_start = readU32(buffer + 0x30); // first directory sector location 0327 threshold = readU32(buffer + 0x38); // transaction signature number 0328 sbat_start = readU32(buffer + 0x3c); // first mini fat sector location 0329 num_sbat = readU32(buffer + 0x40); // mini stream cutoff size 0330 mbat_start = readU32(buffer + 0x44); // first mini difat sector location 0331 num_mbat = readU32(buffer + 0x48); // number of difat sectors 0332 0333 for (unsigned i = 0; i < 8; i++) 0334 id[i] = buffer[i]; 0335 for (unsigned i = 0; i < 109; i++) 0336 bb_blocks[i] = readU32(buffer + 0x4C + i * 4); 0337 } 0338 0339 void Header::save(unsigned char* buffer) 0340 { 0341 memset(buffer, 0, 0x4c); 0342 memcpy(buffer, pole_magic, 8); // ole signature 0343 writeU32(buffer + 8, 0); // unknown 0344 writeU32(buffer + 12, 0); // unknown 0345 writeU32(buffer + 16, 0); // unknown 0346 writeU16(buffer + 24, 0x003e); // revision ? 0347 writeU16(buffer + 26, 3); // version ? 0348 writeU16(buffer + 28, 0xfffe); // unknown 0349 writeU16(buffer + 0x1e, b_shift); 0350 writeU16(buffer + 0x20, s_shift); 0351 writeU32(buffer + 0x2c, num_bat); 0352 writeU32(buffer + 0x30, dirent_start); 0353 writeU32(buffer + 0x38, threshold); 0354 writeU32(buffer + 0x3c, sbat_start); 0355 writeU32(buffer + 0x40, num_sbat); 0356 writeU32(buffer + 0x44, mbat_start); 0357 writeU32(buffer + 0x48, num_mbat); 0358 0359 for (unsigned i = 0; i < 109; i++) 0360 writeU32(buffer + 0x4C + i*4, bb_blocks[i]); 0361 } 0362 0363 void Header::debug() 0364 { 0365 qDebug() << Q_FUNC_INFO; 0366 qDebug() << "b_shift:" << b_shift; 0367 qDebug() << "s_shift:" << s_shift; 0368 qDebug() << "num_bat:" << num_bat; 0369 qDebug() << "dirent_start: 0x" << hex << dirent_start; 0370 qDebug() << "threshold:" << dec << threshold; 0371 qDebug() << "sbat_start: 0x" << hex << sbat_start; 0372 qDebug() << "num_sbat:" << dec << num_sbat; 0373 qDebug() << "mbat_start: 0x" << hex << mbat_start; 0374 qDebug() << "num_mbat:" << dec << num_mbat; 0375 0376 unsigned s = (num_bat <= 109) ? num_bat : 109; 0377 std::cout << "bat blocks:"; 0378 for (unsigned i = 0; i < s; i++) { 0379 std::cout << "0x" << std::hex << bb_blocks[i] << " "; 0380 } 0381 std::cout << std::dec << std::endl; 0382 } 0383 0384 // =========== AllocTable ========== 0385 0386 const unsigned AllocTable::Avail = 0xffffffff; 0387 const unsigned AllocTable::Eof = 0xfffffffe; 0388 const unsigned AllocTable::Bat = 0xfffffffd; 0389 const unsigned AllocTable::MetaBat = 0xfffffffc; 0390 0391 AllocTable::AllocTable() 0392 { 0393 blockSize = 4096; 0394 // initial size 0395 resize(128); 0396 } 0397 0398 bool AllocTable::valid(const unsigned long filesize, const unsigned shift, const bool isFat) const 0399 { 0400 unsigned long offset = 0; 0401 for (unsigned long i = 0; i < data.size(); i++) { 0402 switch (data[i]) { 0403 case AllocTable::Avail: 0404 case AllocTable::Eof: 0405 case AllocTable::Bat: 0406 case AllocTable::MetaBat: 0407 break; 0408 default: 0409 offset = data[i] << shift; 0410 if (isFat) { 0411 offset += OLE_HEADER_SIZE; 0412 } 0413 if (offset > filesize) { 0414 #ifdef POLE_DEBUG 0415 qDebug() << "Invalid location of sector in the stream!" << 0416 "offset:" << offset << " | filesize:" << filesize; 0417 #endif 0418 return false; 0419 } 0420 } 0421 } 0422 return true; 0423 } 0424 0425 unsigned long AllocTable::count() 0426 { 0427 return data.size(); 0428 } 0429 0430 void AllocTable::resize(unsigned long newsize) 0431 { 0432 unsigned oldsize = data.size(); 0433 data.resize(newsize); 0434 if (newsize > oldsize) 0435 for (unsigned i = oldsize; i < newsize; i++) 0436 data[i] = Avail; 0437 } 0438 0439 // make sure there're still free blocks 0440 void AllocTable::preserve(unsigned long n) 0441 { 0442 std::vector<unsigned long> pre; 0443 for (unsigned i = 0; i < n; i++) 0444 pre.push_back(unused()); 0445 } 0446 0447 unsigned long AllocTable::operator[](unsigned long index) 0448 { 0449 unsigned long result; 0450 result = data[index]; 0451 return result; 0452 } 0453 0454 void AllocTable::set(unsigned long index, unsigned long value) 0455 { 0456 if (index >= count()) resize(index + 1); 0457 data[ index ] = value; 0458 } 0459 0460 void AllocTable::setChain(std::vector<unsigned long> chain) 0461 { 0462 if (chain.size()) { 0463 for (unsigned i = 0; i < chain.size() - 1; i++) 0464 set(chain[i], chain[i+1]); 0465 set(chain[ chain.size()-1 ], AllocTable::Eof); 0466 } 0467 } 0468 0469 // follow 0470 std::vector<unsigned long> AllocTable::follow(unsigned long start, bool& fail) 0471 { 0472 std::vector<unsigned long> chain; 0473 0474 if (start >= count()) { 0475 #ifdef POLE_DEBUG 0476 qDebug() << Q_FUNC_INFO << "start >= count()!"; 0477 #endif 0478 fail = true; 0479 return chain; 0480 } 0481 0482 unsigned long p = start; 0483 while (p < count()) { 0484 if (p == (unsigned long)Eof) { 0485 #ifdef POLE_DEBUG 0486 qDebug() << Q_FUNC_INFO << "Eof detected!"; 0487 #endif 0488 break; 0489 } 0490 if (p == (unsigned long)Bat) { 0491 #ifdef POLE_DEBUG 0492 qDebug() << Q_FUNC_INFO << "Bat detected!"; 0493 #endif 0494 break; 0495 } 0496 if (p == (unsigned long)MetaBat) { 0497 #ifdef POLE_DEBUG 0498 qDebug() << Q_FUNC_INFO << "MetaBat detected!"; 0499 #endif 0500 break; 0501 } 0502 if (p >= count()) { 0503 #ifdef POLE_DEBUG 0504 qDebug() << Q_FUNC_INFO << "Invalid index detected!"; 0505 #endif 0506 fail = true; 0507 break; 0508 } 0509 chain.push_back(p); 0510 0511 // break if the chain is longer than the total sector count 0512 if (chain.size() > count()) { 0513 #ifdef POLE_DEBUG 0514 qDebug() << Q_FUNC_INFO << "Probably a loop detected!"; 0515 #endif 0516 fail = true; 0517 break; 0518 } 0519 p = data[ p ]; 0520 } 0521 if (p != (unsigned long)AllocTable::Eof) { 0522 #ifdef POLE_DEBUG 0523 qDebug() << Q_FUNC_INFO << "Last chain entry MUST be 0x" << hex << AllocTable::Eof << 0524 ", detected: 0x" << hex << p; 0525 #endif 0526 fail = true; 0527 } 0528 0529 return chain; 0530 } 0531 0532 unsigned AllocTable::unused() 0533 { 0534 // find first available block 0535 for (unsigned i = 0; i < data.size(); i++) 0536 if (data[i] == Avail) 0537 return i; 0538 0539 // completely full, so enlarge the table 0540 unsigned block = data.size(); 0541 resize(data.size() + 10); 0542 return block; 0543 } 0544 0545 void AllocTable::load(const unsigned char* buffer, unsigned len) 0546 { 0547 resize(len / 4); 0548 for (unsigned i = 0; i < count(); i++) 0549 set(i, readU32(buffer + i*4)); 0550 } 0551 0552 // return space required to save this dirtree 0553 unsigned AllocTable::size() 0554 { 0555 return count() * 4; 0556 } 0557 0558 void AllocTable::save(unsigned char* buffer) 0559 { 0560 for (unsigned i = 0; i < count(); i++) 0561 writeU32(buffer + i*4, data[i]); 0562 } 0563 0564 void AllocTable::debug() 0565 { 0566 qDebug() << "block size " << data.size(); 0567 for (unsigned i = 0; i < data.size(); i++) { 0568 if (data[i] == Avail) continue; 0569 std::cout << i << ": "; 0570 if (data[i] == Eof) std::cout << "[eof]"; 0571 else if (data[i] == Bat) std::cout << "[bat]"; 0572 else if (data[i] == MetaBat) std::cout << "[metabat]"; 0573 else std::cout << data[i]; 0574 std::cout << std::endl; 0575 } 0576 } 0577 0578 // =========== DirTree ========== 0579 0580 const unsigned DirTree::End = 0xffffffff; 0581 0582 /* 0583 * Compare DirEntry names according to the spec. 0584 */ 0585 int ename_cmp(QString& str1, QString& str2) 0586 { 0587 str1 = str1.toUpper(); 0588 str2 = str2.toUpper(); 0589 if (str1.size() < str2.size()) return -1; 0590 else if (str1.size() > str2.size()) return 1; 0591 else return str1.compare(str2); 0592 } 0593 0594 /* 0595 * Check if DirEntry elements at this level have unique names. 0596 */ 0597 bool valid_enames(DirTree* dirtree, unsigned index) 0598 { 0599 std::vector<unsigned> chi = dirtree->children(index); 0600 QList<std::string> names; 0601 DirEntry* e = 0; 0602 0603 #ifdef POLE_DEBUG 0604 if (chi.size()) { 0605 qDebug() << "---------------------"; 0606 qDebug() << Q_FUNC_INFO; 0607 qDebug() << "[KIDS]:"; 0608 } 0609 for (unsigned i = 0; i < chi.size(); i++) { 0610 e = dirtree->entry(chi[i]); 0611 if (!e->valid) std::cout << "[INVALID] "; 0612 printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n", 0613 e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir); 0614 } 0615 if (chi.size()) { 0616 qDebug() << "---------------------"; 0617 } 0618 #endif 0619 0620 for (unsigned i = 0; i < chi.size(); i++) { 0621 e = dirtree->entry(chi[i]); 0622 if (e->valid) { 0623 if (names.contains(e->name)) { 0624 return false; 0625 } else { 0626 names.append(e->name); 0627 } 0628 } 0629 } 0630 return true; 0631 } 0632 0633 bool DirTree::valid() const 0634 { 0635 const DirEntry* e; 0636 QString str1, str2; 0637 0638 #ifdef POLE_DEBUG 0639 qDebug() << Q_FUNC_INFO; 0640 #endif 0641 for (unsigned i = 0; i < entries.size(); i++) { 0642 e = &entries[i]; 0643 0644 #ifdef POLE_DEBUG 0645 if (!e->valid) std::cout << "[INVALID] "; 0646 printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n", 0647 e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir); 0648 #endif 0649 #ifdef POLE_FAIL_ON_NEMPTY_NVALID_OBJS 0650 if (!e->valid && e->size) { 0651 #ifdef POLE_DEBUG 0652 qDebug() << "Invalid DirEntry detected!"; 0653 #endif 0654 return false; 0655 } 0656 #endif 0657 //Looking for invalid stream objects. 0658 #ifdef POLE_FAIL_ON_NVALID_STREAM_OBJS 0659 if (!e->valid && !e->dir) { 0660 #ifdef POLE_DEBUG 0661 qDebug() << "Invalid DirEntry (stream object) detected!"; 0662 #endif 0663 return false; 0664 } 0665 #endif 0666 //Looking for invalid storage objects. 0667 if (!e->valid && e->dir) { 0668 #ifdef POLE_DEBUG 0669 qDebug() << "Invalid DirEntry (storage object) detected!"; 0670 #endif 0671 return false; 0672 } 0673 0674 //A root storage: size = size of the mini stream, start = first sector 0675 //of the mini stream, if the mini stream exists 0676 // 0677 //A storage object MAY have e->child set - [MS-CFB]. 0678 #ifdef POLE_CHECK_STORAGE_OBJS 0679 if ((e->valid && e->dir) && (i > 0) && 0680 ((e->start != 0) || (e->size != 0))) 0681 { 0682 #ifdef POLE_DEBUG 0683 qDebug() << "Invalid DirEntry (storage object) detected!"; 0684 #endif 0685 return false; 0686 } 0687 #endif 0688 //Looking for duplicate DirEntries in the storage object. 0689 if (e->valid && e->dir) { 0690 if (!valid_enames(const_cast<DirTree*>(this), i)) { 0691 #ifdef POLE_DEBUG 0692 qDebug() << "Invalid DirEntry (storage object) detected!"; 0693 #endif 0694 return false; 0695 } 0696 } 0697 0698 //Check the name of the left/right DirEntry. 0699 #ifdef POLE_CHECK_SIBLINGS 0700 if (e->prev != End) { 0701 str1 = QString(entries[e->prev].name.data()); 0702 } 0703 if (e->next != End) { 0704 str2 = QString(entries[e->next].name.data()); 0705 } 0706 if (!str1.isEmpty() && !str2.isEmpty()) { 0707 if (ename_cmp(str1, str2) > 0) { 0708 #ifdef POLE_DEBUG 0709 qDebug() << "DirEntry: [name, position] mismatch!"; 0710 #endif 0711 return false; 0712 } 0713 } 0714 #endif 0715 } 0716 return true; 0717 } 0718 0719 DirTree::DirTree() 0720 { 0721 clear(); 0722 } 0723 0724 void DirTree::clear() 0725 { 0726 // leave only root entry 0727 entries.resize(1); 0728 entries[0].valid = true; 0729 entries[0].name = "Root Entry"; 0730 entries[0].dir = true; 0731 entries[0].size = 0; 0732 entries[0].start = End; 0733 entries[0].prev = End; 0734 entries[0].next = End; 0735 entries[0].child = End; 0736 } 0737 0738 unsigned DirTree::entryCount() 0739 { 0740 return entries.size(); 0741 } 0742 0743 DirEntry* DirTree::entry(unsigned index) 0744 { 0745 if (index >= entryCount()) return (DirEntry*) 0; 0746 return &entries[ index ]; 0747 } 0748 0749 int DirTree::indexOf(DirEntry* e) 0750 { 0751 for (unsigned i = 0; i < entryCount(); i++) 0752 if (entry(i) == e) return i; 0753 0754 return -1; 0755 } 0756 0757 int DirTree::parent(unsigned index) 0758 { 0759 // brute-force, basically we iterate for each entries, find its children 0760 // and check if one of the children is 'index' 0761 for (unsigned j = 0; j < entryCount(); j++) { 0762 std::vector<unsigned> chi = children(j); 0763 for (unsigned i = 0; i < chi.size(); i++) 0764 if (chi[i] == index) 0765 return j; 0766 } 0767 0768 return -1; 0769 } 0770 0771 std::string DirTree::fullName(unsigned index) 0772 { 0773 // don't use root name ("Root Entry"), just give "/" 0774 if (index == 0) return "/"; 0775 0776 std::string result = entry(index)->name; 0777 result.insert(0, "/"); 0778 int p = parent(index); 0779 DirEntry * _entry = 0; 0780 while (p > 0) { 0781 _entry = entry(p); 0782 if (_entry->dir && _entry->valid) { 0783 result.insert(0, _entry->name); 0784 result.insert(0, "/"); 0785 } 0786 --p; 0787 index = p; 0788 if (index <= 0) break; 0789 } 0790 return result; 0791 } 0792 0793 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry 0794 // if not found and create is false, return 0 0795 // if create is true, a new entry is returned 0796 DirEntry* DirTree::entry(const std::string& name, bool create) 0797 { 0798 if (!name.length()) return (DirEntry*)0; 0799 0800 // quick check for "/" (that's root) 0801 if (name == "/") return entry(0); 0802 0803 // split the names, e.g "/ObjectPool/_1020961869" will become: 0804 // "ObjectPool" and "_1020961869" 0805 std::list<std::string> names; 0806 std::string::size_type start = 0, end = 0; 0807 if (name[0] == '/') start++; 0808 while (start < name.length()) { 0809 end = name.find_first_of('/', start); 0810 if (end == std::string::npos) end = name.length(); 0811 names.push_back(name.substr(start, end - start)); 0812 start = end + 1; 0813 } 0814 0815 // start from root 0816 int index = 0 ; 0817 0818 // trace one by one 0819 std::list<std::string>::iterator it; 0820 0821 for (it = names.begin(); it != names.end(); ++it) { 0822 // find among the children of index 0823 std::vector<unsigned> chi = children(index); 0824 unsigned child = 0; 0825 for (unsigned i = 0; i < chi.size(); i++) { 0826 DirEntry* ce = entry(chi[i]); 0827 if (ce) 0828 if (ce->valid && (ce->name.length() > 1)) 0829 if (ce->name == *it) 0830 child = chi[i]; 0831 } 0832 0833 // traverse to the child 0834 if (child > 0) index = child; 0835 else { 0836 // not found among children 0837 if (!create) return (DirEntry*)0; 0838 0839 // create a new entry 0840 unsigned parent = index; 0841 entries.push_back(DirEntry()); 0842 index = entryCount() - 1; 0843 DirEntry* e = entry(index); 0844 e->valid = true; 0845 e->name = *it; 0846 e->dir = false; 0847 e->size = 0; 0848 e->start = 0; 0849 e->child = End; 0850 e->prev = End; 0851 e->next = entry(parent)->child; 0852 entry(parent)->child = index; 0853 } 0854 } 0855 0856 return entry(index); 0857 } 0858 0859 // helper function: recursively find siblings of index 0860 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result, 0861 unsigned index) 0862 { 0863 DirEntry* e = dirtree->entry(index); 0864 if (!e) return; 0865 // if (!e->valid) return; 0866 0867 // prevent infinite loop 0868 for (unsigned i = 0; i < result.size(); i++) { 0869 if (result[i] == index) return; 0870 } 0871 // add myself 0872 result.push_back(index); 0873 0874 // visit previous sibling, don't go infinitely 0875 unsigned prev = e->prev; 0876 if ((prev > 0) && (prev < dirtree->entryCount())) { 0877 for (unsigned i = 0; i < result.size(); i++) 0878 if (result[i] == prev) prev = 0; 0879 if (prev) dirtree_find_siblings(dirtree, result, prev); 0880 } 0881 0882 // visit next sibling, don't go infinitely 0883 unsigned next = e->next; 0884 if ((next > 0) && (next < dirtree->entryCount())) { 0885 for (unsigned i = 0; i < result.size(); i++) 0886 if (result[i] == next) next = 0; 0887 if (next) dirtree_find_siblings(dirtree, result, next); 0888 } 0889 } 0890 0891 std::vector<unsigned> DirTree::children(unsigned index) 0892 { 0893 std::vector<unsigned> result; 0894 0895 DirEntry* e = entry(index); 0896 if (e) { 0897 if (e->valid && e->dir) { 0898 dirtree_find_siblings(this, result, e->child); 0899 } 0900 } 0901 return result; 0902 } 0903 0904 void DirTree::load(unsigned char* buffer, unsigned size, const unsigned threshold, 0905 const unsigned max_sbat, const unsigned max_bbat) 0906 { 0907 #ifdef POLE_DEBUG 0908 qDebug() << "-------------------------------"; 0909 qDebug() << Q_FUNC_INFO; 0910 #endif 0911 0912 entries.clear(); 0913 unsigned n = (size / 128); //num. of directory entries 0914 0915 for (unsigned i = 0; i < (size / 128); i++) { 0916 unsigned p = i * 128; 0917 0918 0919 // parse name of this entry, which stored as Unicode 16-bit 0920 int name_len = readU16(buffer + 0x40 + p); 0921 if (name_len > 64) { 0922 name_len = 64; 0923 #ifdef POLE_DEBUG 0924 qDebug() << "DirEntry: Invalid length of name!"; 0925 #endif 0926 } 0927 std::string name; 0928 for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2) { 0929 name.append(1, buffer[j+p]); 0930 } 0931 0932 // first char isn't printable ? remove it... 0933 if (buffer[p] < 32) { 0934 name.erase(0, 1); 0935 } 0936 0937 // [MS-CFB] — v20110318 0938 // 0x00 = Unknown or unallocated, 0x01 = directory (Storage Object), 0939 // 0x02 = file (Stream Object), 0x05 = Root Storage Object 0940 unsigned type = buffer[ 0x42 + p]; 0941 0942 DirEntry e; 0943 e.valid = true; 0944 e.name = name; 0945 e.start = readU32(buffer + 0x74 + p); 0946 e.size = readU32(buffer + 0x78 + p); 0947 e.prev = readU32(buffer + 0x44 + p); 0948 e.next = readU32(buffer + 0x48 + p); 0949 e.child = readU32(buffer + 0x4C + p); 0950 e.dir = false; 0951 0952 if ((type == 1) || (type == 5)) { 0953 e.dir = true; 0954 } 0955 0956 // sanity checks 0957 if ((type != 0) && (type != 1) && (type != 2) && (type != 5)) { 0958 e.valid = false; 0959 #ifdef POLE_DEBUG 0960 qDebug() << "DirEntry: invalid type!"; 0961 #endif 0962 } 0963 if ((type != 0) && (name_len < 1)) { 0964 e.valid = false; 0965 #ifdef POLE_DEBUG 0966 qDebug() << "DirEntry: invalid (type,name) pair!"; 0967 #endif 0968 } 0969 // unknown object 0970 if (type == 0) { 0971 if ((e.child != End) || (e.prev != End) || (e.next != End)) { 0972 e.valid = false; 0973 #ifdef POLE_DEBUG 0974 qDebug() << "DirEntry: reference to prev/next/child != NOSTREAM"; 0975 #endif 0976 } 0977 if ((e.start != 0) || (e.size != 0)) { 0978 e.valid = false; 0979 #ifdef POLE_DEBUG 0980 qDebug() << "DirEntry: start/size != ZERO"; 0981 #endif 0982 } 0983 } 0984 // storage objects 0985 if (type == 1) { 0986 if (((e.prev != End) && (e.prev >= n)) || 0987 ((e.next != End) && (e.next >= n)) || 0988 ((e.child != End) && (e.child >= n))) { 0989 e.valid = false; 0990 #ifdef POLE_DEBUG 0991 qDebug() << "DirEntry: reference to prev/next/child > object num. (" << n << ")"; 0992 #endif 0993 } 0994 } 0995 // stream object 0996 if (type == 2) { 0997 //check stream position 0998 if ((e.size >= threshold) && (e.start >= max_bbat)) { 0999 e.valid = false; 1000 #ifdef POLE_DEBUG 1001 qDebug() << "DirEntry: (e.start >= max_bbat)"; 1002 #endif 1003 } 1004 else if (e.start >= max_sbat) { 1005 e.valid = false; 1006 #ifdef POLE_DEBUG 1007 qDebug() << "DirEntry: (e.start >= max_sbat)"; 1008 #endif 1009 } 1010 //check stream object 1011 if (e.child != End) { 1012 e.valid = false; 1013 #ifdef POLE_DEBUG 1014 qDebug() << "DirEntry: (e.child != End)"; 1015 #endif 1016 } 1017 //NOTE: Disabled because of too many false positives. 1018 // if ((e->prev != End) || (e->next != End)) { 1019 // e.valid = false; 1020 // } 1021 } 1022 1023 // CLSID contains an object class GUID (globally unique identifier) if 1024 // this entry is a storage or root storage. In a stream object, this 1025 // field MUST be set to all zeroes. 1026 #ifdef POLE_DEBUG 1027 if (!e.valid) { 1028 std::cout << "[INVALID] "; 1029 } 1030 printf("DirEntry: name=%s type=%i prev=%i next=%i child=%i start=%lu size=%lu clsid=%lu.%lu.%lu.%lu\n", 1031 name.c_str(), type, e.prev, e.next, e.child, e.start, e.size, readU32(buffer + 0x50 + p), 1032 readU32(buffer + 0x54 + p), readU32(buffer + 0x58 + p), readU32(buffer + 0x5C + p)); 1033 #endif 1034 entries.push_back(e); 1035 } 1036 #ifdef POLE_DEBUG 1037 qDebug() << "-------------------------------"; 1038 #endif 1039 } 1040 1041 // return space required to save this dirtree 1042 unsigned DirTree::size() 1043 { 1044 return entryCount() * 128; 1045 } 1046 1047 void DirTree::save(unsigned char* buffer) 1048 { 1049 memset(buffer, 0, size()); 1050 1051 // root is fixed as "Root Entry" 1052 DirEntry* root = entry(0); 1053 std::string name = "Root Entry"; 1054 for (unsigned j = 0; j < name.length(); j++) 1055 buffer[ j*2 ] = name[j]; 1056 writeU16(buffer + 0x40, name.length()*2 + 2); 1057 writeU32(buffer + 0x74, 0xffffffff); 1058 writeU32(buffer + 0x78, 0); 1059 writeU32(buffer + 0x44, 0xffffffff); 1060 writeU32(buffer + 0x48, 0xffffffff); 1061 writeU32(buffer + 0x4c, root->child); 1062 buffer[ 0x42 ] = 5; 1063 buffer[ 0x43 ] = 1; 1064 1065 for (unsigned i = 1; i < entryCount(); i++) { 1066 DirEntry* e = entry(i); 1067 if (!e) continue; 1068 if (e->dir) { 1069 e->start = 0xffffffff; 1070 e->size = 0; 1071 } 1072 1073 // max length for name is 32 chars 1074 std::string name = e->name; 1075 if (name.length() > 32) 1076 name.erase(32, name.length()); 1077 1078 // write name as Unicode 16-bit 1079 for (unsigned j = 0; j < name.length(); j++) 1080 buffer[ i*128 + j*2 ] = name[j]; 1081 1082 writeU16(buffer + i*128 + 0x40, name.length()*2 + 2); 1083 writeU32(buffer + i*128 + 0x74, e->start); 1084 writeU32(buffer + i*128 + 0x78, e->size); 1085 writeU32(buffer + i*128 + 0x44, e->prev); 1086 writeU32(buffer + i*128 + 0x48, e->next); 1087 writeU32(buffer + i*128 + 0x4c, e->child); 1088 buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2; 1089 buffer[ i*128 + 0x43 ] = 1; // always black 1090 } 1091 } 1092 1093 void DirTree::debug() 1094 { 1095 for (unsigned i = 0; i < entryCount(); i++) { 1096 DirEntry* e = entry(i); 1097 if (!e) continue; 1098 std::cout << i << ": "; 1099 if (!e->valid) std::cout << "INVALID "; 1100 std::cout << e->name << " "; 1101 if (e->dir) std::cout << "(Dir) "; 1102 else std::cout << "(File) "; 1103 std::cout << e->size << " "; 1104 std::cout << "s:" << e->start << " "; 1105 std::cout << "("; 1106 if (e->child == End) std::cout << "-"; else std::cout << e->child; 1107 std::cout << " "; 1108 if (e->prev == End) std::cout << "-"; else std::cout << e->prev; 1109 std::cout << ":"; 1110 if (e->next == End) std::cout << "-"; else std::cout << e->next; 1111 std::cout << ")"; 1112 std::cout << std::endl; 1113 } 1114 } 1115 1116 // =========== StorageIO ========== 1117 1118 StorageIO::StorageIO(Storage* st, const char* fname) 1119 { 1120 storage = st; 1121 filename = fname; 1122 result = Storage::Ok; 1123 opened = false; 1124 1125 header = new Header(); 1126 dirtree = new DirTree(); 1127 bbat = new AllocTable(); 1128 sbat = new AllocTable(); 1129 1130 filesize = 0; 1131 bbat->blockSize = 1 << header->b_shift; 1132 sbat->blockSize = 1 << header->s_shift; 1133 } 1134 1135 StorageIO::~StorageIO() 1136 { 1137 if (opened) close(); 1138 delete sbat; 1139 delete bbat; 1140 delete dirtree; 1141 delete header; 1142 } 1143 1144 bool StorageIO::open() 1145 { 1146 // already opened ? close first 1147 if (opened) close(); 1148 1149 load(); 1150 1151 return result == Storage::Ok; 1152 } 1153 1154 void StorageIO::load() 1155 { 1156 unsigned char* buffer = 0; 1157 unsigned long buflen = 0; 1158 std::vector<unsigned long> blocks; 1159 1160 // open the file, check for error 1161 result = Storage::OpenFailed; 1162 file.open(filename.c_str(), std::ios::binary | std::ios::in); 1163 if (!file.good()) return; 1164 1165 // find size of input file 1166 file.seekg(0, std::ios::end); 1167 filesize = file.tellg(); 1168 1169 // load header 1170 buffer = new unsigned char[OLE_HEADER_SIZE]; 1171 file.seekg(0); 1172 file.read((char*)buffer, OLE_HEADER_SIZE); 1173 if (!file.good()) { 1174 delete[] buffer; 1175 return; 1176 } 1177 header->load(buffer); 1178 delete[] buffer; 1179 1180 // check OLE magic id 1181 result = Storage::NotOLE; 1182 for (unsigned i = 0; i < 8; i++) 1183 if (header->id[i] != pole_magic[i]) 1184 return; 1185 1186 // important block size 1187 bbat->blockSize = 1 << header->b_shift; 1188 sbat->blockSize = 1 << header->s_shift; 1189 const unsigned max_bbat_block = (filesize - OLE_HEADER_SIZE) / bbat->blockSize; 1190 const unsigned max_sbat_block = (filesize - OLE_HEADER_SIZE) / sbat->blockSize; 1191 1192 // sanity checks 1193 result = Storage::BadOLE; 1194 if (!header->valid(max_sbat_block, max_bbat_block)) { 1195 return; 1196 } 1197 1198 // find blocks allocated to store big bat 1199 // the first 109 blocks are in header, the rest in meta bat 1200 blocks.clear(); 1201 blocks.resize(header->num_bat); 1202 for (unsigned i = 0; i < 109; i++) { 1203 if (i >= header->num_bat) break; 1204 else blocks[i] = header->bb_blocks[i]; 1205 } 1206 if ((header->num_bat > 109) && (header->num_mbat > 0)) { 1207 unsigned char* buffer2 = new unsigned char[ bbat->blockSize ]; 1208 unsigned k = 109; 1209 unsigned mblock = header->mbat_start; 1210 for (unsigned r = 0; r < header->num_mbat; r++) { 1211 unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize); 1212 if (rr != bbat->blockSize) { 1213 delete[] buffer2; 1214 return; 1215 } 1216 for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) { 1217 if (k >= header->num_bat) break; 1218 else blocks[k++] = readU32(buffer2 + s); 1219 } 1220 mblock = readU32(buffer2 + bbat->blockSize - 4); 1221 } 1222 delete[] buffer2; 1223 } 1224 1225 // load big bat 1226 buflen = blocks.size() * bbat->blockSize; 1227 if (buflen > 0) { 1228 buffer = new unsigned char[ buflen ]; 1229 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 1230 if (r != buflen) { 1231 qCritical() << Q_FUNC_INFO << "SAT construction failed!"; 1232 delete[] buffer; 1233 return; 1234 } 1235 bbat->load(buffer, buflen); 1236 delete[] buffer; 1237 1238 if (!bbat->valid(filesize, header->b_shift, true)) { 1239 return; 1240 } 1241 } 1242 //TODO: make fail affect the result value 1243 bool fail = false; 1244 1245 // load small bat 1246 blocks.clear(); 1247 blocks = bbat->follow(header->sbat_start, fail); 1248 buflen = blocks.size() * bbat->blockSize; 1249 if (buflen > 0) { 1250 buffer = new unsigned char[ buflen ]; 1251 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 1252 if (r != buflen) { 1253 qCritical() << Q_FUNC_INFO << "SSAT construction failed!"; 1254 delete[] buffer; 1255 return; 1256 } 1257 sbat->load(buffer, buflen); 1258 delete[] buffer; 1259 1260 if (!sbat->valid(filesize, header->s_shift, false)) { 1261 return; 1262 } 1263 } 1264 1265 // load directory tree 1266 blocks.clear(); 1267 blocks = bbat->follow(header->dirent_start, fail); 1268 buflen = blocks.size() * bbat->blockSize; 1269 buffer = new unsigned char[ buflen ]; 1270 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 1271 if (r != buflen) { 1272 qCritical() << Q_FUNC_INFO << "DirTree construction failed!"; 1273 delete[] buffer; 1274 return; 1275 } 1276 dirtree->load(buffer, buflen, header->threshold, max_sbat_block, max_bbat_block); 1277 unsigned sb_start = readU32(buffer + 0x74); 1278 delete[] buffer; 1279 if (!dirtree->valid()) { 1280 qCritical() << Q_FUNC_INFO << "Invalid DirTree!"; 1281 return; 1282 } 1283 1284 // fetch block chain as data for small-files 1285 sb_blocks = bbat->follow(sb_start, fail); 1286 1287 // for troubleshooting, just enable this block 1288 #ifdef POLE_DEBUG 1289 header->debug(); 1290 sbat->debug(); 1291 bbat->debug(); 1292 dirtree->debug(); 1293 #endif 1294 1295 // so far so good 1296 result = Storage::Ok; 1297 opened = true; 1298 } 1299 1300 void StorageIO::create() 1301 { 1302 // std::cout << "Creating " << filename << std::endl; 1303 1304 file.open(filename.c_str(), std::ios::out | std::ios::binary); 1305 if (!file.good()) { 1306 qCritical() << Q_FUNC_INFO << "Can't create file:" << filename.c_str(); 1307 result = Storage::OpenFailed; 1308 return; 1309 } 1310 1311 // so far so good 1312 opened = true; 1313 result = Storage::Ok; 1314 } 1315 1316 void StorageIO::flush() 1317 { 1318 /* Note on Microsoft implementation: 1319 - directory entries are stored in the last block(s) 1320 - BATs are as second to the last 1321 - Meta BATs are third to the last 1322 */ 1323 } 1324 1325 void StorageIO::close() 1326 { 1327 if (!opened) return; 1328 1329 file.close(); 1330 opened = false; 1331 1332 std::list<Stream*>::iterator it; 1333 for (it = streams.begin(); it != streams.end(); ++it) 1334 delete *it; 1335 } 1336 1337 StreamIO* StorageIO::streamIO(const std::string& name) 1338 { 1339 #ifdef POLE_DEBUG 1340 qDebug() << Q_FUNC_INFO << "preparing stream:" << name.c_str(); 1341 #endif 1342 // sanity check 1343 if (!name.length()) return (StreamIO*)0; 1344 1345 // search in the entries 1346 DirEntry* entry = dirtree->entry(name); 1347 //if( entry) std::cout << "FOUND\n"; 1348 if (!entry) return (StreamIO*)0; 1349 //if( !entry->dir ) std::cout << " NOT DIR\n"; 1350 if (entry->dir) return (StreamIO*)0; 1351 1352 StreamIO* result = new StreamIO(this, entry); 1353 result->fullName = name; 1354 1355 return result; 1356 } 1357 1358 unsigned long StorageIO::loadBigBlocks(const std::vector<unsigned long>& blocks, 1359 unsigned char* data, unsigned long maxlen) 1360 { 1361 return loadBigBlocks(&blocks[0], blocks.size(), data, maxlen); 1362 } 1363 1364 unsigned long StorageIO::loadBigBlocks(const unsigned long *blocks, unsigned blockCount, 1365 unsigned char *data, unsigned long maxlen) 1366 { 1367 // sentinel 1368 if (!data) return 0; 1369 if (!file.good()) return 0; 1370 if (!blocks) return 0; 1371 if (blockCount < 1) return 0; 1372 if (maxlen == 0) return 0; 1373 1374 // read block one by one, seems fast enough 1375 unsigned long bytes = 0; 1376 for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) { 1377 unsigned long block = blocks[i]; 1378 unsigned long pos = bbat->blockSize * (block + 1); 1379 unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes; 1380 if (pos + p > filesize) p = filesize - pos; 1381 file.seekg(pos); 1382 file.read((char*)data + bytes, p); 1383 if (!file.good()) return 0; 1384 bytes += p; 1385 } 1386 1387 return bytes; 1388 } 1389 1390 unsigned long StorageIO::loadBigBlock(unsigned long block, 1391 unsigned char* data, unsigned long maxlen) 1392 { 1393 // sentinel 1394 if (!data) return 0; 1395 if (!file.good()) return 0; 1396 1397 return loadBigBlocks(&block, 1, data, maxlen); 1398 } 1399 1400 // return number of bytes which has been read 1401 unsigned long StorageIO::loadSmallBlocks(const std::vector<unsigned long>& blocks, 1402 unsigned char* data, unsigned long maxlen) 1403 { 1404 return loadSmallBlocks(&blocks[0], blocks.size(), data, maxlen); 1405 } 1406 1407 unsigned long StorageIO::loadSmallBlocks(const unsigned long *blocks, unsigned blockCount, 1408 unsigned char *data, unsigned long maxlen) 1409 { 1410 // sentinel 1411 if (!data) return 0; 1412 if (!file.good()) return 0; 1413 if (!blocks) return 0; 1414 if (blockCount < 1) return 0; 1415 if (maxlen == 0) return 0; 1416 1417 // our own local buffer 1418 unsigned char* buf = new unsigned char[ bbat->blockSize ]; 1419 1420 // read small block one by one 1421 unsigned long bytes = 0; 1422 for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) { 1423 unsigned long block = blocks[i]; 1424 1425 // find where the small-block exactly is 1426 unsigned long pos = block * sbat->blockSize; 1427 unsigned long bbindex = pos / bbat->blockSize; 1428 if (bbindex >= sb_blocks.size()) break; 1429 1430 unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize); 1431 if (r != bbat->blockSize) { 1432 delete[] buf; 1433 return 0; 1434 } 1435 1436 // copy the data 1437 unsigned offset = pos % bbat->blockSize; 1438 unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes : bbat->blockSize - offset; 1439 p = (sbat->blockSize < p) ? sbat->blockSize : p; 1440 memcpy(data + bytes, buf + offset, p); 1441 bytes += p; 1442 } 1443 1444 delete[] buf; 1445 1446 return bytes; 1447 } 1448 1449 unsigned long StorageIO::loadSmallBlock(unsigned long block, 1450 unsigned char* data, unsigned long maxlen) 1451 { 1452 // sentinel 1453 if (!data) return 0; 1454 if (!file.good()) return 0; 1455 1456 return loadSmallBlocks(&block, 1, data, maxlen); 1457 } 1458 1459 // =========== StreamIO ========== 1460 1461 StreamIO::StreamIO(StorageIO* s, DirEntry* e) 1462 { 1463 io = s; 1464 entry = e; 1465 eof = false; 1466 fail = false; 1467 1468 m_pos = 0; 1469 1470 if (entry->size >= io->header->threshold) { 1471 blocks = io->bbat->follow(entry->start, fail); 1472 } else { 1473 blocks = io->sbat->follow(entry->start, fail); 1474 } 1475 1476 // prepare cache 1477 cache_pos = 0; 1478 base_cache_size = cache_size = 4096; // optimal ? 1479 cache_data = new unsigned char[base_cache_size]; 1480 updateCache(); 1481 } 1482 1483 // FIXME tell parent we're gone 1484 StreamIO::~StreamIO() 1485 { 1486 delete[] cache_data; 1487 } 1488 1489 void StreamIO::seek(unsigned long pos) 1490 { 1491 m_pos = pos; 1492 } 1493 1494 unsigned long StreamIO::tell() 1495 { 1496 return m_pos; 1497 } 1498 1499 int StreamIO::getch() 1500 { 1501 // past end-of-file ? 1502 if (m_pos > entry->size) return -1; 1503 1504 // need to update cache ? 1505 if (!cache_size || (m_pos < cache_pos) || 1506 (m_pos >= cache_pos + cache_size)) 1507 updateCache(); 1508 1509 // something bad if we don't get good cache 1510 if (!cache_size) return -1; 1511 1512 int data = cache_data[m_pos - cache_pos]; 1513 m_pos++; 1514 1515 return data; 1516 } 1517 1518 unsigned long StreamIO::read(unsigned char *data, unsigned long maxlen) 1519 { 1520 // sanity checks 1521 if (!data) return 0; 1522 if (maxlen == 0) return 0; 1523 1524 unsigned long totalbytes = 0; 1525 1526 while (totalbytes < maxlen) { 1527 // need to update cache ? 1528 if (!cache_size || (m_pos < cache_pos) || 1529 (m_pos >= cache_pos + cache_size)) 1530 updateCache(); 1531 if (!cache_size) break; 1532 1533 const unsigned long remaining = cache_size - (m_pos - cache_pos); 1534 const unsigned long count = std::min(remaining, maxlen - totalbytes); 1535 memcpy(data + totalbytes, &cache_data[m_pos - cache_pos], count); 1536 totalbytes += count; 1537 m_pos += count; 1538 } 1539 return totalbytes; 1540 } 1541 1542 unsigned long StreamIO::readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen) 1543 { 1544 // sanity checks 1545 if (!data) return 0; 1546 if (maxlen == 0) return 0; 1547 1548 unsigned long totalbytes = 0; 1549 1550 if (entry->size < io->header->threshold) { 1551 // small file 1552 unsigned long index = pos / io->sbat->blockSize; 1553 1554 if (index >= blocks.size()) return 0; 1555 1556 unsigned char buf[4096]; 1557 unsigned long offset = pos % io->sbat->blockSize; 1558 while (totalbytes < maxlen) { 1559 if (index >= blocks.size()) break; 1560 io->loadSmallBlock(blocks[index], &buf[0], io->bbat->blockSize); 1561 unsigned long count = io->sbat->blockSize - offset; 1562 if (count > maxlen - totalbytes) count = maxlen - totalbytes; 1563 memcpy(data + totalbytes, &buf[0] + offset, count); 1564 totalbytes += count; 1565 offset = 0; 1566 index++; 1567 } 1568 1569 } else { 1570 // big file 1571 unsigned long index = pos / io->bbat->blockSize; 1572 1573 if (index >= blocks.size()) return 0; 1574 1575 unsigned char buf[4096]; 1576 unsigned long offset = pos % io->bbat->blockSize; 1577 while (totalbytes < maxlen) { 1578 if (index >= blocks.size()) break; 1579 unsigned long r = io->loadBigBlock(blocks[index], &buf[0], io->bbat->blockSize); 1580 if (r != io->bbat->blockSize) { 1581 return 0; 1582 } 1583 unsigned long count = io->bbat->blockSize - offset; 1584 if (count > maxlen - totalbytes) count = maxlen - totalbytes; 1585 memcpy(data + totalbytes, &buf[0] + offset, count); 1586 totalbytes += count; 1587 index++; 1588 offset = 0; 1589 } 1590 1591 } 1592 1593 return totalbytes; 1594 } 1595 1596 unsigned long StreamIO::readInternal(unsigned char* data, unsigned long maxlen) 1597 { 1598 unsigned long bytes = readInternal(tell(), data, maxlen); 1599 m_pos += bytes; 1600 return bytes; 1601 } 1602 1603 void StreamIO::updateCache() 1604 { 1605 // sanity check 1606 if (!cache_data) return; 1607 1608 cache_pos = m_pos - (m_pos % base_cache_size); 1609 unsigned long bytes = base_cache_size; 1610 if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos; 1611 if (cache_pos + bytes <= m_pos) { 1612 cache_size = 0; 1613 } else { 1614 cache_size = readInternal(cache_pos, cache_data, bytes); 1615 } 1616 } 1617 1618 1619 // =========== Storage ========== 1620 1621 Storage::Storage(const char* filename) 1622 { 1623 io = new StorageIO(this, filename); 1624 } 1625 1626 Storage::~Storage() 1627 { 1628 delete io; 1629 } 1630 1631 int Storage::result() 1632 { 1633 return io->result; 1634 } 1635 1636 bool Storage::open() 1637 { 1638 return io->open(); 1639 } 1640 1641 void Storage::close() 1642 { 1643 io->close(); 1644 } 1645 1646 std::list<std::string> Storage::entries(const std::string& path) 1647 { 1648 std::list<std::string> result; 1649 DirTree* dt = io->dirtree; 1650 DirEntry* e = dt->entry(path, false); 1651 if (e) { 1652 if (e->dir) { 1653 unsigned parent = dt->indexOf(e); 1654 std::vector<unsigned> children = dt->children(parent); 1655 for (unsigned i = 0; i < children.size(); i++) 1656 result.push_back(dt->entry(children[i])->name); 1657 } 1658 } 1659 return result; 1660 } 1661 1662 bool Storage::isDirectory(const std::string& name) 1663 { 1664 DirEntry* e = io->dirtree->entry(name, false); 1665 return e ? e->dir : false; 1666 } 1667 1668 // =========== Stream ========== 1669 1670 Stream::Stream(Storage* storage, const std::string& name) 1671 { 1672 io = storage->io->streamIO(name); 1673 } 1674 1675 // FIXME tell parent we're gone 1676 Stream::~Stream() 1677 { 1678 delete io; 1679 } 1680 1681 std::string Stream::fullName() 1682 { 1683 return io ? io->fullName : std::string(); 1684 } 1685 1686 unsigned long Stream::tell() 1687 { 1688 return io ? io->tell() : 0; 1689 } 1690 1691 void Stream::seek(unsigned long newpos) 1692 { 1693 if (io) io->seek(newpos); 1694 } 1695 1696 unsigned long Stream::size() 1697 { 1698 return io ? io->entry->size : 0; 1699 } 1700 1701 int Stream::getch() 1702 { 1703 return io ? io->getch() : 0; 1704 } 1705 1706 unsigned long Stream::read(unsigned char* data, unsigned long maxlen) 1707 { 1708 return io ? io->read(data, maxlen) : 0; 1709 } 1710 1711 bool Stream::eof() 1712 { 1713 return io ? io->eof : false; 1714 } 1715 1716 bool Stream::fail() 1717 { 1718 return io ? io->fail : true; 1719 }