File indexing completed on 2024-05-12 16:28:58

0001 /* POLE - Portable C++ library to access OLE Storage
0002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
0003    Copyright (C) 2011-2012 Matus Uzak <matus.uzak@ixonos.com>
0004 
0005    Redistribution and use in source and binary forms, with or without
0006    modification, are permitted provided that the following conditions
0007    are met:
0008    * Redistributions of source code must retain the above copyright notice,
0009      this list of conditions and the following disclaimer.
0010    * Redistributions in binary form must reproduce the above copyright notice,
0011      this list of conditions and the following disclaimer in the documentation
0012      and/or other materials provided with the distribution.
0013    * Neither the name of the authors nor the names of its contributors may be
0014      used to endorse or promote products derived from this software without
0015      specific prior written permission.
0016 
0017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0018    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0019    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0020    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0021    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0022    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0023    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0024    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0025    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0026    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
0027    THE POSSIBILITY OF SUCH DAMAGE.
0028 */
0029 
0030 #include "pole.h"
0031 
0032 #include <fstream>
0033 #include <iostream>
0034 #include <list>
0035 #include <string>
0036 #include <vector>
0037 #include <string.h>
0038 #include <ios>       // for std::hex
0039 
0040 #include <QList>
0041 #include <QString>
0042 #include <QDebug>
0043 
0044 //Enable to activate debugging output.
0045 //#define POLE_DEBUG
0046 
0047 //Disabled because of too many false positives, both streams and unknown
0048 //objects MAY be invalid and still have a size set.
0049 //#define POLE_FAIL_ON_NEMPTY_NVALID_OBJS
0050 
0051 //Validate stream object against [MS-CFB] — v20110318.  Disabled because of too
0052 //many false positives.
0053 //#define POLE_FAIL_ON_NVALID_STREAM_OBJS
0054 
0055 //Validate storage object against [MS-CFB] — v20110318.  Disabled because of
0056 //too many false positives on Word8 documents.
0057 //#define POLE_CHECK_STORAGE_OBJS
0058 
0059 //Validate sibling names against positions in the black red tree.  Disabled
0060 //because of too many false positives on Word8 files with embedded documents.
0061 //#define POLE_CHECK_SIBLINGS
0062 
0063 #define OLE_HEADER_SIZE 0x200
0064 
0065 namespace POLE
0066 {
0067 
0068 class Header
0069 {
0070 public:
0071     unsigned char id[8];       // signature, or magic identifier
0072     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
0073     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
0074     unsigned num_bat;          // blocks allocated for big bat
0075     unsigned dirent_start;     // starting block for directory info
0076     unsigned threshold;        // switch from small to big file (usually 4K)
0077     unsigned sbat_start;       // starting block index to store small bat
0078     unsigned num_sbat;         // blocks allocated for small bat
0079     unsigned mbat_start;       // starting block to store meta bat
0080     unsigned num_mbat;         // blocks allocated for meta bat
0081     unsigned long bb_blocks[109];
0082 
0083     Header();
0084     bool valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const;
0085     void load(const unsigned char* buffer);
0086     void save(unsigned char* buffer);
0087     void debug();
0088 };
0089 
0090 class AllocTable
0091 {
0092 public:
0093     static const unsigned Eof;
0094     static const unsigned Avail;
0095     static const unsigned Bat;
0096     static const unsigned MetaBat;
0097     unsigned blockSize;
0098     AllocTable();
0099     bool valid(const unsigned long filesize, const unsigned shift, const bool isFat = true) const;
0100     void clear();
0101     unsigned long count();
0102     void resize(unsigned long newsize);
0103     void preserve(unsigned long n);
0104     void set(unsigned long index, unsigned long val);
0105     unsigned unused();
0106     void setChain(std::vector<unsigned long>);
0107     std::vector<unsigned long> follow(unsigned long start, bool& fail);
0108     unsigned long operator[](unsigned long index);
0109     void load(const unsigned char* buffer, unsigned len);
0110     void save(unsigned char* buffer);
0111     unsigned size();
0112     void debug();
0113 private:
0114     std::vector<unsigned long> data;
0115     AllocTable(const AllocTable&);
0116     AllocTable& operator=(const AllocTable&);
0117 };
0118 
0119 class DirEntry
0120 {
0121 public:
0122     bool valid;            // false if invalid (should be skipped)
0123     std::string name;      // the name, not in unicode anymore
0124     bool dir;              // true if directory
0125     unsigned long size;    // size (not valid if directory)
0126     unsigned long start;   // starting block
0127     unsigned prev;         // previous sibling
0128     unsigned next;         // next sibling
0129     unsigned child;        // first child
0130 };
0131 
0132 class DirTree
0133 {
0134 public:
0135     static const unsigned End;
0136     DirTree();
0137     bool valid(void) const;
0138     void clear();
0139     unsigned entryCount();
0140     DirEntry* entry(unsigned index);
0141     DirEntry* entry(const std::string& name, bool create = false);
0142     int indexOf(DirEntry* e);
0143     int parent(unsigned index);
0144     std::string fullName(unsigned index);
0145     std::vector<unsigned> children(unsigned index);
0146     void load(unsigned char* buffer, unsigned len, const unsigned threshold, const unsigned max_sbat, const unsigned max_bbat);
0147     void save(unsigned char* buffer);
0148     unsigned size();
0149     void debug();
0150 private:
0151     std::vector<DirEntry> entries;
0152     DirTree(const DirTree&);
0153     DirTree& operator=(const DirTree&);
0154 };
0155 
0156 class StorageIO
0157 {
0158 public:
0159     Storage* storage;         // owner
0160     std::string filename;     // filename
0161     std::fstream file;        // associated with above name
0162     int result;               // result of operation
0163     bool opened;              // true if file is opened
0164     unsigned long filesize;   // size of the file
0165 
0166     Header* header;           // storage header
0167     DirTree* dirtree;         // directory tree
0168     AllocTable* bbat;         // allocation table for big blocks
0169     AllocTable* sbat;         // allocation table for small blocks
0170 
0171     std::vector<unsigned long> sb_blocks; // blocks for "small" files
0172 
0173     std::list<Stream*> streams;
0174 
0175     StorageIO(Storage* storage, const char* filename);
0176     ~StorageIO();
0177 
0178     bool open();
0179     void close();
0180     void flush();
0181     void load();
0182     void create();
0183 
0184     unsigned long loadBigBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen);
0185     unsigned long loadBigBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen);
0186 
0187     unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0188 
0189     unsigned long loadSmallBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen);
0190     unsigned long loadSmallBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen);
0191 
0192     unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0193 
0194     StreamIO* streamIO(const std::string& name);
0195 
0196 private:
0197     // no copy or assign
0198     StorageIO(const StorageIO&);
0199     StorageIO& operator=(const StorageIO&);
0200 
0201 };
0202 
0203 class StreamIO
0204 {
0205 public:
0206     StorageIO* io;
0207     DirEntry* entry;
0208     std::string fullName;
0209     bool eof;
0210     bool fail;
0211 
0212     StreamIO(StorageIO* io, DirEntry* entry);
0213     ~StreamIO();
0214     unsigned long size();
0215     void seek(unsigned long pos);
0216     unsigned long tell();
0217     int getch();
0218     unsigned long read(unsigned char* data, unsigned long maxlen);
0219 
0220 private:
0221     unsigned long readInternal(unsigned char* data, unsigned long maxlen);
0222     unsigned long readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen);
0223 
0224     std::vector<unsigned long> blocks;
0225 
0226     // no copy or assign
0227     StreamIO(const StreamIO&);
0228     StreamIO& operator=(const StreamIO&);
0229 
0230     // pointer for read
0231     unsigned long m_pos;
0232 
0233     // simple cache system to speed-up getch()
0234     unsigned char* cache_data;
0235     unsigned long base_cache_size;
0236     unsigned long cache_size;
0237     unsigned long cache_pos;
0238     void updateCache();
0239 };
0240 
0241 } // namespace POLE
0242 
0243 using namespace POLE;
0244 
0245 static inline unsigned long readU16(const unsigned char* ptr)
0246 {
0247     return ptr[0] + (ptr[1] << 8);
0248 }
0249 
0250 static inline unsigned long readU32(const unsigned char* ptr)
0251 {
0252     return unsigned(ptr[0]) + (unsigned(ptr[1]) << 8 ) + (unsigned(ptr[2]) << 16) + (unsigned(ptr[3]) << 24);
0253 }
0254 
0255 static inline void writeU16(unsigned char* ptr, unsigned long data)
0256 {
0257     ptr[0] = (unsigned char)(data & 0xff);
0258     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0259 }
0260 
0261 static inline void writeU32(unsigned char* ptr, unsigned long data)
0262 {
0263     ptr[0] = (unsigned char)(data & 0xff);
0264     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0265     ptr[2] = (unsigned char)((data >> 16) & 0xff);
0266     ptr[3] = (unsigned char)((data >> 24) & 0xff);
0267 }
0268 
0269 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
0270 
0271 // =========== Header ==========
0272 
0273 Header::Header()
0274 {
0275     b_shift = 9;
0276     s_shift = 6;
0277     num_bat = 0;
0278     dirent_start = 0;
0279     threshold = 4096;
0280     sbat_start = 0;
0281     num_sbat = 0;
0282     mbat_start = 0;
0283     num_mbat = 0;
0284 
0285     for (unsigned i = 0; i < 8; i++)
0286         id[i] = pole_magic[i];
0287     for (unsigned i = 0; i < 109; i++)
0288         bb_blocks[i] = AllocTable::Avail;
0289 }
0290 
0291 bool Header::valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const
0292 {
0293     if (threshold != 4096) return false;
0294     if (num_bat == 0) return false;
0295     if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
0296     if ((num_bat < 109) && (num_mbat != 0)) return false;
0297     if (s_shift > b_shift) return false;
0298     if (b_shift <= 6) return false;
0299     if (b_shift > 12) return false;
0300 
0301     // additional heuristics to check the header
0302     if (num_sbat > max_sbat_block) return false;
0303     if (num_bat > max_bbat_block) return false;
0304 
0305 #ifdef POLE_DEBUG
0306     const unsigned ENDOFCHAIN = 0xfffffffe;
0307     const unsigned FREESECT = 0xffffffff;
0308 
0309     if (num_sbat == 0 &&
0310         sbat_start != ENDOFCHAIN &&
0311         sbat_start != FREESECT)
0312     {
0313         qDebug() << Q_FUNC_INFO <<
0314             "There aren't any minifat sectors, but there are links to some!";
0315     }
0316 #endif
0317 
0318     return true;
0319 }
0320 
0321 void Header::load(const unsigned char* buffer)
0322 {
0323     b_shift      = readU16(buffer + 0x1e); // sector shift
0324     s_shift      = readU16(buffer + 0x20); // mini sector shift
0325     num_bat      = readU32(buffer + 0x2c); // number of fat sectors
0326     dirent_start = readU32(buffer + 0x30); // first directory sector location
0327     threshold    = readU32(buffer + 0x38); // transaction signature number
0328     sbat_start   = readU32(buffer + 0x3c); // first mini fat sector location
0329     num_sbat     = readU32(buffer + 0x40); // mini stream cutoff size
0330     mbat_start   = readU32(buffer + 0x44); // first mini difat sector location
0331     num_mbat     = readU32(buffer + 0x48); // number of difat sectors
0332 
0333     for (unsigned i = 0; i < 8; i++)
0334         id[i] = buffer[i];
0335     for (unsigned i = 0; i < 109; i++)
0336         bb_blocks[i] = readU32(buffer + 0x4C + i * 4);
0337 }
0338 
0339 void Header::save(unsigned char* buffer)
0340 {
0341     memset(buffer, 0, 0x4c);
0342     memcpy(buffer, pole_magic, 8);          // ole signature
0343     writeU32(buffer + 8, 0);                // unknown
0344     writeU32(buffer + 12, 0);               // unknown
0345     writeU32(buffer + 16, 0);               // unknown
0346     writeU16(buffer + 24, 0x003e);          // revision ?
0347     writeU16(buffer + 26, 3);               // version ?
0348     writeU16(buffer + 28, 0xfffe);          // unknown
0349     writeU16(buffer + 0x1e, b_shift);
0350     writeU16(buffer + 0x20, s_shift);
0351     writeU32(buffer + 0x2c, num_bat);
0352     writeU32(buffer + 0x30, dirent_start);
0353     writeU32(buffer + 0x38, threshold);
0354     writeU32(buffer + 0x3c, sbat_start);
0355     writeU32(buffer + 0x40, num_sbat);
0356     writeU32(buffer + 0x44, mbat_start);
0357     writeU32(buffer + 0x48, num_mbat);
0358 
0359     for (unsigned i = 0; i < 109; i++)
0360         writeU32(buffer + 0x4C + i*4, bb_blocks[i]);
0361 }
0362 
0363 void Header::debug()
0364 {
0365     qDebug() << Q_FUNC_INFO;
0366     qDebug() << "b_shift:" << b_shift;
0367     qDebug() << "s_shift:" << s_shift;
0368     qDebug() << "num_bat:" << num_bat;
0369     qDebug() << "dirent_start: 0x" << hex << dirent_start;
0370     qDebug() << "threshold:" << dec << threshold;
0371     qDebug() << "sbat_start: 0x" << hex << sbat_start;
0372     qDebug() << "num_sbat:" << dec << num_sbat;
0373     qDebug() << "mbat_start: 0x" << hex << mbat_start;
0374     qDebug() << "num_mbat:" << dec << num_mbat;
0375 
0376     unsigned s = (num_bat <= 109) ? num_bat : 109;
0377     std::cout << "bat blocks:";
0378     for (unsigned i = 0; i < s; i++) {
0379         std::cout << "0x" << std::hex << bb_blocks[i] << " ";
0380     }
0381     std::cout << std::dec << std::endl;
0382 }
0383 
0384 // =========== AllocTable ==========
0385 
0386 const unsigned AllocTable::Avail = 0xffffffff;
0387 const unsigned AllocTable::Eof = 0xfffffffe;
0388 const unsigned AllocTable::Bat = 0xfffffffd;
0389 const unsigned AllocTable::MetaBat = 0xfffffffc;
0390 
0391 AllocTable::AllocTable()
0392 {
0393     blockSize = 4096;
0394     // initial size
0395     resize(128);
0396 }
0397 
0398 bool AllocTable::valid(const unsigned long filesize, const unsigned shift, const bool isFat) const
0399 {
0400     unsigned long offset = 0;
0401     for (unsigned long i = 0; i < data.size(); i++) {
0402         switch (data[i]) {
0403         case AllocTable::Avail:
0404         case AllocTable::Eof:
0405         case AllocTable::Bat:
0406         case AllocTable::MetaBat:
0407             break;
0408         default:
0409             offset = data[i] << shift;
0410             if (isFat) {
0411                 offset += OLE_HEADER_SIZE;
0412             }
0413             if (offset > filesize) {
0414 #ifdef POLE_DEBUG
0415                 qDebug() << "Invalid location of sector in the stream!" <<
0416                     "offset:" << offset << " | filesize:" << filesize;
0417 #endif
0418                 return false;
0419             }
0420         }
0421     }
0422     return true;
0423 }
0424 
0425 unsigned long AllocTable::count()
0426 {
0427     return data.size();
0428 }
0429 
0430 void AllocTable::resize(unsigned long newsize)
0431 {
0432     unsigned oldsize = data.size();
0433     data.resize(newsize);
0434     if (newsize > oldsize)
0435         for (unsigned i = oldsize; i < newsize; i++)
0436             data[i] = Avail;
0437 }
0438 
0439 // make sure there're still free blocks
0440 void AllocTable::preserve(unsigned long n)
0441 {
0442     std::vector<unsigned long> pre;
0443     for (unsigned i = 0; i < n; i++)
0444         pre.push_back(unused());
0445 }
0446 
0447 unsigned long AllocTable::operator[](unsigned long index)
0448 {
0449     unsigned long result;
0450     result = data[index];
0451     return result;
0452 }
0453 
0454 void AllocTable::set(unsigned long index, unsigned long value)
0455 {
0456     if (index >= count()) resize(index + 1);
0457     data[ index ] = value;
0458 }
0459 
0460 void AllocTable::setChain(std::vector<unsigned long> chain)
0461 {
0462     if (chain.size()) {
0463         for (unsigned i = 0; i < chain.size() - 1; i++)
0464             set(chain[i], chain[i+1]);
0465         set(chain[ chain.size()-1 ], AllocTable::Eof);
0466     }
0467 }
0468 
0469 // follow
0470 std::vector<unsigned long> AllocTable::follow(unsigned long start, bool& fail)
0471 {
0472     std::vector<unsigned long> chain;
0473 
0474     if (start >= count()) {
0475 #ifdef POLE_DEBUG
0476         qDebug() << Q_FUNC_INFO << "start >= count()!";
0477 #endif
0478         fail = true;
0479         return chain;
0480     }
0481 
0482     unsigned long p = start;
0483     while (p < count()) {
0484         if (p == (unsigned long)Eof) {
0485 #ifdef POLE_DEBUG
0486             qDebug() << Q_FUNC_INFO << "Eof detected!";
0487 #endif
0488             break;
0489         }
0490         if (p == (unsigned long)Bat) {
0491 #ifdef POLE_DEBUG
0492             qDebug() << Q_FUNC_INFO << "Bat detected!";
0493 #endif
0494             break;
0495         }
0496         if (p == (unsigned long)MetaBat) {
0497 #ifdef POLE_DEBUG
0498             qDebug() << Q_FUNC_INFO << "MetaBat detected!";
0499 #endif
0500             break;
0501         }
0502         if (p >= count()) {
0503 #ifdef POLE_DEBUG
0504             qDebug() << Q_FUNC_INFO << "Invalid index detected!";
0505 #endif
0506             fail = true;
0507             break;
0508         }
0509         chain.push_back(p);
0510 
0511         // break if the chain is longer than the total sector count
0512         if (chain.size() > count()) {
0513 #ifdef POLE_DEBUG
0514             qDebug() << Q_FUNC_INFO << "Probably a loop detected!";
0515 #endif
0516             fail = true;
0517             break;
0518         }
0519         p = data[ p ];
0520     }
0521     if (p != (unsigned long)AllocTable::Eof) {
0522 #ifdef POLE_DEBUG
0523         qDebug() << Q_FUNC_INFO << "Last chain entry MUST be 0x" << hex << AllocTable::Eof <<
0524             ", detected: 0x" << hex << p;
0525 #endif
0526         fail = true;
0527     }
0528 
0529     return chain;
0530 }
0531 
0532 unsigned AllocTable::unused()
0533 {
0534     // find first available block
0535     for (unsigned i = 0; i < data.size(); i++)
0536         if (data[i] == Avail)
0537             return i;
0538 
0539     // completely full, so enlarge the table
0540     unsigned block = data.size();
0541     resize(data.size() + 10);
0542     return block;
0543 }
0544 
0545 void AllocTable::load(const unsigned char* buffer, unsigned len)
0546 {
0547     resize(len / 4);
0548     for (unsigned i = 0; i < count(); i++)
0549         set(i, readU32(buffer + i*4));
0550 }
0551 
0552 // return space required to save this dirtree
0553 unsigned AllocTable::size()
0554 {
0555     return count() * 4;
0556 }
0557 
0558 void AllocTable::save(unsigned char* buffer)
0559 {
0560     for (unsigned i = 0; i < count(); i++)
0561         writeU32(buffer + i*4, data[i]);
0562 }
0563 
0564 void AllocTable::debug()
0565 {
0566     qDebug() << "block size " << data.size();
0567     for (unsigned i = 0; i < data.size(); i++) {
0568         if (data[i] == Avail) continue;
0569         std::cout << i << ": ";
0570         if (data[i] == Eof) std::cout << "[eof]";
0571         else if (data[i] == Bat) std::cout << "[bat]";
0572         else if (data[i] == MetaBat) std::cout << "[metabat]";
0573         else std::cout << data[i];
0574         std::cout << std::endl;
0575     }
0576 }
0577 
0578 // =========== DirTree ==========
0579 
0580 const unsigned DirTree::End = 0xffffffff;
0581 
0582 /*
0583  * Compare DirEntry names according to the spec.
0584  */
0585 int ename_cmp(QString& str1, QString& str2)
0586 {
0587     str1 = str1.toUpper();
0588     str2 = str2.toUpper();
0589     if (str1.size() < str2.size()) return -1;
0590     else if (str1.size() > str2.size()) return 1;
0591     else return str1.compare(str2);
0592 }
0593 
0594 /*
0595  * Check if DirEntry elements at this level have unique names.
0596  */
0597 bool valid_enames(DirTree* dirtree, unsigned index)
0598 {
0599     std::vector<unsigned> chi = dirtree->children(index);
0600     QList<std::string> names;
0601     DirEntry* e = 0;
0602 
0603 #ifdef POLE_DEBUG
0604     if (chi.size()) {
0605         qDebug() << "---------------------";
0606         qDebug() << Q_FUNC_INFO;
0607         qDebug() << "[KIDS]:";
0608     }
0609     for (unsigned i = 0; i < chi.size(); i++) {
0610         e = dirtree->entry(chi[i]);
0611         if (!e->valid) std::cout << "[INVALID] ";
0612         printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n",
0613                e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir);
0614     }
0615     if (chi.size()) {
0616         qDebug() << "---------------------";
0617     }
0618 #endif
0619 
0620     for (unsigned i = 0; i < chi.size(); i++) {
0621         e = dirtree->entry(chi[i]);
0622         if (e->valid) {
0623             if (names.contains(e->name)) {
0624                 return false;
0625             } else {
0626                 names.append(e->name);
0627             }
0628         }
0629     }
0630     return true;
0631 }
0632 
0633 bool DirTree::valid() const
0634 {
0635     const DirEntry* e;
0636     QString str1, str2;
0637 
0638 #ifdef POLE_DEBUG
0639         qDebug() << Q_FUNC_INFO;
0640 #endif
0641     for (unsigned i = 0; i < entries.size(); i++) {
0642         e = &entries[i];
0643 
0644 #ifdef POLE_DEBUG
0645         if (!e->valid) std::cout << "[INVALID] ";
0646         printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n",
0647                e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir);
0648 #endif
0649 #ifdef POLE_FAIL_ON_NEMPTY_NVALID_OBJS
0650         if (!e->valid && e->size) {
0651 #ifdef POLE_DEBUG
0652             qDebug() << "Invalid DirEntry detected!";
0653 #endif
0654             return false;
0655         }
0656 #endif
0657         //Looking for invalid stream objects.
0658 #ifdef POLE_FAIL_ON_NVALID_STREAM_OBJS
0659         if (!e->valid && !e->dir) {
0660 #ifdef POLE_DEBUG
0661             qDebug() << "Invalid DirEntry (stream object) detected!";
0662 #endif
0663             return false;
0664         }
0665 #endif
0666         //Looking for invalid storage objects.
0667         if (!e->valid && e->dir) {
0668 #ifdef POLE_DEBUG
0669             qDebug() << "Invalid DirEntry (storage object) detected!";
0670 #endif
0671             return false;
0672         }
0673 
0674     //A root storage: size = size of the mini stream, start = first sector
0675     //of the mini stream, if the mini stream exists
0676     //
0677     //A storage object MAY have e->child set - [MS-CFB].
0678 #ifdef POLE_CHECK_STORAGE_OBJS
0679         if ((e->valid && e->dir) && (i > 0) &&
0680             ((e->start != 0) || (e->size != 0)))
0681         {
0682 #ifdef POLE_DEBUG
0683             qDebug() << "Invalid DirEntry (storage object) detected!";
0684 #endif
0685             return false;
0686         }
0687 #endif
0688         //Looking for duplicate DirEntries in the storage object.
0689         if (e->valid && e->dir) {
0690             if (!valid_enames(const_cast<DirTree*>(this), i)) {
0691 #ifdef POLE_DEBUG
0692                 qDebug() << "Invalid DirEntry (storage object) detected!";
0693 #endif
0694                 return false;
0695             }
0696         }
0697 
0698         //Check the name of the left/right DirEntry.
0699 #ifdef POLE_CHECK_SIBLINGS
0700         if (e->prev != End) {
0701             str1 = QString(entries[e->prev].name.data());
0702         }
0703         if (e->next != End) {
0704             str2 = QString(entries[e->next].name.data());
0705         }
0706         if (!str1.isEmpty() && !str2.isEmpty()) {
0707             if (ename_cmp(str1, str2) > 0) {
0708 #ifdef POLE_DEBUG
0709                 qDebug() << "DirEntry: [name, position] mismatch!";
0710 #endif
0711                 return false;
0712             }
0713         }
0714 #endif
0715     }
0716     return true;
0717 }
0718 
0719 DirTree::DirTree()
0720 {
0721     clear();
0722 }
0723 
0724 void DirTree::clear()
0725 {
0726     // leave only root entry
0727     entries.resize(1);
0728     entries[0].valid = true;
0729     entries[0].name = "Root Entry";
0730     entries[0].dir = true;
0731     entries[0].size = 0;
0732     entries[0].start = End;
0733     entries[0].prev = End;
0734     entries[0].next = End;
0735     entries[0].child = End;
0736 }
0737 
0738 unsigned DirTree::entryCount()
0739 {
0740     return entries.size();
0741 }
0742 
0743 DirEntry* DirTree::entry(unsigned index)
0744 {
0745     if (index >= entryCount()) return (DirEntry*) 0;
0746     return &entries[ index ];
0747 }
0748 
0749 int DirTree::indexOf(DirEntry* e)
0750 {
0751     for (unsigned i = 0; i < entryCount(); i++)
0752         if (entry(i) == e) return i;
0753 
0754     return -1;
0755 }
0756 
0757 int DirTree::parent(unsigned index)
0758 {
0759     // brute-force, basically we iterate for each entries, find its children
0760     // and check if one of the children is 'index'
0761     for (unsigned j = 0; j < entryCount(); j++) {
0762         std::vector<unsigned> chi = children(j);
0763         for (unsigned i = 0; i < chi.size(); i++)
0764             if (chi[i] == index)
0765                 return j;
0766     }
0767 
0768     return -1;
0769 }
0770 
0771 std::string DirTree::fullName(unsigned index)
0772 {
0773     // don't use root name ("Root Entry"), just give "/"
0774     if (index == 0) return "/";
0775 
0776     std::string result = entry(index)->name;
0777     result.insert(0,  "/");
0778     int p = parent(index);
0779     DirEntry * _entry = 0;
0780     while (p > 0) {
0781         _entry = entry(p);
0782         if (_entry->dir && _entry->valid) {
0783             result.insert(0,  _entry->name);
0784             result.insert(0,  "/");
0785         }
0786         --p;
0787         index = p;
0788         if (index <= 0) break;
0789     }
0790     return result;
0791 }
0792 
0793 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
0794 // if not found and create is false, return 0
0795 // if create is true, a new entry is returned
0796 DirEntry* DirTree::entry(const std::string& name, bool create)
0797 {
0798     if (!name.length()) return (DirEntry*)0;
0799 
0800     // quick check for "/" (that's root)
0801     if (name == "/") return entry(0);
0802 
0803     // split the names, e.g  "/ObjectPool/_1020961869" will become:
0804     // "ObjectPool" and "_1020961869"
0805     std::list<std::string> names;
0806     std::string::size_type start = 0, end = 0;
0807     if (name[0] == '/') start++;
0808     while (start < name.length()) {
0809         end = name.find_first_of('/', start);
0810         if (end == std::string::npos) end = name.length();
0811         names.push_back(name.substr(start, end - start));
0812         start = end + 1;
0813     }
0814 
0815     // start from root
0816     int index = 0 ;
0817 
0818     // trace one by one
0819     std::list<std::string>::iterator it;
0820 
0821     for (it = names.begin(); it != names.end(); ++it) {
0822         // find among the children of index
0823         std::vector<unsigned> chi = children(index);
0824         unsigned child = 0;
0825         for (unsigned i = 0; i < chi.size(); i++) {
0826             DirEntry* ce = entry(chi[i]);
0827             if (ce)
0828                 if (ce->valid && (ce->name.length() > 1))
0829                     if (ce->name == *it)
0830                         child = chi[i];
0831         }
0832 
0833         // traverse to the child
0834         if (child > 0) index = child;
0835         else {
0836             // not found among children
0837             if (!create) return (DirEntry*)0;
0838 
0839             // create a new entry
0840             unsigned parent = index;
0841             entries.push_back(DirEntry());
0842             index = entryCount() - 1;
0843             DirEntry* e = entry(index);
0844             e->valid = true;
0845             e->name = *it;
0846             e->dir = false;
0847             e->size = 0;
0848             e->start = 0;
0849             e->child = End;
0850             e->prev = End;
0851             e->next = entry(parent)->child;
0852             entry(parent)->child = index;
0853         }
0854     }
0855 
0856     return entry(index);
0857 }
0858 
0859 // helper function: recursively find siblings of index
0860 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result,
0861                            unsigned index)
0862 {
0863     DirEntry* e = dirtree->entry(index);
0864     if (!e) return;
0865 //     if (!e->valid) return;
0866 
0867     // prevent infinite loop
0868     for (unsigned i = 0; i < result.size(); i++) {
0869         if (result[i] == index) return;
0870     }
0871     // add myself
0872     result.push_back(index);
0873 
0874     // visit previous sibling, don't go infinitely
0875     unsigned prev = e->prev;
0876     if ((prev > 0) && (prev < dirtree->entryCount())) {
0877         for (unsigned i = 0; i < result.size(); i++)
0878             if (result[i] == prev) prev = 0;
0879         if (prev) dirtree_find_siblings(dirtree, result, prev);
0880     }
0881 
0882     // visit next sibling, don't go infinitely
0883     unsigned next = e->next;
0884     if ((next > 0) && (next < dirtree->entryCount())) {
0885         for (unsigned i = 0; i < result.size(); i++)
0886             if (result[i] == next) next = 0;
0887         if (next) dirtree_find_siblings(dirtree, result, next);
0888     }
0889 }
0890 
0891 std::vector<unsigned> DirTree::children(unsigned index)
0892 {
0893     std::vector<unsigned> result;
0894 
0895     DirEntry* e = entry(index);
0896     if (e) {
0897         if (e->valid && e->dir) {
0898             dirtree_find_siblings(this, result, e->child);
0899         }
0900     }
0901     return result;
0902 }
0903 
0904 void DirTree::load(unsigned char* buffer, unsigned size, const unsigned threshold,
0905                    const unsigned max_sbat, const unsigned max_bbat)
0906 {
0907 #ifdef POLE_DEBUG
0908     qDebug() << "-------------------------------";
0909     qDebug() << Q_FUNC_INFO;
0910 #endif
0911 
0912     entries.clear();
0913     unsigned n = (size / 128); //num. of directory entries
0914 
0915     for (unsigned i = 0; i < (size / 128); i++) {
0916         unsigned p = i * 128;
0917 
0918 
0919         // parse name of this entry, which stored as Unicode 16-bit
0920         int name_len = readU16(buffer + 0x40 + p);
0921         if (name_len > 64) {
0922             name_len = 64;
0923 #ifdef POLE_DEBUG
0924             qDebug() << "DirEntry: Invalid length of name!";
0925 #endif
0926         }
0927         std::string name;
0928         for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2) {
0929             name.append(1, buffer[j+p]);
0930         }
0931 
0932         // first char isn't printable ? remove it...
0933         if (buffer[p] < 32) {
0934             name.erase(0, 1);
0935         }
0936 
0937         // [MS-CFB] — v20110318
0938         // 0x00 = Unknown or unallocated, 0x01 = directory (Storage Object),
0939         // 0x02 = file (Stream Object), 0x05 = Root Storage Object
0940         unsigned type = buffer[ 0x42 + p];
0941 
0942         DirEntry e;
0943         e.valid = true;
0944         e.name = name;
0945         e.start = readU32(buffer + 0x74 + p);
0946         e.size = readU32(buffer + 0x78 + p);
0947         e.prev = readU32(buffer + 0x44 + p);
0948         e.next = readU32(buffer + 0x48 + p);
0949         e.child = readU32(buffer + 0x4C + p);
0950         e.dir = false;
0951 
0952         if ((type == 1) || (type == 5)) {
0953             e.dir = true;
0954         }
0955 
0956         // sanity checks
0957         if ((type != 0) && (type != 1) && (type != 2) && (type != 5)) {
0958             e.valid = false;
0959 #ifdef POLE_DEBUG
0960             qDebug() << "DirEntry: invalid type!";
0961 #endif
0962         }
0963         if ((type != 0) && (name_len < 1)) {
0964             e.valid = false;
0965 #ifdef POLE_DEBUG
0966             qDebug() << "DirEntry: invalid (type,name) pair!";
0967 #endif
0968         }
0969         // unknown object
0970         if (type == 0) {
0971             if ((e.child != End) || (e.prev != End) || (e.next != End)) {
0972                 e.valid = false;
0973 #ifdef POLE_DEBUG
0974                 qDebug() << "DirEntry: reference to prev/next/child != NOSTREAM";
0975 #endif
0976             }
0977             if ((e.start != 0) || (e.size != 0)) {
0978                 e.valid = false;
0979 #ifdef POLE_DEBUG
0980                 qDebug() << "DirEntry: start/size != ZERO";
0981 #endif
0982             }
0983         }
0984         // storage objects
0985         if (type == 1) {
0986             if (((e.prev != End) && (e.prev >= n)) ||
0987                 ((e.next != End) && (e.next >= n)) ||
0988                 ((e.child != End) && (e.child >= n))) {
0989                 e.valid = false;
0990 #ifdef POLE_DEBUG
0991                 qDebug() << "DirEntry: reference to prev/next/child > object num. (" << n << ")";
0992 #endif
0993             }
0994         }
0995         // stream object
0996         if (type == 2) {
0997             //check stream position
0998             if ((e.size >= threshold) && (e.start >= max_bbat)) {
0999                 e.valid = false;
1000 #ifdef POLE_DEBUG
1001                 qDebug() << "DirEntry: (e.start >= max_bbat)";
1002 #endif
1003             }
1004             else if (e.start >= max_sbat) {
1005                 e.valid = false;
1006 #ifdef POLE_DEBUG
1007                 qDebug() << "DirEntry: (e.start >= max_sbat)";
1008 #endif
1009             }
1010             //check stream object
1011             if (e.child != End) {
1012                 e.valid = false;
1013 #ifdef POLE_DEBUG
1014                 qDebug() << "DirEntry: (e.child != End)";
1015 #endif
1016             }
1017             //NOTE: Disabled because of too many false positives.
1018 //             if ((e->prev != End) || (e->next != End)) {
1019 //                 e.valid = false;
1020 //             }
1021         }
1022 
1023         // CLSID contains an object class GUID (globally unique identifier) if
1024         // this entry is a storage or root storage.  In a stream object, this
1025         // field MUST be set to all zeroes.
1026 #ifdef POLE_DEBUG
1027         if (!e.valid) {
1028             std::cout << "[INVALID] ";
1029         }
1030         printf("DirEntry: name=%s type=%i prev=%i next=%i child=%i start=%lu size=%lu clsid=%lu.%lu.%lu.%lu\n",
1031                name.c_str(), type, e.prev, e.next, e.child, e.start, e.size, readU32(buffer + 0x50 + p),
1032                readU32(buffer + 0x54 + p), readU32(buffer + 0x58 + p), readU32(buffer + 0x5C + p));
1033 #endif
1034         entries.push_back(e);
1035     }
1036 #ifdef POLE_DEBUG
1037     qDebug() << "-------------------------------";
1038 #endif
1039 }
1040 
1041 // return space required to save this dirtree
1042 unsigned DirTree::size()
1043 {
1044     return entryCount() * 128;
1045 }
1046 
1047 void DirTree::save(unsigned char* buffer)
1048 {
1049     memset(buffer, 0, size());
1050 
1051     // root is fixed as "Root Entry"
1052     DirEntry* root = entry(0);
1053     std::string name = "Root Entry";
1054     for (unsigned j = 0; j < name.length(); j++)
1055         buffer[ j*2 ] = name[j];
1056     writeU16(buffer + 0x40, name.length()*2 + 2);
1057     writeU32(buffer + 0x74, 0xffffffff);
1058     writeU32(buffer + 0x78, 0);
1059     writeU32(buffer + 0x44, 0xffffffff);
1060     writeU32(buffer + 0x48, 0xffffffff);
1061     writeU32(buffer + 0x4c, root->child);
1062     buffer[ 0x42 ] = 5;
1063     buffer[ 0x43 ] = 1;
1064 
1065     for (unsigned i = 1; i < entryCount(); i++) {
1066         DirEntry* e = entry(i);
1067         if (!e) continue;
1068         if (e->dir) {
1069             e->start = 0xffffffff;
1070             e->size = 0;
1071         }
1072 
1073         // max length for name is 32 chars
1074         std::string name = e->name;
1075         if (name.length() > 32)
1076             name.erase(32, name.length());
1077 
1078         // write name as Unicode 16-bit
1079         for (unsigned j = 0; j < name.length(); j++)
1080             buffer[ i*128 + j*2 ] = name[j];
1081 
1082         writeU16(buffer + i*128 + 0x40, name.length()*2 + 2);
1083         writeU32(buffer + i*128 + 0x74, e->start);
1084         writeU32(buffer + i*128 + 0x78, e->size);
1085         writeU32(buffer + i*128 + 0x44, e->prev);
1086         writeU32(buffer + i*128 + 0x48, e->next);
1087         writeU32(buffer + i*128 + 0x4c, e->child);
1088         buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
1089         buffer[ i*128 + 0x43 ] = 1; // always black
1090     }
1091 }
1092 
1093 void DirTree::debug()
1094 {
1095     for (unsigned i = 0; i < entryCount(); i++) {
1096         DirEntry* e = entry(i);
1097         if (!e) continue;
1098         std::cout << i << ": ";
1099         if (!e->valid) std::cout << "INVALID ";
1100         std::cout << e->name << " ";
1101         if (e->dir) std::cout << "(Dir) ";
1102         else std::cout << "(File) ";
1103         std::cout << e->size << " ";
1104         std::cout << "s:" << e->start << " ";
1105         std::cout << "(";
1106         if (e->child == End) std::cout << "-"; else std::cout << e->child;
1107         std::cout << " ";
1108         if (e->prev == End) std::cout << "-"; else std::cout << e->prev;
1109         std::cout << ":";
1110         if (e->next == End) std::cout << "-"; else std::cout << e->next;
1111         std::cout << ")";
1112         std::cout << std::endl;
1113     }
1114 }
1115 
1116 // =========== StorageIO ==========
1117 
1118 StorageIO::StorageIO(Storage* st, const char* fname)
1119 {
1120     storage = st;
1121     filename = fname;
1122     result = Storage::Ok;
1123     opened = false;
1124 
1125     header = new Header();
1126     dirtree = new DirTree();
1127     bbat = new AllocTable();
1128     sbat = new AllocTable();
1129 
1130     filesize = 0;
1131     bbat->blockSize = 1 << header->b_shift;
1132     sbat->blockSize = 1 << header->s_shift;
1133 }
1134 
1135 StorageIO::~StorageIO()
1136 {
1137     if (opened) close();
1138     delete sbat;
1139     delete bbat;
1140     delete dirtree;
1141     delete header;
1142 }
1143 
1144 bool StorageIO::open()
1145 {
1146     // already opened ? close first
1147     if (opened) close();
1148 
1149     load();
1150 
1151     return result == Storage::Ok;
1152 }
1153 
1154 void StorageIO::load()
1155 {
1156     unsigned char* buffer = 0;
1157     unsigned long buflen = 0;
1158     std::vector<unsigned long> blocks;
1159 
1160     // open the file, check for error
1161     result = Storage::OpenFailed;
1162     file.open(filename.c_str(), std::ios::binary | std::ios::in);
1163     if (!file.good()) return;
1164 
1165     // find size of input file
1166     file.seekg(0, std::ios::end);
1167     filesize = file.tellg();
1168 
1169     // load header
1170     buffer = new unsigned char[OLE_HEADER_SIZE];
1171     file.seekg(0);
1172     file.read((char*)buffer, OLE_HEADER_SIZE);
1173     if (!file.good()) {
1174         delete[] buffer;
1175         return;
1176     }
1177     header->load(buffer);
1178     delete[] buffer;
1179 
1180     // check OLE magic id
1181     result = Storage::NotOLE;
1182     for (unsigned i = 0; i < 8; i++)
1183         if (header->id[i] != pole_magic[i])
1184             return;
1185 
1186     // important block size
1187     bbat->blockSize = 1 << header->b_shift;
1188     sbat->blockSize = 1 << header->s_shift;
1189     const unsigned max_bbat_block = (filesize - OLE_HEADER_SIZE) / bbat->blockSize;
1190     const unsigned max_sbat_block = (filesize - OLE_HEADER_SIZE) / sbat->blockSize;
1191 
1192     // sanity checks
1193     result = Storage::BadOLE;
1194     if (!header->valid(max_sbat_block, max_bbat_block)) {
1195         return;
1196     }
1197 
1198     // find blocks allocated to store big bat
1199     // the first 109 blocks are in header, the rest in meta bat
1200     blocks.clear();
1201     blocks.resize(header->num_bat);
1202     for (unsigned i = 0; i < 109; i++) {
1203         if (i >= header->num_bat) break;
1204         else blocks[i] = header->bb_blocks[i];
1205     }
1206     if ((header->num_bat > 109) && (header->num_mbat > 0)) {
1207         unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
1208         unsigned k = 109;
1209         unsigned mblock = header->mbat_start;
1210         for (unsigned r = 0; r < header->num_mbat; r++) {
1211             unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize);
1212             if (rr != bbat->blockSize) {
1213                 delete[] buffer2;
1214                 return;
1215             }
1216             for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) {
1217                 if (k >= header->num_bat) break;
1218                 else  blocks[k++] = readU32(buffer2 + s);
1219             }
1220             mblock = readU32(buffer2 + bbat->blockSize - 4);
1221         }
1222         delete[] buffer2;
1223     }
1224 
1225     // load big bat
1226     buflen = blocks.size() * bbat->blockSize;
1227     if (buflen > 0) {
1228         buffer = new unsigned char[ buflen ];
1229         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
1230         if (r != buflen) {
1231             qCritical() << Q_FUNC_INFO << "SAT construction failed!";
1232             delete[] buffer;
1233             return;
1234         }
1235         bbat->load(buffer, buflen);
1236         delete[] buffer;
1237 
1238         if (!bbat->valid(filesize, header->b_shift, true)) {
1239             return;
1240         }
1241     }
1242     //TODO: make fail affect the result value
1243     bool fail = false;
1244 
1245     // load small bat
1246     blocks.clear();
1247     blocks = bbat->follow(header->sbat_start, fail);
1248     buflen = blocks.size() * bbat->blockSize;
1249     if (buflen > 0) {
1250         buffer = new unsigned char[ buflen ];
1251         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
1252         if (r != buflen) {
1253             qCritical() << Q_FUNC_INFO << "SSAT construction failed!";
1254             delete[] buffer;
1255             return;
1256         }
1257         sbat->load(buffer, buflen);
1258         delete[] buffer;
1259 
1260         if (!sbat->valid(filesize, header->s_shift, false)) {
1261             return;
1262         }
1263     }
1264 
1265     // load directory tree
1266     blocks.clear();
1267     blocks = bbat->follow(header->dirent_start, fail);
1268     buflen = blocks.size() * bbat->blockSize;
1269     buffer = new unsigned char[ buflen ];
1270     unsigned long r = loadBigBlocks(blocks, buffer, buflen);
1271     if (r != buflen) {
1272         qCritical() << Q_FUNC_INFO << "DirTree construction failed!";
1273         delete[] buffer;
1274         return;
1275     }
1276     dirtree->load(buffer, buflen, header->threshold, max_sbat_block, max_bbat_block);
1277     unsigned sb_start = readU32(buffer + 0x74);
1278     delete[] buffer;
1279     if (!dirtree->valid()) {
1280         qCritical() << Q_FUNC_INFO << "Invalid DirTree!";
1281         return;
1282     }
1283 
1284     // fetch block chain as data for small-files
1285     sb_blocks = bbat->follow(sb_start, fail);
1286 
1287     // for troubleshooting, just enable this block
1288 #ifdef POLE_DEBUG
1289     header->debug();
1290     sbat->debug();
1291     bbat->debug();
1292     dirtree->debug();
1293 #endif
1294 
1295     // so far so good
1296     result = Storage::Ok;
1297     opened = true;
1298 }
1299 
1300 void StorageIO::create()
1301 {
1302     // std::cout << "Creating " << filename << std::endl;
1303 
1304     file.open(filename.c_str(), std::ios::out | std::ios::binary);
1305     if (!file.good()) {
1306         qCritical() << Q_FUNC_INFO << "Can't create file:" << filename.c_str();
1307         result = Storage::OpenFailed;
1308         return;
1309     }
1310 
1311     // so far so good
1312     opened = true;
1313     result = Storage::Ok;
1314 }
1315 
1316 void StorageIO::flush()
1317 {
1318     /* Note on Microsoft implementation:
1319        - directory entries are stored in the last block(s)
1320        - BATs are as second to the last
1321        - Meta BATs are third to the last
1322     */
1323 }
1324 
1325 void StorageIO::close()
1326 {
1327     if (!opened) return;
1328 
1329     file.close();
1330     opened = false;
1331 
1332     std::list<Stream*>::iterator it;
1333     for (it = streams.begin(); it != streams.end(); ++it)
1334         delete *it;
1335 }
1336 
1337 StreamIO* StorageIO::streamIO(const std::string& name)
1338 {
1339 #ifdef POLE_DEBUG
1340     qDebug() << Q_FUNC_INFO << "preparing stream:" << name.c_str();
1341 #endif
1342     // sanity check
1343     if (!name.length()) return (StreamIO*)0;
1344 
1345     // search in the entries
1346     DirEntry* entry = dirtree->entry(name);
1347     //if( entry) std::cout << "FOUND\n";
1348     if (!entry) return (StreamIO*)0;
1349     //if( !entry->dir ) std::cout << "  NOT DIR\n";
1350     if (entry->dir) return (StreamIO*)0;
1351 
1352     StreamIO* result = new StreamIO(this, entry);
1353     result->fullName = name;
1354 
1355     return result;
1356 }
1357 
1358 unsigned long StorageIO::loadBigBlocks(const std::vector<unsigned long>& blocks,
1359                                        unsigned char* data, unsigned long maxlen)
1360 {
1361     return loadBigBlocks(&blocks[0], blocks.size(), data, maxlen);
1362 }
1363 
1364 unsigned long StorageIO::loadBigBlocks(const unsigned long *blocks, unsigned blockCount,
1365                                        unsigned char *data, unsigned long maxlen)
1366 {
1367     // sentinel
1368     if (!data) return 0;
1369     if (!file.good()) return 0;
1370     if (!blocks) return 0;
1371     if (blockCount < 1) return 0;
1372     if (maxlen == 0) return 0;
1373 
1374     // read block one by one, seems fast enough
1375     unsigned long bytes = 0;
1376     for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) {
1377         unsigned long block = blocks[i];
1378         unsigned long pos =  bbat->blockSize * (block + 1);
1379         unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes;
1380         if (pos + p > filesize) p = filesize - pos;
1381         file.seekg(pos);
1382         file.read((char*)data + bytes, p);
1383         if (!file.good()) return 0;
1384         bytes += p;
1385     }
1386 
1387     return bytes;
1388 }
1389 
1390 unsigned long StorageIO::loadBigBlock(unsigned long block,
1391                                       unsigned char* data, unsigned long maxlen)
1392 {
1393     // sentinel
1394     if (!data) return 0;
1395     if (!file.good()) return 0;
1396 
1397     return loadBigBlocks(&block, 1, data, maxlen);
1398 }
1399 
1400 // return number of bytes which has been read
1401 unsigned long StorageIO::loadSmallBlocks(const std::vector<unsigned long>& blocks,
1402         unsigned char* data, unsigned long maxlen)
1403 {
1404     return loadSmallBlocks(&blocks[0], blocks.size(), data, maxlen);
1405 }
1406 
1407 unsigned long StorageIO::loadSmallBlocks(const unsigned long *blocks, unsigned blockCount,
1408                                          unsigned char *data, unsigned long maxlen)
1409 {
1410     // sentinel
1411     if (!data) return 0;
1412     if (!file.good()) return 0;
1413     if (!blocks) return 0;
1414     if (blockCount < 1) return 0;
1415     if (maxlen == 0) return 0;
1416 
1417     // our own local buffer
1418     unsigned char* buf = new unsigned char[ bbat->blockSize ];
1419 
1420     // read small block one by one
1421     unsigned long bytes = 0;
1422     for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) {
1423         unsigned long block = blocks[i];
1424 
1425         // find where the small-block exactly is
1426         unsigned long pos = block * sbat->blockSize;
1427         unsigned long bbindex = pos / bbat->blockSize;
1428         if (bbindex >= sb_blocks.size()) break;
1429 
1430         unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize);
1431         if (r != bbat->blockSize) {
1432             delete[] buf;
1433             return 0;
1434         }
1435 
1436         // copy the data
1437         unsigned offset = pos % bbat->blockSize;
1438         unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes :  bbat->blockSize - offset;
1439         p = (sbat->blockSize < p) ? sbat->blockSize : p;
1440         memcpy(data + bytes, buf + offset, p);
1441         bytes += p;
1442     }
1443 
1444     delete[] buf;
1445 
1446     return bytes;
1447 }
1448 
1449 unsigned long StorageIO::loadSmallBlock(unsigned long block,
1450                                         unsigned char* data, unsigned long maxlen)
1451 {
1452     // sentinel
1453     if (!data) return 0;
1454     if (!file.good()) return 0;
1455 
1456     return loadSmallBlocks(&block, 1, data, maxlen);
1457 }
1458 
1459 // =========== StreamIO ==========
1460 
1461 StreamIO::StreamIO(StorageIO* s, DirEntry* e)
1462 {
1463     io = s;
1464     entry = e;
1465     eof = false;
1466     fail = false;
1467 
1468     m_pos = 0;
1469 
1470     if (entry->size >= io->header->threshold) {
1471         blocks = io->bbat->follow(entry->start, fail);
1472     } else {
1473         blocks = io->sbat->follow(entry->start, fail);
1474     }
1475 
1476     // prepare cache
1477     cache_pos = 0;
1478     base_cache_size = cache_size = 4096; // optimal ?
1479     cache_data = new unsigned char[base_cache_size];
1480     updateCache();
1481 }
1482 
1483 // FIXME tell parent we're gone
1484 StreamIO::~StreamIO()
1485 {
1486     delete[] cache_data;
1487 }
1488 
1489 void StreamIO::seek(unsigned long pos)
1490 {
1491     m_pos = pos;
1492 }
1493 
1494 unsigned long StreamIO::tell()
1495 {
1496     return m_pos;
1497 }
1498 
1499 int StreamIO::getch()
1500 {
1501     // past end-of-file ?
1502     if (m_pos > entry->size) return -1;
1503 
1504     // need to update cache ?
1505     if (!cache_size || (m_pos < cache_pos) ||
1506             (m_pos >= cache_pos + cache_size))
1507         updateCache();
1508 
1509     // something bad if we don't get good cache
1510     if (!cache_size) return -1;
1511 
1512     int data = cache_data[m_pos - cache_pos];
1513     m_pos++;
1514 
1515     return data;
1516 }
1517 
1518 unsigned long StreamIO::read(unsigned char *data, unsigned long maxlen)
1519 {
1520     // sanity checks
1521     if (!data) return 0;
1522     if (maxlen == 0) return 0;
1523 
1524     unsigned long totalbytes = 0;
1525 
1526     while (totalbytes < maxlen) {
1527         // need to update cache ?
1528         if (!cache_size || (m_pos < cache_pos) ||
1529                 (m_pos >= cache_pos + cache_size))
1530             updateCache();
1531         if (!cache_size) break;
1532 
1533         const unsigned long remaining = cache_size - (m_pos - cache_pos);
1534         const unsigned long count = std::min(remaining, maxlen - totalbytes);
1535         memcpy(data + totalbytes, &cache_data[m_pos - cache_pos], count);
1536         totalbytes += count;
1537         m_pos += count;
1538     }
1539     return totalbytes;
1540 }
1541 
1542 unsigned long StreamIO::readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen)
1543 {
1544     // sanity checks
1545     if (!data) return 0;
1546     if (maxlen == 0) return 0;
1547 
1548     unsigned long totalbytes = 0;
1549 
1550     if (entry->size < io->header->threshold) {
1551         // small file
1552         unsigned long index = pos / io->sbat->blockSize;
1553 
1554         if (index >= blocks.size()) return 0;
1555 
1556         unsigned char buf[4096];
1557         unsigned long offset = pos % io->sbat->blockSize;
1558         while (totalbytes < maxlen) {
1559             if (index >= blocks.size()) break;
1560             io->loadSmallBlock(blocks[index], &buf[0], io->bbat->blockSize);
1561             unsigned long count = io->sbat->blockSize - offset;
1562             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1563             memcpy(data + totalbytes, &buf[0] + offset, count);
1564             totalbytes += count;
1565             offset = 0;
1566             index++;
1567         }
1568 
1569     } else {
1570         // big file
1571         unsigned long index = pos / io->bbat->blockSize;
1572 
1573         if (index >= blocks.size()) return 0;
1574 
1575         unsigned char buf[4096];
1576         unsigned long offset = pos % io->bbat->blockSize;
1577         while (totalbytes < maxlen) {
1578             if (index >= blocks.size()) break;
1579             unsigned long r = io->loadBigBlock(blocks[index], &buf[0], io->bbat->blockSize);
1580             if (r != io->bbat->blockSize) {
1581                 return 0;
1582             }
1583             unsigned long count = io->bbat->blockSize - offset;
1584             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1585             memcpy(data + totalbytes, &buf[0] + offset, count);
1586             totalbytes += count;
1587             index++;
1588             offset = 0;
1589         }
1590 
1591     }
1592 
1593     return totalbytes;
1594 }
1595 
1596 unsigned long StreamIO::readInternal(unsigned char* data, unsigned long maxlen)
1597 {
1598     unsigned long bytes = readInternal(tell(), data, maxlen);
1599     m_pos += bytes;
1600     return bytes;
1601 }
1602 
1603 void StreamIO::updateCache()
1604 {
1605     // sanity check
1606     if (!cache_data) return;
1607 
1608     cache_pos = m_pos - (m_pos % base_cache_size);
1609     unsigned long bytes = base_cache_size;
1610     if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos;
1611     if (cache_pos + bytes <= m_pos) {
1612         cache_size = 0;
1613     } else {
1614         cache_size = readInternal(cache_pos, cache_data, bytes);
1615     }
1616 }
1617 
1618 
1619 // =========== Storage ==========
1620 
1621 Storage::Storage(const char* filename)
1622 {
1623     io = new StorageIO(this, filename);
1624 }
1625 
1626 Storage::~Storage()
1627 {
1628     delete io;
1629 }
1630 
1631 int Storage::result()
1632 {
1633     return io->result;
1634 }
1635 
1636 bool Storage::open()
1637 {
1638     return io->open();
1639 }
1640 
1641 void Storage::close()
1642 {
1643     io->close();
1644 }
1645 
1646 std::list<std::string> Storage::entries(const std::string& path)
1647 {
1648     std::list<std::string> result;
1649     DirTree* dt = io->dirtree;
1650     DirEntry* e = dt->entry(path, false);
1651     if (e) {
1652         if (e->dir) {
1653             unsigned parent = dt->indexOf(e);
1654             std::vector<unsigned> children = dt->children(parent);
1655             for (unsigned i = 0; i < children.size(); i++)
1656                 result.push_back(dt->entry(children[i])->name);
1657         }
1658     }
1659     return result;
1660 }
1661 
1662 bool Storage::isDirectory(const std::string& name)
1663 {
1664     DirEntry* e = io->dirtree->entry(name, false);
1665     return e ? e->dir : false;
1666 }
1667 
1668 // =========== Stream ==========
1669 
1670 Stream::Stream(Storage* storage, const std::string& name)
1671 {
1672     io = storage->io->streamIO(name);
1673 }
1674 
1675 // FIXME tell parent we're gone
1676 Stream::~Stream()
1677 {
1678     delete io;
1679 }
1680 
1681 std::string Stream::fullName()
1682 {
1683     return io ? io->fullName : std::string();
1684 }
1685 
1686 unsigned long Stream::tell()
1687 {
1688     return io ? io->tell() : 0;
1689 }
1690 
1691 void Stream::seek(unsigned long newpos)
1692 {
1693     if (io) io->seek(newpos);
1694 }
1695 
1696 unsigned long Stream::size()
1697 {
1698     return io ? io->entry->size : 0;
1699 }
1700 
1701 int Stream::getch()
1702 {
1703     return io ? io->getch() : 0;
1704 }
1705 
1706 unsigned long Stream::read(unsigned char* data, unsigned long maxlen)
1707 {
1708     return io ? io->read(data, maxlen) : 0;
1709 }
1710 
1711 bool Stream::eof()
1712 {
1713     return io ? io->eof : false;
1714 }
1715 
1716 bool Stream::fail()
1717 {
1718     return io ? io->fail : true;
1719 }