File indexing completed on 2025-05-11 11:33:07

0001 /* POLE - Portable C++ library to access OLE Storage
0002    SPDX-FileCopyrightText: 2002-2005 Ariya Hidayat <ariya@kde.org>
0003    SPDX-FileCopyrightText: 2011-2012 Matus Uzak <matus.uzak@ixonos.com>
0004 
0005    Redistribution and use in source and binary forms, with or without
0006    modification, are permitted provided that the following conditions
0007    are met:
0008    * Redistributions of source code must retain the above copyright notice,
0009      this list of conditions and the following disclaimer.
0010    * Redistributions in binary form must reproduce the above copyright notice,
0011      this list of conditions and the following disclaimer in the documentation
0012      and/or other materials provided with the distribution.
0013    * Neither the name of the authors nor the names of its contributors may be
0014      used to endorse or promote products derived from this software without
0015      specific prior written permission.
0016 
0017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0018    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0019    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0020    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0021    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0022    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0023    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0024    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0025    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0026    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
0027    THE POSSIBILITY OF SUCH DAMAGE.
0028 */
0029 
0030 #include "pole.h"
0031 
0032 #include <fstream>
0033 #include <iostream>
0034 #include <list>
0035 #include <string>
0036 #include <vector>
0037 #include <string.h>
0038 #include <ios>       // for std::hex
0039 
0040 #include <QList>
0041 #include <QString>
0042 #include <QDebug>
0043 
0044 //Enable to activate debugging output.
0045 //#define POLE_DEBUG
0046 
0047 //Disabled because of too many false positives, both streams and unknown
0048 //objects MAY be invalid and still have a size set.
0049 //#define POLE_FAIL_ON_NEMPTY_NVALID_OBJS
0050 
0051 //Validate stream object against [MS-CFB] — v20110318.  Disabled because of too
0052 //many false positives.
0053 //#define POLE_FAIL_ON_NVALID_STREAM_OBJS
0054 
0055 //Validate storage object against [MS-CFB] — v20110318.  Disabled because of
0056 //too many false positives on Word8 documents.
0057 //#define POLE_CHECK_STORAGE_OBJS
0058 
0059 //Validate sibling names against positions in the black red tree.  Disabled
0060 //because of too many false positives on Word8 files with embedded documents.
0061 //#define POLE_CHECK_SIBLINGS
0062 
0063 #define OLE_HEADER_SIZE 0x200
0064 
0065 namespace POLE
0066 {
0067 
0068 class Header
0069 {
0070 public:
0071     unsigned char id[8];       // signature, or magic identifier
0072     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
0073     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
0074     unsigned num_bat;          // blocks allocated for big bat
0075     unsigned dirent_start;     // starting block for directory info
0076     unsigned threshold;        // switch from small to big file (usually 4K)
0077     unsigned sbat_start;       // starting block index to store small bat
0078     unsigned num_sbat;         // blocks allocated for small bat
0079     unsigned mbat_start;       // starting block to store meta bat
0080     unsigned num_mbat;         // blocks allocated for meta bat
0081     unsigned long bb_blocks[109];
0082 
0083     Header();
0084     bool valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const;
0085     void load(const unsigned char* buffer);
0086     void save(unsigned char* buffer);
0087     void debug();
0088 };
0089 
0090 class AllocTable
0091 {
0092 public:
0093     static const unsigned Eof;
0094     static const unsigned Avail;
0095     static const unsigned Bat;
0096     static const unsigned MetaBat;
0097     unsigned blockSize;
0098     AllocTable();
0099     bool valid(const unsigned long filesize, const unsigned shift, const bool isFat = true) const;
0100     void clear();
0101     unsigned long count();
0102     void resize(unsigned long newsize);
0103     void preserve(unsigned long n);
0104     void set(unsigned long index, unsigned long val);
0105     unsigned unused();
0106     void setChain(std::vector<unsigned long>);
0107     std::vector<unsigned long> follow(unsigned long start, bool& fail);
0108     unsigned long operator[](unsigned long index);
0109     void load(const unsigned char* buffer, unsigned len);
0110     void save(unsigned char* buffer);
0111     unsigned size();
0112     void debug();
0113 private:
0114     std::vector<unsigned long> data;
0115     AllocTable(const AllocTable&);
0116     AllocTable& operator=(const AllocTable&);
0117 };
0118 
0119 class DirEntry
0120 {
0121 public:
0122     bool valid;            // false if invalid (should be skipped)
0123     std::string name;      // the name, not in unicode anymore
0124     bool dir;              // true if directory
0125     unsigned long size;    // size (not valid if directory)
0126     unsigned long start;   // starting block
0127     unsigned prev;         // previous sibling
0128     unsigned next;         // next sibling
0129     unsigned child;        // first child
0130 };
0131 
0132 class DirTree
0133 {
0134 public:
0135     static const unsigned End;
0136     DirTree();
0137     bool valid(void) const;
0138     void clear();
0139     unsigned entryCount();
0140     DirEntry* entry(unsigned index);
0141     DirEntry* entry(const std::string& name, bool create = false);
0142     int indexOf(DirEntry* e);
0143     int parent(unsigned index);
0144     std::string fullName(unsigned index);
0145     std::vector<unsigned> children(unsigned index);
0146     void load(unsigned char* buffer, unsigned len, const unsigned threshold, const unsigned max_sbat, const unsigned max_bbat);
0147     void save(unsigned char* buffer);
0148     unsigned size();
0149     void debug();
0150 private:
0151     std::vector<DirEntry> entries;
0152     DirTree(const DirTree&);
0153     DirTree& operator=(const DirTree&);
0154 };
0155 
0156 class StorageIO
0157 {
0158 public:
0159     Storage* storage;         // owner
0160     std::string filename;     // filename
0161     std::fstream file;        // associated with above name
0162     int result;               // result of operation
0163     bool opened;              // true if file is opened
0164     unsigned long filesize;   // size of the file
0165 
0166     Header* header;           // storage header
0167     DirTree* dirtree;         // directory tree
0168     AllocTable* bbat;         // allocation table for big blocks
0169     AllocTable* sbat;         // allocation table for small blocks
0170 
0171     std::vector<unsigned long> sb_blocks; // blocks for "small" files
0172 
0173     std::list<Stream*> streams;
0174 
0175     StorageIO(Storage* storage, const char* filename);
0176     ~StorageIO();
0177 
0178     bool open();
0179     void close();
0180     void flush();
0181     void load();
0182     void create();
0183 
0184     unsigned long loadBigBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen);
0185     unsigned long loadBigBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen);
0186 
0187     unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0188 
0189     unsigned long loadSmallBlocks(const std::vector<unsigned long>& blocks, unsigned char* buffer, unsigned long maxlen);
0190     unsigned long loadSmallBlocks(const unsigned long* blocks, unsigned blockCount, unsigned char* buffer, unsigned long maxlen);
0191 
0192     unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0193 
0194     StreamIO* streamIO(const std::string& name);
0195 
0196 private:
0197     // no copy or assign
0198     StorageIO(const StorageIO&);
0199     StorageIO& operator=(const StorageIO&);
0200 
0201 };
0202 
0203 class StreamIO
0204 {
0205 public:
0206     StorageIO* io;
0207     DirEntry* entry;
0208     std::string fullName;
0209     bool eof;
0210     bool fail;
0211 
0212     StreamIO(StorageIO* io, DirEntry* entry);
0213     ~StreamIO();
0214     unsigned long size();
0215     void seek(unsigned long pos);
0216     unsigned long tell();
0217     int getch();
0218     unsigned long read(unsigned char* data, unsigned long maxlen);
0219 
0220 private:
0221     unsigned long readInternal(unsigned char* data, unsigned long maxlen);
0222     unsigned long readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen);
0223 
0224     std::vector<unsigned long> blocks;
0225 
0226     // no copy or assign
0227     StreamIO(const StreamIO&);
0228     StreamIO& operator=(const StreamIO&);
0229 
0230     // pointer for read
0231     unsigned long m_pos;
0232 
0233     // simple cache system to speed-up getch()
0234     unsigned char* cache_data;
0235     unsigned long base_cache_size;
0236     unsigned long cache_size;
0237     unsigned long cache_pos;
0238     void updateCache();
0239 };
0240 
0241 } // namespace POLE
0242 
0243 using namespace POLE;
0244 
0245 static inline unsigned long readU16(const unsigned char* ptr)
0246 {
0247     return ptr[0] + (ptr[1] << 8);
0248 }
0249 
0250 static inline unsigned long readU32(const unsigned char* ptr)
0251 {
0252     return unsigned(ptr[0]) + (unsigned(ptr[1]) << 8 ) + (unsigned(ptr[2]) << 16) + (unsigned(ptr[3]) << 24);
0253 }
0254 
0255 static inline void writeU16(unsigned char* ptr, unsigned long data)
0256 {
0257     ptr[0] = (unsigned char)(data & 0xff);
0258     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0259 }
0260 
0261 static inline void writeU32(unsigned char* ptr, unsigned long data)
0262 {
0263     ptr[0] = (unsigned char)(data & 0xff);
0264     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0265     ptr[2] = (unsigned char)((data >> 16) & 0xff);
0266     ptr[3] = (unsigned char)((data >> 24) & 0xff);
0267 }
0268 
0269 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
0270 
0271 // =========== Header ==========
0272 
0273 Header::Header()
0274 {
0275     b_shift = 9;
0276     s_shift = 6;
0277     num_bat = 0;
0278     dirent_start = 0;
0279     threshold = 4096;
0280     sbat_start = 0;
0281     num_sbat = 0;
0282     mbat_start = 0;
0283     num_mbat = 0;
0284 
0285     for (unsigned i = 0; i < 8; i++)
0286         id[i] = pole_magic[i];
0287     for (unsigned i = 0; i < 109; i++)
0288         bb_blocks[i] = AllocTable::Avail;
0289 }
0290 
0291 bool Header::valid(const unsigned max_sbat_block, const unsigned max_bbat_block) const
0292 {
0293     if (threshold != 4096) return false;
0294     if (num_bat == 0) return false;
0295     if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
0296     if ((num_bat < 109) && (num_mbat != 0)) return false;
0297     if (s_shift > b_shift) return false;
0298     if (b_shift <= 6) return false;
0299     if (b_shift > 12) return false;
0300 
0301     // additional heuristics to check the header
0302     if (num_sbat > max_sbat_block) return false;
0303     if (num_bat > max_bbat_block) return false;
0304 
0305 #ifdef POLE_DEBUG
0306     const unsigned ENDOFCHAIN = 0xfffffffe;
0307     const unsigned FREESECT = 0xffffffff;
0308 
0309     if (num_sbat == 0 &&
0310         sbat_start != ENDOFCHAIN &&
0311         sbat_start != FREESECT)
0312     {
0313         qDebug() << Q_FUNC_INFO <<
0314             "There aren't any minifat sectors, but there are links to some!";
0315     }
0316 #endif
0317 
0318     return true;
0319 }
0320 
0321 void Header::load(const unsigned char* buffer)
0322 {
0323     b_shift      = readU16(buffer + 0x1e); // sector shift
0324     s_shift      = readU16(buffer + 0x20); // mini sector shift
0325     num_bat      = readU32(buffer + 0x2c); // number of fat sectors
0326     dirent_start = readU32(buffer + 0x30); // first directory sector location
0327     threshold    = readU32(buffer + 0x38); // transaction signature number
0328     sbat_start   = readU32(buffer + 0x3c); // first mini fat sector location
0329     num_sbat     = readU32(buffer + 0x40); // mini stream cutoff size
0330     mbat_start   = readU32(buffer + 0x44); // first mini difat sector location
0331     num_mbat     = readU32(buffer + 0x48); // number of difat sectors
0332 
0333     for (unsigned i = 0; i < 8; i++)
0334         id[i] = buffer[i];
0335     for (unsigned i = 0; i < 109; i++)
0336         bb_blocks[i] = readU32(buffer + 0x4C + i * 4);
0337 }
0338 
0339 void Header::save(unsigned char* buffer)
0340 {
0341     memset(buffer, 0, 0x4c);
0342     memcpy(buffer, pole_magic, 8);          // ole signature
0343     writeU32(buffer + 8, 0);                // unknown
0344     writeU32(buffer + 12, 0);               // unknown
0345     writeU32(buffer + 16, 0);               // unknown
0346     writeU16(buffer + 24, 0x003e);          // revision ?
0347     writeU16(buffer + 26, 3);               // version ?
0348     writeU16(buffer + 28, 0xfffe);          // unknown
0349     writeU16(buffer + 0x1e, b_shift);
0350     writeU16(buffer + 0x20, s_shift);
0351     writeU32(buffer + 0x2c, num_bat);
0352     writeU32(buffer + 0x30, dirent_start);
0353     writeU32(buffer + 0x38, threshold);
0354     writeU32(buffer + 0x3c, sbat_start);
0355     writeU32(buffer + 0x40, num_sbat);
0356     writeU32(buffer + 0x44, mbat_start);
0357     writeU32(buffer + 0x48, num_mbat);
0358 
0359     for (unsigned i = 0; i < 109; i++)
0360         writeU32(buffer + 0x4C + i*4, bb_blocks[i]);
0361 }
0362 
0363 void Header::debug()
0364 {
0365     qDebug() << Q_FUNC_INFO;
0366     qDebug() << "b_shift:" << b_shift;
0367     qDebug() << "s_shift:" << s_shift;
0368     qDebug() << "num_bat:" << num_bat;
0369     qDebug() << "dirent_start: 0x" << hex << dirent_start;
0370     qDebug() << "threshold:" << dec << threshold;
0371     qDebug() << "sbat_start: 0x" << hex << sbat_start;
0372     qDebug() << "num_sbat:" << dec << num_sbat;
0373     qDebug() << "mbat_start: 0x" << hex << mbat_start;
0374     qDebug() << "num_mbat:" << dec << num_mbat;
0375 
0376     unsigned s = (num_bat <= 109) ? num_bat : 109;
0377     std::cout << "bat blocks:";
0378     for (unsigned i = 0; i < s; i++) {
0379         std::cout << "0x" << std::hex << bb_blocks[i] << " ";
0380     }
0381     std::cout << std::dec << std::endl;
0382 }
0383 
0384 // =========== AllocTable ==========
0385 
0386 const unsigned AllocTable::Avail = 0xffffffff;
0387 const unsigned AllocTable::Eof = 0xfffffffe;
0388 const unsigned AllocTable::Bat = 0xfffffffd;
0389 const unsigned AllocTable::MetaBat = 0xfffffffc;
0390 
0391 AllocTable::AllocTable()
0392 {
0393     blockSize = 4096;
0394     // initial size
0395     resize(128);
0396 }
0397 
0398 bool AllocTable::valid(const unsigned long filesize, const unsigned shift, const bool isFat) const
0399 {
0400     unsigned long offset = 0;
0401     for (unsigned long i = 0; i < data.size(); i++) {
0402         switch (data[i]) {
0403         case AllocTable::Avail:
0404         case AllocTable::Eof:
0405         case AllocTable::Bat:
0406         case AllocTable::MetaBat:
0407             break;
0408         default:
0409             offset = data[i] << shift;
0410             if (isFat) {
0411                 offset += OLE_HEADER_SIZE;
0412             }
0413             if (offset > filesize) {
0414 #ifdef POLE_DEBUG
0415                 qDebug() << "Invalid location of sector in the stream!" <<
0416                     "offset:" << offset << " | filesize:" << filesize;
0417 #endif
0418                 return false;
0419             }
0420         }
0421     }
0422     return true;
0423 }
0424 
0425 unsigned long AllocTable::count()
0426 {
0427     return data.size();
0428 }
0429 
0430 void AllocTable::resize(unsigned long newsize)
0431 {
0432     unsigned oldsize = data.size();
0433     data.resize(newsize);
0434     if (newsize > oldsize)
0435         for (unsigned i = oldsize; i < newsize; i++)
0436             data[i] = Avail;
0437 }
0438 
0439 // make sure there're still free blocks
0440 void AllocTable::preserve(unsigned long n)
0441 {
0442     std::vector<unsigned long> pre;
0443     for (unsigned i = 0; i < n; i++)
0444         pre.push_back(unused());
0445 }
0446 
0447 unsigned long AllocTable::operator[](unsigned long index)
0448 {
0449     unsigned long result;
0450     result = data[index];
0451     return result;
0452 }
0453 
0454 void AllocTable::set(unsigned long index, unsigned long value)
0455 {
0456     if (index >= count()) resize(index + 1);
0457     data[ index ] = value;
0458 }
0459 
0460 void AllocTable::setChain(std::vector<unsigned long> chain)
0461 {
0462     if (chain.size()) {
0463         for (unsigned i = 0; i < chain.size() - 1; i++)
0464             set(chain[i], chain[i+1]);
0465         set(chain[ chain.size()-1 ], AllocTable::Eof);
0466     }
0467 }
0468 
0469 // follow
0470 std::vector<unsigned long> AllocTable::follow(unsigned long start, bool& fail)
0471 {
0472     std::vector<unsigned long> chain;
0473 
0474     if (start >= count()) {
0475 #ifdef POLE_DEBUG
0476         qDebug() << Q_FUNC_INFO << "start >= count()!";
0477 #endif
0478         fail = true;
0479         return chain;
0480     }
0481 
0482     unsigned long p = start;
0483     while (p < count()) {
0484         if (p == (unsigned long)Eof) {
0485 #ifdef POLE_DEBUG
0486             qDebug() << Q_FUNC_INFO << "Eof detected!";
0487 #endif
0488             break;
0489         }
0490         if (p == (unsigned long)Bat) {
0491 #ifdef POLE_DEBUG
0492             qDebug() << Q_FUNC_INFO << "Bat detected!";
0493 #endif
0494             break;
0495         }
0496         if (p == (unsigned long)MetaBat) {
0497 #ifdef POLE_DEBUG
0498             qDebug() << Q_FUNC_INFO << "MetaBat detected!";
0499 #endif
0500             break;
0501         }
0502         if (p >= count()) {
0503 #ifdef POLE_DEBUG
0504             qDebug() << Q_FUNC_INFO << "Invalid index detected!";
0505 #endif
0506             fail = true;
0507             break;
0508         }
0509         chain.push_back(p);
0510 
0511         // break if the chain is longer than the total sector count
0512         if (chain.size() > count()) {
0513 #ifdef POLE_DEBUG
0514             qDebug() << Q_FUNC_INFO << "Probably a loop detected!";
0515 #endif
0516             fail = true;
0517             break;
0518         }
0519         p = data[ p ];
0520     }
0521     if (p != (unsigned long)AllocTable::Eof) {
0522 #ifdef POLE_DEBUG
0523         qDebug() << Q_FUNC_INFO << "Last chain entry MUST be 0x" << hex << AllocTable::Eof <<
0524             ", detected: 0x" << hex << p;
0525 #endif
0526         fail = true;
0527     }
0528 
0529     return chain;
0530 }
0531 
0532 unsigned AllocTable::unused()
0533 {
0534     // find first available block
0535     for (unsigned i = 0; i < data.size(); i++)
0536         if (data[i] == Avail)
0537             return i;
0538 
0539     // completely full, so enlarge the table
0540     unsigned block = data.size();
0541     resize(data.size() + 10);
0542     return block;
0543 }
0544 
0545 void AllocTable::load(const unsigned char* buffer, unsigned len)
0546 {
0547     resize(len / 4);
0548     for (unsigned i = 0; i < count(); i++)
0549         set(i, readU32(buffer + i*4));
0550 }
0551 
0552 // return space required to save this dirtree
0553 unsigned AllocTable::size()
0554 {
0555     return count() * 4;
0556 }
0557 
0558 void AllocTable::save(unsigned char* buffer)
0559 {
0560     for (unsigned i = 0; i < count(); i++)
0561         writeU32(buffer + i*4, data[i]);
0562 }
0563 
0564 void AllocTable::debug()
0565 {
0566     qDebug() << "block size " << data.size();
0567     for (unsigned i = 0; i < data.size(); i++) {
0568         if (data[i] == Avail) continue;
0569         std::cout << i << ": ";
0570         if (data[i] == Eof) std::cout << "[eof]";
0571         else if (data[i] == Bat) std::cout << "[bat]";
0572         else if (data[i] == MetaBat) std::cout << "[metabat]";
0573         else std::cout << data[i];
0574         std::cout << std::endl;
0575     }
0576 }
0577 
0578 // =========== DirTree ==========
0579 
0580 const unsigned DirTree::End = 0xffffffff;
0581 
0582 /*
0583  * Compare DirEntry names according to the spec.
0584  */
0585 int ename_cmp(QString& str1, QString& str2)
0586 {
0587     str1 = str1.toUpper();
0588     str2 = str2.toUpper();
0589     if (str1.size() < str2.size()) return -1;
0590     else if (str1.size() > str2.size()) return 1;
0591     else return str1.compare(str2);
0592 }
0593 
0594 /*
0595  * Check if DirEntry elements at this level have unique names.
0596  */
0597 bool valid_enames(DirTree* dirtree, unsigned index)
0598 {
0599     std::vector<unsigned> chi = dirtree->children(index);
0600     QList<std::string> names;
0601     DirEntry* e = 0;
0602 
0603 #ifdef POLE_DEBUG
0604     if (chi.size()) {
0605         qDebug() << "---------------------";
0606         qDebug() << Q_FUNC_INFO;
0607         qDebug() << "[KIDS]:";
0608     }
0609     for (unsigned i = 0; i < chi.size(); i++) {
0610         e = dirtree->entry(chi[i]);
0611         if (!e->valid) std::cout << "[INVALID] ";
0612         printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n",
0613                e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir);
0614     }
0615     if (chi.size()) {
0616         qDebug() << "---------------------";
0617     }
0618 #endif
0619 
0620     for (unsigned i = 0; i < chi.size(); i++) {
0621         e = dirtree->entry(chi[i]);
0622         if (e->valid) {
0623             if (names.contains(e->name)) {
0624                 return false;
0625             } else {
0626                 names.append(e->name);
0627             }
0628         }
0629     }
0630     return true;
0631 }
0632 
0633 bool DirTree::valid() const
0634 {
0635     const DirEntry* e;
0636 #ifdef POLE_CHECK_SIBLINGS
0637     QString str1, str2;
0638 #endif
0639 
0640 #ifdef POLE_DEBUG
0641         qDebug() << Q_FUNC_INFO;
0642 #endif
0643     for (unsigned i = 0; i < entries.size(); i++) {
0644         e = &entries[i];
0645 
0646 #ifdef POLE_DEBUG
0647         if (!e->valid) std::cout << "[INVALID] ";
0648         printf("DirEntry: name=%s prev=%i next=%i child=%i start=%lu size=%lu dir=%i\n",
0649                e->name.c_str(), e->prev, e->next, e->child, e->start, e->size, e->dir);
0650 #endif
0651 #ifdef POLE_FAIL_ON_NEMPTY_NVALID_OBJS
0652         if (!e->valid && e->size) {
0653 #ifdef POLE_DEBUG
0654             qDebug() << "Invalid DirEntry detected!";
0655 #endif
0656             return false;
0657         }
0658 #endif
0659         //Looking for invalid stream objects.
0660 #ifdef POLE_FAIL_ON_NVALID_STREAM_OBJS
0661         if (!e->valid && !e->dir) {
0662 #ifdef POLE_DEBUG
0663             qDebug() << "Invalid DirEntry (stream object) detected!";
0664 #endif
0665             return false;
0666         }
0667 #endif
0668         //Looking for invalid storage objects.
0669         if (!e->valid && e->dir) {
0670 #ifdef POLE_DEBUG
0671             qDebug() << "Invalid DirEntry (storage object) detected!";
0672 #endif
0673             return false;
0674         }
0675 
0676     //A root storage: size = size of the mini stream, start = first sector
0677     //of the mini stream, if the mini stream exists
0678     //
0679     //A storage object MAY have e->child set - [MS-CFB].
0680 #ifdef POLE_CHECK_STORAGE_OBJS
0681         if ((e->valid && e->dir) && (i > 0) &&
0682             ((e->start != 0) || (e->size != 0)))
0683         {
0684 #ifdef POLE_DEBUG
0685             qDebug() << "Invalid DirEntry (storage object) detected!";
0686 #endif
0687             return false;
0688         }
0689 #endif
0690         //Looking for duplicate DirEntries in the storage object.
0691         if (e->valid && e->dir) {
0692             if (!valid_enames(const_cast<DirTree*>(this), i)) {
0693 #ifdef POLE_DEBUG
0694                 qDebug() << "Invalid DirEntry (storage object) detected!";
0695 #endif
0696                 return false;
0697             }
0698         }
0699 
0700         //Check the name of the left/right DirEntry.
0701 #ifdef POLE_CHECK_SIBLINGS
0702         if (e->prev != End) {
0703             str1 = QString(entries[e->prev].name.data());
0704         }
0705         if (e->next != End) {
0706             str2 = QString(entries[e->next].name.data());
0707         }
0708         if (!str1.isEmpty() && !str2.isEmpty()) {
0709             if (ename_cmp(str1, str2) > 0) {
0710 #ifdef POLE_DEBUG
0711                 qDebug() << "DirEntry: [name, position] mismatch!";
0712 #endif
0713                 return false;
0714             }
0715         }
0716 #endif
0717     }
0718     return true;
0719 }
0720 
0721 DirTree::DirTree()
0722 {
0723     clear();
0724 }
0725 
0726 void DirTree::clear()
0727 {
0728     // leave only root entry
0729     entries.resize(1);
0730     entries[0].valid = true;
0731     entries[0].name = "Root Entry";
0732     entries[0].dir = true;
0733     entries[0].size = 0;
0734     entries[0].start = End;
0735     entries[0].prev = End;
0736     entries[0].next = End;
0737     entries[0].child = End;
0738 }
0739 
0740 unsigned DirTree::entryCount()
0741 {
0742     return entries.size();
0743 }
0744 
0745 DirEntry* DirTree::entry(unsigned index)
0746 {
0747     if (index >= entryCount()) return (DirEntry*) 0;
0748     return &entries[ index ];
0749 }
0750 
0751 int DirTree::indexOf(DirEntry* e)
0752 {
0753     for (unsigned i = 0; i < entryCount(); i++)
0754         if (entry(i) == e) return i;
0755 
0756     return -1;
0757 }
0758 
0759 int DirTree::parent(unsigned index)
0760 {
0761     // brute-force, basically we iterate for each entries, find its children
0762     // and check if one of the children is 'index'
0763     for (unsigned j = 0; j < entryCount(); j++) {
0764         std::vector<unsigned> chi = children(j);
0765         for (unsigned i = 0; i < chi.size(); i++)
0766             if (chi[i] == index)
0767                 return j;
0768     }
0769 
0770     return -1;
0771 }
0772 
0773 std::string DirTree::fullName(unsigned index)
0774 {
0775     // don't use root name ("Root Entry"), just give "/"
0776     if (index == 0) return "/";
0777 
0778     std::string result = entry(index)->name;
0779     result.insert(0,  "/");
0780     int p = parent(index);
0781     DirEntry * _entry = 0;
0782     while (p > 0) {
0783         _entry = entry(p);
0784         if (_entry->dir && _entry->valid) {
0785             result.insert(0,  _entry->name);
0786             result.insert(0,  "/");
0787         }
0788         --p;
0789         index = p;
0790         if (index <= 0) break;
0791     }
0792     return result;
0793 }
0794 
0795 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
0796 // if not found and create is false, return 0
0797 // if create is true, a new entry is returned
0798 DirEntry* DirTree::entry(const std::string& name, bool create)
0799 {
0800     if (!name.length()) return (DirEntry*)0;
0801 
0802     // quick check for "/" (that's root)
0803     if (name == "/") return entry(0);
0804 
0805     // split the names, e.g  "/ObjectPool/_1020961869" will become:
0806     // "ObjectPool" and "_1020961869"
0807     std::list<std::string> names;
0808     std::string::size_type start = 0, end = 0;
0809     if (name[0] == '/') start++;
0810     while (start < name.length()) {
0811         end = name.find_first_of('/', start);
0812         if (end == std::string::npos) end = name.length();
0813         names.push_back(name.substr(start, end - start));
0814         start = end + 1;
0815     }
0816 
0817     // start from root
0818     int index = 0 ;
0819 
0820     // trace one by one
0821     std::list<std::string>::iterator it;
0822 
0823     for (it = names.begin(); it != names.end(); ++it) {
0824         // find among the children of index
0825         std::vector<unsigned> chi = children(index);
0826         unsigned child = 0;
0827         for (unsigned i = 0; i < chi.size(); i++) {
0828             DirEntry* ce = entry(chi[i]);
0829             if (ce)
0830                 if (ce->valid && (ce->name.length() > 1))
0831                     if (ce->name == *it)
0832                         child = chi[i];
0833         }
0834 
0835         // traverse to the child
0836         if (child > 0) index = child;
0837         else {
0838             // not found among children
0839             if (!create) return (DirEntry*)0;
0840 
0841             // create a new entry
0842             unsigned parent = index;
0843             entries.push_back(DirEntry());
0844             index = entryCount() - 1;
0845             DirEntry* e = entry(index);
0846             e->valid = true;
0847             e->name = *it;
0848             e->dir = false;
0849             e->size = 0;
0850             e->start = 0;
0851             e->child = End;
0852             e->prev = End;
0853             e->next = entry(parent)->child;
0854             entry(parent)->child = index;
0855         }
0856     }
0857 
0858     return entry(index);
0859 }
0860 
0861 // helper function: recursively find siblings of index
0862 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result,
0863                            unsigned index)
0864 {
0865     DirEntry* e = dirtree->entry(index);
0866     if (!e) return;
0867 //     if (!e->valid) return;
0868 
0869     // prevent infinite loop
0870     for (unsigned i = 0; i < result.size(); i++) {
0871         if (result[i] == index) return;
0872     }
0873     // add myself
0874     result.push_back(index);
0875 
0876     // visit previous sibling, don't go infinitely
0877     unsigned prev = e->prev;
0878     if ((prev > 0) && (prev < dirtree->entryCount())) {
0879         for (unsigned i = 0; i < result.size(); i++)
0880             if (result[i] == prev) prev = 0;
0881         if (prev) dirtree_find_siblings(dirtree, result, prev);
0882     }
0883 
0884     // visit next sibling, don't go infinitely
0885     unsigned next = e->next;
0886     if ((next > 0) && (next < dirtree->entryCount())) {
0887         for (unsigned i = 0; i < result.size(); i++)
0888             if (result[i] == next) next = 0;
0889         if (next) dirtree_find_siblings(dirtree, result, next);
0890     }
0891 }
0892 
0893 std::vector<unsigned> DirTree::children(unsigned index)
0894 {
0895     std::vector<unsigned> result;
0896 
0897     DirEntry* e = entry(index);
0898     if (e) {
0899         if (e->valid && e->dir) {
0900             dirtree_find_siblings(this, result, e->child);
0901         }
0902     }
0903     return result;
0904 }
0905 
0906 void DirTree::load(unsigned char* buffer, unsigned size, const unsigned threshold,
0907                    const unsigned max_sbat, const unsigned max_bbat)
0908 {
0909 #ifdef POLE_DEBUG
0910     qDebug() << "-------------------------------";
0911     qDebug() << Q_FUNC_INFO;
0912 #endif
0913 
0914     entries.clear();
0915     unsigned n = (size / 128); //num. of directory entries
0916 
0917     for (unsigned i = 0; i < (size / 128); i++) {
0918         unsigned p = i * 128;
0919 
0920 
0921         // parse name of this entry, which stored as Unicode 16-bit
0922         int name_len = readU16(buffer + 0x40 + p);
0923         if (name_len > 64) {
0924             name_len = 64;
0925 #ifdef POLE_DEBUG
0926             qDebug() << "DirEntry: Invalid length of name!";
0927 #endif
0928         }
0929         std::string name;
0930         for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2) {
0931             name.append(1, buffer[j+p]);
0932         }
0933 
0934         // first char isn't printable ? remove it...
0935         if (buffer[p] < 32) {
0936             name.erase(0, 1);
0937         }
0938 
0939         // [MS-CFB] — v20110318
0940         // 0x00 = Unknown or unallocated, 0x01 = directory (Storage Object),
0941         // 0x02 = file (Stream Object), 0x05 = Root Storage Object
0942         unsigned type = buffer[ 0x42 + p];
0943 
0944         DirEntry e;
0945         e.valid = true;
0946         e.name = name;
0947         e.start = readU32(buffer + 0x74 + p);
0948         e.size = readU32(buffer + 0x78 + p);
0949         e.prev = readU32(buffer + 0x44 + p);
0950         e.next = readU32(buffer + 0x48 + p);
0951         e.child = readU32(buffer + 0x4C + p);
0952         e.dir = false;
0953 
0954         if ((type == 1) || (type == 5)) {
0955             e.dir = true;
0956         }
0957 
0958         // sanity checks
0959         if ((type != 0) && (type != 1) && (type != 2) && (type != 5)) {
0960             e.valid = false;
0961 #ifdef POLE_DEBUG
0962             qDebug() << "DirEntry: invalid type!";
0963 #endif
0964         }
0965         if ((type != 0) && (name_len < 1)) {
0966             e.valid = false;
0967 #ifdef POLE_DEBUG
0968             qDebug() << "DirEntry: invalid (type,name) pair!";
0969 #endif
0970         }
0971         // unknown object
0972         if (type == 0) {
0973             if ((e.child != End) || (e.prev != End) || (e.next != End)) {
0974                 e.valid = false;
0975 #ifdef POLE_DEBUG
0976                 qDebug() << "DirEntry: reference to prev/next/child != NOSTREAM";
0977 #endif
0978             }
0979             if ((e.start != 0) || (e.size != 0)) {
0980                 e.valid = false;
0981 #ifdef POLE_DEBUG
0982                 qDebug() << "DirEntry: start/size != ZERO";
0983 #endif
0984             }
0985         }
0986         // storage objects
0987         if (type == 1) {
0988             if (((e.prev != End) && (e.prev >= n)) ||
0989                 ((e.next != End) && (e.next >= n)) ||
0990                 ((e.child != End) && (e.child >= n))) {
0991                 e.valid = false;
0992 #ifdef POLE_DEBUG
0993                 qDebug() << "DirEntry: reference to prev/next/child > object num. (" << n << ")";
0994 #endif
0995             }
0996         }
0997         // stream object
0998         if (type == 2) {
0999             //check stream position
1000             if ((e.size >= threshold) && (e.start >= max_bbat)) {
1001                 e.valid = false;
1002 #ifdef POLE_DEBUG
1003                 qDebug() << "DirEntry: (e.start >= max_bbat)";
1004 #endif
1005             }
1006             else if (e.start >= max_sbat) {
1007                 e.valid = false;
1008 #ifdef POLE_DEBUG
1009                 qDebug() << "DirEntry: (e.start >= max_sbat)";
1010 #endif
1011             }
1012             //check stream object
1013             if (e.child != End) {
1014                 e.valid = false;
1015 #ifdef POLE_DEBUG
1016                 qDebug() << "DirEntry: (e.child != End)";
1017 #endif
1018             }
1019             //NOTE: Disabled because of too many false positives.
1020 //             if ((e->prev != End) || (e->next != End)) {
1021 //                 e.valid = false;
1022 //             }
1023         }
1024 
1025         // CLSID contains an object class GUID (globally unique identifier) if
1026         // this entry is a storage or root storage.  In a stream object, this
1027         // field MUST be set to all zeroes.
1028 #ifdef POLE_DEBUG
1029         if (!e.valid) {
1030             std::cout << "[INVALID] ";
1031         }
1032         printf("DirEntry: name=%s type=%i prev=%i next=%i child=%i start=%lu size=%lu clsid=%lu.%lu.%lu.%lu\n",
1033                name.c_str(), type, e.prev, e.next, e.child, e.start, e.size, readU32(buffer + 0x50 + p),
1034                readU32(buffer + 0x54 + p), readU32(buffer + 0x58 + p), readU32(buffer + 0x5C + p));
1035 #endif
1036         entries.push_back(e);
1037     }
1038 #ifdef POLE_DEBUG
1039     qDebug() << "-------------------------------";
1040 #endif
1041 }
1042 
1043 // return space required to save this dirtree
1044 unsigned DirTree::size()
1045 {
1046     return entryCount() * 128;
1047 }
1048 
1049 void DirTree::save(unsigned char* buffer)
1050 {
1051     memset(buffer, 0, size());
1052 
1053     // root is fixed as "Root Entry"
1054     DirEntry* root = entry(0);
1055     std::string name = "Root Entry";
1056     for (unsigned j = 0; j < name.length(); j++)
1057         buffer[ j*2 ] = name[j];
1058     writeU16(buffer + 0x40, name.length()*2 + 2);
1059     writeU32(buffer + 0x74, 0xffffffff);
1060     writeU32(buffer + 0x78, 0);
1061     writeU32(buffer + 0x44, 0xffffffff);
1062     writeU32(buffer + 0x48, 0xffffffff);
1063     writeU32(buffer + 0x4c, root->child);
1064     buffer[ 0x42 ] = 5;
1065     buffer[ 0x43 ] = 1;
1066 
1067     for (unsigned i = 1; i < entryCount(); i++) {
1068         DirEntry* e = entry(i);
1069         if (!e) continue;
1070         if (e->dir) {
1071             e->start = 0xffffffff;
1072             e->size = 0;
1073         }
1074 
1075         // max length for name is 32 chars
1076         std::string name = e->name;
1077         if (name.length() > 32)
1078             name.erase(32, name.length());
1079 
1080         // write name as Unicode 16-bit
1081         for (unsigned j = 0; j < name.length(); j++)
1082             buffer[ i*128 + j*2 ] = name[j];
1083 
1084         writeU16(buffer + i*128 + 0x40, name.length()*2 + 2);
1085         writeU32(buffer + i*128 + 0x74, e->start);
1086         writeU32(buffer + i*128 + 0x78, e->size);
1087         writeU32(buffer + i*128 + 0x44, e->prev);
1088         writeU32(buffer + i*128 + 0x48, e->next);
1089         writeU32(buffer + i*128 + 0x4c, e->child);
1090         buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
1091         buffer[ i*128 + 0x43 ] = 1; // always black
1092     }
1093 }
1094 
1095 void DirTree::debug()
1096 {
1097     for (unsigned i = 0; i < entryCount(); i++) {
1098         DirEntry* e = entry(i);
1099         if (!e) continue;
1100         std::cout << i << ": ";
1101         if (!e->valid) std::cout << "INVALID ";
1102         std::cout << e->name << " ";
1103         if (e->dir) std::cout << "(Dir) ";
1104         else std::cout << "(File) ";
1105         std::cout << e->size << " ";
1106         std::cout << "s:" << e->start << " ";
1107         std::cout << "(";
1108         if (e->child == End) std::cout << "-"; else std::cout << e->child;
1109         std::cout << " ";
1110         if (e->prev == End) std::cout << "-"; else std::cout << e->prev;
1111         std::cout << ":";
1112         if (e->next == End) std::cout << "-"; else std::cout << e->next;
1113         std::cout << ")";
1114         std::cout << std::endl;
1115     }
1116 }
1117 
1118 // =========== StorageIO ==========
1119 
1120 StorageIO::StorageIO(Storage* st, const char* fname)
1121 {
1122     storage = st;
1123     filename = fname;
1124     result = Storage::Ok;
1125     opened = false;
1126 
1127     header = new Header();
1128     dirtree = new DirTree();
1129     bbat = new AllocTable();
1130     sbat = new AllocTable();
1131 
1132     filesize = 0;
1133     bbat->blockSize = 1 << header->b_shift;
1134     sbat->blockSize = 1 << header->s_shift;
1135 }
1136 
1137 StorageIO::~StorageIO()
1138 {
1139     if (opened) close();
1140     delete sbat;
1141     delete bbat;
1142     delete dirtree;
1143     delete header;
1144 }
1145 
1146 bool StorageIO::open()
1147 {
1148     // already opened ? close first
1149     if (opened) close();
1150 
1151     load();
1152 
1153     return result == Storage::Ok;
1154 }
1155 
1156 void StorageIO::load()
1157 {
1158     unsigned char* buffer = 0;
1159     unsigned long buflen = 0;
1160     std::vector<unsigned long> blocks;
1161 
1162     // open the file, check for error
1163     result = Storage::OpenFailed;
1164     file.open(filename.c_str(), std::ios::binary | std::ios::in);
1165     if (!file.good()) return;
1166 
1167     // find size of input file
1168     file.seekg(0, std::ios::end);
1169     filesize = file.tellg();
1170 
1171     // load header
1172     buffer = new unsigned char[OLE_HEADER_SIZE];
1173     file.seekg(0);
1174     file.read((char*)buffer, OLE_HEADER_SIZE);
1175     if (!file.good()) {
1176         delete[] buffer;
1177         return;
1178     }
1179     header->load(buffer);
1180     delete[] buffer;
1181 
1182     // check OLE magic id
1183     result = Storage::NotOLE;
1184     for (unsigned i = 0; i < 8; i++)
1185         if (header->id[i] != pole_magic[i])
1186             return;
1187 
1188     // important block size
1189     bbat->blockSize = 1 << header->b_shift;
1190     sbat->blockSize = 1 << header->s_shift;
1191     const unsigned max_bbat_block = (filesize - OLE_HEADER_SIZE) / bbat->blockSize;
1192     const unsigned max_sbat_block = (filesize - OLE_HEADER_SIZE) / sbat->blockSize;
1193 
1194     // sanity checks
1195     result = Storage::BadOLE;
1196     if (!header->valid(max_sbat_block, max_bbat_block)) {
1197         return;
1198     }
1199 
1200     // find blocks allocated to store big bat
1201     // the first 109 blocks are in header, the rest in meta bat
1202     blocks.clear();
1203     blocks.resize(header->num_bat);
1204     for (unsigned i = 0; i < 109; i++) {
1205         if (i >= header->num_bat) break;
1206         else blocks[i] = header->bb_blocks[i];
1207     }
1208     if ((header->num_bat > 109) && (header->num_mbat > 0)) {
1209         unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
1210         unsigned k = 109;
1211         unsigned mblock = header->mbat_start;
1212         for (unsigned r = 0; r < header->num_mbat; r++) {
1213             unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize);
1214             if (rr != bbat->blockSize) {
1215                 delete[] buffer2;
1216                 return;
1217             }
1218             for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) {
1219                 if (k >= header->num_bat) break;
1220                 else  blocks[k++] = readU32(buffer2 + s);
1221             }
1222             mblock = readU32(buffer2 + bbat->blockSize - 4);
1223         }
1224         delete[] buffer2;
1225     }
1226 
1227     // load big bat
1228     buflen = blocks.size() * bbat->blockSize;
1229     if (buflen > 0) {
1230         buffer = new unsigned char[ buflen ];
1231         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
1232         if (r != buflen) {
1233             qCritical() << Q_FUNC_INFO << "SAT construction failed!";
1234             delete[] buffer;
1235             return;
1236         }
1237         bbat->load(buffer, buflen);
1238         delete[] buffer;
1239 
1240         if (!bbat->valid(filesize, header->b_shift, true)) {
1241             return;
1242         }
1243     }
1244     //TODO: make fail affect the result value
1245     bool fail = false;
1246 
1247     // load small bat
1248     blocks.clear();
1249     blocks = bbat->follow(header->sbat_start, fail);
1250     buflen = blocks.size() * bbat->blockSize;
1251     if (buflen > 0) {
1252         buffer = new unsigned char[ buflen ];
1253         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
1254         if (r != buflen) {
1255             qCritical() << Q_FUNC_INFO << "SSAT construction failed!";
1256             delete[] buffer;
1257             return;
1258         }
1259         sbat->load(buffer, buflen);
1260         delete[] buffer;
1261 
1262         if (!sbat->valid(filesize, header->s_shift, false)) {
1263             return;
1264         }
1265     }
1266 
1267     // load directory tree
1268     blocks.clear();
1269     blocks = bbat->follow(header->dirent_start, fail);
1270     buflen = blocks.size() * bbat->blockSize;
1271     buffer = new unsigned char[ buflen ];
1272     unsigned long r = loadBigBlocks(blocks, buffer, buflen);
1273     if (r != buflen) {
1274         qCritical() << Q_FUNC_INFO << "DirTree construction failed!";
1275         delete[] buffer;
1276         return;
1277     }
1278     dirtree->load(buffer, buflen, header->threshold, max_sbat_block, max_bbat_block);
1279     unsigned sb_start = readU32(buffer + 0x74);
1280     delete[] buffer;
1281     if (!dirtree->valid()) {
1282         qCritical() << Q_FUNC_INFO << "Invalid DirTree!";
1283         return;
1284     }
1285 
1286     // fetch block chain as data for small-files
1287     sb_blocks = bbat->follow(sb_start, fail);
1288 
1289     // for troubleshooting, just enable this block
1290 #ifdef POLE_DEBUG
1291     header->debug();
1292     sbat->debug();
1293     bbat->debug();
1294     dirtree->debug();
1295 #endif
1296 
1297     // so far so good
1298     result = Storage::Ok;
1299     opened = true;
1300 }
1301 
1302 void StorageIO::create()
1303 {
1304     // std::cout << "Creating " << filename << std::endl;
1305 
1306     file.open(filename.c_str(), std::ios::out | std::ios::binary);
1307     if (!file.good()) {
1308         qCritical() << Q_FUNC_INFO << "Can't create file:" << filename.c_str();
1309         result = Storage::OpenFailed;
1310         return;
1311     }
1312 
1313     // so far so good
1314     opened = true;
1315     result = Storage::Ok;
1316 }
1317 
1318 void StorageIO::flush()
1319 {
1320     /* Note on Microsoft implementation:
1321        - directory entries are stored in the last block(s)
1322        - BATs are as second to the last
1323        - Meta BATs are third to the last
1324     */
1325 }
1326 
1327 void StorageIO::close()
1328 {
1329     if (!opened) return;
1330 
1331     file.close();
1332     opened = false;
1333 
1334     std::list<Stream*>::iterator it;
1335     for (it = streams.begin(); it != streams.end(); ++it)
1336         delete *it;
1337 }
1338 
1339 StreamIO* StorageIO::streamIO(const std::string& name)
1340 {
1341 #ifdef POLE_DEBUG
1342     qDebug() << Q_FUNC_INFO << "preparing stream:" << name.c_str();
1343 #endif
1344     // sanity check
1345     if (!name.length()) return (StreamIO*)0;
1346 
1347     // search in the entries
1348     DirEntry* entry = dirtree->entry(name);
1349     //if( entry) std::cout << "FOUND\n";
1350     if (!entry) return (StreamIO*)0;
1351     //if( !entry->dir ) std::cout << "  NOT DIR\n";
1352     if (entry->dir) return (StreamIO*)0;
1353 
1354     StreamIO* result = new StreamIO(this, entry);
1355     result->fullName = name;
1356 
1357     return result;
1358 }
1359 
1360 unsigned long StorageIO::loadBigBlocks(const std::vector<unsigned long>& blocks,
1361                                        unsigned char* data, unsigned long maxlen)
1362 {
1363     return loadBigBlocks(&blocks[0], blocks.size(), data, maxlen);
1364 }
1365 
1366 unsigned long StorageIO::loadBigBlocks(const unsigned long *blocks, unsigned blockCount,
1367                                        unsigned char *data, unsigned long maxlen)
1368 {
1369     // sentinel
1370     if (!data) return 0;
1371     if (!file.good()) return 0;
1372     if (!blocks) return 0;
1373     if (blockCount < 1) return 0;
1374     if (maxlen == 0) return 0;
1375 
1376     // read block one by one, seems fast enough
1377     unsigned long bytes = 0;
1378     for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) {
1379         unsigned long block = blocks[i];
1380         unsigned long pos =  bbat->blockSize * (block + 1);
1381         unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes;
1382         if (pos + p > filesize) p = filesize - pos;
1383         file.seekg(pos);
1384         file.read((char*)data + bytes, p);
1385         if (!file.good()) return 0;
1386         bytes += p;
1387     }
1388 
1389     return bytes;
1390 }
1391 
1392 unsigned long StorageIO::loadBigBlock(unsigned long block,
1393                                       unsigned char* data, unsigned long maxlen)
1394 {
1395     // sentinel
1396     if (!data) return 0;
1397     if (!file.good()) return 0;
1398 
1399     return loadBigBlocks(&block, 1, data, maxlen);
1400 }
1401 
1402 // return number of bytes which has been read
1403 unsigned long StorageIO::loadSmallBlocks(const std::vector<unsigned long>& blocks,
1404         unsigned char* data, unsigned long maxlen)
1405 {
1406     return loadSmallBlocks(&blocks[0], blocks.size(), data, maxlen);
1407 }
1408 
1409 unsigned long StorageIO::loadSmallBlocks(const unsigned long *blocks, unsigned blockCount,
1410                                          unsigned char *data, unsigned long maxlen)
1411 {
1412     // sentinel
1413     if (!data) return 0;
1414     if (!file.good()) return 0;
1415     if (!blocks) return 0;
1416     if (blockCount < 1) return 0;
1417     if (maxlen == 0) return 0;
1418 
1419     // our own local buffer
1420     unsigned char* buf = new unsigned char[ bbat->blockSize ];
1421 
1422     // read small block one by one
1423     unsigned long bytes = 0;
1424     for (unsigned long i = 0; (i < blockCount) && (bytes < maxlen); i++) {
1425         unsigned long block = blocks[i];
1426 
1427         // find where the small-block exactly is
1428         unsigned long pos = block * sbat->blockSize;
1429         unsigned long bbindex = pos / bbat->blockSize;
1430         if (bbindex >= sb_blocks.size()) break;
1431 
1432         unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize);
1433         if (r != bbat->blockSize) {
1434             delete[] buf;
1435             return 0;
1436         }
1437 
1438         // copy the data
1439         unsigned offset = pos % bbat->blockSize;
1440         unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes :  bbat->blockSize - offset;
1441         p = (sbat->blockSize < p) ? sbat->blockSize : p;
1442         memcpy(data + bytes, buf + offset, p);
1443         bytes += p;
1444     }
1445 
1446     delete[] buf;
1447 
1448     return bytes;
1449 }
1450 
1451 unsigned long StorageIO::loadSmallBlock(unsigned long block,
1452                                         unsigned char* data, unsigned long maxlen)
1453 {
1454     // sentinel
1455     if (!data) return 0;
1456     if (!file.good()) return 0;
1457 
1458     return loadSmallBlocks(&block, 1, data, maxlen);
1459 }
1460 
1461 // =========== StreamIO ==========
1462 
1463 StreamIO::StreamIO(StorageIO* s, DirEntry* e)
1464 {
1465     io = s;
1466     entry = e;
1467     eof = false;
1468     fail = false;
1469 
1470     m_pos = 0;
1471 
1472     if (entry->size >= io->header->threshold) {
1473         blocks = io->bbat->follow(entry->start, fail);
1474     } else {
1475         blocks = io->sbat->follow(entry->start, fail);
1476     }
1477 
1478     // prepare cache
1479     cache_pos = 0;
1480     base_cache_size = cache_size = 4096; // optimal ?
1481     cache_data = new unsigned char[base_cache_size];
1482     updateCache();
1483 }
1484 
1485 // FIXME tell parent we're gone
1486 StreamIO::~StreamIO()
1487 {
1488     delete[] cache_data;
1489 }
1490 
1491 void StreamIO::seek(unsigned long pos)
1492 {
1493     m_pos = pos;
1494 }
1495 
1496 unsigned long StreamIO::tell()
1497 {
1498     return m_pos;
1499 }
1500 
1501 int StreamIO::getch()
1502 {
1503     // past end-of-file ?
1504     if (m_pos > entry->size) return -1;
1505 
1506     // need to update cache ?
1507     if (!cache_size || (m_pos < cache_pos) ||
1508             (m_pos >= cache_pos + cache_size))
1509         updateCache();
1510 
1511     // something bad if we don't get good cache
1512     if (!cache_size) return -1;
1513 
1514     int data = cache_data[m_pos - cache_pos];
1515     m_pos++;
1516 
1517     return data;
1518 }
1519 
1520 unsigned long StreamIO::read(unsigned char *data, unsigned long maxlen)
1521 {
1522     // sanity checks
1523     if (!data) return 0;
1524     if (maxlen == 0) return 0;
1525 
1526     unsigned long totalbytes = 0;
1527 
1528     while (totalbytes < maxlen) {
1529         // need to update cache ?
1530         if (!cache_size || (m_pos < cache_pos) ||
1531                 (m_pos >= cache_pos + cache_size))
1532             updateCache();
1533         if (!cache_size) break;
1534 
1535         const unsigned long remaining = cache_size - (m_pos - cache_pos);
1536         const unsigned long count = std::min(remaining, maxlen - totalbytes);
1537         memcpy(data + totalbytes, &cache_data[m_pos - cache_pos], count);
1538         totalbytes += count;
1539         m_pos += count;
1540     }
1541     return totalbytes;
1542 }
1543 
1544 unsigned long StreamIO::readInternal(unsigned long pos, unsigned char* data, unsigned long maxlen)
1545 {
1546     // sanity checks
1547     if (!data) return 0;
1548     if (maxlen == 0) return 0;
1549 
1550     unsigned long totalbytes = 0;
1551 
1552     if (entry->size < io->header->threshold) {
1553         // small file
1554         unsigned long index = pos / io->sbat->blockSize;
1555 
1556         if (index >= blocks.size()) return 0;
1557 
1558         unsigned char buf[4096];
1559         unsigned long offset = pos % io->sbat->blockSize;
1560         while (totalbytes < maxlen) {
1561             if (index >= blocks.size()) break;
1562             io->loadSmallBlock(blocks[index], &buf[0], io->bbat->blockSize);
1563             unsigned long count = io->sbat->blockSize - offset;
1564             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1565             memcpy(data + totalbytes, &buf[0] + offset, count);
1566             totalbytes += count;
1567             offset = 0;
1568             index++;
1569         }
1570 
1571     } else {
1572         // big file
1573         unsigned long index = pos / io->bbat->blockSize;
1574 
1575         if (index >= blocks.size()) return 0;
1576 
1577         unsigned char buf[4096];
1578         unsigned long offset = pos % io->bbat->blockSize;
1579         while (totalbytes < maxlen) {
1580             if (index >= blocks.size()) break;
1581             unsigned long r = io->loadBigBlock(blocks[index], &buf[0], io->bbat->blockSize);
1582             if (r != io->bbat->blockSize) {
1583                 return 0;
1584             }
1585             unsigned long count = io->bbat->blockSize - offset;
1586             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1587             memcpy(data + totalbytes, &buf[0] + offset, count);
1588             totalbytes += count;
1589             index++;
1590             offset = 0;
1591         }
1592 
1593     }
1594 
1595     return totalbytes;
1596 }
1597 
1598 unsigned long StreamIO::readInternal(unsigned char* data, unsigned long maxlen)
1599 {
1600     unsigned long bytes = readInternal(tell(), data, maxlen);
1601     m_pos += bytes;
1602     return bytes;
1603 }
1604 
1605 void StreamIO::updateCache()
1606 {
1607     // sanity check
1608     if (!cache_data) return;
1609 
1610     cache_pos = m_pos - (m_pos % base_cache_size);
1611     unsigned long bytes = base_cache_size;
1612     if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos;
1613     if (cache_pos + bytes <= m_pos) {
1614         cache_size = 0;
1615     } else {
1616         cache_size = readInternal(cache_pos, cache_data, bytes);
1617     }
1618 }
1619 
1620 
1621 // =========== Storage ==========
1622 
1623 Storage::Storage(const char* filename)
1624 {
1625     io = new StorageIO(this, filename);
1626 }
1627 
1628 Storage::~Storage()
1629 {
1630     delete io;
1631 }
1632 
1633 int Storage::result()
1634 {
1635     return io->result;
1636 }
1637 
1638 bool Storage::open()
1639 {
1640     return io->open();
1641 }
1642 
1643 void Storage::close()
1644 {
1645     io->close();
1646 }
1647 
1648 std::list<std::string> Storage::entries(const std::string& path)
1649 {
1650     std::list<std::string> result;
1651     DirTree* dt = io->dirtree;
1652     DirEntry* e = dt->entry(path, false);
1653     if (e) {
1654         if (e->dir) {
1655             unsigned parent = dt->indexOf(e);
1656             std::vector<unsigned> children = dt->children(parent);
1657             for (unsigned i = 0; i < children.size(); i++)
1658                 result.push_back(dt->entry(children[i])->name);
1659         }
1660     }
1661     return result;
1662 }
1663 
1664 bool Storage::isDirectory(const std::string& name)
1665 {
1666     DirEntry* e = io->dirtree->entry(name, false);
1667     return e ? e->dir : false;
1668 }
1669 
1670 // =========== Stream ==========
1671 
1672 Stream::Stream(Storage* storage, const std::string& name)
1673 {
1674     io = storage->io->streamIO(name);
1675 }
1676 
1677 // FIXME tell parent we're gone
1678 Stream::~Stream()
1679 {
1680     delete io;
1681 }
1682 
1683 std::string Stream::fullName()
1684 {
1685     return io ? io->fullName : std::string();
1686 }
1687 
1688 unsigned long Stream::tell()
1689 {
1690     return io ? io->tell() : 0;
1691 }
1692 
1693 void Stream::seek(unsigned long newpos)
1694 {
1695     if (io) io->seek(newpos);
1696 }
1697 
1698 unsigned long Stream::size()
1699 {
1700     return io ? io->entry->size : 0;
1701 }
1702 
1703 int Stream::getch()
1704 {
1705     return io ? io->getch() : 0;
1706 }
1707 
1708 unsigned long Stream::read(unsigned char* data, unsigned long maxlen)
1709 {
1710     return io ? io->read(data, maxlen) : 0;
1711 }
1712 
1713 bool Stream::eof()
1714 {
1715     return io ? io->eof : false;
1716 }
1717 
1718 bool Stream::fail()
1719 {
1720     return io ? io->fail : true;
1721 }