File indexing completed on 2024-04-28 04:40:02

0001 /* POLE - Portable C++ library to access OLE Storage
0002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
0003 
0004    Redistribution and use in source and binary forms, with or without
0005    modification, are permitted provided that the following conditions
0006    are met:
0007    * Redistributions of source code must retain the above copyright notice,
0008      this list of conditions and the following disclaimer.
0009    * Redistributions in binary form must reproduce the above copyright notice,
0010      this list of conditions and the following disclaimer in the documentation
0011      and/or other materials provided with the distribution.
0012    * Neither the name of the authors nor the names of its contributors may be
0013      used to endorse or promote products derived from this software without
0014      specific prior written permission.
0015 
0016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
0026    THE POSSIBILITY OF SUCH DAMAGE.
0027 */
0028 
0029 #include <fstream>
0030 #include <iostream>
0031 #include <list>
0032 #include <string>
0033 #include <vector>
0034 
0035 #include "pole.h"
0036 
0037 #include <string.h>
0038 
0039 // enable to activate debugging output
0040 // #define POLE_DEBUG
0041 
0042 namespace POLE
0043 {
0044 
0045 class Header
0046 {
0047 public:
0048     unsigned char id[8];       // signature, or magic identifier
0049     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
0050     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
0051     unsigned num_bat;          // blocks allocated for big bat
0052     unsigned dirent_start;     // starting block for directory info
0053     unsigned threshold;        // switch from small to big file (usually 4K)
0054     unsigned sbat_start;       // starting block index to store small bat
0055     unsigned num_sbat;         // blocks allocated for small bat
0056     unsigned mbat_start;       // starting block to store meta bat
0057     unsigned num_mbat;         // blocks allocated for meta bat
0058     unsigned long bb_blocks[109];
0059 
0060     Header();
0061     bool valid();
0062     void load(const unsigned char* buffer);
0063     void save(unsigned char* buffer);
0064     void debug();
0065 };
0066 
0067 class AllocTable
0068 {
0069 public:
0070     static const unsigned Eof;
0071     static const unsigned Avail;
0072     static const unsigned Bat;
0073     static const unsigned MetaBat;
0074     unsigned blockSize;
0075     AllocTable();
0076     void clear();
0077     unsigned long count();
0078     void resize(unsigned long newsize);
0079     void preserve(unsigned long n);
0080     void set(unsigned long index, unsigned long val);
0081     unsigned unused();
0082     void setChain(std::vector<unsigned long>);
0083     std::vector<unsigned long> follow(unsigned long start);
0084     unsigned long operator[](unsigned long index);
0085     void load(const unsigned char* buffer, unsigned len);
0086     void save(unsigned char* buffer);
0087     unsigned size();
0088     void debug();
0089 private:
0090     std::vector<unsigned long> data;
0091     AllocTable(const AllocTable&);
0092     AllocTable& operator=(const AllocTable&);
0093 };
0094 
0095 class DirEntry
0096 {
0097 public:
0098     bool valid;            // false if invalid (should be skipped)
0099     std::string name;      // the name, not in unicode anymore
0100     bool dir;              // true if directory
0101     unsigned long size;    // size (not valid if directory)
0102     unsigned long start;   // starting block
0103     unsigned prev;         // previous sibling
0104     unsigned next;         // next sibling
0105     unsigned child;        // first child
0106 };
0107 
0108 class DirTree
0109 {
0110 public:
0111     static const unsigned End;
0112     DirTree();
0113     void clear();
0114     unsigned entryCount();
0115     DirEntry* entry(unsigned index);
0116     DirEntry* entry(const std::string& name, bool create = false);
0117     int indexOf(DirEntry* e);
0118     int parent(unsigned index);
0119     std::string fullName(unsigned index);
0120     std::vector<unsigned> children(unsigned index);
0121     void load(unsigned char* buffer, unsigned len);
0122     void save(unsigned char* buffer);
0123     unsigned size();
0124     void debug();
0125 private:
0126     std::vector<DirEntry> entries;
0127     DirTree(const DirTree&);
0128     DirTree& operator=(const DirTree&);
0129 };
0130 
0131 class StorageIO
0132 {
0133 public:
0134     Storage* storage;         // owner
0135     std::string filename;     // filename
0136     std::fstream file;        // associated with above name
0137     int result;               // result of operation
0138     bool opened;              // true if file is opened
0139     unsigned long filesize;   // size of the file
0140 
0141     Header* header;           // storage header
0142     DirTree* dirtree;         // directory tree
0143     AllocTable* bbat;         // allocation table for big blocks
0144     AllocTable* sbat;         // allocation table for small blocks
0145 
0146     std::vector<unsigned long> sb_blocks; // blocks for "small" files
0147 
0148     std::list<Stream*> streams;
0149 
0150     StorageIO(Storage* storage, const char* filename);
0151     ~StorageIO();
0152 
0153     bool open();
0154     void close();
0155     void flush();
0156     void load();
0157     void create();
0158 
0159     unsigned long loadBigBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen);
0160 
0161     unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0162 
0163     unsigned long loadSmallBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen);
0164 
0165     unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0166 
0167     StreamIO* streamIO(const std::string& name);
0168 
0169 private:
0170     // no copy or assign
0171     StorageIO(const StorageIO&);
0172     StorageIO& operator=(const StorageIO&);
0173 
0174 };
0175 
0176 class StreamIO
0177 {
0178 public:
0179     StorageIO* io;
0180     DirEntry* entry;
0181     std::string fullName;
0182     bool eof;
0183     bool fail;
0184 
0185     StreamIO(StorageIO* io, DirEntry* entry);
0186     ~StreamIO();
0187     unsigned long size();
0188     void seek(unsigned long pos);
0189     unsigned long tell();
0190     int getch();
0191     unsigned long read(unsigned char* data, unsigned long maxlen);
0192     unsigned long read(unsigned long pos, unsigned char* data, unsigned long maxlen);
0193 
0194 
0195 private:
0196     std::vector<unsigned long> blocks;
0197 
0198     // no copy or assign
0199     StreamIO(const StreamIO&);
0200     StreamIO& operator=(const StreamIO&);
0201 
0202     // pointer for read
0203     unsigned long m_pos;
0204 
0205     // simple cache system to speed-up getch()
0206     unsigned char* cache_data;
0207     unsigned long cache_size;
0208     unsigned long cache_pos;
0209     void updateCache();
0210 };
0211 
0212 } // namespace POLE
0213 
0214 using namespace POLE;
0215 
0216 static inline unsigned long readU16(const unsigned char* ptr)
0217 {
0218     return ptr[0] + (ptr[1] << 8);
0219 }
0220 
0221 static inline unsigned long readU32(const unsigned char* ptr)
0222 {
0223     return ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
0224 }
0225 
0226 static inline void writeU16(unsigned char* ptr, unsigned long data)
0227 {
0228     ptr[0] = (unsigned char)(data & 0xff);
0229     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0230 }
0231 
0232 static inline void writeU32(unsigned char* ptr, unsigned long data)
0233 {
0234     ptr[0] = (unsigned char)(data & 0xff);
0235     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0236     ptr[2] = (unsigned char)((data >> 16) & 0xff);
0237     ptr[3] = (unsigned char)((data >> 24) & 0xff);
0238 }
0239 
0240 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
0241 
0242 // =========== Header ==========
0243 
0244 Header::Header()
0245 {
0246     b_shift = 9;
0247     s_shift = 6;
0248     num_bat = 0;
0249     dirent_start = 0;
0250     threshold = 4096;
0251     sbat_start = 0;
0252     num_sbat = 0;
0253     mbat_start = 0;
0254     num_mbat = 0;
0255 
0256     for (unsigned i = 0; i < 8; i++)
0257         id[i] = pole_magic[i];
0258     for (unsigned i = 0; i < 109; i++)
0259         bb_blocks[i] = AllocTable::Avail;
0260 }
0261 
0262 bool Header::valid()
0263 {
0264     if (threshold != 4096) return false;
0265     if (num_bat == 0) return false;
0266     if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
0267     if ((num_bat < 109) && (num_mbat != 0)) return false;
0268     if (s_shift > b_shift) return false;
0269     if (b_shift <= 6) return false;
0270     if (b_shift >= 31) return false;
0271 
0272     return true;
0273 }
0274 
0275 void Header::load(const unsigned char* buffer)
0276 {
0277     b_shift      = readU16(buffer + 0x1e); // sector shift
0278     s_shift      = readU16(buffer + 0x20); // mini sector shift
0279     num_bat      = readU32(buffer + 0x2c); // number of fat sectors
0280     dirent_start = readU32(buffer + 0x30); // first directory sector location
0281     threshold    = readU32(buffer + 0x38); // transaction signature number
0282     sbat_start   = readU32(buffer + 0x3c); // mini stream cutoff size
0283     num_sbat     = readU32(buffer + 0x40); // first mini fat sector location
0284     mbat_start   = readU32(buffer + 0x44); // first mini difat sector location
0285     num_mbat     = readU32(buffer + 0x48); // number of difat sectors
0286 
0287     for (unsigned i = 0; i < 8; i++)
0288         id[i] = buffer[i];
0289     for (unsigned i = 0; i < 109; i++)
0290         bb_blocks[i] = readU32(buffer + 0x4C + i * 4);
0291 }
0292 
0293 void Header::save(unsigned char* buffer)
0294 {
0295     memset(buffer, 0, 0x4c);
0296     memcpy(buffer, pole_magic, 8);          // ole signature
0297     writeU32(buffer + 8, 0);                // unknown
0298     writeU32(buffer + 12, 0);               // unknown
0299     writeU32(buffer + 16, 0);               // unknown
0300     writeU16(buffer + 24, 0x003e);          // revision ?
0301     writeU16(buffer + 26, 3);               // version ?
0302     writeU16(buffer + 28, 0xfffe);          // unknown
0303     writeU16(buffer + 0x1e, b_shift);
0304     writeU16(buffer + 0x20, s_shift);
0305     writeU32(buffer + 0x2c, num_bat);
0306     writeU32(buffer + 0x30, dirent_start);
0307     writeU32(buffer + 0x38, threshold);
0308     writeU32(buffer + 0x3c, sbat_start);
0309     writeU32(buffer + 0x40, num_sbat);
0310     writeU32(buffer + 0x44, mbat_start);
0311     writeU32(buffer + 0x48, num_mbat);
0312 
0313     for (unsigned i = 0; i < 109; i++)
0314         writeU32(buffer + 0x4C + i*4, bb_blocks[i]);
0315 }
0316 
0317 void Header::debug()
0318 {
0319     std::cout << std::endl;
0320     std::cout << "b_shift " << b_shift << std::endl;
0321     std::cout << "s_shift " << s_shift << std::endl;
0322     std::cout << "num_bat " << num_bat << std::endl;
0323     std::cout << "dirent_start " << dirent_start << std::endl;
0324     std::cout << "threshold " << threshold << std::endl;
0325     std::cout << "sbat_start " << sbat_start << std::endl;
0326     std::cout << "num_sbat " << num_sbat << std::endl;
0327     std::cout << "mbat_start " << mbat_start << std::endl;
0328     std::cout << "num_mbat " << num_mbat << std::endl;
0329 
0330     unsigned s = (num_bat <= 109) ? num_bat : 109;
0331     std::cout << "bat blocks: ";
0332     for (unsigned i = 0; i < s; i++)
0333         std::cout << bb_blocks[i] << " ";
0334     std::cout << std::endl;
0335 }
0336 
0337 // =========== AllocTable ==========
0338 
0339 const unsigned AllocTable::Avail = 0xffffffff;
0340 const unsigned AllocTable::Eof = 0xfffffffe;
0341 const unsigned AllocTable::Bat = 0xfffffffd;
0342 const unsigned AllocTable::MetaBat = 0xfffffffc;
0343 
0344 AllocTable::AllocTable()
0345 {
0346     blockSize = 4096;
0347     // initial size
0348     resize(128);
0349 }
0350 
0351 unsigned long AllocTable::count()
0352 {
0353     return data.size();
0354 }
0355 
0356 void AllocTable::resize(unsigned long newsize)
0357 {
0358     unsigned oldsize = data.size();
0359     data.resize(newsize);
0360     if (newsize > oldsize)
0361         for (unsigned i = oldsize; i < newsize; i++)
0362             data[i] = Avail;
0363 }
0364 
0365 // make sure there're still free blocks
0366 void AllocTable::preserve(unsigned long n)
0367 {
0368     std::vector<unsigned long> pre;
0369     for (unsigned i = 0; i < n; i++)
0370         pre.push_back(unused());
0371 }
0372 
0373 unsigned long AllocTable::operator[](unsigned long index)
0374 {
0375     unsigned long result;
0376     result = data[index];
0377     return result;
0378 }
0379 
0380 void AllocTable::set(unsigned long index, unsigned long value)
0381 {
0382     if (index >= count()) resize(index + 1);
0383     data[ index ] = value;
0384 }
0385 
0386 void AllocTable::setChain(std::vector<unsigned long> chain)
0387 {
0388     if (chain.size()) {
0389         for (unsigned i = 0; i < chain.size() - 1; i++)
0390             set(chain[i], chain[i+1]);
0391         set(chain[ chain.size()-1 ], AllocTable::Eof);
0392     }
0393 }
0394 
0395 // follow
0396 std::vector<unsigned long> AllocTable::follow(unsigned long start)
0397 {
0398     std::vector<unsigned long> chain;
0399 
0400     if (start >= count()) return chain;
0401 
0402     unsigned long p = start;
0403     while (p < count()) {
0404         if (p == (unsigned long)Eof) break;
0405         if (p == (unsigned long)Bat) break;
0406         if (p == (unsigned long)MetaBat) break;
0407         if (p >= count()) break;
0408         chain.push_back(p);
0409         if (data[p] >= count()) break;
0410         p = data[ p ];
0411     }
0412 
0413     return chain;
0414 }
0415 
0416 unsigned AllocTable::unused()
0417 {
0418     // find first available block
0419     for (unsigned i = 0; i < data.size(); i++)
0420         if (data[i] == Avail)
0421             return i;
0422 
0423     // completely full, so enlarge the table
0424     unsigned block = data.size();
0425     resize(data.size() + 10);
0426     return block;
0427 }
0428 
0429 void AllocTable::load(const unsigned char* buffer, unsigned len)
0430 {
0431     resize(len / 4);
0432     for (unsigned i = 0; i < count(); i++)
0433         set(i, readU32(buffer + i*4));
0434 }
0435 
0436 // return space required to save this dirtree
0437 unsigned AllocTable::size()
0438 {
0439     return count() * 4;
0440 }
0441 
0442 void AllocTable::save(unsigned char* buffer)
0443 {
0444     for (unsigned i = 0; i < count(); i++)
0445         writeU32(buffer + i*4, data[i]);
0446 }
0447 
0448 void AllocTable::debug()
0449 {
0450     std::cout << "block size " << data.size() << std::endl;
0451     for (unsigned i = 0; i < data.size(); i++) {
0452         if (data[i] == Avail) continue;
0453         std::cout << i << ": ";
0454         if (data[i] == Eof) std::cout << "[eof]";
0455         else if (data[i] == Bat) std::cout << "[bat]";
0456         else if (data[i] == MetaBat) std::cout << "[metabat]";
0457         else std::cout << data[i];
0458         std::cout << std::endl;
0459     }
0460 }
0461 
0462 // =========== DirTree ==========
0463 
0464 const unsigned DirTree::End = 0xffffffff;
0465 
0466 DirTree::DirTree()
0467 {
0468     clear();
0469 }
0470 
0471 void DirTree::clear()
0472 {
0473     // leave only root entry
0474     entries.resize(1);
0475     entries[0].valid = true;
0476     entries[0].name = "Root Entry";
0477     entries[0].dir = true;
0478     entries[0].size = 0;
0479     entries[0].start = End;
0480     entries[0].prev = End;
0481     entries[0].next = End;
0482     entries[0].child = End;
0483 }
0484 
0485 unsigned DirTree::entryCount()
0486 {
0487     return entries.size();
0488 }
0489 
0490 DirEntry* DirTree::entry(unsigned index)
0491 {
0492     if (index >= entryCount()) return (DirEntry*) 0;
0493     return &entries[ index ];
0494 }
0495 
0496 int DirTree::indexOf(DirEntry* e)
0497 {
0498     for (unsigned i = 0; i < entryCount(); i++)
0499         if (entry(i) == e) return i;
0500 
0501     return -1;
0502 }
0503 
0504 int DirTree::parent(unsigned index)
0505 {
0506     // brute-force, basically we iterate for each entries, find its children
0507     // and check if one of the children is 'index'
0508     for (unsigned j = 0; j < entryCount(); j++) {
0509         std::vector<unsigned> chi = children(j);
0510         for (unsigned i = 0; i < chi.size();i++)
0511             if (chi[i] == index)
0512                 return j;
0513     }
0514 
0515     return -1;
0516 }
0517 
0518 std::string DirTree::fullName(unsigned index)
0519 {
0520     // don't use root name ("Root Entry"), just give "/"
0521     if (index == 0) return "/";
0522 
0523     std::string result = entry(index)->name;
0524     result.insert(0,  "/");
0525     int p = parent(index);
0526     DirEntry * _entry = 0;
0527     while (p > 0) {
0528         _entry = entry(p);
0529         if (_entry->dir && _entry->valid) {
0530             result.insert(0,  _entry->name);
0531             result.insert(0,  "/");
0532         }
0533         --p;
0534         index = p;
0535         if (index <= 0) break;
0536     }
0537     return result;
0538 }
0539 
0540 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
0541 // if not found and create is false, return 0
0542 // if create is true, a new entry is returned
0543 DirEntry* DirTree::entry(const std::string& name, bool create)
0544 {
0545     if (!name.length()) return (DirEntry*)0;
0546 
0547     // quick check for "/" (that's root)
0548     if (name == "/") return entry(0);
0549 
0550     // split the names, e.g  "/ObjectPool/_1020961869" will become:
0551     // "ObjectPool" and "_1020961869"
0552     std::list<std::string> names;
0553     std::string::size_type start = 0, end = 0;
0554     if (name[0] == '/') start++;
0555     while (start < name.length()) {
0556         end = name.find_first_of('/', start);
0557         if (end == std::string::npos) end = name.length();
0558         names.push_back(name.substr(start, end - start));
0559         start = end + 1;
0560     }
0561 
0562     // start from root
0563     int index = 0 ;
0564 
0565     // trace one by one
0566     std::list<std::string>::iterator it;
0567 
0568     for (it = names.begin(); it != names.end(); ++it) {
0569         // find among the children of index
0570         std::vector<unsigned> chi = children(index);
0571         unsigned child = 0;
0572         for (unsigned i = 0; i < chi.size(); i++) {
0573             DirEntry* ce = entry(chi[i]);
0574             if (ce)
0575                 if (ce->valid && (ce->name.length() > 1))
0576                     if (ce->name == *it)
0577                         child = chi[i];
0578         }
0579 
0580         // traverse to the child
0581         if (child > 0) index = child;
0582         else {
0583             // not found among children
0584             if (!create) return (DirEntry*)0;
0585 
0586             // create a new entry
0587             unsigned parent = index;
0588             entries.push_back(DirEntry());
0589             index = entryCount() - 1;
0590             DirEntry* e = entry(index);
0591             e->valid = true;
0592             e->name = *it;
0593             e->dir = false;
0594             e->size = 0;
0595             e->start = 0;
0596             e->child = End;
0597             e->prev = End;
0598             e->next = entry(parent)->child;
0599             entry(parent)->child = index;
0600         }
0601     }
0602 
0603     return entry(index);
0604 }
0605 
0606 // helper function: recursively find siblings of index
0607 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result,
0608                            unsigned index)
0609 {
0610     DirEntry* e = dirtree->entry(index);
0611     if (!e) return;
0612     if (!e->valid) return;
0613 
0614     // prevent infinite loop
0615     for (unsigned i = 0; i < result.size(); i++)
0616         if (result[i] == index) return;
0617 
0618     // add myself
0619     result.push_back(index);
0620 
0621     // visit previous sibling, don't go infinitely
0622     unsigned prev = e->prev;
0623     if ((prev > 0) && (prev < dirtree->entryCount())) {
0624         for (unsigned i = 0; i < result.size(); i++)
0625             if (result[i] == prev) prev = 0;
0626         if (prev) dirtree_find_siblings(dirtree, result, prev);
0627     }
0628 
0629     // visit next sibling, don't go infinitely
0630     unsigned next = e->next;
0631     if ((next > 0) && (next < dirtree->entryCount())) {
0632         for (unsigned i = 0; i < result.size(); i++)
0633             if (result[i] == next) next = 0;
0634         if (next) dirtree_find_siblings(dirtree, result, next);
0635     }
0636 }
0637 
0638 std::vector<unsigned> DirTree::children(unsigned index)
0639 {
0640     std::vector<unsigned> result;
0641 
0642     DirEntry* e = entry(index);
0643     if (e) if (e->valid && e->child < entryCount())
0644             dirtree_find_siblings(this, result, e->child);
0645 
0646     return result;
0647 }
0648 
0649 void DirTree::load(unsigned char* buffer, unsigned size)
0650 {
0651     entries.clear();
0652 
0653     for (unsigned i = 0; i < size / 128; i++) {
0654         unsigned p = i * 128;
0655 
0656         // would be < 32 if first char in the name isn't printable
0657         unsigned prefix = 32;
0658 
0659         // parse name of this entry, which stored as Unicode 16-bit
0660         std::string name;
0661         int name_len = readU16(buffer + 0x40 + p);
0662         if (name_len > 64) name_len = 64;
0663         for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2)
0664             name.append(1, buffer[j+p]);
0665 
0666         // first char isn't printable ? remove it...
0667         if (buffer[p] < 32) {
0668             prefix = buffer[0];
0669             name.erase(0, 1);
0670         }
0671 
0672         // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
0673         unsigned type = buffer[ 0x42 + p];
0674 
0675         DirEntry e;
0676         e.valid = true;
0677         e.name = name;
0678         e.start = readU32(buffer + 0x74 + p);
0679         e.size = readU32(buffer + 0x78 + p);
0680         e.prev = readU32(buffer + 0x44 + p);
0681         e.next = readU32(buffer + 0x48 + p);
0682         e.child = readU32(buffer + 0x4C + p);
0683         e.dir = (type != 2);
0684         
0685         // sanity checks
0686         if ((type != 2) && (type != 1) && (type != 5)) e.valid = false;
0687         if (name_len < 1) e.valid = false;
0688 
0689         // CLSID, contains a object class GUI if this entry is a storage or root
0690         // storage or all zero if not.
0691 #ifdef POLE_DEBUG
0692         printf("DirTree::load name=%s type=%i prev=%i next=%i child=%i start=%i size=%i clsid=%i.%i.%i.%i\n",
0693                name.c_str(),type,e.prev,e.next,e.child,e.start,e.size,readU32(buffer+0x50+p),readU32(buffer+0x54+p),readU32(buffer+0x58+p),readU32(buffer+0x5C+p));
0694 #endif
0695         entries.push_back(e);
0696     }
0697 }
0698 
0699 // return space required to save this dirtree
0700 unsigned DirTree::size()
0701 {
0702     return entryCount() * 128;
0703 }
0704 
0705 void DirTree::save(unsigned char* buffer)
0706 {
0707     memset(buffer, 0, size());
0708 
0709     // root is fixed as "Root Entry"
0710     DirEntry* root = entry(0);
0711     std::string name = "Root Entry";
0712     for (unsigned j = 0; j < name.length(); j++)
0713         buffer[ j*2 ] = name[j];
0714     writeU16(buffer + 0x40, name.length()*2 + 2);
0715     writeU32(buffer + 0x74, 0xffffffff);
0716     writeU32(buffer + 0x78, 0);
0717     writeU32(buffer + 0x44, 0xffffffff);
0718     writeU32(buffer + 0x48, 0xffffffff);
0719     writeU32(buffer + 0x4c, root->child);
0720     buffer[ 0x42 ] = 5;
0721     buffer[ 0x43 ] = 1;
0722 
0723     for (unsigned i = 1; i < entryCount(); i++) {
0724         DirEntry* e = entry(i);
0725         if (!e) continue;
0726         if (e->dir) {
0727             e->start = 0xffffffff;
0728             e->size = 0;
0729         }
0730 
0731         // max length for name is 32 chars
0732         std::string name = e->name;
0733         if (name.length() > 32)
0734             name.erase(32, name.length());
0735 
0736         // write name as Unicode 16-bit
0737         for (unsigned j = 0; j < name.length(); j++)
0738             buffer[ i*128 + j*2 ] = name[j];
0739 
0740         writeU16(buffer + i*128 + 0x40, name.length()*2 + 2);
0741         writeU32(buffer + i*128 + 0x74, e->start);
0742         writeU32(buffer + i*128 + 0x78, e->size);
0743         writeU32(buffer + i*128 + 0x44, e->prev);
0744         writeU32(buffer + i*128 + 0x48, e->next);
0745         writeU32(buffer + i*128 + 0x4c, e->child);
0746         buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
0747         buffer[ i*128 + 0x43 ] = 1; // always black
0748     }
0749 }
0750 
0751 void DirTree::debug()
0752 {
0753     for (unsigned i = 0; i < entryCount(); i++) {
0754         DirEntry* e = entry(i);
0755         if (!e) continue;
0756         std::cout << i << ": ";
0757         if (!e->valid) std::cout << "INVALID ";
0758         std::cout << e->name << " ";
0759         if (e->dir) std::cout << "(Dir) ";
0760         else std::cout << "(File) ";
0761         std::cout << e->size << " ";
0762         std::cout << "s:" << e->start << " ";
0763         std::cout << "(";
0764         if (e->child == End) std::cout << "-"; else std::cout << e->child;
0765         std::cout << " ";
0766         if (e->prev == End) std::cout << "-"; else std::cout << e->prev;
0767         std::cout << ":";
0768         if (e->next == End) std::cout << "-"; else std::cout << e->next;
0769         std::cout << ")";
0770         std::cout << std::endl;
0771     }
0772 }
0773 
0774 // =========== StorageIO ==========
0775 
0776 StorageIO::StorageIO(Storage* st, const char* fname)
0777 {
0778     storage = st;
0779     filename = fname;
0780     result = Storage::Ok;
0781     opened = false;
0782 
0783     header = new Header();
0784     dirtree = new DirTree();
0785     bbat = new AllocTable();
0786     sbat = new AllocTable();
0787 
0788     filesize = 0;
0789     bbat->blockSize = 1 << header->b_shift;
0790     sbat->blockSize = 1 << header->s_shift;
0791 }
0792 
0793 StorageIO::~StorageIO()
0794 {
0795     if (opened) close();
0796     delete sbat;
0797     delete bbat;
0798     delete dirtree;
0799     delete header;
0800 }
0801 
0802 bool StorageIO::open()
0803 {
0804     // already opened ? close first
0805     if (opened) close();
0806 
0807     load();
0808 
0809     return result == Storage::Ok;
0810 }
0811 
0812 void StorageIO::load()
0813 {
0814     unsigned char* buffer = 0;
0815     unsigned long buflen = 0;
0816     std::vector<unsigned long> blocks;
0817 
0818     // open the file, check for error
0819     result = Storage::OpenFailed;
0820     file.open(filename.c_str(), std::ios::binary | std::ios::in);
0821     if (!file.good()) return;
0822 
0823     // find size of input file
0824     file.seekg(0, std::ios::end);
0825     filesize = file.tellg();
0826 
0827     // load header
0828     buffer = new unsigned char[512];
0829     file.seekg(0);
0830     file.read((char*)buffer, 512);
0831     if (!file.good()) {
0832         delete[] buffer;
0833         return;
0834     }
0835     header->load(buffer);
0836     delete[] buffer;
0837 
0838     // check OLE magic id
0839     result = Storage::NotOLE;
0840     for (unsigned i = 0; i < 8; i++)
0841         if (header->id[i] != pole_magic[i])
0842             return;
0843 
0844     // sanity checks
0845     result = Storage::BadOLE;
0846     if (!header->valid()) return;
0847     if (header->threshold != 4096) return;
0848 
0849     // important block size
0850     bbat->blockSize = 1 << header->b_shift;
0851     sbat->blockSize = 1 << header->s_shift;
0852 
0853     // find blocks allocated to store big bat
0854     // the first 109 blocks are in header, the rest in meta bat
0855     blocks.clear();
0856     blocks.resize(header->num_bat);
0857     for (unsigned i = 0; i < 109; i++)
0858         if (i >= header->num_bat) break;
0859         else blocks[i] = header->bb_blocks[i];
0860     if ((header->num_bat > 109) && (header->num_mbat > 0)) {
0861         unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
0862         unsigned k = 109;
0863         unsigned mblock = header->mbat_start;
0864         for (unsigned r = 0; r < header->num_mbat; r++) {
0865             unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize);
0866             if (rr != bbat->blockSize) {
0867                 delete[] buffer2;
0868                 return;
0869             }
0870             for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) {
0871                 if (k >= header->num_bat) break;
0872                 else  blocks[k++] = readU32(buffer2 + s);
0873             }
0874             mblock = readU32(buffer2 + bbat->blockSize - 4);
0875         }
0876         delete[] buffer2;
0877     }
0878 
0879     // load big bat
0880     buflen = blocks.size() * bbat->blockSize;
0881     if (buflen > 0) {
0882         buffer = new unsigned char[ buflen ];
0883         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
0884         if (r != buflen) {
0885             delete[] buffer;
0886             return;
0887         }
0888         bbat->load(buffer, buflen);
0889         delete[] buffer;
0890     }
0891 
0892     // load small bat
0893     blocks.clear();
0894     blocks = bbat->follow(header->sbat_start);
0895     buflen = blocks.size() * bbat->blockSize;
0896     if (buflen > 0) {
0897         buffer = new unsigned char[ buflen ];
0898         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
0899         if (r != buflen) {
0900             delete[] buffer;
0901             return;
0902         }
0903         sbat->load(buffer, buflen);
0904         delete[] buffer;
0905     }
0906 
0907     // load directory tree
0908     blocks.clear();
0909     blocks = bbat->follow(header->dirent_start);
0910     buflen = blocks.size() * bbat->blockSize;
0911     buffer = new unsigned char[ buflen ];
0912     unsigned long r = loadBigBlocks(blocks, buffer, buflen);
0913     if (r != buflen) {
0914         delete[] buffer;
0915         return;
0916     }
0917     dirtree->load(buffer, buflen);
0918     unsigned sb_start = readU32(buffer + 0x74);
0919     delete[] buffer;
0920 
0921     // fetch block chain as data for small-files
0922     sb_blocks = bbat->follow(sb_start);   // small files
0923 
0924     // for troubleshooting, just enable this block
0925 #ifdef POLE_DEBUG
0926     header->debug();
0927     sbat->debug();
0928     bbat->debug();
0929     dirtree->debug();
0930 #endif
0931 
0932     // so far so good
0933     result = Storage::Ok;
0934     opened = true;
0935 }
0936 
0937 void StorageIO::create()
0938 {
0939     // std::cout << "Creating " << filename << std::endl;
0940 
0941     file.open(filename.c_str(), std::ios::out | std::ios::binary);
0942     if (!file.good()) {
0943         std::cerr << "Can't create " << filename << std::endl;
0944         result = Storage::OpenFailed;
0945         return;
0946     }
0947 
0948     // so far so good
0949     opened = true;
0950     result = Storage::Ok;
0951 }
0952 
0953 void StorageIO::flush()
0954 {
0955     /* Note on Microsoft implementation:
0956        - directory entries are stored in the last block(s)
0957        - BATs are as second to the last
0958        - Meta BATs are third to the last
0959     */
0960 }
0961 
0962 void StorageIO::close()
0963 {
0964     if (!opened) return;
0965 
0966     file.close();
0967     opened = false;
0968 
0969     std::list<Stream*>::iterator it;
0970     for (it = streams.begin(); it != streams.end(); ++it)
0971         delete *it;
0972 }
0973 
0974 StreamIO* StorageIO::streamIO(const std::string& name)
0975 {
0976     // sanity check
0977     if (!name.length()) return (StreamIO*)0;
0978 
0979     // search in the entries
0980     DirEntry* entry = dirtree->entry(name);
0981     //if( entry) std::cout << "FOUND\n";
0982     if (!entry) return (StreamIO*)0;
0983     //if( !entry->dir ) std::cout << "  NOT DIR\n";
0984     if (entry->dir) return (StreamIO*)0;
0985 
0986     StreamIO* result = new StreamIO(this, entry);
0987     result->fullName = name;
0988 
0989     return result;
0990 }
0991 
0992 unsigned long StorageIO::loadBigBlocks(std::vector<unsigned long> blocks,
0993                                        unsigned char* data, unsigned long maxlen)
0994 {
0995     // sentinel
0996     if (!data) return 0;
0997     if (!file.good()) return 0;
0998     if (blocks.size() < 1) return 0;
0999     if (maxlen == 0) return 0;
1000 
1001     // read block one by one, seems fast enough
1002     unsigned long bytes = 0;
1003     for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) {
1004         unsigned long block = blocks[i];
1005         unsigned long pos =  bbat->blockSize * (block + 1);
1006         unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes;
1007         if (pos + p > filesize) p = filesize - pos;
1008         file.seekg(pos);
1009         file.read((char*)data + bytes, p);
1010         if (!file.good()) return 0;
1011         bytes += p;
1012     }
1013 
1014     return bytes;
1015 }
1016 
1017 unsigned long StorageIO::loadBigBlock(unsigned long block,
1018                                       unsigned char* data, unsigned long maxlen)
1019 {
1020     // sentinel
1021     if (!data) return 0;
1022     if (!file.good()) return 0;
1023 
1024     // wraps call for loadBigBlocks
1025     std::vector<unsigned long> blocks;
1026     blocks.resize(1);
1027     blocks[ 0 ] = block;
1028 
1029     return loadBigBlocks(blocks, data, maxlen);
1030 }
1031 
1032 // return number of bytes which has been read
1033 unsigned long StorageIO::loadSmallBlocks(std::vector<unsigned long> blocks,
1034         unsigned char* data, unsigned long maxlen)
1035 {
1036     // sentinel
1037     if (!data) return 0;
1038     if (!file.good()) return 0;
1039     if (blocks.size() < 1) return 0;
1040     if (maxlen == 0) return 0;
1041 
1042     // our own local buffer
1043     unsigned char* buf = new unsigned char[ bbat->blockSize ];
1044 
1045     // read small block one by one
1046     unsigned long bytes = 0;
1047     for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) {
1048         unsigned long block = blocks[i];
1049 
1050         // find where the small-block exactly is
1051         unsigned long pos = block * sbat->blockSize;
1052         unsigned long bbindex = pos / bbat->blockSize;
1053         if (bbindex >= sb_blocks.size()) break;
1054 
1055         unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize);
1056         if (r != bbat->blockSize) {
1057             delete[] buf;
1058             return 0;
1059         }
1060 
1061         // copy the data
1062         unsigned offset = pos % bbat->blockSize;
1063         unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes :  bbat->blockSize - offset;
1064         p = (sbat->blockSize < p) ? sbat->blockSize : p;
1065         memcpy(data + bytes, buf + offset, p);
1066         bytes += p;
1067     }
1068 
1069     delete[] buf;
1070 
1071     return bytes;
1072 }
1073 
1074 unsigned long StorageIO::loadSmallBlock(unsigned long block,
1075                                         unsigned char* data, unsigned long maxlen)
1076 {
1077     // sentinel
1078     if (!data) return 0;
1079     if (!file.good()) return 0;
1080 
1081     // wraps call for loadSmallBlocks
1082     std::vector<unsigned long> blocks;
1083     blocks.resize(1);
1084     blocks.assign(1, block);
1085 
1086     return loadSmallBlocks(blocks, data, maxlen);
1087 }
1088 
1089 // =========== StreamIO ==========
1090 
1091 StreamIO::StreamIO(StorageIO* s, DirEntry* e)
1092 {
1093     io = s;
1094     entry = e;
1095     eof = false;
1096     fail = false;
1097 
1098     m_pos = 0;
1099 
1100     if (entry->size >= io->header->threshold)
1101         blocks = io->bbat->follow(entry->start);
1102     else
1103         blocks = io->sbat->follow(entry->start);
1104 
1105     // prepare cache
1106     cache_pos = 0;
1107     cache_size = 4096; // optimal ?
1108     cache_data = new unsigned char[cache_size];
1109     updateCache();
1110 }
1111 
1112 // FIXME tell parent we're gone
1113 StreamIO::~StreamIO()
1114 {
1115     delete[] cache_data;
1116 }
1117 
1118 void StreamIO::seek(unsigned long pos)
1119 {
1120     m_pos = pos;
1121 }
1122 
1123 unsigned long StreamIO::tell()
1124 {
1125     return m_pos;
1126 }
1127 
1128 int StreamIO::getch()
1129 {
1130     // past end-of-file ?
1131     if (m_pos > entry->size) return -1;
1132 
1133     // need to update cache ?
1134     if (!cache_size || (m_pos < cache_pos) ||
1135             (m_pos >= cache_pos + cache_size))
1136         updateCache();
1137 
1138     // something bad if we don't get good cache
1139     if (!cache_size) return -1;
1140 
1141     int data = cache_data[m_pos - cache_pos];
1142     m_pos++;
1143 
1144     return data;
1145 }
1146 
1147 unsigned long StreamIO::read(unsigned long pos, unsigned char* data, unsigned long maxlen)
1148 {
1149     // sanity checks
1150     if (!data) return 0;
1151     if (maxlen == 0) return 0;
1152 
1153     unsigned long totalbytes = 0;
1154 
1155     if (entry->size < io->header->threshold) {
1156         // small file
1157         unsigned long index = pos / io->sbat->blockSize;
1158 
1159         if (index >= blocks.size()) return 0;
1160 
1161         unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
1162         unsigned long offset = pos % io->sbat->blockSize;
1163         while (totalbytes < maxlen) {
1164             if (index >= blocks.size()) break;
1165             io->loadSmallBlock(blocks[index], buf, io->bbat->blockSize);
1166             unsigned long count = io->sbat->blockSize - offset;
1167             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1168             memcpy(data + totalbytes, buf + offset, count);
1169             totalbytes += count;
1170             offset = 0;
1171             index++;
1172         }
1173         delete[] buf;
1174 
1175     } else {
1176         // big file
1177         unsigned long index = pos / io->bbat->blockSize;
1178 
1179         if (index >= blocks.size()) return 0;
1180 
1181         unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
1182         unsigned long offset = pos % io->bbat->blockSize;
1183         while (totalbytes < maxlen) {
1184             if (index >= blocks.size()) break;
1185             unsigned long r = io->loadBigBlock(blocks[index], buf, io->bbat->blockSize);
1186             if (r != io->bbat->blockSize) {
1187                 delete [] buf;
1188                 return 0;
1189             }
1190             unsigned long count = io->bbat->blockSize - offset;
1191             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1192             memcpy(data + totalbytes, buf + offset, count);
1193             totalbytes += count;
1194             index++;
1195             offset = 0;
1196         }
1197         delete [] buf;
1198 
1199     }
1200 
1201     return totalbytes;
1202 }
1203 
1204 unsigned long StreamIO::read(unsigned char* data, unsigned long maxlen)
1205 {
1206     unsigned long bytes = read(tell(), data, maxlen);
1207     m_pos += bytes;
1208     return bytes;
1209 }
1210 
1211 void StreamIO::updateCache()
1212 {
1213     // sanity check
1214     if (!cache_data) return;
1215 
1216     cache_pos = m_pos - (m_pos % cache_size);
1217     unsigned long bytes = cache_size;
1218     if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos;
1219     cache_size = read(cache_pos, cache_data, bytes);
1220 }
1221 
1222 
1223 // =========== Storage ==========
1224 
1225 Storage::Storage(const char* filename)
1226 {
1227     io = new StorageIO(this, filename);
1228 }
1229 
1230 Storage::~Storage()
1231 {
1232     delete io;
1233 }
1234 
1235 int Storage::result()
1236 {
1237     return io->result;
1238 }
1239 
1240 bool Storage::open()
1241 {
1242     return io->open();
1243 }
1244 
1245 void Storage::close()
1246 {
1247     io->close();
1248 }
1249 
1250 std::list<std::string> Storage::entries(const std::string& path)
1251 {
1252     std::list<std::string> result;
1253     DirTree* dt = io->dirtree;
1254     DirEntry* e = dt->entry(path, false);
1255     if (e) {
1256         if (e->dir) {
1257             unsigned parent = dt->indexOf(e);
1258             std::vector<unsigned> children = dt->children(parent);
1259             for (unsigned i = 0; i < children.size(); i++)
1260                 result.push_back(dt->entry(children[i])->name);
1261         }
1262     }
1263     return result;
1264 }
1265 
1266 bool Storage::isDirectory(const std::string& name)
1267 {
1268     DirEntry* e = io->dirtree->entry(name, false);
1269     return e ? e->dir : false;
1270 }
1271 
1272 // =========== Stream ==========
1273 
1274 Stream::Stream(Storage* storage, const std::string& name)
1275 {
1276     io = storage->io->streamIO(name);
1277 }
1278 
1279 // FIXME tell parent we're gone
1280 Stream::~Stream()
1281 {
1282     delete io;
1283 }
1284 
1285 std::string Stream::fullName()
1286 {
1287     return io ? io->fullName : std::string();
1288 }
1289 
1290 unsigned long Stream::tell()
1291 {
1292     return io ? io->tell() : 0;
1293 }
1294 
1295 void Stream::seek(unsigned long newpos)
1296 {
1297     if (io) io->seek(newpos);
1298 }
1299 
1300 unsigned long Stream::size()
1301 {
1302     return io ? io->entry->size : 0;
1303 }
1304 
1305 int Stream::getch()
1306 {
1307     return io ? io->getch() : 0;
1308 }
1309 
1310 unsigned long Stream::read(unsigned char* data, unsigned long maxlen)
1311 {
1312     return io ? io->read(data, maxlen) : 0;
1313 }
1314 
1315 bool Stream::eof()
1316 {
1317     return io ? io->eof : false;
1318 }
1319 
1320 bool Stream::fail()
1321 {
1322     return io ? io->fail : true;
1323 }