File indexing completed on 2024-05-12 16:29:15

0001 /* POLE - Portable C++ library to access OLE Storage
0002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
0003 
0004    Redistribution and use in source and binary forms, with or without
0005    modification, are permitted provided that the following conditions
0006    are met:
0007    * Redistributions of source code must retain the above copyright notice,
0008      this list of conditions and the following disclaimer.
0009    * Redistributions in binary form must reproduce the above copyright notice,
0010      this list of conditions and the following disclaimer in the documentation
0011      and/or other materials provided with the distribution.
0012    * Neither the name of the authors nor the names of its contributors may be
0013      used to endorse or promote products derived from this software without
0014      specific prior written permission.
0015 
0016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
0026    THE POSSIBILITY OF SUCH DAMAGE.
0027 */
0028 
0029 #include "ooxml_pole.h"
0030 
0031 #include <fstream>
0032 #include <iostream>
0033 #include <list>
0034 #include <string>
0035 #include <vector>
0036 
0037 #include <QByteArray>
0038 #include <QIODevice>
0039 
0040 #include <string.h>
0041 
0042 // enable to activate debugging output
0043 // #define POLE_DEBUG
0044 
0045 namespace OOXML_POLE
0046 {
0047 
0048 class Header
0049 {
0050 public:
0051     unsigned char id[8];       // signature, or magic identifier
0052     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
0053     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
0054     unsigned num_bat;          // blocks allocated for big bat
0055     unsigned dirent_start;     // starting block for directory info
0056     unsigned threshold;        // switch from small to big file (usually 4K)
0057     unsigned sbat_start;       // starting block index to store small bat
0058     unsigned num_sbat;         // blocks allocated for small bat
0059     unsigned mbat_start;       // starting block to store meta bat
0060     unsigned num_mbat;         // blocks allocated for meta bat
0061     unsigned long bb_blocks[109];
0062 
0063     Header();
0064     bool valid();
0065     void load(const unsigned char* buffer);
0066     void save(unsigned char* buffer);
0067     void debug();
0068 };
0069 
0070 class AllocTable
0071 {
0072 public:
0073     static const unsigned Eof;
0074     static const unsigned Avail;
0075     static const unsigned Bat;
0076     static const unsigned MetaBat;
0077     unsigned blockSize;
0078     AllocTable();
0079     void clear();
0080     unsigned long count();
0081     void resize(unsigned long newsize);
0082     void preserve(unsigned long n);
0083     void set(unsigned long index, unsigned long val);
0084     unsigned unused();
0085     void setChain(std::vector<unsigned long>);
0086     std::vector<unsigned long> follow(unsigned long start);
0087     unsigned long operator[](unsigned long index);
0088     void load(const unsigned char* buffer, unsigned len);
0089     void save(unsigned char* buffer);
0090     unsigned size();
0091     void debug();
0092 private:
0093     std::vector<unsigned long> data;
0094     AllocTable(const AllocTable&);
0095     AllocTable& operator=(const AllocTable&);
0096 };
0097 
0098 class DirEntry
0099 {
0100 public:
0101     bool valid;            // false if invalid (should be skipped)
0102     std::string name;      // the name, not in unicode anymore
0103     bool dir;              // true if directory
0104     unsigned long size;    // size (not valid if directory)
0105     unsigned long start;   // starting block
0106     unsigned prev;         // previous sibling
0107     unsigned next;         // next sibling
0108     unsigned child;        // first child
0109 };
0110 
0111 class DirTree
0112 {
0113 public:
0114     static const unsigned End;
0115     DirTree();
0116     void clear();
0117     unsigned entryCount();
0118     DirEntry* entry(unsigned index);
0119     DirEntry* entry(const std::string& name, bool create = false);
0120     int indexOf(DirEntry* e);
0121     int parent(unsigned index);
0122     std::string fullName(unsigned index);
0123     std::vector<unsigned> children(unsigned index);
0124     void load(unsigned char* buffer, unsigned len);
0125     void save(unsigned char* buffer);
0126     unsigned size();
0127     void debug();
0128 private:
0129     std::vector<DirEntry> entries;
0130     DirTree(const DirTree&);
0131     DirTree& operator=(const DirTree&);
0132 };
0133 
0134 class StorageIO
0135 {
0136 public:
0137     Storage* storage;         // owner
0138     QIODevice* filename;      // filename
0139     QByteArray file;
0140     int result;               // result of operation
0141     bool opened;              // true if file is opened
0142     unsigned long filesize;   // size of the file
0143 
0144     Header* header;           // storage header
0145     DirTree* dirtree;         // directory tree
0146     AllocTable* bbat;         // allocation table for big blocks
0147     AllocTable* sbat;         // allocation table for small blocks
0148 
0149     std::vector<unsigned long> sb_blocks; // blocks for "small" files
0150 
0151     std::list<Stream*> streams;
0152 
0153     StorageIO(Storage* storage, QIODevice* filename);
0154     ~StorageIO();
0155 
0156     bool open();
0157     void close();
0158     void flush();
0159     void load();
0160     void create();
0161 
0162     unsigned long loadBigBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen);
0163 
0164     unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0165 
0166     unsigned long loadSmallBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen);
0167 
0168     unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
0169 
0170     StreamIO* streamIO(const std::string& name);
0171 
0172 private:
0173     // no copy or assign
0174     StorageIO(const StorageIO&);
0175     StorageIO& operator=(const StorageIO&);
0176 
0177 };
0178 
0179 class StreamIO
0180 {
0181 public:
0182     StorageIO* io;
0183     DirEntry* entry;
0184     std::string fullName;
0185     bool eof;
0186     bool fail;
0187 
0188     StreamIO(StorageIO* io, DirEntry* entry);
0189     ~StreamIO();
0190     unsigned long size();
0191     void seek(unsigned long pos);
0192     unsigned long tell();
0193     int getch();
0194     unsigned long read(unsigned char* data, unsigned long maxlen);
0195     unsigned long read(unsigned long pos, unsigned char* data, unsigned long maxlen);
0196 
0197 
0198 private:
0199     std::vector<unsigned long> blocks;
0200 
0201     // no copy or assign
0202     StreamIO(const StreamIO&);
0203     StreamIO& operator=(const StreamIO&);
0204 
0205     // pointer for read
0206     unsigned long m_pos;
0207 
0208     // simple cache system to speed-up getch()
0209     unsigned char* cache_data;
0210     unsigned long cache_size;
0211     unsigned long cache_pos;
0212     void updateCache();
0213 };
0214 
0215 } // namespace OOXML_POLE
0216 
0217 using namespace OOXML_POLE;
0218 
0219 static inline unsigned long readU16(const unsigned char* ptr)
0220 {
0221     return ptr[0] + (ptr[1] << 8);
0222 }
0223 
0224 static inline unsigned long readU32(const unsigned char* ptr)
0225 {
0226     return ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
0227 }
0228 
0229 static inline void writeU16(unsigned char* ptr, unsigned long data)
0230 {
0231     ptr[0] = (unsigned char)(data & 0xff);
0232     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0233 }
0234 
0235 static inline void writeU32(unsigned char* ptr, unsigned long data)
0236 {
0237     ptr[0] = (unsigned char)(data & 0xff);
0238     ptr[1] = (unsigned char)((data >> 8) & 0xff);
0239     ptr[2] = (unsigned char)((data >> 16) & 0xff);
0240     ptr[3] = (unsigned char)((data >> 24) & 0xff);
0241 }
0242 
0243 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
0244 
0245 // =========== Header ==========
0246 
0247 Header::Header()
0248 {
0249     b_shift = 9;
0250     s_shift = 6;
0251     num_bat = 0;
0252     dirent_start = 0;
0253     threshold = 4096;
0254     sbat_start = 0;
0255     num_sbat = 0;
0256     mbat_start = 0;
0257     num_mbat = 0;
0258 
0259     for (unsigned i = 0; i < 8; i++)
0260         id[i] = pole_magic[i];
0261     for (unsigned i = 0; i < 109; i++)
0262         bb_blocks[i] = AllocTable::Avail;
0263 }
0264 
0265 bool Header::valid()
0266 {
0267     if (threshold != 4096) return false;
0268     if (num_bat == 0) return false;
0269     if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
0270     if ((num_bat < 109) && (num_mbat != 0)) return false;
0271     if (s_shift > b_shift) return false;
0272     if (b_shift <= 6) return false;
0273     if (b_shift > 12) return false;
0274 
0275     return true;
0276 }
0277 
0278 void Header::load(const unsigned char* buffer)
0279 {
0280     b_shift      = readU16(buffer + 0x1e); // sector shift
0281     s_shift      = readU16(buffer + 0x20); // mini sector shift
0282     num_bat      = readU32(buffer + 0x2c); // number of fat sectors
0283     dirent_start = readU32(buffer + 0x30); // first directory sector location
0284     threshold    = readU32(buffer + 0x38); // transaction signature number
0285     sbat_start   = readU32(buffer + 0x3c); // mini stream cutoff size
0286     num_sbat     = readU32(buffer + 0x40); // first mini fat sector location
0287     mbat_start   = readU32(buffer + 0x44); // first mini difat sector location
0288     num_mbat     = readU32(buffer + 0x48); // number of difat sectors
0289 
0290     for (unsigned i = 0; i < 8; i++)
0291         id[i] = buffer[i];
0292     for (unsigned i = 0; i < 109; i++)
0293         bb_blocks[i] = readU32(buffer + 0x4C + i * 4);
0294 }
0295 
0296 void Header::save(unsigned char* buffer)
0297 {
0298     memset(buffer, 0, 0x4c);
0299     memcpy(buffer, pole_magic, 8);          // ole signature
0300     writeU32(buffer + 8, 0);                // unknown
0301     writeU32(buffer + 12, 0);               // unknown
0302     writeU32(buffer + 16, 0);               // unknown
0303     writeU16(buffer + 24, 0x003e);          // revision ?
0304     writeU16(buffer + 26, 3);               // version ?
0305     writeU16(buffer + 28, 0xfffe);          // unknown
0306     writeU16(buffer + 0x1e, b_shift);
0307     writeU16(buffer + 0x20, s_shift);
0308     writeU32(buffer + 0x2c, num_bat);
0309     writeU32(buffer + 0x30, dirent_start);
0310     writeU32(buffer + 0x38, threshold);
0311     writeU32(buffer + 0x3c, sbat_start);
0312     writeU32(buffer + 0x40, num_sbat);
0313     writeU32(buffer + 0x44, mbat_start);
0314     writeU32(buffer + 0x48, num_mbat);
0315 
0316     for (unsigned i = 0; i < 109; i++)
0317         writeU32(buffer + 0x4C + i*4, bb_blocks[i]);
0318 }
0319 
0320 void Header::debug()
0321 {
0322     std::cout << std::endl;
0323     std::cout << "b_shift " << b_shift << std::endl;
0324     std::cout << "s_shift " << s_shift << std::endl;
0325     std::cout << "num_bat " << num_bat << std::endl;
0326     std::cout << "dirent_start " << dirent_start << std::endl;
0327     std::cout << "threshold " << threshold << std::endl;
0328     std::cout << "sbat_start " << sbat_start << std::endl;
0329     std::cout << "num_sbat " << num_sbat << std::endl;
0330     std::cout << "mbat_start " << mbat_start << std::endl;
0331     std::cout << "num_mbat " << num_mbat << std::endl;
0332 
0333     unsigned s = (num_bat <= 109) ? num_bat : 109;
0334     std::cout << "bat blocks: ";
0335     for (unsigned i = 0; i < s; i++)
0336         std::cout << bb_blocks[i] << " ";
0337     std::cout << std::endl;
0338 }
0339 
0340 // =========== AllocTable ==========
0341 
0342 const unsigned AllocTable::Avail = 0xffffffff;
0343 const unsigned AllocTable::Eof = 0xfffffffe;
0344 const unsigned AllocTable::Bat = 0xfffffffd;
0345 const unsigned AllocTable::MetaBat = 0xfffffffc;
0346 
0347 AllocTable::AllocTable()
0348 {
0349     blockSize = 4096;
0350     // initial size
0351     resize(128);
0352 }
0353 
0354 unsigned long AllocTable::count()
0355 {
0356     return data.size();
0357 }
0358 
0359 void AllocTable::resize(unsigned long newsize)
0360 {
0361     unsigned oldsize = data.size();
0362     data.resize(newsize);
0363     if (newsize > oldsize)
0364         for (unsigned i = oldsize; i < newsize; i++)
0365             data[i] = Avail;
0366 }
0367 
0368 // make sure there're still free blocks
0369 void AllocTable::preserve(unsigned long n)
0370 {
0371     std::vector<unsigned long> pre;
0372     for (unsigned i = 0; i < n; i++)
0373         pre.push_back(unused());
0374 }
0375 
0376 unsigned long AllocTable::operator[](unsigned long index)
0377 {
0378     unsigned long result;
0379     result = data[index];
0380     return result;
0381 }
0382 
0383 void AllocTable::set(unsigned long index, unsigned long value)
0384 {
0385     if (index >= count()) resize(index + 1);
0386     data[ index ] = value;
0387 }
0388 
0389 void AllocTable::setChain(std::vector<unsigned long> chain)
0390 {
0391     if (chain.size()) {
0392         for (unsigned i = 0; i < chain.size() - 1; i++)
0393             set(chain[i], chain[i+1]);
0394         set(chain[ chain.size()-1 ], AllocTable::Eof);
0395     }
0396 }
0397 
0398 // TODO: optimize this with better search
0399 static bool already_exist(const std::vector<unsigned long>& chain, unsigned long item)
0400 {
0401     for(unsigned i = 0; i < chain.size(); i++)
0402         if(chain[i] == item) return true;
0403     return false;
0404 }
0405 
0406 // follow
0407 std::vector<unsigned long> AllocTable::follow(unsigned long start)
0408 {
0409     std::vector<unsigned long> chain;
0410 
0411     if (start >= count()) return chain;
0412 
0413     unsigned long p = start;
0414     while (p < count()) {
0415         if (p == (unsigned long)Eof) break;
0416         if (p == (unsigned long)Bat) break;
0417         if (p == (unsigned long)MetaBat) break;
0418         if (already_exist(chain, p)) break;
0419         chain.push_back(p);
0420         if (data[p] >= count()) break;
0421         p = data[ p ];
0422     }
0423 
0424     return chain;
0425 }
0426 
0427 unsigned AllocTable::unused()
0428 {
0429     // find first available block
0430     for (unsigned i = 0; i < data.size(); i++)
0431         if (data[i] == Avail)
0432             return i;
0433 
0434     // completely full, so enlarge the table
0435     unsigned block = data.size();
0436     resize(data.size() + 10);
0437     return block;
0438 }
0439 
0440 void AllocTable::load(const unsigned char* buffer, unsigned len)
0441 {
0442     resize(len / 4);
0443     for (unsigned i = 0; i < count(); i++)
0444         set(i, readU32(buffer + i*4));
0445 }
0446 
0447 // return space required to save this dirtree
0448 unsigned AllocTable::size()
0449 {
0450     return count() * 4;
0451 }
0452 
0453 void AllocTable::save(unsigned char* buffer)
0454 {
0455     for (unsigned i = 0; i < count(); i++)
0456         writeU32(buffer + i*4, data[i]);
0457 }
0458 
0459 void AllocTable::debug()
0460 {
0461     std::cout << "block size " << data.size() << std::endl;
0462     for (unsigned i = 0; i < data.size(); i++) {
0463         if (data[i] == Avail) continue;
0464         std::cout << i << ": ";
0465         if (data[i] == Eof) std::cout << "[eof]";
0466         else if (data[i] == Bat) std::cout << "[bat]";
0467         else if (data[i] == MetaBat) std::cout << "[metabat]";
0468         else std::cout << data[i];
0469         std::cout << std::endl;
0470     }
0471 }
0472 
0473 // =========== DirTree ==========
0474 
0475 const unsigned DirTree::End = 0xffffffff;
0476 
0477 DirTree::DirTree()
0478 {
0479     clear();
0480 }
0481 
0482 void DirTree::clear()
0483 {
0484     // leave only root entry
0485     entries.resize(1);
0486     entries[0].valid = true;
0487     entries[0].name = "Root Entry";
0488     entries[0].dir = true;
0489     entries[0].size = 0;
0490     entries[0].start = End;
0491     entries[0].prev = End;
0492     entries[0].next = End;
0493     entries[0].child = End;
0494 }
0495 
0496 unsigned DirTree::entryCount()
0497 {
0498     return entries.size();
0499 }
0500 
0501 DirEntry* DirTree::entry(unsigned index)
0502 {
0503     if (index >= entryCount()) return (DirEntry*) 0;
0504     return &entries[ index ];
0505 }
0506 
0507 int DirTree::indexOf(DirEntry* e)
0508 {
0509     for (unsigned i = 0; i < entryCount(); i++)
0510         if (entry(i) == e) return i;
0511 
0512     return -1;
0513 }
0514 
0515 int DirTree::parent(unsigned index)
0516 {
0517     // brute-force, basically we iterate for each entries, find its children
0518     // and check if one of the children is 'index'
0519     for (unsigned j = 0; j < entryCount(); j++) {
0520         std::vector<unsigned> chi = children(j);
0521         for (unsigned i = 0; i < chi.size();i++)
0522             if (chi[i] == index)
0523                 return j;
0524     }
0525 
0526     return -1;
0527 }
0528 
0529 std::string DirTree::fullName(unsigned index)
0530 {
0531     // don't use root name ("Root Entry"), just give "/"
0532     if (index == 0) return "/";
0533 
0534     std::string result = entry(index)->name;
0535     result.insert(0,  "/");
0536     int p = parent(index);
0537     DirEntry * _entry = 0;
0538     while (p > 0) {
0539         _entry = entry(p);
0540         if (_entry->dir && _entry->valid) {
0541             result.insert(0,  _entry->name);
0542             result.insert(0,  "/");
0543         }
0544         --p;
0545         index = p;
0546         if (index <= 0) break;
0547     }
0548     return result;
0549 }
0550 
0551 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
0552 // if not found and create is false, return 0
0553 // if create is true, a new entry is returned
0554 DirEntry* DirTree::entry(const std::string& name, bool create)
0555 {
0556     if (!name.length()) return (DirEntry*)0;
0557 
0558     // quick check for "/" (that's root)
0559     if (name == "/") return entry(0);
0560 
0561     // split the names, e.g  "/ObjectPool/_1020961869" will become:
0562     // "ObjectPool" and "_1020961869"
0563     std::list<std::string> names;
0564     std::string::size_type start = 0, end = 0;
0565     if (name[0] == '/') start++;
0566     while (start < name.length()) {
0567         end = name.find_first_of('/', start);
0568         if (end == std::string::npos) end = name.length();
0569         names.push_back(name.substr(start, end - start));
0570         start = end + 1;
0571     }
0572 
0573     // start from root
0574     int index = 0 ;
0575 
0576     // trace one by one
0577     std::list<std::string>::iterator it;
0578 
0579     for (it = names.begin(); it != names.end(); ++it) {
0580         // find among the children of index
0581         std::vector<unsigned> chi = children(index);
0582         unsigned child = 0;
0583         for (unsigned i = 0; i < chi.size(); i++) {
0584             DirEntry* ce = entry(chi[i]);
0585             if (ce)
0586                 if (ce->valid && (ce->name.length() > 1))
0587                     if (ce->name == *it)
0588                         child = chi[i];
0589         }
0590 
0591         // traverse to the child
0592         if (child > 0) index = child;
0593         else {
0594             // not found among children
0595             if (!create) return (DirEntry*)0;
0596 
0597             // create a new entry
0598             unsigned parent = index;
0599             entries.push_back(DirEntry());
0600             index = entryCount() - 1;
0601             DirEntry* e = entry(index);
0602             e->valid = true;
0603             e->name = *it;
0604             e->dir = false;
0605             e->size = 0;
0606             e->start = 0;
0607             e->child = End;
0608             e->prev = End;
0609             e->next = entry(parent)->child;
0610             entry(parent)->child = index;
0611         }
0612     }
0613 
0614     return entry(index);
0615 }
0616 
0617 // helper function: recursively find siblings of index
0618 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result,
0619                            unsigned index)
0620 {
0621     DirEntry* e = dirtree->entry(index);
0622     if (!e) return;
0623     if (!e->valid) return;
0624 
0625     // prevent infinite loop
0626     for (unsigned i = 0; i < result.size(); i++)
0627         if (result[i] == index) return;
0628 
0629     // add myself
0630     result.push_back(index);
0631 
0632     // visit previous sibling, don't go infinitely
0633     unsigned prev = e->prev;
0634     if ((prev > 0) && (prev < dirtree->entryCount())) {
0635         for (unsigned i = 0; i < result.size(); i++)
0636             if (result[i] == prev) prev = 0;
0637         if (prev) dirtree_find_siblings(dirtree, result, prev);
0638     }
0639 
0640     // visit next sibling, don't go infinitely
0641     unsigned next = e->next;
0642     if ((next > 0) && (next < dirtree->entryCount())) {
0643         for (unsigned i = 0; i < result.size(); i++)
0644             if (result[i] == next) next = 0;
0645         if (next) dirtree_find_siblings(dirtree, result, next);
0646     }
0647 }
0648 
0649 std::vector<unsigned> DirTree::children(unsigned index)
0650 {
0651     std::vector<unsigned> result;
0652 
0653     DirEntry* e = entry(index);
0654     if (e) if (e->valid && e->child < entryCount())
0655             dirtree_find_siblings(this, result, e->child);
0656 
0657     return result;
0658 }
0659 
0660 void DirTree::load(unsigned char* buffer, unsigned size)
0661 {
0662     entries.clear();
0663 
0664     for (unsigned i = 0; i < size / 128; i++) {
0665         unsigned p = i * 128;
0666 
0667         // would be < 32 if first char in the name isn't printable
0668 
0669         // parse name of this entry, which stored as Unicode 16-bit
0670         std::string name;
0671         int name_len = readU16(buffer + 0x40 + p);
0672         if (name_len > 64) name_len = 64;
0673         for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2)
0674             name.append(1, buffer[j+p]);
0675 
0676         // first char isn't printable ? remove it...
0677         if (buffer[p] < 32) {
0678             name.erase(0, 1);
0679         }
0680 
0681         // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
0682         unsigned type = buffer[ 0x42 + p];
0683 
0684         DirEntry e;
0685         e.valid = true;
0686         e.name = name;
0687         e.start = readU32(buffer + 0x74 + p);
0688         e.size = readU32(buffer + 0x78 + p);
0689         e.prev = readU32(buffer + 0x44 + p);
0690         e.next = readU32(buffer + 0x48 + p);
0691         e.child = readU32(buffer + 0x4C + p);
0692         e.dir = (type != 2);
0693 
0694         // sanity checks
0695         if ((type != 2) && (type != 1) && (type != 5)) e.valid = false;
0696         if (name_len < 1) e.valid = false;
0697 
0698         // CLSID, contains a object class GUI if this entry is a storage or root
0699         // storage or all zero if not.
0700 #ifdef POLE_DEBUG
0701         printf("DirTree::load name=%s type=%i prev=%i next=%i child=%i start=%i size=%i clsid=%i.%i.%i.%i\n",
0702                name.c_str(),type,e.prev,e.next,e.child,e.start,e.size,readU32(buffer+0x50+p),readU32(buffer+0x54+p),readU32(buffer+0x58+p),readU32(buffer+0x5C+p));
0703 #endif
0704         entries.push_back(e);
0705     }
0706 }
0707 
0708 // return space required to save this dirtree
0709 unsigned DirTree::size()
0710 {
0711     return entryCount() * 128;
0712 }
0713 
0714 void DirTree::save(unsigned char* buffer)
0715 {
0716     memset(buffer, 0, size());
0717 
0718     // root is fixed as "Root Entry"
0719     DirEntry* root = entry(0);
0720     std::string name = "Root Entry";
0721     for (unsigned j = 0; j < name.length(); j++)
0722         buffer[ j*2 ] = name[j];
0723     writeU16(buffer + 0x40, name.length()*2 + 2);
0724     writeU32(buffer + 0x74, 0xffffffff);
0725     writeU32(buffer + 0x78, 0);
0726     writeU32(buffer + 0x44, 0xffffffff);
0727     writeU32(buffer + 0x48, 0xffffffff);
0728     writeU32(buffer + 0x4c, root->child);
0729     buffer[ 0x42 ] = 5;
0730     buffer[ 0x43 ] = 1;
0731 
0732     for (unsigned i = 1; i < entryCount(); i++) {
0733         DirEntry* e = entry(i);
0734         if (!e) continue;
0735         if (e->dir) {
0736             e->start = 0xffffffff;
0737             e->size = 0;
0738         }
0739 
0740         // max length for name is 32 chars
0741         std::string name = e->name;
0742         if (name.length() > 32)
0743             name.erase(32, name.length());
0744 
0745         // write name as Unicode 16-bit
0746         for (unsigned j = 0; j < name.length(); j++)
0747             buffer[ i*128 + j*2 ] = name[j];
0748 
0749         writeU16(buffer + i*128 + 0x40, name.length()*2 + 2);
0750         writeU32(buffer + i*128 + 0x74, e->start);
0751         writeU32(buffer + i*128 + 0x78, e->size);
0752         writeU32(buffer + i*128 + 0x44, e->prev);
0753         writeU32(buffer + i*128 + 0x48, e->next);
0754         writeU32(buffer + i*128 + 0x4c, e->child);
0755         buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
0756         buffer[ i*128 + 0x43 ] = 1; // always black
0757     }
0758 }
0759 
0760 void DirTree::debug()
0761 {
0762     for (unsigned i = 0; i < entryCount(); i++) {
0763         DirEntry* e = entry(i);
0764         if (!e) continue;
0765         std::cout << i << ": ";
0766         if (!e->valid) std::cout << "INVALID ";
0767         std::cout << e->name << " ";
0768         if (e->dir) std::cout << "(Dir) ";
0769         else std::cout << "(File) ";
0770         std::cout << e->size << " ";
0771         std::cout << "s:" << e->start << " ";
0772         std::cout << "(";
0773         if (e->child == End) std::cout << "-"; else std::cout << e->child;
0774         std::cout << " ";
0775         if (e->prev == End) std::cout << "-"; else std::cout << e->prev;
0776         std::cout << ":";
0777         if (e->next == End) std::cout << "-"; else std::cout << e->next;
0778         std::cout << ")";
0779         std::cout << std::endl;
0780     }
0781 }
0782 
0783 // =========== StorageIO ==========
0784 
0785 StorageIO::StorageIO(Storage* st, QIODevice* fname)
0786 {
0787     storage = st;
0788     filename = fname;
0789     result = Storage::Ok;
0790     opened = false;
0791 
0792     header = new Header();
0793     dirtree = new DirTree();
0794     bbat = new AllocTable();
0795     sbat = new AllocTable();
0796 
0797     filesize = 0;
0798     bbat->blockSize = 1 << header->b_shift;
0799     sbat->blockSize = 1 << header->s_shift;
0800 }
0801 
0802 StorageIO::~StorageIO()
0803 {
0804     if (opened) close();
0805     delete sbat;
0806     delete bbat;
0807     delete dirtree;
0808     delete header;
0809 }
0810 
0811 bool StorageIO::open()
0812 {
0813     // already opened ? close first
0814     if (opened) close();
0815 
0816     load();
0817 
0818     return result == Storage::Ok;
0819 }
0820 
0821 void StorageIO::load()
0822 {
0823     unsigned char* buffer = 0;
0824     unsigned long buflen = 0;
0825     std::vector<unsigned long> blocks;
0826 
0827     // open the file, check for error
0828     result = Storage::OpenFailed;
0829     //file.open(filename.c_str(), std::ios::binary | std::ios::in);
0830     if (!filename->isReadable()) {
0831         return;
0832     }
0833 
0834     // find size of input file
0835     //file.seekg(0, std::ios::end);
0836     //filesize = file.tellg();
0837 
0838     // The reason we read QIODevice completely and put it to QByteArray is
0839     // that QIODevice return by KZipEntry does not seem to work very well,
0840     // that is, bytesAvailable, seek and read behave oddly
0841     // This also means that the current solution is not optimal for large files
0842 
0843     file = filename->readAll();
0844     filesize = file.length();
0845 
0846     // load header
0847     buffer = new unsigned char[512];
0848     //file->seek(0);
0849     //file->read((char*)buffer, 512);
0850     memcpy(buffer, file.mid(0, 512).data(), 512);
0851     if (!filename->isReadable()) {
0852         delete[] buffer;
0853         return;
0854     }
0855     header->load(buffer);
0856     delete[] buffer;
0857 
0858     // check OLE magic id
0859     result = Storage::NotOLE;
0860     for (unsigned i = 0; i < 8; i++)
0861         if (header->id[i] != pole_magic[i])
0862             return;
0863 
0864     // sanity checks
0865     result = Storage::BadOLE;
0866     if (!header->valid()) return;
0867     if (header->threshold != 4096) return;
0868 
0869     // important block size
0870     bbat->blockSize = 1 << header->b_shift;
0871     sbat->blockSize = 1 << header->s_shift;
0872 
0873     // find blocks allocated to store big bat
0874     // the first 109 blocks are in header, the rest in meta bat
0875     blocks.clear();
0876     blocks.resize(header->num_bat);
0877     for (unsigned i = 0; i < 109; i++)
0878         if (i >= header->num_bat) break;
0879         else blocks[i] = header->bb_blocks[i];
0880     if ((header->num_bat > 109) && (header->num_mbat > 0)) {
0881         unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
0882         unsigned k = 109;
0883         unsigned mblock = header->mbat_start;
0884         for (unsigned r = 0; r < header->num_mbat; r++) {
0885             unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize);
0886             if (rr != bbat->blockSize) {
0887                 delete[] buffer2;
0888                 return;
0889             }
0890             for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) {
0891                 if (k >= header->num_bat) break;
0892                 else  blocks[k++] = readU32(buffer2 + s);
0893             }
0894             mblock = readU32(buffer2 + bbat->blockSize - 4);
0895         }
0896         delete[] buffer2;
0897     }
0898 
0899     // load big bat
0900     buflen = blocks.size() * bbat->blockSize;
0901     if (buflen > 0) {
0902         buffer = new unsigned char[ buflen ];
0903         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
0904         if (r != buflen) {
0905             delete[] buffer;
0906             return;
0907         }
0908         bbat->load(buffer, buflen);
0909         delete[] buffer;
0910     }
0911 
0912     // load small bat
0913     blocks.clear();
0914     blocks = bbat->follow(header->sbat_start);
0915     buflen = blocks.size() * bbat->blockSize;
0916     if (buflen > 0) {
0917         buffer = new unsigned char[ buflen ];
0918         unsigned long r = loadBigBlocks(blocks, buffer, buflen);
0919         if (r != buflen) {
0920             delete[] buffer;
0921             return;
0922         }
0923         sbat->load(buffer, buflen);
0924         delete[] buffer;
0925     }
0926 
0927     // load directory tree
0928     blocks.clear();
0929     blocks = bbat->follow(header->dirent_start);
0930     buflen = blocks.size() * bbat->blockSize;
0931     buffer = new unsigned char[ buflen ];
0932     unsigned long r = loadBigBlocks(blocks, buffer, buflen);
0933     if (r != buflen) {
0934         delete[] buffer;
0935         return;
0936     }
0937     dirtree->load(buffer, buflen);
0938     unsigned sb_start = readU32(buffer + 0x74);
0939     delete[] buffer;
0940 
0941     // fetch block chain as data for small-files
0942     sb_blocks = bbat->follow(sb_start);   // small files
0943 
0944     // for troubleshooting, just enable this block
0945 #ifdef POLE_DEBUG
0946     header->debug();
0947     sbat->debug();
0948     bbat->debug();
0949     dirtree->debug();
0950 #endif
0951 
0952     // so far so good
0953     result = Storage::Ok;
0954     opened = true;
0955 }
0956 
0957 void StorageIO::create()
0958 {
0959     // std::cout << "Creating " << filename << std::endl;
0960 
0961     //file.open(filename.c_str(), std::ios::out | std::ios::binary);
0962     if (!filename->isReadable()) {
0963         //std::cerr << "Can't create " << filename << std::endl;
0964         result = Storage::OpenFailed;
0965         return;
0966     }
0967 
0968     // so far so good
0969     opened = true;
0970     result = Storage::Ok;
0971 }
0972 
0973 void StorageIO::flush()
0974 {
0975     /* Note on Microsoft implementation:
0976        - directory entries are stored in the last block(s)
0977        - BATs are as second to the last
0978        - Meta BATs are third to the last
0979     */
0980 }
0981 
0982 void StorageIO::close()
0983 {
0984     if (!opened) return;
0985 
0986     //file->close();
0987     opened = false;
0988 
0989     std::list<Stream*>::iterator it;
0990     for (it = streams.begin(); it != streams.end(); ++it)
0991         delete *it;
0992 }
0993 
0994 StreamIO* StorageIO::streamIO(const std::string& name)
0995 {
0996     // sanity check
0997     if (!name.length()) return (StreamIO*)0;
0998 
0999     // search in the entries
1000     DirEntry* entry = dirtree->entry(name);
1001     //if( entry) std::cout << "FOUND\n";
1002     if (!entry) return (StreamIO*)0;
1003     //if( !entry->dir ) std::cout << "  NOT DIR\n";
1004     if (entry->dir) return (StreamIO*)0;
1005 
1006     StreamIO* result = new StreamIO(this, entry);
1007     result->fullName = name;
1008 
1009     return result;
1010 }
1011 
1012 unsigned long StorageIO::loadBigBlocks(std::vector<unsigned long> blocks,
1013                                        unsigned char* data, unsigned long maxlen)
1014 {
1015     // sentinel
1016     if (!data) return 0;
1017     if (!filename->isReadable()) return 0;
1018     if (blocks.size() < 1) return 0;
1019     if (maxlen == 0) return 0;
1020     // read block one by one, seems fast enough
1021     unsigned long bytes = 0;
1022     for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) {
1023         unsigned long block = blocks[i];
1024         unsigned long pos =  bbat->blockSize * (block + 1);
1025         unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes;
1026         if (pos + p > filesize) p = filesize - pos;
1027         //file->seek(pos);
1028         //file->read((char*)data + bytes, p);
1029         memcpy((char*)data + bytes, file.mid(pos, p), p);
1030         if (!filename->isReadable()) return 0;
1031         bytes += p;
1032     }
1033 
1034     return bytes;
1035 }
1036 
1037 unsigned long StorageIO::loadBigBlock(unsigned long block,
1038                                       unsigned char* data, unsigned long maxlen)
1039 {
1040     // sentinel
1041     if (!data) return 0;
1042     if (!filename->isReadable()) return 0;
1043 
1044     // wraps call for loadBigBlocks
1045     std::vector<unsigned long> blocks;
1046     blocks.resize(1);
1047     blocks[ 0 ] = block;
1048 
1049     return loadBigBlocks(blocks, data, maxlen);
1050 }
1051 
1052 // return number of bytes which has been read
1053 unsigned long StorageIO::loadSmallBlocks(std::vector<unsigned long> blocks,
1054         unsigned char* data, unsigned long maxlen)
1055 {
1056     // sentinel
1057     if (!data) return 0;
1058     if (!filename->isReadable()) return 0;
1059     if (blocks.size() < 1) return 0;
1060     if (maxlen == 0) return 0;
1061 
1062     // our own local buffer
1063     unsigned char* buf = new unsigned char[ bbat->blockSize ];
1064 
1065     // read small block one by one
1066     unsigned long bytes = 0;
1067     for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) {
1068         unsigned long block = blocks[i];
1069 
1070         // find where the small-block exactly is
1071         unsigned long pos = block * sbat->blockSize;
1072         unsigned long bbindex = pos / bbat->blockSize;
1073         if (bbindex >= sb_blocks.size()) break;
1074 
1075         unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize);
1076         if (r != bbat->blockSize) {
1077             delete[] buf;
1078             return 0;
1079         }
1080 
1081         // copy the data
1082         unsigned offset = pos % bbat->blockSize;
1083         unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes :  bbat->blockSize - offset;
1084         p = (sbat->blockSize < p) ? sbat->blockSize : p;
1085         memcpy(data + bytes, buf + offset, p);
1086         bytes += p;
1087     }
1088 
1089     delete[] buf;
1090 
1091     return bytes;
1092 }
1093 
1094 unsigned long StorageIO::loadSmallBlock(unsigned long block,
1095                                         unsigned char* data, unsigned long maxlen)
1096 {
1097     // sentinel
1098     if (!data) return 0;
1099     if (!filename->isReadable()) return 0;
1100 
1101     // wraps call for loadSmallBlocks
1102     std::vector<unsigned long> blocks;
1103     blocks.resize(1);
1104     blocks.assign(1, block);
1105 
1106     return loadSmallBlocks(blocks, data, maxlen);
1107 }
1108 
1109 // =========== StreamIO ==========
1110 
1111 StreamIO::StreamIO(StorageIO* s, DirEntry* e)
1112 {
1113     io = s;
1114     entry = e;
1115     eof = false;
1116     fail = false;
1117 
1118     m_pos = 0;
1119 
1120     if (entry->size >= io->header->threshold)
1121         blocks = io->bbat->follow(entry->start);
1122     else
1123         blocks = io->sbat->follow(entry->start);
1124 
1125     // prepare cache
1126     cache_pos = 0;
1127     cache_size = 4096; // optimal ?
1128     cache_data = new unsigned char[cache_size];
1129     updateCache();
1130 }
1131 
1132 // FIXME tell parent we're gone
1133 StreamIO::~StreamIO()
1134 {
1135     delete[] cache_data;
1136 }
1137 
1138 void StreamIO::seek(unsigned long pos)
1139 {
1140     m_pos = pos;
1141 }
1142 
1143 unsigned long StreamIO::tell()
1144 {
1145     return m_pos;
1146 }
1147 
1148 int StreamIO::getch()
1149 {
1150     // past end-of-file ?
1151     if (m_pos > entry->size) return -1;
1152 
1153     // need to update cache ?
1154     if (!cache_size || (m_pos < cache_pos) ||
1155             (m_pos >= cache_pos + cache_size))
1156         updateCache();
1157 
1158     // something bad if we don't get good cache
1159     if (!cache_size) return -1;
1160 
1161     int data = cache_data[m_pos - cache_pos];
1162     m_pos++;
1163 
1164     return data;
1165 }
1166 
1167 unsigned long StreamIO::read(unsigned long pos, unsigned char* data, unsigned long maxlen)
1168 {
1169     // sanity checks
1170     if (!data) return 0;
1171     if (maxlen == 0) return 0;
1172 
1173     unsigned long totalbytes = 0;
1174 
1175     if (entry->size < io->header->threshold) {
1176         // small file
1177         unsigned long index = pos / io->sbat->blockSize;
1178 
1179         if (index >= blocks.size()) return 0;
1180 
1181         unsigned char buf[4096];
1182         unsigned long offset = pos % io->sbat->blockSize;
1183         while (totalbytes < maxlen) {
1184             if (index >= blocks.size()) break;
1185             io->loadSmallBlock(blocks[index], &buf[0], io->bbat->blockSize);
1186             unsigned long count = io->sbat->blockSize - offset;
1187             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1188             memcpy(data + totalbytes, &buf[0] + offset, count);
1189             totalbytes += count;
1190             offset = 0;
1191             index++;
1192         }
1193 
1194     } else {
1195         // big file
1196         unsigned long index = pos / io->bbat->blockSize;
1197 
1198         if (index >= blocks.size()) return 0;
1199 
1200         unsigned char buf[4096];
1201         unsigned long offset = pos % io->bbat->blockSize;
1202         while (totalbytes < maxlen) {
1203             if (index >= blocks.size()) break;
1204             unsigned long r = io->loadBigBlock(blocks[index], &buf[0], io->bbat->blockSize);
1205             if (r != io->bbat->blockSize) {
1206                 return 0;
1207             }
1208             unsigned long count = io->bbat->blockSize - offset;
1209             if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1210             memcpy(data + totalbytes, &buf[0] + offset, count);
1211             totalbytes += count;
1212             index++;
1213             offset = 0;
1214         }
1215 
1216     }
1217 
1218     return totalbytes;
1219 }
1220 
1221 unsigned long StreamIO::read(unsigned char* data, unsigned long maxlen)
1222 {
1223     unsigned long bytes = read(tell(), data, maxlen);
1224     m_pos += bytes;
1225     return bytes;
1226 }
1227 
1228 void StreamIO::updateCache()
1229 {
1230     // sanity check
1231     if (!cache_data) return;
1232 
1233     cache_pos = m_pos - (m_pos % cache_size);
1234     unsigned long bytes = cache_size;
1235     if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos;
1236     cache_size = read(cache_pos, cache_data, bytes);
1237 }
1238 
1239 
1240 // =========== Storage ==========
1241 
1242 Storage::Storage(QIODevice* file)
1243 {
1244     io = new StorageIO(this, file);
1245 }
1246 
1247 Storage::~Storage()
1248 {
1249     delete io;
1250 }
1251 
1252 int Storage::result()
1253 {
1254     return io->result;
1255 }
1256 
1257 bool Storage::open()
1258 {
1259     return io->open();
1260 }
1261 
1262 void Storage::close()
1263 {
1264     io->close();
1265 }
1266 
1267 std::list<std::string> Storage::entries(const std::string& path)
1268 {
1269     std::list<std::string> result;
1270     DirTree* dt = io->dirtree;
1271     DirEntry* e = dt->entry(path, false);
1272     if (e) {
1273         if (e->dir) {
1274             unsigned parent = dt->indexOf(e);
1275             std::vector<unsigned> children = dt->children(parent);
1276             for (unsigned i = 0; i < children.size(); i++)
1277                 result.push_back(dt->entry(children[i])->name);
1278         }
1279     }
1280     return result;
1281 }
1282 
1283 bool Storage::isDirectory(const std::string& name)
1284 {
1285     DirEntry* e = io->dirtree->entry(name, false);
1286     return e ? e->dir : false;
1287 }
1288 
1289 // =========== Stream ==========
1290 
1291 Stream::Stream(Storage* storage, const std::string& name)
1292 {
1293     io = storage->io->streamIO(name);
1294 }
1295 
1296 // FIXME tell parent we're gone
1297 Stream::~Stream()
1298 {
1299     delete io;
1300 }
1301 
1302 std::string Stream::fullName()
1303 {
1304     return io ? io->fullName : std::string();
1305 }
1306 
1307 unsigned long Stream::tell()
1308 {
1309     return io ? io->tell() : 0;
1310 }
1311 
1312 void Stream::seek(unsigned long newpos)
1313 {
1314     if (io) io->seek(newpos);
1315 }
1316 
1317 unsigned long Stream::size()
1318 {
1319     return io ? io->entry->size : 0;
1320 }
1321 
1322 int Stream::getch()
1323 {
1324     return io ? io->getch() : 0;
1325 }
1326 
1327 unsigned long Stream::read(unsigned char* data, unsigned long maxlen)
1328 {
1329     return io ? io->read(data, maxlen) : 0;
1330 }
1331 
1332 bool Stream::eof()
1333 {
1334     return io ? io->eof : false;
1335 }
1336 
1337 bool Stream::fail()
1338 {
1339     return io ? io->fail : true;
1340 }