Warning, file /office/calligra/filters/libmsooxml/ooxml_pole.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* POLE - Portable C++ library to access OLE Storage 0002 Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org> 0003 0004 Redistribution and use in source and binary forms, with or without 0005 modification, are permitted provided that the following conditions 0006 are met: 0007 * Redistributions of source code must retain the above copyright notice, 0008 this list of conditions and the following disclaimer. 0009 * Redistributions in binary form must reproduce the above copyright notice, 0010 this list of conditions and the following disclaimer in the documentation 0011 and/or other materials provided with the distribution. 0012 * Neither the name of the authors nor the names of its contributors may be 0013 used to endorse or promote products derived from this software without 0014 specific prior written permission. 0015 0016 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 0017 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0018 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 0019 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 0020 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 0021 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 0022 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 0023 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 0024 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 0025 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 0026 THE POSSIBILITY OF SUCH DAMAGE. 0027 */ 0028 0029 #include "ooxml_pole.h" 0030 0031 #include <fstream> 0032 #include <iostream> 0033 #include <list> 0034 #include <string> 0035 #include <vector> 0036 0037 #include <QByteArray> 0038 #include <QIODevice> 0039 0040 #include <string.h> 0041 0042 // enable to activate debugging output 0043 // #define POLE_DEBUG 0044 0045 namespace OOXML_POLE 0046 { 0047 0048 class Header 0049 { 0050 public: 0051 unsigned char id[8]; // signature, or magic identifier 0052 unsigned b_shift; // bbat->blockSize = 1 << b_shift 0053 unsigned s_shift; // sbat->blockSize = 1 << s_shift 0054 unsigned num_bat; // blocks allocated for big bat 0055 unsigned dirent_start; // starting block for directory info 0056 unsigned threshold; // switch from small to big file (usually 4K) 0057 unsigned sbat_start; // starting block index to store small bat 0058 unsigned num_sbat; // blocks allocated for small bat 0059 unsigned mbat_start; // starting block to store meta bat 0060 unsigned num_mbat; // blocks allocated for meta bat 0061 unsigned long bb_blocks[109]; 0062 0063 Header(); 0064 bool valid(); 0065 void load(const unsigned char* buffer); 0066 void save(unsigned char* buffer); 0067 void debug(); 0068 }; 0069 0070 class AllocTable 0071 { 0072 public: 0073 static const unsigned Eof; 0074 static const unsigned Avail; 0075 static const unsigned Bat; 0076 static const unsigned MetaBat; 0077 unsigned blockSize; 0078 AllocTable(); 0079 void clear(); 0080 unsigned long count(); 0081 void resize(unsigned long newsize); 0082 void preserve(unsigned long n); 0083 void set(unsigned long index, unsigned long val); 0084 unsigned unused(); 0085 void setChain(std::vector<unsigned long>); 0086 std::vector<unsigned long> follow(unsigned long start); 0087 unsigned long operator[](unsigned long index); 0088 void load(const unsigned char* buffer, unsigned len); 0089 void save(unsigned char* buffer); 0090 unsigned size(); 0091 void debug(); 0092 private: 0093 std::vector<unsigned long> data; 0094 AllocTable(const AllocTable&); 0095 AllocTable& operator=(const AllocTable&); 0096 }; 0097 0098 class DirEntry 0099 { 0100 public: 0101 bool valid; // false if invalid (should be skipped) 0102 std::string name; // the name, not in unicode anymore 0103 bool dir; // true if directory 0104 unsigned long size; // size (not valid if directory) 0105 unsigned long start; // starting block 0106 unsigned prev; // previous sibling 0107 unsigned next; // next sibling 0108 unsigned child; // first child 0109 }; 0110 0111 class DirTree 0112 { 0113 public: 0114 static const unsigned End; 0115 DirTree(); 0116 void clear(); 0117 unsigned entryCount(); 0118 DirEntry* entry(unsigned index); 0119 DirEntry* entry(const std::string& name, bool create = false); 0120 int indexOf(DirEntry* e); 0121 int parent(unsigned index); 0122 std::string fullName(unsigned index); 0123 std::vector<unsigned> children(unsigned index); 0124 void load(unsigned char* buffer, unsigned len); 0125 void save(unsigned char* buffer); 0126 unsigned size(); 0127 void debug(); 0128 private: 0129 std::vector<DirEntry> entries; 0130 DirTree(const DirTree&); 0131 DirTree& operator=(const DirTree&); 0132 }; 0133 0134 class StorageIO 0135 { 0136 public: 0137 Storage* storage; // owner 0138 QIODevice* filename; // filename 0139 QByteArray file; 0140 int result; // result of operation 0141 bool opened; // true if file is opened 0142 unsigned long filesize; // size of the file 0143 0144 Header* header; // storage header 0145 DirTree* dirtree; // directory tree 0146 AllocTable* bbat; // allocation table for big blocks 0147 AllocTable* sbat; // allocation table for small blocks 0148 0149 std::vector<unsigned long> sb_blocks; // blocks for "small" files 0150 0151 std::list<Stream*> streams; 0152 0153 StorageIO(Storage* storage, QIODevice* filename); 0154 ~StorageIO(); 0155 0156 bool open(); 0157 void close(); 0158 void flush(); 0159 void load(); 0160 void create(); 0161 0162 unsigned long loadBigBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen); 0163 0164 unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen); 0165 0166 unsigned long loadSmallBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen); 0167 0168 unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen); 0169 0170 StreamIO* streamIO(const std::string& name); 0171 0172 private: 0173 // no copy or assign 0174 StorageIO(const StorageIO&); 0175 StorageIO& operator=(const StorageIO&); 0176 0177 }; 0178 0179 class StreamIO 0180 { 0181 public: 0182 StorageIO* io; 0183 DirEntry* entry; 0184 std::string fullName; 0185 bool eof; 0186 bool fail; 0187 0188 StreamIO(StorageIO* io, DirEntry* entry); 0189 ~StreamIO(); 0190 unsigned long size(); 0191 void seek(unsigned long pos); 0192 unsigned long tell(); 0193 int getch(); 0194 unsigned long read(unsigned char* data, unsigned long maxlen); 0195 unsigned long read(unsigned long pos, unsigned char* data, unsigned long maxlen); 0196 0197 0198 private: 0199 std::vector<unsigned long> blocks; 0200 0201 // no copy or assign 0202 StreamIO(const StreamIO&); 0203 StreamIO& operator=(const StreamIO&); 0204 0205 // pointer for read 0206 unsigned long m_pos; 0207 0208 // simple cache system to speed-up getch() 0209 unsigned char* cache_data; 0210 unsigned long cache_size; 0211 unsigned long cache_pos; 0212 void updateCache(); 0213 }; 0214 0215 } // namespace OOXML_POLE 0216 0217 using namespace OOXML_POLE; 0218 0219 static inline unsigned long readU16(const unsigned char* ptr) 0220 { 0221 return ptr[0] + (ptr[1] << 8); 0222 } 0223 0224 static inline unsigned long readU32(const unsigned char* ptr) 0225 { 0226 return ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24); 0227 } 0228 0229 static inline void writeU16(unsigned char* ptr, unsigned long data) 0230 { 0231 ptr[0] = (unsigned char)(data & 0xff); 0232 ptr[1] = (unsigned char)((data >> 8) & 0xff); 0233 } 0234 0235 static inline void writeU32(unsigned char* ptr, unsigned long data) 0236 { 0237 ptr[0] = (unsigned char)(data & 0xff); 0238 ptr[1] = (unsigned char)((data >> 8) & 0xff); 0239 ptr[2] = (unsigned char)((data >> 16) & 0xff); 0240 ptr[3] = (unsigned char)((data >> 24) & 0xff); 0241 } 0242 0243 static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; 0244 0245 // =========== Header ========== 0246 0247 Header::Header() 0248 { 0249 b_shift = 9; 0250 s_shift = 6; 0251 num_bat = 0; 0252 dirent_start = 0; 0253 threshold = 4096; 0254 sbat_start = 0; 0255 num_sbat = 0; 0256 mbat_start = 0; 0257 num_mbat = 0; 0258 0259 for (unsigned i = 0; i < 8; i++) 0260 id[i] = pole_magic[i]; 0261 for (unsigned i = 0; i < 109; i++) 0262 bb_blocks[i] = AllocTable::Avail; 0263 } 0264 0265 bool Header::valid() 0266 { 0267 if (threshold != 4096) return false; 0268 if (num_bat == 0) return false; 0269 if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false; 0270 if ((num_bat < 109) && (num_mbat != 0)) return false; 0271 if (s_shift > b_shift) return false; 0272 if (b_shift <= 6) return false; 0273 if (b_shift > 12) return false; 0274 0275 return true; 0276 } 0277 0278 void Header::load(const unsigned char* buffer) 0279 { 0280 b_shift = readU16(buffer + 0x1e); // sector shift 0281 s_shift = readU16(buffer + 0x20); // mini sector shift 0282 num_bat = readU32(buffer + 0x2c); // number of fat sectors 0283 dirent_start = readU32(buffer + 0x30); // first directory sector location 0284 threshold = readU32(buffer + 0x38); // transaction signature number 0285 sbat_start = readU32(buffer + 0x3c); // mini stream cutoff size 0286 num_sbat = readU32(buffer + 0x40); // first mini fat sector location 0287 mbat_start = readU32(buffer + 0x44); // first mini difat sector location 0288 num_mbat = readU32(buffer + 0x48); // number of difat sectors 0289 0290 for (unsigned i = 0; i < 8; i++) 0291 id[i] = buffer[i]; 0292 for (unsigned i = 0; i < 109; i++) 0293 bb_blocks[i] = readU32(buffer + 0x4C + i * 4); 0294 } 0295 0296 void Header::save(unsigned char* buffer) 0297 { 0298 memset(buffer, 0, 0x4c); 0299 memcpy(buffer, pole_magic, 8); // ole signature 0300 writeU32(buffer + 8, 0); // unknown 0301 writeU32(buffer + 12, 0); // unknown 0302 writeU32(buffer + 16, 0); // unknown 0303 writeU16(buffer + 24, 0x003e); // revision ? 0304 writeU16(buffer + 26, 3); // version ? 0305 writeU16(buffer + 28, 0xfffe); // unknown 0306 writeU16(buffer + 0x1e, b_shift); 0307 writeU16(buffer + 0x20, s_shift); 0308 writeU32(buffer + 0x2c, num_bat); 0309 writeU32(buffer + 0x30, dirent_start); 0310 writeU32(buffer + 0x38, threshold); 0311 writeU32(buffer + 0x3c, sbat_start); 0312 writeU32(buffer + 0x40, num_sbat); 0313 writeU32(buffer + 0x44, mbat_start); 0314 writeU32(buffer + 0x48, num_mbat); 0315 0316 for (unsigned i = 0; i < 109; i++) 0317 writeU32(buffer + 0x4C + i*4, bb_blocks[i]); 0318 } 0319 0320 void Header::debug() 0321 { 0322 std::cout << std::endl; 0323 std::cout << "b_shift " << b_shift << std::endl; 0324 std::cout << "s_shift " << s_shift << std::endl; 0325 std::cout << "num_bat " << num_bat << std::endl; 0326 std::cout << "dirent_start " << dirent_start << std::endl; 0327 std::cout << "threshold " << threshold << std::endl; 0328 std::cout << "sbat_start " << sbat_start << std::endl; 0329 std::cout << "num_sbat " << num_sbat << std::endl; 0330 std::cout << "mbat_start " << mbat_start << std::endl; 0331 std::cout << "num_mbat " << num_mbat << std::endl; 0332 0333 unsigned s = (num_bat <= 109) ? num_bat : 109; 0334 std::cout << "bat blocks: "; 0335 for (unsigned i = 0; i < s; i++) 0336 std::cout << bb_blocks[i] << " "; 0337 std::cout << std::endl; 0338 } 0339 0340 // =========== AllocTable ========== 0341 0342 const unsigned AllocTable::Avail = 0xffffffff; 0343 const unsigned AllocTable::Eof = 0xfffffffe; 0344 const unsigned AllocTable::Bat = 0xfffffffd; 0345 const unsigned AllocTable::MetaBat = 0xfffffffc; 0346 0347 AllocTable::AllocTable() 0348 { 0349 blockSize = 4096; 0350 // initial size 0351 resize(128); 0352 } 0353 0354 unsigned long AllocTable::count() 0355 { 0356 return data.size(); 0357 } 0358 0359 void AllocTable::resize(unsigned long newsize) 0360 { 0361 unsigned oldsize = data.size(); 0362 data.resize(newsize); 0363 if (newsize > oldsize) 0364 for (unsigned i = oldsize; i < newsize; i++) 0365 data[i] = Avail; 0366 } 0367 0368 // make sure there're still free blocks 0369 void AllocTable::preserve(unsigned long n) 0370 { 0371 std::vector<unsigned long> pre; 0372 for (unsigned i = 0; i < n; i++) 0373 pre.push_back(unused()); 0374 } 0375 0376 unsigned long AllocTable::operator[](unsigned long index) 0377 { 0378 unsigned long result; 0379 result = data[index]; 0380 return result; 0381 } 0382 0383 void AllocTable::set(unsigned long index, unsigned long value) 0384 { 0385 if (index >= count()) resize(index + 1); 0386 data[ index ] = value; 0387 } 0388 0389 void AllocTable::setChain(std::vector<unsigned long> chain) 0390 { 0391 if (chain.size()) { 0392 for (unsigned i = 0; i < chain.size() - 1; i++) 0393 set(chain[i], chain[i+1]); 0394 set(chain[ chain.size()-1 ], AllocTable::Eof); 0395 } 0396 } 0397 0398 // TODO: optimize this with better search 0399 static bool already_exist(const std::vector<unsigned long>& chain, unsigned long item) 0400 { 0401 for(unsigned i = 0; i < chain.size(); i++) 0402 if(chain[i] == item) return true; 0403 return false; 0404 } 0405 0406 // follow 0407 std::vector<unsigned long> AllocTable::follow(unsigned long start) 0408 { 0409 std::vector<unsigned long> chain; 0410 0411 if (start >= count()) return chain; 0412 0413 unsigned long p = start; 0414 while (p < count()) { 0415 if (p == (unsigned long)Eof) break; 0416 if (p == (unsigned long)Bat) break; 0417 if (p == (unsigned long)MetaBat) break; 0418 if (already_exist(chain, p)) break; 0419 chain.push_back(p); 0420 if (data[p] >= count()) break; 0421 p = data[ p ]; 0422 } 0423 0424 return chain; 0425 } 0426 0427 unsigned AllocTable::unused() 0428 { 0429 // find first available block 0430 for (unsigned i = 0; i < data.size(); i++) 0431 if (data[i] == Avail) 0432 return i; 0433 0434 // completely full, so enlarge the table 0435 unsigned block = data.size(); 0436 resize(data.size() + 10); 0437 return block; 0438 } 0439 0440 void AllocTable::load(const unsigned char* buffer, unsigned len) 0441 { 0442 resize(len / 4); 0443 for (unsigned i = 0; i < count(); i++) 0444 set(i, readU32(buffer + i*4)); 0445 } 0446 0447 // return space required to save this dirtree 0448 unsigned AllocTable::size() 0449 { 0450 return count() * 4; 0451 } 0452 0453 void AllocTable::save(unsigned char* buffer) 0454 { 0455 for (unsigned i = 0; i < count(); i++) 0456 writeU32(buffer + i*4, data[i]); 0457 } 0458 0459 void AllocTable::debug() 0460 { 0461 std::cout << "block size " << data.size() << std::endl; 0462 for (unsigned i = 0; i < data.size(); i++) { 0463 if (data[i] == Avail) continue; 0464 std::cout << i << ": "; 0465 if (data[i] == Eof) std::cout << "[eof]"; 0466 else if (data[i] == Bat) std::cout << "[bat]"; 0467 else if (data[i] == MetaBat) std::cout << "[metabat]"; 0468 else std::cout << data[i]; 0469 std::cout << std::endl; 0470 } 0471 } 0472 0473 // =========== DirTree ========== 0474 0475 const unsigned DirTree::End = 0xffffffff; 0476 0477 DirTree::DirTree() 0478 { 0479 clear(); 0480 } 0481 0482 void DirTree::clear() 0483 { 0484 // leave only root entry 0485 entries.resize(1); 0486 entries[0].valid = true; 0487 entries[0].name = "Root Entry"; 0488 entries[0].dir = true; 0489 entries[0].size = 0; 0490 entries[0].start = End; 0491 entries[0].prev = End; 0492 entries[0].next = End; 0493 entries[0].child = End; 0494 } 0495 0496 unsigned DirTree::entryCount() 0497 { 0498 return entries.size(); 0499 } 0500 0501 DirEntry* DirTree::entry(unsigned index) 0502 { 0503 if (index >= entryCount()) return (DirEntry*) 0; 0504 return &entries[ index ]; 0505 } 0506 0507 int DirTree::indexOf(DirEntry* e) 0508 { 0509 for (unsigned i = 0; i < entryCount(); i++) 0510 if (entry(i) == e) return i; 0511 0512 return -1; 0513 } 0514 0515 int DirTree::parent(unsigned index) 0516 { 0517 // brute-force, basically we iterate for each entries, find its children 0518 // and check if one of the children is 'index' 0519 for (unsigned j = 0; j < entryCount(); j++) { 0520 std::vector<unsigned> chi = children(j); 0521 for (unsigned i = 0; i < chi.size();i++) 0522 if (chi[i] == index) 0523 return j; 0524 } 0525 0526 return -1; 0527 } 0528 0529 std::string DirTree::fullName(unsigned index) 0530 { 0531 // don't use root name ("Root Entry"), just give "/" 0532 if (index == 0) return "/"; 0533 0534 std::string result = entry(index)->name; 0535 result.insert(0, "/"); 0536 int p = parent(index); 0537 DirEntry * _entry = 0; 0538 while (p > 0) { 0539 _entry = entry(p); 0540 if (_entry->dir && _entry->valid) { 0541 result.insert(0, _entry->name); 0542 result.insert(0, "/"); 0543 } 0544 --p; 0545 index = p; 0546 if (index <= 0) break; 0547 } 0548 return result; 0549 } 0550 0551 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry 0552 // if not found and create is false, return 0 0553 // if create is true, a new entry is returned 0554 DirEntry* DirTree::entry(const std::string& name, bool create) 0555 { 0556 if (!name.length()) return (DirEntry*)0; 0557 0558 // quick check for "/" (that's root) 0559 if (name == "/") return entry(0); 0560 0561 // split the names, e.g "/ObjectPool/_1020961869" will become: 0562 // "ObjectPool" and "_1020961869" 0563 std::list<std::string> names; 0564 std::string::size_type start = 0, end = 0; 0565 if (name[0] == '/') start++; 0566 while (start < name.length()) { 0567 end = name.find_first_of('/', start); 0568 if (end == std::string::npos) end = name.length(); 0569 names.push_back(name.substr(start, end - start)); 0570 start = end + 1; 0571 } 0572 0573 // start from root 0574 int index = 0 ; 0575 0576 // trace one by one 0577 std::list<std::string>::iterator it; 0578 0579 for (it = names.begin(); it != names.end(); ++it) { 0580 // find among the children of index 0581 std::vector<unsigned> chi = children(index); 0582 unsigned child = 0; 0583 for (unsigned i = 0; i < chi.size(); i++) { 0584 DirEntry* ce = entry(chi[i]); 0585 if (ce) 0586 if (ce->valid && (ce->name.length() > 1)) 0587 if (ce->name == *it) 0588 child = chi[i]; 0589 } 0590 0591 // traverse to the child 0592 if (child > 0) index = child; 0593 else { 0594 // not found among children 0595 if (!create) return (DirEntry*)0; 0596 0597 // create a new entry 0598 unsigned parent = index; 0599 entries.push_back(DirEntry()); 0600 index = entryCount() - 1; 0601 DirEntry* e = entry(index); 0602 e->valid = true; 0603 e->name = *it; 0604 e->dir = false; 0605 e->size = 0; 0606 e->start = 0; 0607 e->child = End; 0608 e->prev = End; 0609 e->next = entry(parent)->child; 0610 entry(parent)->child = index; 0611 } 0612 } 0613 0614 return entry(index); 0615 } 0616 0617 // helper function: recursively find siblings of index 0618 void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result, 0619 unsigned index) 0620 { 0621 DirEntry* e = dirtree->entry(index); 0622 if (!e) return; 0623 if (!e->valid) return; 0624 0625 // prevent infinite loop 0626 for (unsigned i = 0; i < result.size(); i++) 0627 if (result[i] == index) return; 0628 0629 // add myself 0630 result.push_back(index); 0631 0632 // visit previous sibling, don't go infinitely 0633 unsigned prev = e->prev; 0634 if ((prev > 0) && (prev < dirtree->entryCount())) { 0635 for (unsigned i = 0; i < result.size(); i++) 0636 if (result[i] == prev) prev = 0; 0637 if (prev) dirtree_find_siblings(dirtree, result, prev); 0638 } 0639 0640 // visit next sibling, don't go infinitely 0641 unsigned next = e->next; 0642 if ((next > 0) && (next < dirtree->entryCount())) { 0643 for (unsigned i = 0; i < result.size(); i++) 0644 if (result[i] == next) next = 0; 0645 if (next) dirtree_find_siblings(dirtree, result, next); 0646 } 0647 } 0648 0649 std::vector<unsigned> DirTree::children(unsigned index) 0650 { 0651 std::vector<unsigned> result; 0652 0653 DirEntry* e = entry(index); 0654 if (e) if (e->valid && e->child < entryCount()) 0655 dirtree_find_siblings(this, result, e->child); 0656 0657 return result; 0658 } 0659 0660 void DirTree::load(unsigned char* buffer, unsigned size) 0661 { 0662 entries.clear(); 0663 0664 for (unsigned i = 0; i < size / 128; i++) { 0665 unsigned p = i * 128; 0666 0667 // would be < 32 if first char in the name isn't printable 0668 0669 // parse name of this entry, which stored as Unicode 16-bit 0670 std::string name; 0671 int name_len = readU16(buffer + 0x40 + p); 0672 if (name_len > 64) name_len = 64; 0673 for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2) 0674 name.append(1, buffer[j+p]); 0675 0676 // first char isn't printable ? remove it... 0677 if (buffer[p] < 32) { 0678 name.erase(0, 1); 0679 } 0680 0681 // 2 = file (aka stream), 1 = directory (aka storage), 5 = root 0682 unsigned type = buffer[ 0x42 + p]; 0683 0684 DirEntry e; 0685 e.valid = true; 0686 e.name = name; 0687 e.start = readU32(buffer + 0x74 + p); 0688 e.size = readU32(buffer + 0x78 + p); 0689 e.prev = readU32(buffer + 0x44 + p); 0690 e.next = readU32(buffer + 0x48 + p); 0691 e.child = readU32(buffer + 0x4C + p); 0692 e.dir = (type != 2); 0693 0694 // sanity checks 0695 if ((type != 2) && (type != 1) && (type != 5)) e.valid = false; 0696 if (name_len < 1) e.valid = false; 0697 0698 // CLSID, contains a object class GUI if this entry is a storage or root 0699 // storage or all zero if not. 0700 #ifdef POLE_DEBUG 0701 printf("DirTree::load name=%s type=%i prev=%i next=%i child=%i start=%i size=%i clsid=%i.%i.%i.%i\n", 0702 name.c_str(),type,e.prev,e.next,e.child,e.start,e.size,readU32(buffer+0x50+p),readU32(buffer+0x54+p),readU32(buffer+0x58+p),readU32(buffer+0x5C+p)); 0703 #endif 0704 entries.push_back(e); 0705 } 0706 } 0707 0708 // return space required to save this dirtree 0709 unsigned DirTree::size() 0710 { 0711 return entryCount() * 128; 0712 } 0713 0714 void DirTree::save(unsigned char* buffer) 0715 { 0716 memset(buffer, 0, size()); 0717 0718 // root is fixed as "Root Entry" 0719 DirEntry* root = entry(0); 0720 std::string name = "Root Entry"; 0721 for (unsigned j = 0; j < name.length(); j++) 0722 buffer[ j*2 ] = name[j]; 0723 writeU16(buffer + 0x40, name.length()*2 + 2); 0724 writeU32(buffer + 0x74, 0xffffffff); 0725 writeU32(buffer + 0x78, 0); 0726 writeU32(buffer + 0x44, 0xffffffff); 0727 writeU32(buffer + 0x48, 0xffffffff); 0728 writeU32(buffer + 0x4c, root->child); 0729 buffer[ 0x42 ] = 5; 0730 buffer[ 0x43 ] = 1; 0731 0732 for (unsigned i = 1; i < entryCount(); i++) { 0733 DirEntry* e = entry(i); 0734 if (!e) continue; 0735 if (e->dir) { 0736 e->start = 0xffffffff; 0737 e->size = 0; 0738 } 0739 0740 // max length for name is 32 chars 0741 std::string name = e->name; 0742 if (name.length() > 32) 0743 name.erase(32, name.length()); 0744 0745 // write name as Unicode 16-bit 0746 for (unsigned j = 0; j < name.length(); j++) 0747 buffer[ i*128 + j*2 ] = name[j]; 0748 0749 writeU16(buffer + i*128 + 0x40, name.length()*2 + 2); 0750 writeU32(buffer + i*128 + 0x74, e->start); 0751 writeU32(buffer + i*128 + 0x78, e->size); 0752 writeU32(buffer + i*128 + 0x44, e->prev); 0753 writeU32(buffer + i*128 + 0x48, e->next); 0754 writeU32(buffer + i*128 + 0x4c, e->child); 0755 buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2; 0756 buffer[ i*128 + 0x43 ] = 1; // always black 0757 } 0758 } 0759 0760 void DirTree::debug() 0761 { 0762 for (unsigned i = 0; i < entryCount(); i++) { 0763 DirEntry* e = entry(i); 0764 if (!e) continue; 0765 std::cout << i << ": "; 0766 if (!e->valid) std::cout << "INVALID "; 0767 std::cout << e->name << " "; 0768 if (e->dir) std::cout << "(Dir) "; 0769 else std::cout << "(File) "; 0770 std::cout << e->size << " "; 0771 std::cout << "s:" << e->start << " "; 0772 std::cout << "("; 0773 if (e->child == End) std::cout << "-"; else std::cout << e->child; 0774 std::cout << " "; 0775 if (e->prev == End) std::cout << "-"; else std::cout << e->prev; 0776 std::cout << ":"; 0777 if (e->next == End) std::cout << "-"; else std::cout << e->next; 0778 std::cout << ")"; 0779 std::cout << std::endl; 0780 } 0781 } 0782 0783 // =========== StorageIO ========== 0784 0785 StorageIO::StorageIO(Storage* st, QIODevice* fname) 0786 { 0787 storage = st; 0788 filename = fname; 0789 result = Storage::Ok; 0790 opened = false; 0791 0792 header = new Header(); 0793 dirtree = new DirTree(); 0794 bbat = new AllocTable(); 0795 sbat = new AllocTable(); 0796 0797 filesize = 0; 0798 bbat->blockSize = 1 << header->b_shift; 0799 sbat->blockSize = 1 << header->s_shift; 0800 } 0801 0802 StorageIO::~StorageIO() 0803 { 0804 if (opened) close(); 0805 delete sbat; 0806 delete bbat; 0807 delete dirtree; 0808 delete header; 0809 } 0810 0811 bool StorageIO::open() 0812 { 0813 // already opened ? close first 0814 if (opened) close(); 0815 0816 load(); 0817 0818 return result == Storage::Ok; 0819 } 0820 0821 void StorageIO::load() 0822 { 0823 unsigned char* buffer = 0; 0824 unsigned long buflen = 0; 0825 std::vector<unsigned long> blocks; 0826 0827 // open the file, check for error 0828 result = Storage::OpenFailed; 0829 //file.open(filename.c_str(), std::ios::binary | std::ios::in); 0830 if (!filename->isReadable()) { 0831 return; 0832 } 0833 0834 // find size of input file 0835 //file.seekg(0, std::ios::end); 0836 //filesize = file.tellg(); 0837 0838 // The reason we read QIODevice completely and put it to QByteArray is 0839 // that QIODevice return by KZipEntry does not seem to work very well, 0840 // that is, bytesAvailable, seek and read behave oddly 0841 // This also means that the current solution is not optimal for large files 0842 0843 file = filename->readAll(); 0844 filesize = file.length(); 0845 0846 // load header 0847 buffer = new unsigned char[512]; 0848 //file->seek(0); 0849 //file->read((char*)buffer, 512); 0850 memcpy(buffer, file.mid(0, 512).data(), 512); 0851 if (!filename->isReadable()) { 0852 delete[] buffer; 0853 return; 0854 } 0855 header->load(buffer); 0856 delete[] buffer; 0857 0858 // check OLE magic id 0859 result = Storage::NotOLE; 0860 for (unsigned i = 0; i < 8; i++) 0861 if (header->id[i] != pole_magic[i]) 0862 return; 0863 0864 // sanity checks 0865 result = Storage::BadOLE; 0866 if (!header->valid()) return; 0867 if (header->threshold != 4096) return; 0868 0869 // important block size 0870 bbat->blockSize = 1 << header->b_shift; 0871 sbat->blockSize = 1 << header->s_shift; 0872 0873 // find blocks allocated to store big bat 0874 // the first 109 blocks are in header, the rest in meta bat 0875 blocks.clear(); 0876 blocks.resize(header->num_bat); 0877 for (unsigned i = 0; i < 109; i++) 0878 if (i >= header->num_bat) break; 0879 else blocks[i] = header->bb_blocks[i]; 0880 if ((header->num_bat > 109) && (header->num_mbat > 0)) { 0881 unsigned char* buffer2 = new unsigned char[ bbat->blockSize ]; 0882 unsigned k = 109; 0883 unsigned mblock = header->mbat_start; 0884 for (unsigned r = 0; r < header->num_mbat; r++) { 0885 unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize); 0886 if (rr != bbat->blockSize) { 0887 delete[] buffer2; 0888 return; 0889 } 0890 for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) { 0891 if (k >= header->num_bat) break; 0892 else blocks[k++] = readU32(buffer2 + s); 0893 } 0894 mblock = readU32(buffer2 + bbat->blockSize - 4); 0895 } 0896 delete[] buffer2; 0897 } 0898 0899 // load big bat 0900 buflen = blocks.size() * bbat->blockSize; 0901 if (buflen > 0) { 0902 buffer = new unsigned char[ buflen ]; 0903 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 0904 if (r != buflen) { 0905 delete[] buffer; 0906 return; 0907 } 0908 bbat->load(buffer, buflen); 0909 delete[] buffer; 0910 } 0911 0912 // load small bat 0913 blocks.clear(); 0914 blocks = bbat->follow(header->sbat_start); 0915 buflen = blocks.size() * bbat->blockSize; 0916 if (buflen > 0) { 0917 buffer = new unsigned char[ buflen ]; 0918 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 0919 if (r != buflen) { 0920 delete[] buffer; 0921 return; 0922 } 0923 sbat->load(buffer, buflen); 0924 delete[] buffer; 0925 } 0926 0927 // load directory tree 0928 blocks.clear(); 0929 blocks = bbat->follow(header->dirent_start); 0930 buflen = blocks.size() * bbat->blockSize; 0931 buffer = new unsigned char[ buflen ]; 0932 unsigned long r = loadBigBlocks(blocks, buffer, buflen); 0933 if (r != buflen) { 0934 delete[] buffer; 0935 return; 0936 } 0937 dirtree->load(buffer, buflen); 0938 unsigned sb_start = readU32(buffer + 0x74); 0939 delete[] buffer; 0940 0941 // fetch block chain as data for small-files 0942 sb_blocks = bbat->follow(sb_start); // small files 0943 0944 // for troubleshooting, just enable this block 0945 #ifdef POLE_DEBUG 0946 header->debug(); 0947 sbat->debug(); 0948 bbat->debug(); 0949 dirtree->debug(); 0950 #endif 0951 0952 // so far so good 0953 result = Storage::Ok; 0954 opened = true; 0955 } 0956 0957 void StorageIO::create() 0958 { 0959 // std::cout << "Creating " << filename << std::endl; 0960 0961 //file.open(filename.c_str(), std::ios::out | std::ios::binary); 0962 if (!filename->isReadable()) { 0963 //std::cerr << "Can't create " << filename << std::endl; 0964 result = Storage::OpenFailed; 0965 return; 0966 } 0967 0968 // so far so good 0969 opened = true; 0970 result = Storage::Ok; 0971 } 0972 0973 void StorageIO::flush() 0974 { 0975 /* Note on Microsoft implementation: 0976 - directory entries are stored in the last block(s) 0977 - BATs are as second to the last 0978 - Meta BATs are third to the last 0979 */ 0980 } 0981 0982 void StorageIO::close() 0983 { 0984 if (!opened) return; 0985 0986 //file->close(); 0987 opened = false; 0988 0989 std::list<Stream*>::iterator it; 0990 for (it = streams.begin(); it != streams.end(); ++it) 0991 delete *it; 0992 } 0993 0994 StreamIO* StorageIO::streamIO(const std::string& name) 0995 { 0996 // sanity check 0997 if (!name.length()) return (StreamIO*)0; 0998 0999 // search in the entries 1000 DirEntry* entry = dirtree->entry(name); 1001 //if( entry) std::cout << "FOUND\n"; 1002 if (!entry) return (StreamIO*)0; 1003 //if( !entry->dir ) std::cout << " NOT DIR\n"; 1004 if (entry->dir) return (StreamIO*)0; 1005 1006 StreamIO* result = new StreamIO(this, entry); 1007 result->fullName = name; 1008 1009 return result; 1010 } 1011 1012 unsigned long StorageIO::loadBigBlocks(std::vector<unsigned long> blocks, 1013 unsigned char* data, unsigned long maxlen) 1014 { 1015 // sentinel 1016 if (!data) return 0; 1017 if (!filename->isReadable()) return 0; 1018 if (blocks.size() < 1) return 0; 1019 if (maxlen == 0) return 0; 1020 // read block one by one, seems fast enough 1021 unsigned long bytes = 0; 1022 for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) { 1023 unsigned long block = blocks[i]; 1024 unsigned long pos = bbat->blockSize * (block + 1); 1025 unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes; 1026 if (pos + p > filesize) p = filesize - pos; 1027 //file->seek(pos); 1028 //file->read((char*)data + bytes, p); 1029 memcpy((char*)data + bytes, file.mid(pos, p), p); 1030 if (!filename->isReadable()) return 0; 1031 bytes += p; 1032 } 1033 1034 return bytes; 1035 } 1036 1037 unsigned long StorageIO::loadBigBlock(unsigned long block, 1038 unsigned char* data, unsigned long maxlen) 1039 { 1040 // sentinel 1041 if (!data) return 0; 1042 if (!filename->isReadable()) return 0; 1043 1044 // wraps call for loadBigBlocks 1045 std::vector<unsigned long> blocks; 1046 blocks.resize(1); 1047 blocks[ 0 ] = block; 1048 1049 return loadBigBlocks(blocks, data, maxlen); 1050 } 1051 1052 // return number of bytes which has been read 1053 unsigned long StorageIO::loadSmallBlocks(std::vector<unsigned long> blocks, 1054 unsigned char* data, unsigned long maxlen) 1055 { 1056 // sentinel 1057 if (!data) return 0; 1058 if (!filename->isReadable()) return 0; 1059 if (blocks.size() < 1) return 0; 1060 if (maxlen == 0) return 0; 1061 1062 // our own local buffer 1063 unsigned char* buf = new unsigned char[ bbat->blockSize ]; 1064 1065 // read small block one by one 1066 unsigned long bytes = 0; 1067 for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) { 1068 unsigned long block = blocks[i]; 1069 1070 // find where the small-block exactly is 1071 unsigned long pos = block * sbat->blockSize; 1072 unsigned long bbindex = pos / bbat->blockSize; 1073 if (bbindex >= sb_blocks.size()) break; 1074 1075 unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize); 1076 if (r != bbat->blockSize) { 1077 delete[] buf; 1078 return 0; 1079 } 1080 1081 // copy the data 1082 unsigned offset = pos % bbat->blockSize; 1083 unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes : bbat->blockSize - offset; 1084 p = (sbat->blockSize < p) ? sbat->blockSize : p; 1085 memcpy(data + bytes, buf + offset, p); 1086 bytes += p; 1087 } 1088 1089 delete[] buf; 1090 1091 return bytes; 1092 } 1093 1094 unsigned long StorageIO::loadSmallBlock(unsigned long block, 1095 unsigned char* data, unsigned long maxlen) 1096 { 1097 // sentinel 1098 if (!data) return 0; 1099 if (!filename->isReadable()) return 0; 1100 1101 // wraps call for loadSmallBlocks 1102 std::vector<unsigned long> blocks; 1103 blocks.resize(1); 1104 blocks.assign(1, block); 1105 1106 return loadSmallBlocks(blocks, data, maxlen); 1107 } 1108 1109 // =========== StreamIO ========== 1110 1111 StreamIO::StreamIO(StorageIO* s, DirEntry* e) 1112 { 1113 io = s; 1114 entry = e; 1115 eof = false; 1116 fail = false; 1117 1118 m_pos = 0; 1119 1120 if (entry->size >= io->header->threshold) 1121 blocks = io->bbat->follow(entry->start); 1122 else 1123 blocks = io->sbat->follow(entry->start); 1124 1125 // prepare cache 1126 cache_pos = 0; 1127 cache_size = 4096; // optimal ? 1128 cache_data = new unsigned char[cache_size]; 1129 updateCache(); 1130 } 1131 1132 // FIXME tell parent we're gone 1133 StreamIO::~StreamIO() 1134 { 1135 delete[] cache_data; 1136 } 1137 1138 void StreamIO::seek(unsigned long pos) 1139 { 1140 m_pos = pos; 1141 } 1142 1143 unsigned long StreamIO::tell() 1144 { 1145 return m_pos; 1146 } 1147 1148 int StreamIO::getch() 1149 { 1150 // past end-of-file ? 1151 if (m_pos > entry->size) return -1; 1152 1153 // need to update cache ? 1154 if (!cache_size || (m_pos < cache_pos) || 1155 (m_pos >= cache_pos + cache_size)) 1156 updateCache(); 1157 1158 // something bad if we don't get good cache 1159 if (!cache_size) return -1; 1160 1161 int data = cache_data[m_pos - cache_pos]; 1162 m_pos++; 1163 1164 return data; 1165 } 1166 1167 unsigned long StreamIO::read(unsigned long pos, unsigned char* data, unsigned long maxlen) 1168 { 1169 // sanity checks 1170 if (!data) return 0; 1171 if (maxlen == 0) return 0; 1172 1173 unsigned long totalbytes = 0; 1174 1175 if (entry->size < io->header->threshold) { 1176 // small file 1177 unsigned long index = pos / io->sbat->blockSize; 1178 1179 if (index >= blocks.size()) return 0; 1180 1181 unsigned char buf[4096]; 1182 unsigned long offset = pos % io->sbat->blockSize; 1183 while (totalbytes < maxlen) { 1184 if (index >= blocks.size()) break; 1185 io->loadSmallBlock(blocks[index], &buf[0], io->bbat->blockSize); 1186 unsigned long count = io->sbat->blockSize - offset; 1187 if (count > maxlen - totalbytes) count = maxlen - totalbytes; 1188 memcpy(data + totalbytes, &buf[0] + offset, count); 1189 totalbytes += count; 1190 offset = 0; 1191 index++; 1192 } 1193 1194 } else { 1195 // big file 1196 unsigned long index = pos / io->bbat->blockSize; 1197 1198 if (index >= blocks.size()) return 0; 1199 1200 unsigned char buf[4096]; 1201 unsigned long offset = pos % io->bbat->blockSize; 1202 while (totalbytes < maxlen) { 1203 if (index >= blocks.size()) break; 1204 unsigned long r = io->loadBigBlock(blocks[index], &buf[0], io->bbat->blockSize); 1205 if (r != io->bbat->blockSize) { 1206 return 0; 1207 } 1208 unsigned long count = io->bbat->blockSize - offset; 1209 if (count > maxlen - totalbytes) count = maxlen - totalbytes; 1210 memcpy(data + totalbytes, &buf[0] + offset, count); 1211 totalbytes += count; 1212 index++; 1213 offset = 0; 1214 } 1215 1216 } 1217 1218 return totalbytes; 1219 } 1220 1221 unsigned long StreamIO::read(unsigned char* data, unsigned long maxlen) 1222 { 1223 unsigned long bytes = read(tell(), data, maxlen); 1224 m_pos += bytes; 1225 return bytes; 1226 } 1227 1228 void StreamIO::updateCache() 1229 { 1230 // sanity check 1231 if (!cache_data) return; 1232 1233 cache_pos = m_pos - (m_pos % cache_size); 1234 unsigned long bytes = cache_size; 1235 if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos; 1236 cache_size = read(cache_pos, cache_data, bytes); 1237 } 1238 1239 1240 // =========== Storage ========== 1241 1242 Storage::Storage(QIODevice* file) 1243 { 1244 io = new StorageIO(this, file); 1245 } 1246 1247 Storage::~Storage() 1248 { 1249 delete io; 1250 } 1251 1252 int Storage::result() 1253 { 1254 return io->result; 1255 } 1256 1257 bool Storage::open() 1258 { 1259 return io->open(); 1260 } 1261 1262 void Storage::close() 1263 { 1264 io->close(); 1265 } 1266 1267 std::list<std::string> Storage::entries(const std::string& path) 1268 { 1269 std::list<std::string> result; 1270 DirTree* dt = io->dirtree; 1271 DirEntry* e = dt->entry(path, false); 1272 if (e) { 1273 if (e->dir) { 1274 unsigned parent = dt->indexOf(e); 1275 std::vector<unsigned> children = dt->children(parent); 1276 for (unsigned i = 0; i < children.size(); i++) 1277 result.push_back(dt->entry(children[i])->name); 1278 } 1279 } 1280 return result; 1281 } 1282 1283 bool Storage::isDirectory(const std::string& name) 1284 { 1285 DirEntry* e = io->dirtree->entry(name, false); 1286 return e ? e->dir : false; 1287 } 1288 1289 // =========== Stream ========== 1290 1291 Stream::Stream(Storage* storage, const std::string& name) 1292 { 1293 io = storage->io->streamIO(name); 1294 } 1295 1296 // FIXME tell parent we're gone 1297 Stream::~Stream() 1298 { 1299 delete io; 1300 } 1301 1302 std::string Stream::fullName() 1303 { 1304 return io ? io->fullName : std::string(); 1305 } 1306 1307 unsigned long Stream::tell() 1308 { 1309 return io ? io->tell() : 0; 1310 } 1311 1312 void Stream::seek(unsigned long newpos) 1313 { 1314 if (io) io->seek(newpos); 1315 } 1316 1317 unsigned long Stream::size() 1318 { 1319 return io ? io->entry->size : 0; 1320 } 1321 1322 int Stream::getch() 1323 { 1324 return io ? io->getch() : 0; 1325 } 1326 1327 unsigned long Stream::read(unsigned char* data, unsigned long maxlen) 1328 { 1329 return io ? io->read(data, maxlen) : 0; 1330 } 1331 1332 bool Stream::eof() 1333 { 1334 return io ? io->eof : false; 1335 } 1336 1337 bool Stream::fail() 1338 { 1339 return io ? io->fail : true; 1340 }