File indexing completed on 2024-04-28 15:38:19
0001 /*************************************************************************** 0002 * Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> * 0003 * * 0004 * RLE decompressor based on FBReader * 0005 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com> * 0006 * * 0007 * Huffdic decompressor based on Python code by Igor Skochinsky * 0008 * * 0009 * This program is free software; you can redistribute it and/or modify * 0010 * it under the terms of the GNU General Public License as published by * 0011 * the Free Software Foundation; either version 2 of the License, or * 0012 * (at your option) any later version. * 0013 ***************************************************************************/ 0014 0015 #include "decompressor.h" 0016 #include "mobipocket.h" 0017 0018 #include <QList> 0019 0020 static const unsigned char TOKEN_CODE[256] = { 0021 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0029 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0030 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0031 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0032 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0033 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0034 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0035 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0036 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0037 }; 0038 0039 namespace Mobipocket { 0040 0041 class NOOPDecompressor : public Decompressor 0042 { 0043 public: 0044 NOOPDecompressor(const PDB& p) : Decompressor(p) {} 0045 QByteArray decompress(const QByteArray& data) override { return data; } 0046 }; 0047 0048 0049 class RLEDecompressor : public Decompressor 0050 { 0051 public: 0052 RLEDecompressor(const PDB& p) : Decompressor(p) {} 0053 QByteArray decompress(const QByteArray& data) override; 0054 }; 0055 0056 class BitReader 0057 { 0058 public: 0059 BitReader(const QByteArray& d) : pos(0), data(d) 0060 { 0061 data.append("\000\000\000\000"); //krazy:exclude=strings 0062 len=data.size()*8; 0063 } 0064 0065 quint32 read() { 0066 quint32 g=0; 0067 quint64 r=0; 0068 while (g<32) { 0069 r=(r << 8) | (quint8)data[(pos+g)>>3]; 0070 g=g+8 - ((pos+g) & 7); 0071 } 0072 return (r >> (g-32)); 0073 } 0074 bool eat(int n) { 0075 pos+=n; 0076 return pos <= len; 0077 } 0078 0079 int left() { 0080 return len - pos; 0081 } 0082 0083 private: 0084 int pos; 0085 int len; 0086 QByteArray data; 0087 }; 0088 0089 class HuffdicDecompressor : public Decompressor 0090 { 0091 public: 0092 HuffdicDecompressor(const PDB& p); 0093 QByteArray decompress(const QByteArray& data) override; 0094 private: 0095 void unpack(BitReader reader, int depth = 0); 0096 QList<QByteArray> dicts; 0097 quint32 entry_bits; 0098 quint32 dict1[256]; 0099 quint32 dict2[64]; 0100 0101 QByteArray buf; 0102 }; 0103 0104 0105 0106 QByteArray RLEDecompressor::decompress(const QByteArray& data) 0107 { 0108 QByteArray ret; 0109 ret.reserve(8192); 0110 0111 unsigned char token; 0112 unsigned short copyLength, N, shift; 0113 unsigned short shifted; 0114 int i=0; 0115 int maxIndex=data.size()-1; 0116 0117 while (i<data.size()) { 0118 token = data.at(i++); 0119 switch (TOKEN_CODE[token]) { 0120 case 0: 0121 ret.append(token); 0122 break; 0123 case 1: 0124 if ((i + token > maxIndex) ) { 0125 goto endOfLoop; 0126 } 0127 ret.append(data.mid(i,token)); 0128 i+=token; 0129 break; 0130 case 2: 0131 ret.append(' '); 0132 ret.append(token ^ 0x80); 0133 break; 0134 case 3: 0135 if (i + 1 > maxIndex) { 0136 goto endOfLoop; 0137 } 0138 N = token; 0139 N<<=8; 0140 N+=(unsigned char)data.at(i++); 0141 copyLength = (N & 7) + 3; 0142 shift = (N & 0x3fff) / 8; 0143 shifted = ret.size()-shift; 0144 if (shifted>(ret.size()-1)) goto endOfLoop; 0145 for (int i=0;i<copyLength;i++) ret.append(ret.at(shifted+i)); 0146 break; 0147 } 0148 } 0149 endOfLoop: 0150 return ret; 0151 0152 } 0153 0154 quint32 readBELong(const QByteArray& data, int offset) 0155 { 0156 quint32 ret=0; 0157 for (int i=0;i<4;i++) { ret<<=8; ret+=(unsigned char)data[offset+i]; } 0158 return ret; 0159 } 0160 0161 HuffdicDecompressor::HuffdicDecompressor(const PDB& p) : Decompressor(p) 0162 { 0163 QByteArray header=p.getRecord(0); 0164 quint32 huff_ofs=readBELong(header,0x70); 0165 quint32 huff_num=readBELong(header,0x74); 0166 quint32 off1,off2; 0167 0168 QByteArray huff1=p.getRecord(huff_ofs); 0169 if (huff1.isNull()) goto fail; 0170 for (unsigned int i=1;i<huff_num;i++) { 0171 QByteArray h=p.getRecord(huff_ofs+i); 0172 if (h.isNull()) goto fail; 0173 dicts.append(h); 0174 } 0175 0176 off1=readBELong(huff1,16); 0177 off2=readBELong(huff1,20); 0178 0179 if (!huff1.startsWith("HUFF")) goto fail; //krazy:exclude=strings 0180 if (!dicts[0].startsWith("CDIC")) goto fail; //krazy:exclude=strings 0181 0182 entry_bits=readBELong(dicts[0],12); 0183 0184 memcpy(dict1,huff1.data()+off1, 256*4); 0185 memcpy(dict2,huff1.data()+off2, 64*4); 0186 return; 0187 fail: 0188 valid=false; 0189 } 0190 0191 QByteArray HuffdicDecompressor::decompress(const QByteArray& data) 0192 { 0193 buf.clear(); 0194 unpack(BitReader(data)); 0195 return buf; 0196 } 0197 0198 void HuffdicDecompressor::unpack(BitReader reader,int depth) 0199 { 0200 if (depth>32) goto fail; 0201 while (reader.left()) { 0202 quint32 dw=reader.read(); 0203 quint32 v=dict1[dw>>24]; 0204 quint8 codelen = v & 0x1F; 0205 if (!codelen) goto fail; 0206 quint32 code = dw >> (32 - codelen); 0207 quint32 r=(v >> 8); 0208 if (!( v & 0x80)) { 0209 while (code < dict2[(codelen-1)*2]) { 0210 codelen++; 0211 code = dw >> (32 - codelen); 0212 } 0213 r = dict2[(codelen-1)*2+1]; 0214 } 0215 r-=code; 0216 if (!codelen) goto fail; 0217 if (!reader.eat(codelen)) return; 0218 quint32 dict_no = r >> entry_bits; 0219 quint32 off1 = 16 + (r - (dict_no << entry_bits))*2; 0220 QByteArray dict=dicts[dict_no]; 0221 quint32 off2 = 16 + (unsigned char)dict[off1]*256 + (unsigned char)dict[off1+1]; 0222 quint32 blen = (unsigned char)dict[off2]*256 + (unsigned char)dict[off2+1]; 0223 QByteArray slice=dict.mid(off2+2,(blen & 0x7fff)); 0224 if (blen & 0x8000) buf+=slice; 0225 else unpack(BitReader(slice),depth+1); 0226 } 0227 return; 0228 fail: 0229 valid=false; 0230 } 0231 0232 Decompressor* Decompressor::create(quint8 type, const PDB& pdb) 0233 { 0234 switch (type) { 0235 case 1 : return new NOOPDecompressor(pdb); 0236 case 2 : return new RLEDecompressor(pdb); 0237 case 'H' : return new HuffdicDecompressor(pdb); 0238 default : return nullptr; 0239 } 0240 0241 } 0242 }