File indexing completed on 2024-04-28 15:38:19

0001 /***************************************************************************
0002  *   Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl>                 *
0003  *                                                                         *
0004  *   RLE decompressor based on FBReader                                    *
0005  *   Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>      *
0006  *                                                                         *
0007  *   Huffdic decompressor based on Python code by Igor Skochinsky          *
0008  *                                                                         *
0009  *   This program is free software; you can redistribute it and/or modify  *
0010  *   it under the terms of the GNU General Public License as published by  *
0011  *   the Free Software Foundation; either version 2 of the License, or     *
0012  *   (at your option) any later version.                                   *
0013  ***************************************************************************/
0014 
0015 #include "decompressor.h"
0016 #include "mobipocket.h"
0017 
0018 #include <QList>
0019 
0020 static const unsigned char TOKEN_CODE[256] = {
0021     0, 1, 1, 1,     1, 1, 1, 1,     1, 0, 0, 0,     0, 0, 0, 0,
0022     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0023     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0024     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0025     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0026     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0027     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0028     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,
0029     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,
0030     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,
0031     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,
0032     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,     3, 3, 3, 3,
0033     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,
0034     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,
0035     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,
0036     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,     2, 2, 2, 2,
0037 };
0038 
0039 namespace Mobipocket {
0040 
0041 class NOOPDecompressor : public Decompressor
0042 {
0043 public:
0044     NOOPDecompressor(const PDB& p) : Decompressor(p) {}
0045     QByteArray decompress(const QByteArray& data) override { return data; }
0046 };
0047 
0048 
0049 class RLEDecompressor : public Decompressor
0050 {
0051 public:
0052     RLEDecompressor(const PDB& p) : Decompressor(p) {}
0053     QByteArray decompress(const QByteArray& data) override;
0054 };
0055 
0056 class BitReader
0057 {
0058 public:
0059     BitReader(const QByteArray& d) : pos(0), data(d)
0060     {
0061         data.append("\000\000\000\000");    //krazy:exclude=strings
0062         len=data.size()*8;
0063     }
0064     
0065     quint32 read() {
0066         quint32 g=0;
0067         quint64 r=0;
0068         while (g<32) {
0069             r=(r << 8) | (quint8)data[(pos+g)>>3];
0070             g=g+8 - ((pos+g) & 7);
0071         }
0072         return (r >> (g-32));
0073     }
0074     bool eat(int n) {
0075         pos+=n;
0076         return pos <= len;
0077     }
0078     
0079     int left() {
0080         return len - pos;
0081     }
0082     
0083 private:
0084     int pos;
0085     int len;
0086     QByteArray data;
0087 };
0088 
0089 class HuffdicDecompressor : public Decompressor
0090 {
0091 public:
0092     HuffdicDecompressor(const PDB& p);
0093     QByteArray decompress(const QByteArray& data) override;
0094 private:
0095     void unpack(BitReader reader, int depth = 0);
0096     QList<QByteArray> dicts;
0097     quint32 entry_bits;
0098     quint32 dict1[256];
0099     quint32 dict2[64];
0100     
0101     QByteArray buf;
0102 };
0103 
0104 
0105 
0106 QByteArray RLEDecompressor::decompress(const QByteArray& data)
0107 {
0108         QByteArray ret;
0109         ret.reserve(8192);
0110 
0111         unsigned char token;
0112         unsigned short copyLength, N, shift;
0113         unsigned short shifted;
0114         int i=0;
0115         int maxIndex=data.size()-1;
0116 
0117         while (i<data.size()) {
0118             token = data.at(i++);
0119             switch (TOKEN_CODE[token]) {
0120                 case 0:
0121                         ret.append(token);
0122                     break;
0123                 case 1:
0124                     if ((i + token > maxIndex) ) {
0125                         goto endOfLoop;
0126                     }
0127                     ret.append(data.mid(i,token));
0128                     i+=token;
0129                     break;
0130                 case 2:
0131                         ret.append(' ');
0132                         ret.append(token ^ 0x80);
0133                     break;
0134                 case 3:
0135                     if (i + 1 > maxIndex) {
0136                         goto endOfLoop;
0137                     }
0138                                         N = token;
0139                                         N<<=8;
0140                                         N+=(unsigned char)data.at(i++);
0141                     copyLength = (N & 7) + 3;
0142                     shift = (N & 0x3fff) / 8;
0143                     shifted = ret.size()-shift;
0144                     if (shifted>(ret.size()-1)) goto endOfLoop;
0145                     for (int i=0;i<copyLength;i++) ret.append(ret.at(shifted+i));
0146                     break;
0147             }
0148         }
0149 endOfLoop:
0150     return ret;
0151 
0152 }
0153 
0154 quint32 readBELong(const QByteArray& data, int offset)
0155 {
0156     quint32 ret=0;
0157     for (int i=0;i<4;i++) { ret<<=8; ret+=(unsigned char)data[offset+i]; }
0158     return ret;
0159 }
0160 
0161 HuffdicDecompressor::HuffdicDecompressor(const PDB& p) : Decompressor(p)
0162 {
0163     QByteArray header=p.getRecord(0);
0164     quint32 huff_ofs=readBELong(header,0x70);
0165     quint32 huff_num=readBELong(header,0x74);
0166     quint32 off1,off2;
0167 
0168     QByteArray huff1=p.getRecord(huff_ofs);
0169     if (huff1.isNull()) goto fail;
0170     for (unsigned int i=1;i<huff_num;i++) {
0171         QByteArray h=p.getRecord(huff_ofs+i);
0172         if (h.isNull()) goto fail;
0173         dicts.append(h);
0174     }
0175 
0176     off1=readBELong(huff1,16);
0177     off2=readBELong(huff1,20);
0178 
0179     if (!huff1.startsWith("HUFF")) goto fail;           //krazy:exclude=strings
0180     if (!dicts[0].startsWith("CDIC")) goto fail;        //krazy:exclude=strings
0181 
0182     entry_bits=readBELong(dicts[0],12);
0183 
0184     memcpy(dict1,huff1.data()+off1, 256*4);
0185     memcpy(dict2,huff1.data()+off2, 64*4);    
0186     return;
0187 fail:
0188     valid=false;
0189 }
0190 
0191 QByteArray HuffdicDecompressor::decompress(const QByteArray& data)
0192 {
0193     buf.clear();
0194     unpack(BitReader(data));
0195     return buf;
0196 }
0197 
0198 void HuffdicDecompressor::unpack(BitReader reader,int depth) 
0199 {
0200     if (depth>32) goto fail;
0201     while (reader.left()) {
0202         quint32 dw=reader.read();
0203         quint32 v=dict1[dw>>24];
0204         quint8 codelen = v & 0x1F;
0205         if (!codelen) goto fail;
0206         quint32 code = dw >> (32 - codelen);
0207         quint32 r=(v >> 8);
0208         if (!( v & 0x80))  {
0209             while (code < dict2[(codelen-1)*2]) {
0210                 codelen++;
0211                 code = dw >> (32 - codelen);
0212             }
0213             r = dict2[(codelen-1)*2+1];
0214         }
0215         r-=code;
0216         if (!codelen) goto fail;
0217         if (!reader.eat(codelen)) return;
0218         quint32 dict_no = r >> entry_bits;
0219         quint32 off1 = 16 + (r - (dict_no << entry_bits))*2;
0220         QByteArray dict=dicts[dict_no];
0221         quint32 off2 = 16 + (unsigned char)dict[off1]*256 + (unsigned char)dict[off1+1];
0222         quint32 blen = (unsigned char)dict[off2]*256 + (unsigned char)dict[off2+1];
0223         QByteArray slice=dict.mid(off2+2,(blen & 0x7fff));
0224         if (blen & 0x8000) buf+=slice;
0225         else unpack(BitReader(slice),depth+1);
0226     }
0227     return;
0228 fail:
0229     valid=false;
0230 }
0231 
0232 Decompressor* Decompressor::create(quint8 type, const PDB& pdb) 
0233 {
0234         switch (type) {
0235             case 1 : return new NOOPDecompressor(pdb); 
0236             case 2 : return new RLEDecompressor(pdb); 
0237             case 'H' : return  new HuffdicDecompressor(pdb);
0238             default : return nullptr;
0239         }
0240 
0241 }
0242 }