File indexing completed on 2024-04-28 15:38:20

0001 /***************************************************************************
0002  *   Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl>                 *
0003  *                                                                         *
0004  *   This program is free software; you can redistribute it and/or modify  *
0005  *   it under the terms of the GNU General Public License as published by  *
0006  *   the Free Software Foundation; either version 2 of the License, or     *
0007  *   (at your option) any later version.                                   *
0008  ***************************************************************************/
0009 
0010 #include "mobipocket.h"
0011 #include "decompressor.h"
0012 
0013 #include <QIODevice>
0014 #include <QtEndian>
0015 #include <QBuffer>
0016 #include <QTextCodec>
0017 #include <QImageReader>
0018 #include <QRegExp>
0019 
0020 namespace Mobipocket {
0021 
0022 QByteArray Stream::read(int len)
0023 {
0024     QByteArray ret;
0025     ret.resize(len);
0026     len=read(ret.data(),len);
0027     if (len<0) len=0;
0028     ret.resize(len);
0029     return ret;
0030 }
0031 
0032 QByteArray Stream::readAll() 
0033 {
0034     QByteArray ret, bit;
0035     while (!(bit=read(4096)).isEmpty()) ret+=bit;
0036     return ret;
0037 }
0038 
0039 
0040 
0041 struct PDBPrivate {
0042     QList<quint32> recordOffsets;
0043     Stream* device;
0044     QString fileType;
0045     quint16 nrecords;
0046     bool valid;
0047     
0048     void init();
0049 };
0050 
0051 void PDBPrivate::init() 
0052 {
0053         valid=true;
0054         quint16 word;
0055         quint32 dword;
0056         if (!device->seek(0x3c)) goto fail;
0057         fileType=QString::fromLatin1(device->read(8));
0058         
0059         if (!device->seek(0x4c)) goto fail;
0060         device->read((char*)&word,2);
0061         nrecords=qFromBigEndian(word);
0062         
0063         for (int i=0;i<nrecords;i++) {
0064             device->read((char*)&dword,4);
0065             recordOffsets.append(qFromBigEndian(dword)); 
0066             device->read((char*)&dword,4);
0067         }
0068         return;
0069     fail:
0070         valid=false;
0071 }
0072 
0073 PDB::PDB(Stream* dev) : d(new PDBPrivate)
0074 {
0075     d->device=dev;
0076     d->init();
0077 }
0078 
0079 PDB::~PDB()
0080 {
0081     delete d;
0082 }
0083 
0084 QByteArray PDB::getRecord(int i) const
0085 {
0086     if (i>=d->nrecords) return QByteArray();
0087     quint32 offset=d->recordOffsets[i];
0088     bool last=(i==(d->nrecords-1));
0089     if (!d->device->seek(offset)) return QByteArray();
0090     if (last) return d->device->readAll();
0091     return d->device->read(d->recordOffsets[i+1]-offset);
0092 }
0093 
0094 bool PDB::isValid() const
0095 {
0096     return d->valid;
0097 }
0098 
0099 int PDB::recordCount() const
0100 {
0101     return d->nrecords;
0102 }
0103 
0104 ////////////////////////////////////////////
0105 struct DocumentPrivate 
0106 {
0107     DocumentPrivate(Stream* d) : pdb(d), valid(true), firstImageRecord(0), 
0108         drm(false), thumbnailIndex(0) {}
0109     PDB pdb;
0110     Decompressor* dec;
0111     quint16 ntextrecords;
0112     quint16 maxRecordSize;
0113     bool valid;
0114     
0115     // number of first record holding image. Usually it is directly after end of text, but not always
0116     quint16 firstImageRecord;
0117     QMap<Document::MetaKey, QString> metadata;
0118     QTextCodec* codec;
0119     bool drm;
0120     
0121     // index of thumbnail in image list. May be specified in EXTH. 
0122     // If not then just use first image and hope for the best
0123     int thumbnailIndex;
0124     
0125     void init();
0126     void findFirstImage();
0127     void parseEXTH(const QByteArray& data);
0128     void parseHtmlHead(const QString& data);
0129     QString readEXTHRecord(const QByteArray& data, quint32& offset);
0130     QImage getImageFromRecord(int recnum);
0131 }; 
0132 
0133 
0134 void DocumentPrivate::parseHtmlHead(const QString& data)
0135 {
0136     static QRegExp title(QLatin1String("<dc:title.*>(.*)</dc:title>"), Qt::CaseInsensitive);
0137     static QRegExp author(QLatin1String("<dc:creator.*>(.*)</dc:creator>"), Qt::CaseInsensitive);
0138     static QRegExp copyright(QLatin1String("<dc:rights.*>(.*)</dc:rights>"), Qt::CaseInsensitive);
0139     static QRegExp subject(QLatin1String("<dc:subject.*>(.*)</dc:subject>"), Qt::CaseInsensitive);
0140     static QRegExp description(QLatin1String("<dc:description.*>(.*)</dc:description>"), Qt::CaseInsensitive);
0141     title.setMinimal(true);
0142     author.setMinimal(true);
0143     copyright.setMinimal(true);
0144     subject.setMinimal(true);
0145     description.setMinimal(true);
0146     
0147     // title could have been already taken from MOBI record
0148     if (!metadata.contains(Document::Title) && title.indexIn(data)!=-1) metadata[Document::Title]=title.capturedTexts()[1];
0149     if (author.indexIn(data)!=-1) metadata[Document::Author]=author.capturedTexts()[1];
0150     if (copyright.indexIn(data)!=-1) metadata[Document::Copyright]=copyright.capturedTexts()[1];
0151     if (subject.indexIn(data)!=-1) metadata[Document::Subject]=subject.capturedTexts()[1];
0152     if (description.indexIn(data)!=-1) metadata[Document::Description]=description.capturedTexts()[1];
0153     
0154 }
0155 
0156 void DocumentPrivate::init()
0157 {
0158     quint32 encoding=0;
0159 
0160     valid=pdb.isValid();
0161     if (!valid) return;
0162     QByteArray mhead=pdb.getRecord(0);
0163     if (mhead.isNull() || mhead.size() <14 ) goto fail;
0164     dec = Decompressor::create(mhead[1], pdb);
0165     if ((int)mhead[12]!=0 || (int)mhead[13]!=0) drm=true;
0166     if (!dec) goto fail;
0167 
0168     ntextrecords=(unsigned char)mhead[8];
0169     ntextrecords<<=8;
0170     ntextrecords+=(unsigned char)mhead[9];
0171     maxRecordSize=(unsigned char)mhead[10];
0172     maxRecordSize<<=8;
0173     maxRecordSize+=(unsigned char)mhead[11];
0174     if (mhead.size() > 31 ) encoding=readBELong(mhead, 28);
0175     if (encoding==65001) codec=QTextCodec::codecForName("UTF-8");
0176     else codec=QTextCodec::codecForName("CP1252");
0177     if (mhead.size()>176) parseEXTH(mhead);
0178     
0179     // try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records
0180     if (metadata.size()<2 && !drm) parseHtmlHead(codec->toUnicode(dec->decompress(pdb.getRecord(1))));
0181     return;
0182 fail:
0183     valid=false;
0184 }
0185 
0186 void DocumentPrivate::findFirstImage() {
0187     firstImageRecord=ntextrecords+1;
0188     while (firstImageRecord<pdb.recordCount()) {
0189         QByteArray rec=pdb.getRecord(firstImageRecord);
0190         if (rec.isNull()) return;
0191         QBuffer buf(&rec);
0192         buf.open(QIODevice::ReadOnly);
0193         QImageReader r(&buf);
0194         if (r.canRead()) return;
0195         firstImageRecord++;
0196     }
0197 }
0198 
0199 QString DocumentPrivate::readEXTHRecord(const QByteArray& data, quint32& offset)
0200 {
0201     quint32 len=readBELong(data,offset);
0202     offset+=4;
0203     len-=8;
0204     QString ret=codec->toUnicode(data.mid(offset,len));
0205     offset+=len;
0206     return ret;
0207 }
0208 
0209 QImage DocumentPrivate::getImageFromRecord(int i) 
0210 {
0211     QByteArray rec=pdb.getRecord(i);
0212     return (rec.isNull()) ? QImage() : QImage::fromData(rec);
0213 }
0214 
0215 
0216 void DocumentPrivate::parseEXTH(const QByteArray& data) 
0217 {
0218     // try to get name 
0219     if (data.size()>=92) {
0220         qint32 nameoffset=readBELong(data,84);
0221         qint32 namelen=readBELong(data,88);
0222         if ( (nameoffset + namelen) < data.size() ) {
0223             metadata[Document::Title]=codec->toUnicode(data.mid(nameoffset, namelen));
0224         }
0225     }
0226 
0227     quint32 exthoffs=readBELong(data,20)+16;
0228 
0229     if (data.mid(exthoffs,4)!="EXTH") return;
0230     quint32 records=readBELong(data,exthoffs+8);
0231     quint32 offset=exthoffs+12;
0232     for (unsigned int i=0;i<records;i++) {
0233         if (offset+4 > quint32(data.size())) break;
0234         quint32 type=readBELong(data,offset);
0235         offset+=4;
0236         switch (type) {
0237             case 100: metadata[Document::Author]=readEXTHRecord(data,offset); break;
0238             case 103: metadata[Document::Description]=readEXTHRecord(data,offset); break;
0239             case 105: metadata[Document::Subject]=readEXTHRecord(data,offset); break;
0240             case 109: metadata[Document::Copyright]=readEXTHRecord(data,offset); break;
0241             case 202: offset += 4; thumbnailIndex = readBELong(data,offset); offset+=4; break;
0242             default: readEXTHRecord(data,offset);
0243         }
0244     }
0245             
0246     
0247 }
0248 
0249 Document::Document(Stream* dev) : d(new DocumentPrivate(dev))
0250 {
0251     d->init();
0252 }
0253 
0254 Document::~Document()
0255 {
0256     delete d;
0257 }
0258 
0259 
0260 QString Document::text(int size) const 
0261 {
0262     QByteArray whole;
0263     for (int i=1;i<d->ntextrecords+1;i++) { 
0264         QByteArray decompressedRecord = d->dec->decompress(d->pdb.getRecord(i));
0265         if (decompressedRecord.size() > d->maxRecordSize)
0266             decompressedRecord.resize(d->maxRecordSize);
0267         whole+=decompressedRecord;
0268         if (!d->dec->isValid()) {
0269             d->valid=false;
0270             return QString();
0271         }
0272         if (size!=-1 && whole.size()>size) break;
0273     }
0274     return d->codec->toUnicode(whole);
0275 }
0276 
0277 int Document::imageCount() const 
0278 {
0279     //FIXME: don't count FLIS and FCIS records
0280     return d->pdb.recordCount()-d->ntextrecords;
0281 }
0282 
0283 bool Document::isValid() const
0284 {
0285     return d->valid;
0286 }
0287 
0288 QImage Document::getImage(int i) const 
0289 {
0290     if (!d->firstImageRecord) d->findFirstImage();
0291     return d->getImageFromRecord(d->firstImageRecord+i);
0292 }
0293 
0294 QMap<Document::MetaKey,QString> Document::metadata() const
0295 {
0296     return d->metadata;
0297 }
0298 
0299 bool Document::hasDRM() const
0300 {
0301     return d->drm;
0302 }
0303 
0304 QImage Document::thumbnail() const 
0305 {
0306     if (!d->firstImageRecord) d->findFirstImage();
0307     QImage img=d->getImageFromRecord(d->thumbnailIndex+d->firstImageRecord);
0308     // does not work, try first image
0309     if (img.isNull() && d->thumbnailIndex) {
0310         d->thumbnailIndex=0;
0311         img=d->getImageFromRecord(d->firstImageRecord);
0312     }
0313     return img;
0314 }
0315 
0316 }