File indexing completed on 2024-04-28 15:38:20
0001 /*************************************************************************** 0002 * Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> * 0003 * * 0004 * This program is free software; you can redistribute it and/or modify * 0005 * it under the terms of the GNU General Public License as published by * 0006 * the Free Software Foundation; either version 2 of the License, or * 0007 * (at your option) any later version. * 0008 ***************************************************************************/ 0009 0010 #include "mobipocket.h" 0011 #include "decompressor.h" 0012 0013 #include <QIODevice> 0014 #include <QtEndian> 0015 #include <QBuffer> 0016 #include <QTextCodec> 0017 #include <QImageReader> 0018 #include <QRegExp> 0019 0020 namespace Mobipocket { 0021 0022 QByteArray Stream::read(int len) 0023 { 0024 QByteArray ret; 0025 ret.resize(len); 0026 len=read(ret.data(),len); 0027 if (len<0) len=0; 0028 ret.resize(len); 0029 return ret; 0030 } 0031 0032 QByteArray Stream::readAll() 0033 { 0034 QByteArray ret, bit; 0035 while (!(bit=read(4096)).isEmpty()) ret+=bit; 0036 return ret; 0037 } 0038 0039 0040 0041 struct PDBPrivate { 0042 QList<quint32> recordOffsets; 0043 Stream* device; 0044 QString fileType; 0045 quint16 nrecords; 0046 bool valid; 0047 0048 void init(); 0049 }; 0050 0051 void PDBPrivate::init() 0052 { 0053 valid=true; 0054 quint16 word; 0055 quint32 dword; 0056 if (!device->seek(0x3c)) goto fail; 0057 fileType=QString::fromLatin1(device->read(8)); 0058 0059 if (!device->seek(0x4c)) goto fail; 0060 device->read((char*)&word,2); 0061 nrecords=qFromBigEndian(word); 0062 0063 for (int i=0;i<nrecords;i++) { 0064 device->read((char*)&dword,4); 0065 recordOffsets.append(qFromBigEndian(dword)); 0066 device->read((char*)&dword,4); 0067 } 0068 return; 0069 fail: 0070 valid=false; 0071 } 0072 0073 PDB::PDB(Stream* dev) : d(new PDBPrivate) 0074 { 0075 d->device=dev; 0076 d->init(); 0077 } 0078 0079 PDB::~PDB() 0080 { 0081 delete d; 0082 } 0083 0084 QByteArray PDB::getRecord(int i) const 0085 { 0086 if (i>=d->nrecords) return QByteArray(); 0087 quint32 offset=d->recordOffsets[i]; 0088 bool last=(i==(d->nrecords-1)); 0089 if (!d->device->seek(offset)) return QByteArray(); 0090 if (last) return d->device->readAll(); 0091 return d->device->read(d->recordOffsets[i+1]-offset); 0092 } 0093 0094 bool PDB::isValid() const 0095 { 0096 return d->valid; 0097 } 0098 0099 int PDB::recordCount() const 0100 { 0101 return d->nrecords; 0102 } 0103 0104 //////////////////////////////////////////// 0105 struct DocumentPrivate 0106 { 0107 DocumentPrivate(Stream* d) : pdb(d), valid(true), firstImageRecord(0), 0108 drm(false), thumbnailIndex(0) {} 0109 PDB pdb; 0110 Decompressor* dec; 0111 quint16 ntextrecords; 0112 quint16 maxRecordSize; 0113 bool valid; 0114 0115 // number of first record holding image. Usually it is directly after end of text, but not always 0116 quint16 firstImageRecord; 0117 QMap<Document::MetaKey, QString> metadata; 0118 QTextCodec* codec; 0119 bool drm; 0120 0121 // index of thumbnail in image list. May be specified in EXTH. 0122 // If not then just use first image and hope for the best 0123 int thumbnailIndex; 0124 0125 void init(); 0126 void findFirstImage(); 0127 void parseEXTH(const QByteArray& data); 0128 void parseHtmlHead(const QString& data); 0129 QString readEXTHRecord(const QByteArray& data, quint32& offset); 0130 QImage getImageFromRecord(int recnum); 0131 }; 0132 0133 0134 void DocumentPrivate::parseHtmlHead(const QString& data) 0135 { 0136 static QRegExp title(QLatin1String("<dc:title.*>(.*)</dc:title>"), Qt::CaseInsensitive); 0137 static QRegExp author(QLatin1String("<dc:creator.*>(.*)</dc:creator>"), Qt::CaseInsensitive); 0138 static QRegExp copyright(QLatin1String("<dc:rights.*>(.*)</dc:rights>"), Qt::CaseInsensitive); 0139 static QRegExp subject(QLatin1String("<dc:subject.*>(.*)</dc:subject>"), Qt::CaseInsensitive); 0140 static QRegExp description(QLatin1String("<dc:description.*>(.*)</dc:description>"), Qt::CaseInsensitive); 0141 title.setMinimal(true); 0142 author.setMinimal(true); 0143 copyright.setMinimal(true); 0144 subject.setMinimal(true); 0145 description.setMinimal(true); 0146 0147 // title could have been already taken from MOBI record 0148 if (!metadata.contains(Document::Title) && title.indexIn(data)!=-1) metadata[Document::Title]=title.capturedTexts()[1]; 0149 if (author.indexIn(data)!=-1) metadata[Document::Author]=author.capturedTexts()[1]; 0150 if (copyright.indexIn(data)!=-1) metadata[Document::Copyright]=copyright.capturedTexts()[1]; 0151 if (subject.indexIn(data)!=-1) metadata[Document::Subject]=subject.capturedTexts()[1]; 0152 if (description.indexIn(data)!=-1) metadata[Document::Description]=description.capturedTexts()[1]; 0153 0154 } 0155 0156 void DocumentPrivate::init() 0157 { 0158 quint32 encoding=0; 0159 0160 valid=pdb.isValid(); 0161 if (!valid) return; 0162 QByteArray mhead=pdb.getRecord(0); 0163 if (mhead.isNull() || mhead.size() <14 ) goto fail; 0164 dec = Decompressor::create(mhead[1], pdb); 0165 if ((int)mhead[12]!=0 || (int)mhead[13]!=0) drm=true; 0166 if (!dec) goto fail; 0167 0168 ntextrecords=(unsigned char)mhead[8]; 0169 ntextrecords<<=8; 0170 ntextrecords+=(unsigned char)mhead[9]; 0171 maxRecordSize=(unsigned char)mhead[10]; 0172 maxRecordSize<<=8; 0173 maxRecordSize+=(unsigned char)mhead[11]; 0174 if (mhead.size() > 31 ) encoding=readBELong(mhead, 28); 0175 if (encoding==65001) codec=QTextCodec::codecForName("UTF-8"); 0176 else codec=QTextCodec::codecForName("CP1252"); 0177 if (mhead.size()>176) parseEXTH(mhead); 0178 0179 // try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records 0180 if (metadata.size()<2 && !drm) parseHtmlHead(codec->toUnicode(dec->decompress(pdb.getRecord(1)))); 0181 return; 0182 fail: 0183 valid=false; 0184 } 0185 0186 void DocumentPrivate::findFirstImage() { 0187 firstImageRecord=ntextrecords+1; 0188 while (firstImageRecord<pdb.recordCount()) { 0189 QByteArray rec=pdb.getRecord(firstImageRecord); 0190 if (rec.isNull()) return; 0191 QBuffer buf(&rec); 0192 buf.open(QIODevice::ReadOnly); 0193 QImageReader r(&buf); 0194 if (r.canRead()) return; 0195 firstImageRecord++; 0196 } 0197 } 0198 0199 QString DocumentPrivate::readEXTHRecord(const QByteArray& data, quint32& offset) 0200 { 0201 quint32 len=readBELong(data,offset); 0202 offset+=4; 0203 len-=8; 0204 QString ret=codec->toUnicode(data.mid(offset,len)); 0205 offset+=len; 0206 return ret; 0207 } 0208 0209 QImage DocumentPrivate::getImageFromRecord(int i) 0210 { 0211 QByteArray rec=pdb.getRecord(i); 0212 return (rec.isNull()) ? QImage() : QImage::fromData(rec); 0213 } 0214 0215 0216 void DocumentPrivate::parseEXTH(const QByteArray& data) 0217 { 0218 // try to get name 0219 if (data.size()>=92) { 0220 qint32 nameoffset=readBELong(data,84); 0221 qint32 namelen=readBELong(data,88); 0222 if ( (nameoffset + namelen) < data.size() ) { 0223 metadata[Document::Title]=codec->toUnicode(data.mid(nameoffset, namelen)); 0224 } 0225 } 0226 0227 quint32 exthoffs=readBELong(data,20)+16; 0228 0229 if (data.mid(exthoffs,4)!="EXTH") return; 0230 quint32 records=readBELong(data,exthoffs+8); 0231 quint32 offset=exthoffs+12; 0232 for (unsigned int i=0;i<records;i++) { 0233 if (offset+4 > quint32(data.size())) break; 0234 quint32 type=readBELong(data,offset); 0235 offset+=4; 0236 switch (type) { 0237 case 100: metadata[Document::Author]=readEXTHRecord(data,offset); break; 0238 case 103: metadata[Document::Description]=readEXTHRecord(data,offset); break; 0239 case 105: metadata[Document::Subject]=readEXTHRecord(data,offset); break; 0240 case 109: metadata[Document::Copyright]=readEXTHRecord(data,offset); break; 0241 case 202: offset += 4; thumbnailIndex = readBELong(data,offset); offset+=4; break; 0242 default: readEXTHRecord(data,offset); 0243 } 0244 } 0245 0246 0247 } 0248 0249 Document::Document(Stream* dev) : d(new DocumentPrivate(dev)) 0250 { 0251 d->init(); 0252 } 0253 0254 Document::~Document() 0255 { 0256 delete d; 0257 } 0258 0259 0260 QString Document::text(int size) const 0261 { 0262 QByteArray whole; 0263 for (int i=1;i<d->ntextrecords+1;i++) { 0264 QByteArray decompressedRecord = d->dec->decompress(d->pdb.getRecord(i)); 0265 if (decompressedRecord.size() > d->maxRecordSize) 0266 decompressedRecord.resize(d->maxRecordSize); 0267 whole+=decompressedRecord; 0268 if (!d->dec->isValid()) { 0269 d->valid=false; 0270 return QString(); 0271 } 0272 if (size!=-1 && whole.size()>size) break; 0273 } 0274 return d->codec->toUnicode(whole); 0275 } 0276 0277 int Document::imageCount() const 0278 { 0279 //FIXME: don't count FLIS and FCIS records 0280 return d->pdb.recordCount()-d->ntextrecords; 0281 } 0282 0283 bool Document::isValid() const 0284 { 0285 return d->valid; 0286 } 0287 0288 QImage Document::getImage(int i) const 0289 { 0290 if (!d->firstImageRecord) d->findFirstImage(); 0291 return d->getImageFromRecord(d->firstImageRecord+i); 0292 } 0293 0294 QMap<Document::MetaKey,QString> Document::metadata() const 0295 { 0296 return d->metadata; 0297 } 0298 0299 bool Document::hasDRM() const 0300 { 0301 return d->drm; 0302 } 0303 0304 QImage Document::thumbnail() const 0305 { 0306 if (!d->firstImageRecord) d->findFirstImage(); 0307 QImage img=d->getImageFromRecord(d->thumbnailIndex+d->firstImageRecord); 0308 // does not work, try first image 0309 if (img.isNull() && d->thumbnailIndex) { 0310 d->thumbnailIndex=0; 0311 img=d->getImageFromRecord(d->firstImageRecord); 0312 } 0313 return img; 0314 } 0315 0316 }