File indexing completed on 2024-12-15 04:13:43

0001 /*
0002     Kchmviewer - a CHM and EPUB file viewer with broad language support
0003     SPDX-FileCopyrightText: 2004-2014 George Yunaev gyunaev@ulduzsoft.com
0004 
0005     SPDX-License-Identifier: GPL-3.0-or-later
0006 */
0007 
0008 #include <QDebug>
0009 #include <QFile>
0010 #include <QVector>
0011 
0012 #include "ebook_chm.h"
0013 #include "ebook_chm_encoding.h"
0014 
0015 #include "bitfiddle.h"
0016 
0017 // Big-enough buffer size for use with various routines.
0018 #define BUF_SIZE 4096
0019 #define COMMON_BUF_LEN 1025
0020 
0021 #define TOPICS_ENTRY_LEN 16
0022 #define URLTBL_ENTRY_LEN 12
0023 
0024 //#define DEBUGPARSER(A)    qDebug A
0025 #define DEBUGPARSER(A)
0026 
0027 #define URL_SCHEME_CHM QStringLiteral("ms-its")
0028 
0029 EBook_CHM::EBook_CHM()
0030     : EBook()
0031     , m_chmFile(nullptr)
0032     , m_detectedLCID(0)
0033     , m_textCodec(nullptr)
0034     , m_textCodecForSpecialFiles(nullptr)
0035     , m_currentEncoding(QStringLiteral("UTF-8"))
0036     , m_envOptions(QString::fromLatin1(qgetenv("KCHMVIEWEROPTS")))
0037 {
0038 }
0039 
0040 EBook_CHM::~EBook_CHM()
0041 {
0042     close();
0043 }
0044 
0045 void EBook_CHM::close()
0046 {
0047     if (m_chmFile == nullptr) {
0048         return;
0049     }
0050 
0051     chm_close(m_chmFile);
0052 
0053     m_chmFile = nullptr;
0054     m_filename = m_font = QString();
0055 
0056     m_home.clear();
0057     m_topicsFile.clear();
0058     m_indexFile.clear();
0059 
0060     m_textCodec = nullptr;
0061     m_textCodecForSpecialFiles = nullptr;
0062     m_detectedLCID = 0;
0063     m_currentEncoding = QStringLiteral("UTF-8");
0064 }
0065 
0066 QString EBook_CHM::title() const
0067 {
0068     return encodeWithCurrentCodec(m_title);
0069 }
0070 
0071 QUrl EBook_CHM::homeUrl() const
0072 {
0073     return pathToUrl(QString::fromUtf8(m_home));
0074 }
0075 
0076 bool EBook_CHM::hasFeature(EBook::Feature code) const
0077 {
0078     switch (code) {
0079     case FEATURE_TOC:
0080         return m_tocAvailable;
0081 
0082     case FEATURE_INDEX:
0083         return m_indexAvailable;
0084 
0085     case FEATURE_ENCODING:
0086         return true;
0087     }
0088 
0089     return false;
0090 }
0091 
0092 bool EBook_CHM::getTableOfContents(QList<EBookTocEntry> &toc) const
0093 {
0094     if (parseBinaryTOC(toc)) {
0095         return true;
0096     }
0097 
0098     // Parse the plain text TOC
0099     QList<ParsedEntry> parsed;
0100 
0101     if (!parseFileAndFillArray(QString::fromUtf8(m_topicsFile), parsed, false)) {
0102         return false;
0103     }
0104 
0105     // Find out the root offset, and reduce the indent level to it
0106     // so the toc starts from zero offset.
0107     int root_offset = -1;
0108 
0109     // Fill up the real toc
0110     toc.reserve(parsed.size());
0111     for (const ParsedEntry &e : std::as_const(parsed)) {
0112         if (root_offset == -1) {
0113             root_offset = e.indent;
0114         }
0115 
0116         EBookTocEntry entry;
0117         entry.iconid = (EBookTocEntry::Icon)e.iconid;
0118         entry.indent = e.indent - root_offset;
0119         entry.name = e.name;
0120 
0121         if (!e.urls.empty()) {
0122             entry.url = e.urls[0];
0123         }
0124 
0125         toc.append(entry);
0126     }
0127 
0128     return true;
0129 }
0130 
0131 bool EBook_CHM::getIndex(QList<EBookIndexEntry> &index) const
0132 {
0133     // Parse the plain text index
0134     QList<ParsedEntry> parsed;
0135 
0136     if (!parseFileAndFillArray(QString::fromUtf8(m_indexFile), parsed, true)) {
0137         return false;
0138     }
0139 
0140     // Find out the root offset, and reduce the indent level to it
0141     // so the index starts from zero offset.
0142     int root_offset = 0;
0143 
0144     // Fill up the real index
0145     index.reserve(parsed.size());
0146 
0147     // Find the index root offset
0148     const QList<ParsedEntry> &parsedList = parsed;
0149     for (const ParsedEntry &e : parsedList) {
0150         if (e.urls.empty()) {
0151             continue;
0152         }
0153 
0154         root_offset = qMin(root_offset, e.indent);
0155     }
0156 
0157     // And apply the index
0158     for (const ParsedEntry &e : parsedList) {
0159         if (e.urls.empty()) {
0160             continue;
0161         }
0162 
0163         EBookIndexEntry entry;
0164         entry.name = e.name;
0165         entry.urls = e.urls;
0166         entry.seealso = e.seealso;
0167 
0168         // If the index array is empty, make sure the first entry is on root offset
0169         if (index.isEmpty()) {
0170             entry.indent = root_offset;
0171         } else {
0172             entry.indent = e.indent - root_offset;
0173         }
0174 
0175         index.append(entry);
0176         printf("%d: %s\n", entry.indent, qPrintable(entry.name));
0177     }
0178 
0179     return true;
0180 }
0181 
0182 bool EBook_CHM::getFileContentAsString(QString &str, const QUrl &url) const
0183 {
0184     return getTextContent(str, urlToPath(url));
0185 }
0186 
0187 bool EBook_CHM::getFileContentAsBinary(QByteArray &data, const QUrl &url) const
0188 {
0189     return getBinaryContent(data, urlToPath(url));
0190 }
0191 
0192 bool EBook_CHM::getBinaryContent(QByteArray &data, const QString &url) const
0193 {
0194     chmUnitInfo ui;
0195 
0196     if (!ResolveObject(url, &ui)) {
0197         return false;
0198     }
0199 
0200     data.resize(ui.length);
0201 
0202     if (RetrieveObject(&ui, (unsigned char *)data.data(), 0, ui.length)) {
0203         return true;
0204     }
0205 
0206     return false;
0207 }
0208 
0209 bool EBook_CHM::getTextContent(QString &str, const QString &url, bool internal_encoding) const
0210 {
0211     QByteArray buf;
0212 
0213     if (getBinaryContent(buf, url)) {
0214         unsigned int length = buf.size();
0215 
0216         if (length > 0) {
0217             buf.resize(length + 1);
0218             buf[length] = '\0';
0219 
0220             str = internal_encoding ? QString::fromUtf8(buf.constData()) : encodeWithCurrentCodec(buf.constData());
0221             return true;
0222         }
0223     }
0224 
0225     return false;
0226 }
0227 
0228 int EBook_CHM::getContentSize(const QString &url)
0229 {
0230     chmUnitInfo ui;
0231 
0232     if (!ResolveObject(url, &ui)) {
0233         return -1;
0234     }
0235 
0236     return ui.length;
0237 }
0238 
0239 bool EBook_CHM::load(const QString &archiveName)
0240 {
0241     QString filename;
0242 
0243     // If the file has a file:// prefix, remove it
0244     if (archiveName.startsWith(QLatin1String("file://"))) {
0245         filename = archiveName.mid(7); // strip it
0246     } else {
0247         filename = archiveName;
0248     }
0249 
0250     if (m_chmFile) {
0251         close();
0252     }
0253 
0254 #if defined(WIN32)
0255     m_chmFile = chm_open((BSTR)QFile::encodeName(filename).constData());
0256 #else
0257     m_chmFile = chm_open(QFile::encodeName(filename).constData());
0258 #endif
0259 
0260     if (m_chmFile == nullptr) {
0261         return false;
0262     }
0263 
0264     m_filename = filename;
0265 
0266     // Reset encoding
0267     m_textCodec = nullptr;
0268     m_textCodecForSpecialFiles = nullptr;
0269     m_currentEncoding = QStringLiteral("UTF-8");
0270 
0271     // Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so)
0272     // and guess the encoding
0273     getInfoFromWindows();
0274     getInfoFromSystem();
0275     guessTextEncoding();
0276 
0277     // Check whether the search tables are present
0278     if (ResolveObject(QStringLiteral("/#TOPICS"), &m_chmTOPICS) && ResolveObject(QStringLiteral("/#STRINGS"), &m_chmSTRINGS) && ResolveObject(QStringLiteral("/#URLTBL"), &m_chmURLTBL) &&
0279         ResolveObject(QStringLiteral("/#URLSTR"), &m_chmURLSTR)) {
0280         m_lookupTablesValid = true;
0281         fillTopicsUrlMap();
0282     } else {
0283         m_lookupTablesValid = false;
0284     }
0285 
0286     // Some CHM files have toc and index files, but do not set the name properly.
0287     // Some heuristics here.
0288     if (m_topicsFile.isEmpty() && hasFile(QStringLiteral("/toc.hhc"))) {
0289         m_topicsFile = "/toc.hhc";
0290     }
0291 
0292     if (m_indexFile.isEmpty() && hasFile(QStringLiteral("/index.hhk"))) {
0293         m_indexFile = "/index.hhk";
0294     }
0295 
0296     if (!m_topicsFile.isEmpty() || (m_lookupTablesValid && hasFile(QStringLiteral("/#TOCIDX")))) {
0297         m_tocAvailable = true;
0298     } else {
0299         m_tocAvailable = false;
0300     }
0301 
0302     if (!m_indexFile.isEmpty() || (m_lookupTablesValid && hasFile(QStringLiteral("/$WWKeywordLinks/BTree")))) {
0303         m_indexAvailable = true;
0304     } else {
0305         m_indexAvailable = false;
0306     }
0307 
0308     return true;
0309 }
0310 
0311 int EBook_CHM::findStringInQuotes(const QString &tag, int offset, QString &value, bool firstquote, bool decodeentities) const
0312 {
0313     int qbegin = tag.indexOf(QLatin1Char('"'), offset);
0314 
0315     if (qbegin == -1) {
0316         qFatal("EBook_CHMImpl::findStringInQuotes: cannot find first quote in <param> tag: '%s'", qPrintable(tag));
0317     }
0318 
0319     int qend = firstquote ? tag.indexOf(QLatin1Char('"'), qbegin + 1) : tag.lastIndexOf(QLatin1Char('"'));
0320 
0321     if (qend == -1 || qend <= qbegin) {
0322         qFatal("EBook_CHMImpl::findStringInQuotes: cannot find last quote in <param> tag: '%s'", qPrintable(tag));
0323     }
0324 
0325     // If we do not need to decode HTML entities, just return.
0326     if (decodeentities) {
0327         QString htmlentity = QString();
0328         bool fill_entity = false;
0329 
0330         value.reserve(qend - qbegin); // to avoid multiple memory allocations
0331 
0332         for (int i = qbegin + 1; i < qend; i++) {
0333             if (!fill_entity) {
0334                 if (tag[i] == QLatin1Char('&')) { // HTML entity starts
0335                     fill_entity = true;
0336                 } else {
0337                     value.append(tag[i]);
0338                 }
0339             } else {
0340                 if (tag[i] == QLatin1Char(';')) // HTML entity ends
0341                 {
0342                     // If entity is an ASCII code, just decode it
0343                     QString decode = m_htmlEntityDecoder.decode(htmlentity);
0344 
0345                     if (decode.isNull()) {
0346                         break;
0347                     }
0348 
0349                     value.append(decode);
0350                     htmlentity = QString();
0351                     fill_entity = false;
0352                 } else {
0353                     htmlentity.append(tag[i]);
0354                 }
0355             }
0356         }
0357     } else {
0358         value = tag.mid(qbegin + 1, qend - qbegin - 1);
0359     }
0360 
0361     return qend + 1;
0362 }
0363 
0364 bool EBook_CHM::parseFileAndFillArray(const QString &file, QList<ParsedEntry> &data, bool asIndex) const
0365 {
0366     QString src;
0367     const int MAX_NEST_DEPTH = 256;
0368 
0369     if (!getTextContent(src, file) || src.isEmpty()) {
0370         return false;
0371     }
0372 
0373     /*
0374         // Save the index for debugging purposes
0375         QFile outfile( "parsed.htm" );
0376 
0377         if ( outfile.open( QIODevice::WriteOnly ) )
0378         {
0379             QTextStream textstream( &outfile );
0380             textstream << src;
0381             outfile.close();
0382         }
0383     */
0384 
0385     EBookTocEntry::Icon defaultimagenum = EBookTocEntry::IMAGE_AUTO;
0386     int pos = 0, indent = 0, root_indent_offset = 0;
0387     bool in_object = false, root_indent_offset_set = false;
0388 
0389     ParsedEntry entry;
0390     entry.iconid = defaultimagenum;
0391 
0392     // Split the HHC file by HTML tags
0393     int stringlen = src.length();
0394 
0395     while (pos < stringlen && (pos = src.indexOf(QLatin1Char('<'), pos)) != -1) {
0396         int i, word_end = 0;
0397 
0398         for (i = ++pos; i < stringlen; i++) {
0399             // If a " or ' is found, skip to the next one.
0400             if ((src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\''))) {
0401                 // find where quote ends, either by another quote, or by '>' symbol (some people don't know HTML)
0402                 int nextpos = src.indexOf(src[i], i + 1);
0403                 if (nextpos == -1 && (nextpos = src.indexOf(QLatin1Char('>'), i + 1)) == -1) {
0404                     qWarning("EBook_CHMImpl::ParseHhcAndFillTree: corrupted TOC: %s", qPrintable(src.mid(i)));
0405                     return false;
0406                 }
0407 
0408                 i = nextpos;
0409             } else if (src[i] == QLatin1Char('>')) {
0410                 break;
0411             } else if (!src[i].isLetterOrNumber() && src[i] != QLatin1Char('/') && !word_end) {
0412                 word_end = i;
0413             }
0414         }
0415 
0416         QString tagword, tag = src.mid(pos, i - pos);
0417 
0418         if (word_end) {
0419             tagword = src.mid(pos, word_end - pos).toLower();
0420         } else {
0421             tagword = tag.toLower();
0422         }
0423 
0424         // DEBUGPARSER(("tag: '%s', tagword: '%s'\n", qPrintable( tag ), qPrintable( tagword ) ));
0425 
0426         // <OBJECT type="text/sitemap"> - a topic entry
0427         if (tagword == QLatin1String("object") && tag.indexOf(QLatin1String("text/sitemap"), 0, Qt::CaseInsensitive) != -1) {
0428             in_object = true;
0429         } else if (tagword == QLatin1String("/object") && in_object) {
0430             // a topic entry closed. Add a tree item
0431             if (entry.name.isEmpty() && entry.urls.isEmpty()) {
0432                 qWarning("EBook_CHMImpl::ParseAndFillTopicsTree: <object> tag is parsed, but both name and url are empty.");
0433             } else {
0434                 // If the name is empty, use the URL as name
0435                 if (entry.name.isEmpty()) {
0436                     entry.name = entry.urls[0].toString();
0437                 }
0438 
0439                 if (!root_indent_offset_set) {
0440                     root_indent_offset_set = true;
0441                     root_indent_offset = indent;
0442 
0443                     if (root_indent_offset > 1) {
0444                         qWarning("CHM has improper index; root indent offset is %d", root_indent_offset);
0445                     }
0446                 }
0447 
0448                 // Trim the entry name
0449                 entry.name = entry.name.trimmed();
0450 
0451                 int real_indent = indent - root_indent_offset;
0452 
0453                 entry.indent = real_indent;
0454                 data.push_back(entry);
0455             }
0456 
0457             entry.name = QString();
0458             entry.urls.clear();
0459             entry.iconid = defaultimagenum;
0460             entry.seealso.clear();
0461             in_object = false;
0462         } else if (tagword == QLatin1String("param") && in_object) {
0463             // <param name="Name" value="First Page">
0464             int offset; // strlen("param ")
0465             const QString name_pattern = QStringLiteral("name="), value_pattern = QStringLiteral("value=");
0466             QString pname, pvalue;
0467 
0468             if ((offset = tag.indexOf(name_pattern, 0, Qt::CaseInsensitive)) == -1) {
0469                 qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no name=\n", qPrintable(tag));
0470             }
0471 
0472             // offset+5 skips 'name='
0473             offset = findStringInQuotes(tag, offset + name_pattern.length(), pname, true, false);
0474             pname = pname.toLower();
0475 
0476             if ((offset = tag.indexOf(value_pattern, offset, Qt::CaseInsensitive)) == -1) {
0477                 qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: bad <param> tag '%s': no value=\n", qPrintable(tag));
0478             }
0479 
0480             // offset+6 skips 'value='
0481             findStringInQuotes(tag, offset + value_pattern.length(), pvalue, false, true);
0482 
0483             // DEBUGPARSER(("<param>: name '%s', value '%s'", qPrintable( pname ), qPrintable( pvalue )));
0484 
0485             if (pname == QLatin1String("name") || pname == QLatin1String("keyword")) {
0486                 // Some help files contain duplicate names, where the second name is empty. Work it around by keeping the first one
0487                 if (!pvalue.isEmpty()) {
0488                     entry.name = pvalue;
0489                 }
0490             } else if (pname == QLatin1String("merge")) {
0491                 // MERGE implementation is experimental
0492                 QUrl mergeurl = pathToUrl(pvalue);
0493                 QString mergecontent;
0494 
0495                 if (getFileContentAsString(mergecontent, mergeurl) && !mergecontent.isEmpty()) {
0496                     qWarning("MERGE is used in index; the implementation is experimental. Please let me know if it works");
0497 
0498                     // Merge the read value into the current parsed file.
0499                     // To save memory it is done in a kinda hacky way:
0500                     src = mergecontent + src.mid(i);
0501                     pos = 0;
0502                     stringlen = src.length();
0503                 } else {
0504                     qWarning("MERGE is used in index but file %s was not found in CHM archive", qPrintable(pvalue));
0505                 }
0506             } else if (pname == QLatin1String("local")) {
0507                 // Check for URL duplication
0508                 QUrl url = pathToUrl(pvalue);
0509 
0510                 if (!entry.urls.contains(url)) {
0511                     entry.urls.push_back(url);
0512                 }
0513             } else if (pname == QLatin1String("see also") && asIndex && entry.name != pvalue) {
0514                 entry.urls.push_back(QUrl(QStringLiteral("seealso")));
0515                 entry.seealso = pvalue;
0516             } else if (pname == QLatin1String("imagenumber")) {
0517                 bool bok;
0518                 int imgnum = pvalue.toInt(&bok);
0519 
0520                 if (bok && imgnum >= 0 && imgnum < EBookTocEntry::MAX_BUILTIN_ICONS) {
0521                     entry.iconid = (EBookTocEntry::Icon)imgnum;
0522                 }
0523             }
0524         } else if (tagword == QLatin1String("ul")) // increase indent level
0525         {
0526             // Fix for buggy help files
0527             if (++indent >= MAX_NEST_DEPTH) {
0528                 qFatal("EBook_CHMImpl::ParseAndFillTopicsTree: max nest depth (%d) is reached, error in help file", MAX_NEST_DEPTH);
0529             }
0530 
0531             DEBUGPARSER(("<ul>: new intent is %d\n", indent - root_indent_offset));
0532         } else if (tagword == QLatin1String("/ul")) // decrease indent level
0533         {
0534             if (--indent < root_indent_offset) {
0535                 indent = root_indent_offset;
0536             }
0537 
0538             DEBUGPARSER(("</ul>: new intent is %d\n", indent - root_indent_offset));
0539         }
0540 
0541         pos = i;
0542     }
0543 
0544     // Dump our array
0545     //    for ( int i = 0; i < data.size(); i++ )
0546     //        qDebug() << data[i].indent << data[i].name << data[i].urls;
0547 
0548     return true;
0549 }
0550 
0551 bool EBook_CHM::ResolveObject(const QString &fileName, chmUnitInfo *ui) const
0552 {
0553     return m_chmFile != nullptr && ::chm_resolve_object(m_chmFile, qPrintable(fileName), ui) == CHM_RESOLVE_SUCCESS;
0554 }
0555 
0556 bool EBook_CHM::hasFile(const QString &fileName) const
0557 {
0558     chmUnitInfo ui;
0559 
0560     return m_chmFile != nullptr && ::chm_resolve_object(m_chmFile, qPrintable(fileName), &ui) == CHM_RESOLVE_SUCCESS;
0561 }
0562 
0563 size_t EBook_CHM::RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer, LONGUINT64 fileOffset, LONGINT64 bufferSize) const
0564 {
0565     return ::chm_retrieve_object(m_chmFile, const_cast<chmUnitInfo *>(ui), buffer, fileOffset, bufferSize);
0566 }
0567 
0568 bool EBook_CHM::getInfoFromWindows()
0569 {
0570 #define WIN_HEADER_LEN 0x08
0571     unsigned char buffer[BUF_SIZE];
0572     unsigned int factor;
0573     chmUnitInfo ui;
0574     long size = 0;
0575 
0576     if (ResolveObject(QStringLiteral("/#WINDOWS"), &ui)) {
0577         if (!RetrieveObject(&ui, buffer, 0, WIN_HEADER_LEN)) {
0578             return false;
0579         }
0580 
0581         unsigned int entries = get_int32_le(reinterpret_cast<unsigned int *>(buffer));
0582         unsigned int entry_size = get_int32_le(reinterpret_cast<unsigned int *>(buffer + 0x04));
0583 
0584         QVector<unsigned char> uptr(entries * entry_size);
0585         unsigned char *raw = (unsigned char *)uptr.data();
0586 
0587         if (!RetrieveObject(&ui, raw, 8, entries * entry_size)) {
0588             return false;
0589         }
0590 
0591         if (!ResolveObject(QStringLiteral("/#STRINGS"), &ui)) {
0592             return false;
0593         }
0594 
0595         for (unsigned int i = 0; i < entries; ++i) {
0596             unsigned int offset = i * entry_size;
0597 
0598             unsigned int off_title = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x14));
0599             unsigned int off_home = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x68));
0600             unsigned int off_hhc = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x60));
0601             unsigned int off_hhk = get_int32_le(reinterpret_cast<unsigned int *>(raw + offset + 0x64));
0602 
0603             factor = off_title / 4096;
0604 
0605             if (size == 0) {
0606                 size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
0607             }
0608 
0609             if (size && off_title) {
0610                 m_title = QByteArray((const char *)(buffer + off_title % 4096));
0611             }
0612 
0613             if (factor != off_home / 4096) {
0614                 factor = off_home / 4096;
0615                 size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
0616             }
0617 
0618             if (size && off_home) {
0619                 m_home = QByteArray("/") + QByteArray((const char *)buffer + off_home % 4096);
0620             }
0621 
0622             if (factor != off_hhc / 4096) {
0623                 factor = off_hhc / 4096;
0624                 size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
0625             }
0626 
0627             if (size && off_hhc) {
0628                 m_topicsFile = QByteArray("/") + QByteArray((const char *)buffer + off_hhc % 4096);
0629             }
0630 
0631             if (factor != off_hhk / 4096) {
0632                 factor = off_hhk / 4096;
0633                 size = RetrieveObject(&ui, buffer, factor * 4096, BUF_SIZE);
0634             }
0635 
0636             if (size && off_hhk) {
0637                 m_indexFile = QByteArray("/") + QByteArray((const char *)buffer + off_hhk % 4096);
0638             }
0639         }
0640     }
0641     return true;
0642 }
0643 
0644 bool EBook_CHM::getInfoFromSystem()
0645 {
0646     unsigned char buffer[BUF_SIZE];
0647     chmUnitInfo ui;
0648 
0649     int index = 0;
0650     unsigned char *cursor = nullptr, *p;
0651     unsigned short value = 0;
0652     long size = 0;
0653 
0654     // Run the first loop to detect the encoding. We need this, because title could be
0655     // already encoded in user encoding. Same for file names
0656     if (!ResolveObject(QStringLiteral("/#SYSTEM"), &ui)) {
0657         return false;
0658     }
0659 
0660     // Can we pull BUFF_SIZE bytes of the #SYSTEM file?
0661     if ((size = RetrieveObject(&ui, buffer, 4, BUF_SIZE)) == 0) {
0662         return false;
0663     }
0664 
0665     buffer[size - 1] = 0;
0666 
0667     // First loop to detect the encoding
0668     for (index = 0; index < (size - 1 - (long)sizeof(unsigned short));) {
0669         cursor = buffer + index;
0670         value = UINT16ARRAY(cursor);
0671 
0672         switch (value) {
0673         case 0:
0674             index += 2;
0675             cursor = buffer + index;
0676 
0677             if (m_topicsFile.isEmpty()) {
0678                 m_topicsFile = QByteArray("/") + QByteArray((const char *)buffer + index + 2);
0679             }
0680 
0681             break;
0682 
0683         case 1:
0684             index += 2;
0685             cursor = buffer + index;
0686 
0687             if (m_indexFile.isEmpty()) {
0688                 m_indexFile = QByteArray("/") + QByteArray((const char *)buffer + index + 2);
0689             }
0690             break;
0691 
0692         case 2:
0693             index += 2;
0694             cursor = buffer + index;
0695 
0696             if (m_home.isEmpty() || m_home == "/") {
0697                 m_home = QByteArray("/") + QByteArray((const char *)buffer + index + 2);
0698             }
0699             break;
0700 
0701         case 3:
0702             index += 2;
0703             cursor = buffer + index;
0704             m_title = QByteArray((const char *)(buffer + index + 2));
0705             break;
0706 
0707         case 4:
0708             index += 2;
0709             cursor = buffer + index;
0710 
0711             p = buffer + index + 2;
0712             m_detectedLCID = (short)(p[0] | (p[1] << 8));
0713 
0714             break;
0715 
0716         case 6:
0717             index += 2;
0718             cursor = buffer + index;
0719 
0720             if (m_topicsFile.isEmpty()) {
0721                 QString topicAttempt = QStringLiteral("/");
0722                 topicAttempt += QString(QString::fromUtf8((const char *)buffer + index + 2));
0723 
0724                 QString tmp = topicAttempt + QStringLiteral(".hhc");
0725 
0726                 if (ResolveObject(tmp, &ui)) {
0727                     m_topicsFile = qPrintable(tmp);
0728                 }
0729 
0730                 tmp = topicAttempt + QStringLiteral(".hhk");
0731 
0732                 if (ResolveObject(tmp, &ui)) {
0733                     m_indexFile = qPrintable(tmp);
0734                 }
0735             }
0736             break;
0737 
0738         case 16:
0739             index += 2;
0740             cursor = buffer + index;
0741 
0742             m_font = QString(QString::fromUtf8((const char *)buffer + index + 2));
0743             break;
0744 
0745         default:
0746             index += 2;
0747             cursor = buffer + index;
0748         }
0749 
0750         value = UINT16ARRAY(cursor);
0751         index += value + 2;
0752     }
0753 
0754     return true;
0755 }
0756 
0757 QString EBook_CHM::getTopicByUrl(const QUrl &url)
0758 {
0759     QMap<QUrl, QString>::const_iterator it = m_url2topics.constFind(url);
0760 
0761     if (it == m_url2topics.constEnd()) {
0762         return QString();
0763     }
0764 
0765     return it.value();
0766 }
0767 
0768 static int chm_enumerator_callback(struct chmFile *, struct chmUnitInfo *ui, void *context)
0769 {
0770     EBook_CHM tmp;
0771     ((QList<QUrl> *)context)->push_back(tmp.pathToUrl(QString::fromUtf8(ui->path)));
0772     return CHM_ENUMERATOR_CONTINUE;
0773 }
0774 
0775 bool EBook_CHM::enumerateFiles(QList<QUrl> &files)
0776 {
0777     files.clear();
0778     return chm_enumerate(m_chmFile, CHM_ENUMERATE_ALL, chm_enumerator_callback, &files);
0779 }
0780 
0781 QString EBook_CHM::currentEncoding() const
0782 {
0783     return m_currentEncoding;
0784 }
0785 
0786 bool EBook_CHM::setCurrentEncoding(const char *encoding)
0787 {
0788     m_currentEncoding = QString::fromUtf8(encoding);
0789     return changeFileEncoding(m_currentEncoding);
0790 }
0791 
0792 bool EBook_CHM::isSupportedUrl(const QUrl &url)
0793 {
0794     return url.scheme() == URL_SCHEME_CHM;
0795 }
0796 
0797 bool EBook_CHM::guessTextEncoding()
0798 {
0799     if (!m_detectedLCID) {
0800         qWarning("Could not detect LCID");
0801         return false;
0802     }
0803 
0804     QString enc = Ebook_CHM_Encoding::guessByLCID(m_detectedLCID);
0805 
0806     if (changeFileEncoding(enc)) {
0807         m_currentEncoding = enc;
0808         return true;
0809     }
0810 
0811     return false;
0812 }
0813 
0814 bool EBook_CHM::changeFileEncoding(const QString &qtencoding)
0815 {
0816     // Encoding could be either simple Qt codepage, or set like CP1251/KOI8, which allows to
0817     // set up encodings separately for text (first) and internal files (second)
0818     int p = qtencoding.indexOf(QLatin1Char('/'));
0819 
0820     if (p != -1) {
0821         QString global = qtencoding.left(p);
0822         QString special = qtencoding.mid(p + 1);
0823 
0824         m_textCodec = QTextCodec::codecForName(global.toUtf8());
0825 
0826         if (!m_textCodec) {
0827             qWarning("Could not set up Text Codec for encoding '%s'", qPrintable(global));
0828             return false;
0829         }
0830 
0831         m_textCodecForSpecialFiles = QTextCodec::codecForName(special.toUtf8());
0832 
0833         if (!m_textCodecForSpecialFiles) {
0834             qWarning("Could not set up Text Codec for encoding '%s'", qPrintable(special));
0835             return false;
0836         }
0837     } else {
0838         m_textCodecForSpecialFiles = m_textCodec = QTextCodec::codecForName(qtencoding.toUtf8());
0839 
0840         if (!m_textCodec) {
0841             qWarning("Could not set up Text Codec for encoding '%s'", qPrintable(qtencoding));
0842             return false;
0843         }
0844     }
0845 
0846     m_htmlEntityDecoder.changeEncoding(m_textCodec);
0847     return true;
0848 }
0849 
0850 void EBook_CHM::fillTopicsUrlMap()
0851 {
0852     if (!m_lookupTablesValid) {
0853         return;
0854     }
0855 
0856     // Read those tables
0857     QVector<unsigned char> topics(m_chmTOPICS.length), urltbl(m_chmURLTBL.length), urlstr(m_chmURLSTR.length), strings(m_chmSTRINGS.length);
0858 
0859     if (!RetrieveObject(&m_chmTOPICS, (unsigned char *)topics.data(), 0, m_chmTOPICS.length) || !RetrieveObject(&m_chmURLTBL, (unsigned char *)urltbl.data(), 0, m_chmURLTBL.length) ||
0860         !RetrieveObject(&m_chmURLSTR, (unsigned char *)urlstr.data(), 0, m_chmURLSTR.length) || !RetrieveObject(&m_chmSTRINGS, (unsigned char *)strings.data(), 0, m_chmSTRINGS.length)) {
0861         return;
0862     }
0863 
0864     for (LONGUINT64 i = 0; i < m_chmTOPICS.length; i += TOPICS_ENTRY_LEN) {
0865         unsigned int off_title = get_int32_le(reinterpret_cast<unsigned int *>(topics.data() + i + 4));
0866         unsigned int off_url = get_int32_le(reinterpret_cast<unsigned int *>(topics.data() + i + 8));
0867         off_url = get_int32_le(reinterpret_cast<unsigned int *>(urltbl.data() + off_url + 8)) + 8;
0868 
0869         QUrl url = pathToUrl(QString::fromUtf8((const char *)urlstr.data() + off_url));
0870 
0871         if (off_title < (unsigned int)strings.size()) {
0872             m_url2topics[url] = encodeWithCurrentCodec((const char *)strings.data() + off_title);
0873         } else {
0874             m_url2topics[url] = QStringLiteral("Untitled");
0875         }
0876     }
0877 }
0878 
0879 bool EBook_CHM::parseBinaryTOC(QList<EBookTocEntry> &toc) const
0880 {
0881     if (!m_lookupTablesValid) {
0882         return false;
0883     }
0884 
0885     QByteArray tocidx, topics, urltbl, urlstr, strings;
0886 
0887     // Read the index tables
0888     if (!getBinaryContent(tocidx, QStringLiteral("/#TOCIDX")) || !getBinaryContent(topics, QStringLiteral("/#TOPICS")) || !getBinaryContent(urltbl, QStringLiteral("/#URLTBL")) || !getBinaryContent(urlstr, QStringLiteral("/#URLSTR")) ||
0889         !getBinaryContent(strings, QStringLiteral("/#STRINGS"))) {
0890         return false;
0891     }
0892 
0893     // Shamelessly stolen from xchm
0894     if (!RecurseLoadBTOC(tocidx, topics, urltbl, urlstr, strings, UINT32ARRAY(tocidx.data()), toc, 0)) {
0895         qWarning("Failed to parse binary TOC, fallback to text-based TOC");
0896         toc.clear();
0897         return false;
0898     }
0899 
0900     return true;
0901 }
0902 
0903 //
0904 // This piece of code was based on the one in xchm written by  Razvan Cojocaru <razvanco@gmx.net>
0905 //
0906 bool EBook_CHM::RecurseLoadBTOC(const QByteArray &tocidx, const QByteArray &topics, const QByteArray &urltbl, const QByteArray &urlstr, const QByteArray &strings, int offset, QList<EBookTocEntry> &entries, int level) const
0907 {
0908     while (offset) {
0909         // If this is end of TOCIDX, return.
0910         if (tocidx.size() < offset + 20) {
0911             return true;
0912         }
0913 
0914         unsigned int flags = UINT32ARRAY(tocidx.data() + offset + 4);
0915         int index = UINT32ARRAY(tocidx.data() + offset + 8);
0916 
0917         if ((flags & 0x04) || (flags & 0x08)) {
0918             QString name, value;
0919 
0920             if ((flags & 0x08) == 0) {
0921                 if (strings.size() < index + 1) {
0922                     qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for book TOC entry!", index);
0923                     return false;
0924                 }
0925 
0926                 name = encodeWithCurrentCodec(strings.data() + index);
0927             } else {
0928                 if (topics.size() < (index * 16) + 12) {
0929                     qWarning("EBook_CHM::RecurseLoadBTOC: invalid name index (%d) for local TOC entry!", index);
0930                     return false;
0931                 }
0932 
0933                 int tocoffset = (int)UINT32ARRAY(topics.data() + (index * 16) + 4);
0934 
0935                 if (strings.size() < tocoffset + 1) {
0936                     qWarning("EBook_CHM::RecurseLoadBTOC: invalid name tocoffset (%d) for TOC entry!", tocoffset);
0937                     return false;
0938                 }
0939 
0940                 if (tocoffset < 0) {
0941                     name.clear();
0942                 } else {
0943                     name = encodeWithCurrentCodec(strings.data() + tocoffset);
0944                 }
0945 
0946                 // #URLTBL index
0947                 tocoffset = (int)UINT32ARRAY(topics.data() + (index * 16) + 8);
0948 
0949                 if (tocoffset < 0 || urltbl.size() < tocoffset + 12) {
0950                     qWarning("EBook_CHM::RecurseLoadBTOC: invalid url index (%d) for TOC entry!", tocoffset);
0951                     return false;
0952                 }
0953 
0954                 tocoffset = (int)UINT32ARRAY(urltbl.data() + tocoffset + 8);
0955 
0956                 if (tocoffset < 0 || urlstr.size() < tocoffset) {
0957                     qWarning("EBook_CHM::RecurseLoadBTOC: invalid url offset (%d) for TOC entry!", tocoffset);
0958                     return false;
0959                 }
0960 
0961                 value = encodeWithCurrentCodec(urlstr.data() + tocoffset + 8);
0962             }
0963 
0964             EBookTocEntry entry;
0965             entry.name = name.trimmed();
0966 
0967             if (!entry.name.isEmpty()) {
0968                 if (!value.isEmpty()) {
0969                     entry.url = pathToUrl(value);
0970                 }
0971 
0972                 entry.iconid = EBookTocEntry::IMAGE_AUTO;
0973                 entry.indent = level;
0974                 entries.push_back(entry);
0975             }
0976         }
0977 
0978         if (flags & 0x04) {
0979             // book
0980             if (tocidx.size() < offset + 24) {
0981                 qWarning("EBook_CHM::RecurseLoadBTOC: invalid child entry offset (%d)", offset);
0982                 return false;
0983             }
0984 
0985             unsigned int childoffset = UINT32ARRAY(tocidx.data() + offset + 20);
0986 
0987             if (childoffset) {
0988                 if (!RecurseLoadBTOC(tocidx, topics, urltbl, urlstr, strings, childoffset, entries, level + 1)) {
0989                     return false;
0990                 }
0991             }
0992         }
0993 
0994         offset = UINT32ARRAY(tocidx.data() + offset + 0x10);
0995     }
0996 
0997     return true;
0998 }
0999 
1000 bool EBook_CHM::hasOption(const QString &name) const
1001 {
1002     if (!m_envOptions.isEmpty() && m_envOptions.contains(name)) {
1003         return true;
1004     }
1005 
1006     return false;
1007 }
1008 
1009 QUrl EBook_CHM::pathToUrl(const QString &link) const
1010 {
1011     if (link.startsWith(QLatin1String("http://")) || link.startsWith(QLatin1String("https://"))) {
1012         return QUrl(link);
1013     }
1014 
1015     QUrl url;
1016     url.setScheme(URL_SCHEME_CHM);
1017     url.setHost(URL_SCHEME_CHM);
1018 
1019     // Does the link contain the fragment as well?
1020     int off = link.indexOf(QLatin1Char('#'));
1021     QString path;
1022 
1023     if (off != -1) {
1024         path = link.left(off);
1025         url.setFragment(link.mid(off + 1));
1026     } else {
1027         path = link;
1028     }
1029 
1030     if (!path.startsWith(QLatin1Char('/'))) {
1031         path.prepend(QLatin1Char('/'));
1032     }
1033 
1034     url.setPath(QUrl::fromPercentEncoding(path.toUtf8()));
1035     return url;
1036 }
1037 
1038 QString EBook_CHM::urlToPath(const QUrl &link) const
1039 {
1040     if (link.scheme() == URL_SCHEME_CHM) {
1041         if (link.path() == QLatin1String("/") || link.path().isEmpty()) {
1042             return QString::fromUtf8(m_home);
1043         }
1044 
1045         return link.path();
1046     }
1047 
1048     return QLatin1String("");
1049 }
1050 
1051 EBook_CHM::ParsedEntry::ParsedEntry()
1052 {
1053     iconid = 0;
1054     indent = 0;
1055 }