File indexing completed on 2024-05-19 15:15:51

0001 /*
0002     This file is part of the KDE libraries
0003     SPDX-FileCopyrightText: 2002 Waldo Bastian <bastian@kde.org>
0004     SPDX-FileCopyrightText: 2009 David Faure <faure@kde.org>
0005 
0006     SPDX-License-Identifier: LGPL-2.0-only
0007 */
0008 
0009 #include "httpfilter.h"
0010 #include <KCompressionDevice>
0011 #include <KFilterBase>
0012 #include <KLocalizedString>
0013 #include <QDebug>
0014 
0015 #include <stdio.h>
0016 
0017 Q_LOGGING_CATEGORY(KIO_HTTP_FILTER, "kf.kio.workers.http.filter")
0018 
0019 /*
0020 Testcases:
0021  - http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http (bug 160289)
0022  - http://demo.serv-u.com/?user=demo-WC&password=demo-WC  (Content-Encoding: deflate)  (bug 188935)
0023  - http://web.davidfaure.fr/cgi-bin/deflate_test (Content-Encoding: deflate) (bug 114830)
0024  - http://www.zlib.net/zlib_faq.html#faq39 (Content-Encoding: gzip)
0025  - wikipedia (Content-Encoding: gzip)
0026  - cnn.com (Content-Encoding: gzip)
0027  - http://arstechnica.com/ (Content-Encoding: gzip)
0028  - mailman admin interface on mail.kde.org (see r266769, but can't confirm these days)
0029 */
0030 
0031 HTTPFilterBase::HTTPFilterBase()
0032     : last(nullptr)
0033 {
0034 }
0035 
0036 HTTPFilterBase::~HTTPFilterBase()
0037 {
0038     delete last;
0039 }
0040 
0041 void HTTPFilterBase::chain(HTTPFilterBase *previous)
0042 {
0043     last = previous;
0044     connect(last, &HTTPFilterBase::output, this, &HTTPFilterBase::slotInput);
0045 }
0046 
0047 HTTPFilterChain::HTTPFilterChain()
0048     : first(nullptr)
0049 {
0050 }
0051 
0052 void HTTPFilterChain::addFilter(HTTPFilterBase *filter)
0053 {
0054     if (!last) {
0055         first = filter;
0056     } else {
0057         disconnect(last, &HTTPFilterBase::output, nullptr, nullptr);
0058         filter->chain(last);
0059     }
0060     last = filter;
0061     connect(filter, &HTTPFilterBase::output, this, &HTTPFilterBase::output);
0062     connect(filter, &HTTPFilterBase::error, this, &HTTPFilterBase::error);
0063 }
0064 
0065 void HTTPFilterChain::slotInput(const QByteArray &d)
0066 {
0067     if (first) {
0068         first->slotInput(d);
0069     } else {
0070         Q_EMIT output(d);
0071     }
0072 }
0073 
0074 HTTPFilterMD5::HTTPFilterMD5()
0075     : context(QCryptographicHash::Md5)
0076 {
0077 }
0078 
0079 QString HTTPFilterMD5::md5()
0080 {
0081     return QString::fromLatin1(context.result().toBase64().constData());
0082 }
0083 
0084 void HTTPFilterMD5::slotInput(const QByteArray &d)
0085 {
0086     context.addData(d);
0087     Q_EMIT output(d);
0088 }
0089 
0090 HTTPFilterGZip::HTTPFilterGZip(bool deflate)
0091     : m_deflateMode(deflate)
0092     , m_firstData(true)
0093     , m_finished(false)
0094 {
0095     // We can't use KFilterDev because it assumes it can read as much data as necessary
0096     // from the underlying device. It's a pull strategy, while we have to do
0097     // a push strategy.
0098     m_gzipFilter = KCompressionDevice::filterForCompressionType(KCompressionDevice::GZip);
0099 }
0100 
0101 HTTPFilterGZip::~HTTPFilterGZip()
0102 {
0103     m_gzipFilter->terminate();
0104     delete m_gzipFilter;
0105 }
0106 
0107 /*
0108   The data format used by the zlib library is described by RFCs (Request for
0109   Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
0110   (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
0111 
0112   Use /usr/include/zlib.h as the primary source of documentation though.
0113 */
0114 
0115 void HTTPFilterGZip::slotInput(const QByteArray &d)
0116 {
0117     if (d.isEmpty()) {
0118         return;
0119     }
0120 
0121     // qDebug() << "Got" << d.size() << "bytes as input";
0122     if (m_firstData) {
0123         if (m_deflateMode) {
0124             bool zlibHeader = true;
0125             // Autodetect broken webservers (thanks Microsoft) who send raw-deflate
0126             // instead of zlib-headers-deflate when saying Content-Encoding: deflate.
0127             const unsigned char firstChar = d[0];
0128             if ((firstChar & 0x0f) != 8) {
0129                 // In a zlib header, CM should be 8 (cf RFC 1950)
0130                 zlibHeader = false;
0131             } else if (d.size() > 1) {
0132                 const unsigned char flg = d[1];
0133                 if ((firstChar * 256 + flg) % 31 != 0) { // Not a multiple of 31? invalid zlib header then
0134                     zlibHeader = false;
0135                 }
0136             }
0137             // if (!zlibHeader)
0138             //    qDebug() << "Bad webserver, uses raw-deflate instead of zlib-deflate...";
0139             if (zlibHeader) {
0140                 m_gzipFilter->setFilterFlags(KFilterBase::ZlibHeaders);
0141             } else {
0142                 m_gzipFilter->setFilterFlags(KFilterBase::NoHeaders);
0143             }
0144             m_gzipFilter->init(QIODevice::ReadOnly);
0145         } else {
0146             m_gzipFilter->setFilterFlags(KFilterBase::WithHeaders);
0147             m_gzipFilter->init(QIODevice::ReadOnly);
0148         }
0149         m_firstData = false;
0150     }
0151 
0152     m_gzipFilter->setInBuffer(d.constData(), d.size());
0153 
0154     while (!m_gzipFilter->inBufferEmpty() && !m_finished) {
0155         char buf[8192];
0156         m_gzipFilter->setOutBuffer(buf, sizeof(buf));
0157         KFilterBase::Result result = m_gzipFilter->uncompress();
0158         // qDebug() << "uncompress returned" << result;
0159         switch (result) {
0160         case KFilterBase::Ok:
0161         case KFilterBase::End: {
0162             const int bytesOut = sizeof(buf) - m_gzipFilter->outBufferAvailable();
0163             if (bytesOut) {
0164                 Q_EMIT output(QByteArray(buf, bytesOut));
0165             }
0166             if (result == KFilterBase::End) {
0167                 // qDebug() << "done, bHasFinished=true";
0168                 Q_EMIT output(QByteArray());
0169                 m_finished = true;
0170             }
0171             break;
0172         }
0173         case KFilterBase::Error:
0174             qCDebug(KIO_HTTP_FILTER) << "Error from KGZipFilter";
0175             Q_EMIT error(i18n("Receiving corrupt data."));
0176             m_finished = true; // exit this while loop
0177             break;
0178         }
0179     }
0180 }
0181 
0182 HTTPFilterDeflate::HTTPFilterDeflate()
0183     : HTTPFilterGZip(true)
0184 {
0185 }
0186 
0187 #include "moc_httpfilter.cpp"