File indexing completed on 2025-03-23 09:55:25
0001 /* 0002 This file is part of the KDE libraries 0003 SPDX-FileCopyrightText: 2002 Waldo Bastian <bastian@kde.org> 0004 SPDX-FileCopyrightText: 2009 David Faure <faure@kde.org> 0005 0006 SPDX-License-Identifier: LGPL-2.0-only 0007 */ 0008 0009 #include "httpfilter.h" 0010 #include <KCompressionDevice> 0011 #include <KFilterBase> 0012 #include <KLocalizedString> 0013 #include <QDebug> 0014 0015 #include <stdio.h> 0016 0017 Q_LOGGING_CATEGORY(KIO_HTTP_FILTER, "kf.kio.workers.http.filter") 0018 0019 /* 0020 Testcases: 0021 - http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http (bug 160289) 0022 - http://demo.serv-u.com/?user=demo-WC&password=demo-WC (Content-Encoding: deflate) (bug 188935) 0023 - http://web.davidfaure.fr/cgi-bin/deflate_test (Content-Encoding: deflate) (bug 114830) 0024 - http://www.zlib.net/zlib_faq.html#faq39 (Content-Encoding: gzip) 0025 - wikipedia (Content-Encoding: gzip) 0026 - cnn.com (Content-Encoding: gzip) 0027 - http://arstechnica.com/ (Content-Encoding: gzip) 0028 - mailman admin interface on mail.kde.org (see r266769, but can't confirm these days) 0029 */ 0030 0031 HTTPFilterBase::HTTPFilterBase() 0032 : last(nullptr) 0033 { 0034 } 0035 0036 HTTPFilterBase::~HTTPFilterBase() 0037 { 0038 delete last; 0039 } 0040 0041 void HTTPFilterBase::chain(HTTPFilterBase *previous) 0042 { 0043 last = previous; 0044 connect(last, &HTTPFilterBase::output, this, &HTTPFilterBase::slotInput); 0045 } 0046 0047 HTTPFilterChain::HTTPFilterChain() 0048 : first(nullptr) 0049 { 0050 } 0051 0052 void HTTPFilterChain::addFilter(HTTPFilterBase *filter) 0053 { 0054 if (!last) { 0055 first = filter; 0056 } else { 0057 disconnect(last, &HTTPFilterBase::output, nullptr, nullptr); 0058 filter->chain(last); 0059 } 0060 last = filter; 0061 connect(filter, &HTTPFilterBase::output, this, &HTTPFilterBase::output); 0062 connect(filter, &HTTPFilterBase::error, this, &HTTPFilterBase::error); 0063 } 0064 0065 void HTTPFilterChain::slotInput(const QByteArray &d) 0066 { 0067 if (first) { 0068 first->slotInput(d); 0069 } else { 0070 Q_EMIT output(d); 0071 } 0072 } 0073 0074 HTTPFilterMD5::HTTPFilterMD5() 0075 : context(QCryptographicHash::Md5) 0076 { 0077 } 0078 0079 QString HTTPFilterMD5::md5() 0080 { 0081 return QString::fromLatin1(context.result().toBase64().constData()); 0082 } 0083 0084 void HTTPFilterMD5::slotInput(const QByteArray &d) 0085 { 0086 context.addData(d); 0087 Q_EMIT output(d); 0088 } 0089 0090 HTTPFilterGZip::HTTPFilterGZip(bool deflate) 0091 : m_deflateMode(deflate) 0092 , m_firstData(true) 0093 , m_finished(false) 0094 { 0095 // We can't use KFilterDev because it assumes it can read as much data as necessary 0096 // from the underlying device. It's a pull strategy, while we have to do 0097 // a push strategy. 0098 m_gzipFilter = KCompressionDevice::filterForCompressionType(KCompressionDevice::GZip); 0099 } 0100 0101 HTTPFilterGZip::~HTTPFilterGZip() 0102 { 0103 m_gzipFilter->terminate(); 0104 delete m_gzipFilter; 0105 } 0106 0107 /* 0108 The data format used by the zlib library is described by RFCs (Request for 0109 Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt 0110 (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). 0111 0112 Use /usr/include/zlib.h as the primary source of documentation though. 0113 */ 0114 0115 void HTTPFilterGZip::slotInput(const QByteArray &d) 0116 { 0117 if (d.isEmpty()) { 0118 return; 0119 } 0120 0121 // qDebug() << "Got" << d.size() << "bytes as input"; 0122 if (m_firstData) { 0123 if (m_deflateMode) { 0124 bool zlibHeader = true; 0125 // Autodetect broken webservers (thanks Microsoft) who send raw-deflate 0126 // instead of zlib-headers-deflate when saying Content-Encoding: deflate. 0127 const unsigned char firstChar = d[0]; 0128 if ((firstChar & 0x0f) != 8) { 0129 // In a zlib header, CM should be 8 (cf RFC 1950) 0130 zlibHeader = false; 0131 } else if (d.size() > 1) { 0132 const unsigned char flg = d[1]; 0133 if ((firstChar * 256 + flg) % 31 != 0) { // Not a multiple of 31? invalid zlib header then 0134 zlibHeader = false; 0135 } 0136 } 0137 // if (!zlibHeader) 0138 // qDebug() << "Bad webserver, uses raw-deflate instead of zlib-deflate..."; 0139 if (zlibHeader) { 0140 m_gzipFilter->setFilterFlags(KFilterBase::ZlibHeaders); 0141 } else { 0142 m_gzipFilter->setFilterFlags(KFilterBase::NoHeaders); 0143 } 0144 m_gzipFilter->init(QIODevice::ReadOnly); 0145 } else { 0146 m_gzipFilter->setFilterFlags(KFilterBase::WithHeaders); 0147 m_gzipFilter->init(QIODevice::ReadOnly); 0148 } 0149 m_firstData = false; 0150 } 0151 0152 m_gzipFilter->setInBuffer(d.constData(), d.size()); 0153 0154 while (!m_gzipFilter->inBufferEmpty() && !m_finished) { 0155 char buf[8192]; 0156 m_gzipFilter->setOutBuffer(buf, sizeof(buf)); 0157 KFilterBase::Result result = m_gzipFilter->uncompress(); 0158 // qDebug() << "uncompress returned" << result; 0159 switch (result) { 0160 case KFilterBase::Ok: 0161 case KFilterBase::End: { 0162 const int bytesOut = sizeof(buf) - m_gzipFilter->outBufferAvailable(); 0163 if (bytesOut) { 0164 Q_EMIT output(QByteArray(buf, bytesOut)); 0165 } 0166 if (result == KFilterBase::End) { 0167 // qDebug() << "done, bHasFinished=true"; 0168 Q_EMIT output(QByteArray()); 0169 m_finished = true; 0170 } 0171 break; 0172 } 0173 case KFilterBase::Error: 0174 qCDebug(KIO_HTTP_FILTER) << "Error from KGZipFilter"; 0175 Q_EMIT error(i18n("Receiving corrupt data.")); 0176 m_finished = true; // exit this while loop 0177 break; 0178 } 0179 } 0180 } 0181 0182 HTTPFilterDeflate::HTTPFilterDeflate() 0183 : HTTPFilterGZip(true) 0184 { 0185 } 0186 0187 #include "moc_httpfilter.cpp"