File indexing completed on 2024-05-12 05:46:38

0001 //  dataprotocol.cpp
0002 // ==================
0003 //
0004 // Implementation of the data protocol (rfc 2397)
0005 //
0006 // Author: Leo Savernik
0007 // Email: l.savernik@aon.at
0008 // Copyright (C) 2002, 2003 by Leo Savernik <l.savernik@aon.at>
0009 // Created: Sam Dez 28 14:11:18 CET 2002
0010 
0011 /***************************************************************************
0012  *                                                                         *
0013  *   This program is free software; you can redistribute it and/or modify  *
0014  *   it under the terms of the GNU Lesser General Public License as        *
0015  *   published by the Free Software Foundation; version 2.                 *
0016  *                                                                         *
0017  ***************************************************************************/
0018 
0019 #include "dataprotocol_p.h"
0020 
0021 #include "global.h"
0022 
0023 #include <QByteArray>
0024 #include <QCharRef>
0025 #include <QTextCodec>
0026 
0027 #ifdef DATAKIOSLAVE
0028 #  include <kinstance.h>
0029 #  include <stdlib.h>
0030 #endif
0031 
0032 #if !defined(DATAKIOSLAVE)
0033 #  define DISPATCH(f) dispatch_##f
0034 #else
0035 #  define DISPATCH(f) f
0036 #endif
0037 
0038 using namespace KIO;
0039 #ifdef DATAKIOSLAVE
0040 extern "C" {
0041 
0042     int kdemain(int argc, char **argv)
0043     {
0044         //qDebug() << "*** Starting kio_data ";
0045 
0046         if (argc != 4) {
0047             //qDebug() << "Usage: kio_data  protocol domain-socket1 domain-socket2";
0048             exit(-1);
0049         }
0050 
0051         DataProtocol slave(argv[2], argv[3]);
0052         slave.dispatchLoop();
0053 
0054         //qDebug() << "*** kio_data Done";
0055         return 0;
0056     }
0057 }
0058 #endif
0059 
0060 /** structure containing header information */
0061 struct DataHeader {
0062     QString mime_type;        // mime type of content (lowercase)
0063     MetaData attributes;      // attribute/value pairs (attribute lowercase,
0064     //  value unchanged)
0065     bool is_base64;       // true if data is base64 encoded
0066     QByteArray url;       // reference to decoded url
0067     int data_offset;      // zero-indexed position within url
0068     // where the real data begins. May point beyond
0069     // the end to indicate that there is no data
0070 };
0071 
0072 /** returns the position of the first occurrence of any of the given
0073   * characters @p c1 or comma (',') or semicolon (';') or buf.length()
0074   * if none is contained.
0075   *
0076   * @param buf buffer where to look for c
0077   * @param begin zero-indexed starting position
0078   * @param c1 character to find or '\0' to ignore
0079   */
0080 static int find(const QByteArray &buf, int begin, const char c1)
0081 {
0082     static const char comma = ',';
0083     static const char semicolon = ';';
0084     int pos = begin;
0085     int size = buf.length();
0086     while (pos < size) {
0087         const char ch = buf[pos];
0088         if (ch == comma || ch == semicolon || (c1 != '\0' && ch == c1)) {
0089             break;
0090         }
0091         pos++;
0092     }/*wend*/
0093     return pos;
0094 }
0095 
0096 /** extracts the string between the current position @p pos and the first
0097  * occurrence of either @p c1 or comma (',') or semicolon (';') exclusively
0098  * and updates @p pos to point at the found delimiter or at the end of the
0099  * buffer if neither character occurred.
0100  * @param buf buffer where to look for
0101  * @param pos zero-indexed position within buffer
0102  * @param c1 character to find or '\0' to ignore
0103  */
0104 static inline QString extract(const QByteArray &buf, int &pos,
0105                               const char c1 = '\0')
0106 {
0107     int oldpos = pos;
0108     pos = find(buf, oldpos, c1);
0109     return QString::fromLatin1(buf.mid(oldpos, pos - oldpos));
0110 }
0111 
0112 /** ignores all whitespaces
0113  * @param buf buffer to operate on
0114  * @param pos position to shift to first non-whitespace character
0115  *  Upon return @p pos will either point to the first non-whitespace
0116  *  character or to the end of the buffer.
0117  */
0118 static inline void ignoreWS(const QByteArray &buf, int &pos)
0119 {
0120     int size = buf.length();
0121     while (pos < size && (buf[pos] == ' ' || buf[pos] == '\t')) {
0122         ++pos;
0123     }
0124 }
0125 
0126 /** parses a quoted string as per rfc 822.
0127  *
0128  * If trailing quote is missing, the whole rest of the buffer is returned.
0129  * @param buf buffer to operate on
0130  * @param pos position pointing to the leading quote
0131  * @return the extracted string. @p pos will be updated to point to the
0132  *  character following the trailing quote.
0133  */
0134 static QString parseQuotedString(const QByteArray &buf, int &pos)
0135 {
0136     int size = buf.length();
0137     QString res;
0138     res.reserve(size);    // can't be larger than buf
0139     pos++;        // jump over leading quote
0140     bool escaped = false; // if true means next character is literal
0141     bool parsing = true;  // true as long as end quote not found
0142     while (parsing && pos < size) {
0143         const QChar ch = QLatin1Char(buf[pos++]);
0144         if (escaped) {
0145             res += ch;
0146             escaped = false;
0147         } else {
0148             switch (ch.unicode()) {
0149             case '"': parsing = false; break;
0150             case '\\': escaped = true; break;
0151             default: res += ch; break;
0152             }/*end switch*/
0153         }/*end if*/
0154     }/*wend*/
0155     res.squeeze();
0156     return res;
0157 }
0158 
0159 /** parses the header of a data url
0160  * @param url the data url
0161  * @param mimeOnly if the only interesting information is the mime type
0162  * @return DataHeader structure with the header information
0163  */
0164 static DataHeader parseDataHeader(const QUrl &url, const bool mimeOnly)
0165 {
0166     DataHeader header_info;
0167 
0168     // initialize header info members
0169     header_info.mime_type = QStringLiteral("text/plain");
0170     header_info.attributes.insert(QStringLiteral("charset"), QStringLiteral("us-ascii"));
0171     header_info.is_base64 = false;
0172 
0173     // decode url and save it
0174     const QByteArray &raw_url = header_info.url = QByteArray::fromPercentEncoding(url.path(QUrl::FullyEncoded).toLatin1());
0175     const int raw_url_len = raw_url.length();
0176 
0177     header_info.data_offset = 0;
0178 
0179     // read mime type
0180     if (raw_url_len == 0) {
0181         return header_info;
0182     }
0183     const QString mime_type = extract(raw_url, header_info.data_offset).trimmed();
0184     if (!mime_type.isEmpty()) {
0185         header_info.mime_type = mime_type;
0186     }
0187     if (mimeOnly) {
0188         return header_info;
0189     }
0190 
0191     if (header_info.data_offset >= raw_url_len) {
0192         return header_info;
0193     }
0194     // jump over delimiter token and return if data reached
0195     if (raw_url[header_info.data_offset++] == ',') {
0196         return header_info;
0197     }
0198 
0199     // read all attributes and store them
0200     bool data_begin_reached = false;
0201     while (!data_begin_reached && header_info.data_offset < raw_url_len) {
0202         // read attribute
0203         const QString attribute = extract(raw_url, header_info.data_offset, '=').trimmed();
0204         if (header_info.data_offset >= raw_url_len
0205                 || raw_url[header_info.data_offset] != '=') {
0206             // no assignment, must be base64 option
0207             if (attribute == QLatin1String("base64")) {
0208                 header_info.is_base64 = true;
0209             }
0210         } else {
0211             header_info.data_offset++; // jump over '=' token
0212 
0213             // read value
0214             ignoreWS(raw_url, header_info.data_offset);
0215             if (header_info.data_offset >= raw_url_len) {
0216                 return header_info;
0217             }
0218 
0219             QString value;
0220             if (raw_url[header_info.data_offset] == '"') {
0221                 value = parseQuotedString(raw_url, header_info.data_offset);
0222                 ignoreWS(raw_url, header_info.data_offset);
0223             } else {
0224                 value = extract(raw_url, header_info.data_offset).trimmed();
0225             }
0226 
0227             // add attribute to map
0228             header_info.attributes[attribute.toLower()] = value;
0229 
0230         }/*end if*/
0231         if (header_info.data_offset < raw_url_len
0232                 && raw_url[header_info.data_offset] == ',') {
0233             data_begin_reached = true;
0234         }
0235         header_info.data_offset++; // jump over separator token
0236     }/*wend*/
0237 
0238     return header_info;
0239 }
0240 
0241 #ifdef DATAKIOSLAVE
0242 DataProtocol::DataProtocol(const QByteArray &pool_socket, const QByteArray &app_socket)
0243     : SlaveBase("kio_data", pool_socket, app_socket)
0244 {
0245 #else
0246 DataProtocol::DataProtocol()
0247 {
0248 #endif
0249     //qDebug();
0250 }
0251 
0252 /* --------------------------------------------------------------------- */
0253 
0254 DataProtocol::~DataProtocol()
0255 {
0256     //qDebug();
0257 }
0258 
0259 /* --------------------------------------------------------------------- */
0260 
0261 void DataProtocol::get(const QUrl &url)
0262 {
0263     ref();
0264     //qDebug() << this;
0265 
0266     const DataHeader hdr = parseDataHeader(url, false);
0267 
0268     const int size = hdr.url.length();
0269     const int data_ofs = qMin(hdr.data_offset, size);
0270     // FIXME: string is copied, would be nice if we could have a reference only
0271     const QByteArray url_data = hdr.url.mid(data_ofs);
0272     QByteArray outData;
0273 
0274     if (hdr.is_base64) {
0275         // base64 stuff is expected to contain the correct charset, so we just
0276         // decode it and pass it to the receiver
0277         outData = QByteArray::fromBase64(url_data);
0278     } else {
0279         QTextCodec *codec = QTextCodec::codecForName(hdr.attributes[QStringLiteral("charset")].toLatin1());
0280         if (codec != nullptr) {
0281             outData = codec->toUnicode(url_data).toUtf8();
0282         } else {
0283             outData = url_data;
0284         }/*end if*/
0285     }/*end if*/
0286 
0287     //qDebug() << "emit mimeType@"<<this;
0288     emit mimeType(hdr.mime_type);
0289     //qDebug() << "emit totalSize@"<<this;
0290     emit totalSize(outData.size());
0291 
0292     //qDebug() << "emit setMetaData@"<<this;
0293 #if defined(DATAKIOSLAVE)
0294     MetaData::ConstIterator it;
0295     for (it = hdr.attributes.constBegin(); it != hdr.attributes.constEnd(); ++it) {
0296         setMetaData(it.key(), it.value());
0297     }/*next it*/
0298 #else
0299     setAllMetaData(hdr.attributes);
0300 #endif
0301 
0302     //qDebug() << "emit sendMetaData@"<<this;
0303     sendMetaData();
0304 //qDebug() << "(1) queue size " << dispatchQueue.size();
0305     // empiric studies have shown that this shouldn't be queued & dispatched
0306     emit data(outData);
0307 //qDebug() << "(2) queue size " << dispatchQueue.size();
0308     DISPATCH(data(QByteArray()));
0309 //qDebug() << "(3) queue size " << dispatchQueue.size();
0310     DISPATCH(finished());
0311 //qDebug() << "(4) queue size " << dispatchQueue.size();
0312     deref();
0313 }
0314 
0315 /* --------------------------------------------------------------------- */
0316 
0317 void DataProtocol::mimetype(const QUrl &url)
0318 {
0319     ref();
0320     emit mimeType(parseDataHeader(url, true).mime_type);
0321     emit finished();
0322     deref();
0323 }
0324 
0325 /* --------------------------------------------------------------------- */