File indexing completed on 2024-04-14 03:52:50
0001 /* 0002 Implementation of the data protocol (rfc 2397) 0003 0004 SPDX-FileCopyrightText: 2002, 2003 Leo Savernik <l.savernik@aon.at> 0005 0006 SPDX-License-Identifier: LGPL-2.0-only 0007 */ 0008 0009 #include "dataprotocol_p.h" 0010 0011 #include "global.h" 0012 #include "metadata.h" 0013 0014 #include <QByteArray> 0015 #include <QStringDecoder> 0016 0017 using namespace KIO; 0018 0019 /** structure containing header information */ 0020 struct DataHeader { 0021 QString mime_type; // MIME type of content (lowercase) 0022 MetaData attributes; // attribute/value pairs (attribute lowercase, 0023 // value unchanged) 0024 bool is_base64; // true if data is base64 encoded 0025 QByteArray url; // reference to decoded url 0026 int data_offset; // zero-indexed position within url 0027 // where the real data begins. May point beyond 0028 // the end to indicate that there is no data 0029 }; 0030 0031 /** returns the position of the first occurrence of any of the given 0032 * characters @p c1 or comma (',') or semicolon (';') or buf.length() 0033 * if none is contained. 0034 * 0035 * @param buf buffer where to look for c 0036 * @param begin zero-indexed starting position 0037 * @param c1 character to find or '\0' to ignore 0038 */ 0039 static int find(const QByteArray &buf, int begin, const char c1) 0040 { 0041 static const char comma = ','; 0042 static const char semicolon = ';'; 0043 int pos = begin; 0044 int size = buf.length(); 0045 while (pos < size) { 0046 const char ch = buf[pos]; 0047 if (ch == comma || ch == semicolon || (c1 != '\0' && ch == c1)) { 0048 break; 0049 } 0050 pos++; 0051 } /*wend*/ 0052 return pos; 0053 } 0054 0055 /** extracts the string between the current position @p pos and the first 0056 * occurrence of either @p c1 or comma (',') or semicolon (';') exclusively 0057 * and updates @p pos to point at the found delimiter or at the end of the 0058 * buffer if neither character occurred. 0059 * @param buf buffer where to look for 0060 * @param pos zero-indexed position within buffer 0061 * @param c1 character to find or '\0' to ignore 0062 */ 0063 static inline QString extract(const QByteArray &buf, int &pos, const char c1 = '\0') 0064 { 0065 int oldpos = pos; 0066 pos = find(buf, oldpos, c1); 0067 return QString::fromLatin1(buf.mid(oldpos, pos - oldpos)); 0068 } 0069 0070 /** ignores all whitespaces 0071 * @param buf buffer to operate on 0072 * @param pos position to shift to first non-whitespace character 0073 * Upon return @p pos will either point to the first non-whitespace 0074 * character or to the end of the buffer. 0075 */ 0076 static inline void ignoreWS(const QByteArray &buf, int &pos) 0077 { 0078 int size = buf.length(); 0079 while (pos < size && (buf[pos] == ' ' || buf[pos] == '\t')) { 0080 ++pos; 0081 } 0082 } 0083 0084 /** parses a quoted string as per rfc 822. 0085 * 0086 * If trailing quote is missing, the whole rest of the buffer is returned. 0087 * @param buf buffer to operate on 0088 * @param pos position pointing to the leading quote 0089 * @return the extracted string. @p pos will be updated to point to the 0090 * character following the trailing quote. 0091 */ 0092 static QString parseQuotedString(const QByteArray &buf, int &pos) 0093 { 0094 int size = buf.length(); 0095 QString res; 0096 res.reserve(size); // can't be larger than buf 0097 pos++; // jump over leading quote 0098 bool escaped = false; // if true means next character is literal 0099 bool parsing = true; // true as long as end quote not found 0100 while (parsing && pos < size) { 0101 const QChar ch = QLatin1Char(buf[pos++]); 0102 if (escaped) { 0103 res += ch; 0104 escaped = false; 0105 } else { 0106 switch (ch.unicode()) { 0107 case '"': 0108 parsing = false; 0109 break; 0110 case '\\': 0111 escaped = true; 0112 break; 0113 default: 0114 res += ch; 0115 break; 0116 } /*end switch*/ 0117 } /*end if*/ 0118 } /*wend*/ 0119 res.squeeze(); 0120 return res; 0121 } 0122 0123 /** parses the header of a data url 0124 * @param url the data url 0125 * @param mimeOnly if the only interesting information is the MIME type 0126 * @return DataHeader structure with the header information 0127 */ 0128 static DataHeader parseDataHeader(const QUrl &url, const bool mimeOnly) 0129 { 0130 DataHeader header_info; 0131 0132 // initialize header info members 0133 header_info.mime_type = QStringLiteral("text/plain"); 0134 header_info.attributes.insert(QStringLiteral("charset"), QStringLiteral("us-ascii")); 0135 header_info.is_base64 = false; 0136 0137 // decode url and save it 0138 const QByteArray &raw_url = header_info.url = QByteArray::fromPercentEncoding(url.path(QUrl::FullyEncoded).toLatin1()); 0139 const int raw_url_len = raw_url.length(); 0140 0141 header_info.data_offset = 0; 0142 0143 // read MIME type 0144 if (raw_url_len == 0) { 0145 return header_info; 0146 } 0147 const QString mime_type = extract(raw_url, header_info.data_offset).trimmed(); 0148 if (!mime_type.isEmpty()) { 0149 header_info.mime_type = mime_type; 0150 } 0151 if (mimeOnly) { 0152 return header_info; 0153 } 0154 0155 if (header_info.data_offset >= raw_url_len) { 0156 return header_info; 0157 } 0158 // jump over delimiter token and return if data reached 0159 if (raw_url[header_info.data_offset++] == ',') { 0160 return header_info; 0161 } 0162 0163 // read all attributes and store them 0164 bool data_begin_reached = false; 0165 while (!data_begin_reached && header_info.data_offset < raw_url_len) { 0166 // read attribute 0167 const QString attribute = extract(raw_url, header_info.data_offset, '=').trimmed(); 0168 if (header_info.data_offset >= raw_url_len || raw_url[header_info.data_offset] != '=') { 0169 // no assignment, must be base64 option 0170 if (attribute == QLatin1String("base64")) { 0171 header_info.is_base64 = true; 0172 } 0173 } else { 0174 header_info.data_offset++; // jump over '=' token 0175 0176 // read value 0177 ignoreWS(raw_url, header_info.data_offset); 0178 if (header_info.data_offset >= raw_url_len) { 0179 return header_info; 0180 } 0181 0182 QString value; 0183 if (raw_url[header_info.data_offset] == '"') { 0184 value = parseQuotedString(raw_url, header_info.data_offset); 0185 ignoreWS(raw_url, header_info.data_offset); 0186 } else { 0187 value = extract(raw_url, header_info.data_offset).trimmed(); 0188 } 0189 0190 // add attribute to map 0191 header_info.attributes[attribute.toLower()] = value; 0192 0193 } /*end if*/ 0194 if (header_info.data_offset < raw_url_len && raw_url[header_info.data_offset] == ',') { 0195 data_begin_reached = true; 0196 } 0197 header_info.data_offset++; // jump over separator token 0198 } /*wend*/ 0199 0200 return header_info; 0201 } 0202 0203 DataProtocol::DataProtocol() 0204 { 0205 } 0206 0207 DataProtocol::~DataProtocol() = default; 0208 0209 void DataProtocol::get(const QUrl &url) 0210 { 0211 ref(); 0212 // qDebug() << this; 0213 0214 const DataHeader hdr = parseDataHeader(url, false); 0215 0216 const int size = hdr.url.length(); 0217 const int data_ofs = qMin(hdr.data_offset, size); 0218 // FIXME: string is copied, would be nice if we could have a reference only 0219 const QByteArray url_data = hdr.url.mid(data_ofs); 0220 QByteArray outData; 0221 0222 if (hdr.is_base64) { 0223 // base64 stuff is expected to contain the correct charset, so we just 0224 // decode it and pass it to the receiver 0225 outData = QByteArray::fromBase64(url_data); 0226 } else { 0227 QStringDecoder codec(hdr.attributes[QStringLiteral("charset")].toLatin1().constData()); 0228 if (codec.isValid()) { 0229 outData = QString(codec.decode(url_data)).toUtf8(); 0230 } else { 0231 outData = url_data; 0232 } /*end if*/ 0233 } /*end if*/ 0234 0235 // qDebug() << "emit mimeType@"<<this; 0236 Q_EMIT mimeType(hdr.mime_type); 0237 // qDebug() << "emit totalSize@"<<this; 0238 Q_EMIT totalSize(outData.size()); 0239 0240 // qDebug() << "emit setMetaData@"<<this; 0241 setAllMetaData(hdr.attributes); 0242 0243 // qDebug() << "emit sendMetaData@"<<this; 0244 sendMetaData(); 0245 // qDebug() << "(1) queue size " << dispatchQueue.size(); 0246 // empiric studies have shown that this shouldn't be queued & dispatched 0247 Q_EMIT data(outData); 0248 // qDebug() << "(2) queue size " << dispatchQueue.size(); 0249 dispatch_data(QByteArray{}); 0250 // qDebug() << "(3) queue size " << dispatchQueue.size(); 0251 dispatch_finished(); 0252 // qDebug() << "(4) queue size " << dispatchQueue.size(); 0253 deref(); 0254 } 0255 0256 /* --------------------------------------------------------------------- */ 0257 0258 void DataProtocol::mimetype(const QUrl &url) 0259 { 0260 ref(); 0261 Q_EMIT mimeType(parseDataHeader(url, true).mime_type); 0262 Q_EMIT finished(); 0263 deref(); 0264 } 0265 0266 /* --------------------------------------------------------------------- */ 0267 0268 #if !defined(TESTKIO) 0269 #include "moc_dataprotocol_p.cpp" 0270 #endif