File indexing completed on 2024-12-01 06:50:04
0001 /* 0002 This file is part of the syndication library 0003 SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #ifndef SYNDICATION_TOOLS_H 0009 #define SYNDICATION_TOOLS_H 0010 0011 #include "person.h" 0012 #include "syndication_export.h" 0013 0014 #include <QString> 0015 0016 class QByteArray; 0017 class QString; 0018 0019 namespace Syndication 0020 { 0021 /** date formats supported by date parsers */ 0022 0023 enum DateFormat { 0024 ISODate, /**< ISO 8601 extended format. 0025 * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", 0026 * datetime with timezone: "2003-12-13T18:30:02.25+01:00") 0027 */ 0028 RFCDate, /**< RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT") */ 0029 }; 0030 0031 /** 0032 * parses a date string in ISO 8601 extended format. 0033 * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", 0034 * datetime with timezone: "2003-12-13T18:30:02.25+01:00") 0035 * 0036 * @param str a string in ISO 8601 format 0037 * @return parsed date in seconds since epoch, 0 if no date could 0038 * be parsed from the string. 0039 */ 0040 SYNDICATION_EXPORT 0041 uint parseISODate(const QString &str); 0042 0043 /** 0044 * parses a date string as defined in RFC 822. 0045 * (Sat, 07 Sep 2002 00:00:01 GMT) 0046 * 0047 * @param str a string in RFC 822 format 0048 * @return parsed date in seconds since epoch, 0 if no date could 0049 * be parsed from the string. 0050 */ 0051 SYNDICATION_EXPORT 0052 uint parseRFCDate(const QString &str); 0053 0054 /** 0055 * parses a date string in ISO (see parseISODate()) or RFC 822 (see 0056 * parseRFCDate()) format. 0057 * It tries both parsers and returns the first valid parsing result found (or 0 0058 * otherwise). 0059 * To speed up parsing, you can give a hint which format you expect. 0060 * The method will try the corresponding parser first then. 0061 * 0062 * @param str a date string 0063 * @param hint the expected format 0064 * @return parsed date in seconds since epoch, 0 if no date could 0065 * be parsed from the string. 0066 */ 0067 SYNDICATION_EXPORT 0068 uint parseDate(const QString &str, DateFormat hint = RFCDate); 0069 0070 /** 0071 * @internal 0072 * returns a string representation of a datetime. 0073 * this is used internally to create debugging output. 0074 * 0075 * @param date the date to convert 0076 * @return string representation of the date, or a null string if 0077 * @c date is 0 0078 */ 0079 SYNDICATION_EXPORT 0080 QString dateTimeToString(uint date); 0081 0082 /** 0083 * resolves entities to respective unicode chars. 0084 * 0085 * @param str a string 0086 */ 0087 SYNDICATION_EXPORT 0088 QString resolveEntities(const QString &str); 0089 /** 0090 * replaces the characters < >, &, ", ' 0091 * with &lt; &gt; &amp;, &quot; &apos;. 0092 * @param str the string to escape 0093 */ 0094 SYNDICATION_EXPORT 0095 QString escapeSpecialCharacters(const QString &str); 0096 0097 /** 0098 * replaces newlines ("\n") by <br/> 0099 * @param str string to convert 0100 */ 0101 SYNDICATION_EXPORT 0102 QString convertNewlines(const QString &str); 0103 0104 /** 0105 * converts a plain text string to HTML 0106 * 0107 * @param plainText a string in plain text. 0108 */ 0109 SYNDICATION_EXPORT 0110 QString plainTextToHtml(const QString &plainText); 0111 0112 /** 0113 * converts a HTML string to plain text 0114 * 0115 * @param html string in HTML format 0116 * @return stripped text 0117 */ 0118 SYNDICATION_EXPORT 0119 QString htmlToPlainText(const QString &html); 0120 0121 /** 0122 * guesses whether a string contains plain text or HTML 0123 * 0124 * @param str the string in unknown format 0125 * @return @c true if the heuristic thinks it's HTML, @c false 0126 * if thinks it is plain text 0127 */ 0128 SYNDICATION_EXPORT 0129 bool isHtml(const QString &str); 0130 0131 /** 0132 * guesses whether a string contains (HTML) markup or not. This 0133 * implements not an exact check for valid HTML markup, but a 0134 * simple (and relatively fast) heuristic. 0135 * 0136 * @param str the string that might or might not contain markup 0137 * @return @c true if the heuristic thinks it contains markup, @c false 0138 * if thinks it is markup-free plain text 0139 */ 0140 SYNDICATION_EXPORT 0141 bool stringContainsMarkup(const QString &str); 0142 0143 /** 0144 * Ensures HTML formatting for a string. 0145 * guesses via isHtml() if @c str contains HTML or plain text, and returns 0146 * plainTextToHtml(str) if it thinks it is plain text, or the unmodified @c str 0147 * otherwise. 0148 * 0149 * @param str a string with unknown content 0150 * @return string as HTML (as long as the heuristics work) 0151 */ 0152 SYNDICATION_EXPORT 0153 QString normalize(const QString &str); 0154 0155 /** 0156 * normalizes a string based on feed-wide properties of tag content. 0157 * It is based on the assumption that all items in a feed encode their 0158 * title/description content in the same way (CDATA or not, plain text 0159 * vs. HTML). isCDATA and containsMarkup are determined once by the feed, 0160 * and then passed to this method. 0161 * 0162 * The returned string contains HTML, with special characters <, >, 0163 * &, ", and ' escaped, and all other entities resolved. 0164 * Whitespace is collapsed, relevant whitespace is replaced by respective 0165 * HTML tags (<br/>). 0166 * 0167 * @param str a string 0168 * @param isCDATA whether the feed uses CDATA for the tag @c str was read from 0169 * @param containsMarkup whether the feed uses HTML markup in the 0170 * tag @c str was read from. 0171 * @return string as HTML (as long as the heuristics work) 0172 */ 0173 SYNDICATION_EXPORT 0174 QString normalize(const QString &str, bool isCDATA, bool containsMarkup); 0175 0176 /** 0177 * Parses a person object from a string by identifying name and email address 0178 * in the string. Currently detected variants are: 0179 * "foo@bar.com", "Foo", "Foo <foo@bar.com>", "foo@bar.com (Foo)". 0180 * 0181 * @param str the string to parse the person from. 0182 * @return a Person object containing the parsed information. 0183 */ 0184 SYNDICATION_EXPORT 0185 PersonPtr personFromString(const QString &str); 0186 0187 /** 0188 * @internal 0189 * calculates a hash value for a string 0190 */ 0191 unsigned int calcHash(const QString &str); 0192 0193 /** 0194 * @internal 0195 * calculates a hash value for a byte array 0196 */ 0197 unsigned int calcHash(const QByteArray &array); 0198 0199 /** 0200 * @internal 0201 * calculates a md5 checksum for a string 0202 */ 0203 QString calcMD5Sum(const QString &str); 0204 0205 //@cond PRIVATE 0206 /** 0207 * @internal 0208 * used internally to represent element types 0209 */ 0210 struct ElementType { 0211 ElementType(const QString &localnamep, 0212 const QString &nsp = QString()); // implicit 0213 0214 bool operator==(const ElementType &other) const; 0215 0216 QString ns; 0217 QString localname; 0218 }; 0219 //@endcond 0220 0221 } // namespace Syndication 0222 0223 #endif // SYNDICATION_TOOLS_H