File indexing completed on 2024-05-12 05:01:53

0001 /*
0002     SPDX-FileCopyrightText: 2005 Ingo Kloecker <kloecker@kde.org>
0003     SPDX-FileCopyrightText: 2007 Allen Winter <winter@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-only
0006 */
0007 
0008 #include "ruqolaktexttohtmltest.h"
0009 
0010 #include "ktexttohtmlfork/ruqolaktexttohtml.h"
0011 #include "ktexttohtmlfork/ruqolaktexttohtml_p.h"
0012 
0013 #include <QDebug>
0014 #include <QTest>
0015 #include <QUrl>
0016 
0017 QTEST_MAIN(KTextToHTMLTest)
0018 
0019 Q_DECLARE_METATYPE(RuqolaKTextToHTML::Options)
0020 
0021 #ifndef Q_OS_WIN
0022 void initLocale()
0023 {
0024     setenv("LC_ALL", "en_US.utf-8", 1);
0025 }
0026 Q_CONSTRUCTOR_FUNCTION(initLocale)
0027 #endif
0028 
0029 void KTextToHTMLTest::testGetEmailAddress()
0030 {
0031     // empty input
0032     const QString emptyQString;
0033     KTextToHTMLHelper ll1(emptyQString, 0);
0034     QVERIFY(ll1.getEmailAddress().isEmpty());
0035 
0036     // no '@' at scan position
0037     KTextToHTMLHelper ll2(QStringLiteral("foo@bar.baz"), 0);
0038     QVERIFY(ll2.getEmailAddress().isEmpty());
0039 
0040     // '@' in local part
0041     KTextToHTMLHelper ll3(QStringLiteral("foo@bar@bar.baz"), 7);
0042     QVERIFY(ll3.getEmailAddress().isEmpty());
0043 
0044     // empty local part
0045     KTextToHTMLHelper ll4(QStringLiteral("@bar.baz"), 0);
0046     QVERIFY(ll4.getEmailAddress().isEmpty());
0047     KTextToHTMLHelper ll5(QStringLiteral(".@bar.baz"), 1);
0048     QVERIFY(ll5.getEmailAddress().isEmpty());
0049     KTextToHTMLHelper ll6(QStringLiteral(" @bar.baz"), 1);
0050     QVERIFY(ll6.getEmailAddress().isEmpty());
0051     KTextToHTMLHelper ll7(QStringLiteral(".!#$%&'*+-/=?^_`{|}~@bar.baz"), qstrlen(".!#$%&'*+-/=?^_`{|}~"));
0052     QVERIFY(ll7.getEmailAddress().isEmpty());
0053 
0054     // allowed special chars in local part of address
0055     KTextToHTMLHelper ll8(QStringLiteral("a.!#$%&'*+-/=?^_`{|}~@bar.baz"), qstrlen("a.!#$%&'*+-/=?^_`{|}~"));
0056     QCOMPARE(ll8.getEmailAddress(), QStringLiteral("a.!#$%&'*+-/=?^_`{|}~@bar.baz"));
0057 
0058     // '@' in domain part
0059     KTextToHTMLHelper ll9(QStringLiteral("foo@bar@bar.baz"), 3);
0060     QVERIFY(ll9.getEmailAddress().isEmpty());
0061 
0062     // domain part without dot
0063     KTextToHTMLHelper lla(QStringLiteral("foo@bar"), 3);
0064     QVERIFY(lla.getEmailAddress().isEmpty());
0065     KTextToHTMLHelper llb(QStringLiteral("foo@bar."), 3);
0066     QVERIFY(llb.getEmailAddress().isEmpty());
0067     KTextToHTMLHelper llc(QStringLiteral(".foo@bar"), 4);
0068     QVERIFY(llc.getEmailAddress().isEmpty());
0069     KTextToHTMLHelper lld(QStringLiteral("foo@bar "), 3);
0070     QVERIFY(lld.getEmailAddress().isEmpty());
0071     KTextToHTMLHelper lle(QStringLiteral(" foo@bar"), 4);
0072     QVERIFY(lle.getEmailAddress().isEmpty());
0073     KTextToHTMLHelper llf(QStringLiteral("foo@bar-bar"), 3);
0074     QVERIFY(llf.getEmailAddress().isEmpty());
0075 
0076     // empty domain part
0077     KTextToHTMLHelper llg(QStringLiteral("foo@"), 3);
0078     QVERIFY(llg.getEmailAddress().isEmpty());
0079     KTextToHTMLHelper llh(QStringLiteral("foo@."), 3);
0080     QVERIFY(llh.getEmailAddress().isEmpty());
0081     KTextToHTMLHelper lli(QStringLiteral("foo@-"), 3);
0082     QVERIFY(lli.getEmailAddress().isEmpty());
0083 
0084     // simple address
0085     KTextToHTMLHelper llj(QStringLiteral("foo@bar.baz"), 3);
0086     QCOMPARE(llj.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0087     KTextToHTMLHelper llk(QStringLiteral("foo@bar.baz."), 3);
0088     QCOMPARE(llk.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0089     KTextToHTMLHelper lll(QStringLiteral(".foo@bar.baz"), 4);
0090     QCOMPARE(lll.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0091     KTextToHTMLHelper llm(QStringLiteral("foo@bar.baz-"), 3);
0092     QCOMPARE(llm.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0093     KTextToHTMLHelper lln(QStringLiteral("-foo@bar.baz"), 4);
0094     QCOMPARE(lln.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0095     KTextToHTMLHelper llo(QStringLiteral("foo@bar.baz "), 3);
0096     QCOMPARE(llo.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0097     KTextToHTMLHelper llp(QStringLiteral(" foo@bar.baz"), 4);
0098     QCOMPARE(llp.getEmailAddress(), QStringLiteral("foo@bar.baz"));
0099     KTextToHTMLHelper llq(QStringLiteral("foo@bar-bar.baz"), 3);
0100     QCOMPARE(llq.getEmailAddress(), QStringLiteral("foo@bar-bar.baz"));
0101 }
0102 
0103 void KTextToHTMLTest::testGetUrl()
0104 {
0105     QStringList brackets;
0106     brackets << QString() << QString(); // no brackets
0107     brackets << QStringLiteral("<") << QStringLiteral(">");
0108     brackets << QStringLiteral("[") << QStringLiteral("]");
0109     brackets << QStringLiteral("\"") << QStringLiteral("\"");
0110     brackets << QStringLiteral("<link>") << QStringLiteral("</link>");
0111 
0112     for (int i = 0; i < brackets.count(); i += 2) {
0113         testGetUrl2(brackets[i], brackets[i + 1]);
0114     }
0115 }
0116 
0117 void KTextToHTMLTest::testGetUrl2(const QString &left, const QString &right)
0118 {
0119     QStringList schemas;
0120     schemas << QStringLiteral("http://");
0121     schemas << QStringLiteral("https://");
0122     schemas << QStringLiteral("vnc://");
0123     schemas << QStringLiteral("fish://");
0124     schemas << QStringLiteral("ftp://");
0125     schemas << QStringLiteral("ftps://");
0126     schemas << QStringLiteral("sftp://");
0127     schemas << QStringLiteral("smb://");
0128     schemas << QStringLiteral("file://");
0129     schemas << QStringLiteral("irc://");
0130     schemas << QStringLiteral("ircs://");
0131 
0132     QStringList urls;
0133     urls << QStringLiteral("en.wikipedia.org/wiki/%C3%98_(disambiguation)");
0134     urls << QStringLiteral("www.kde.org");
0135     urls << QStringLiteral("user@www.kde.org");
0136     urls << QStringLiteral("user:pass@www.kde.org");
0137     urls << QStringLiteral("user:pass@www.kde.org:1234");
0138     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path");
0139     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path?a=1");
0140     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path?a=1#anchor");
0141     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/\npath  \n /long/  path \t  ?a=1#anchor");
0142     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path/special(123)?a=1#anchor");
0143     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor");
0144     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla");
0145     urls << QStringLiteral("user:pass@www.kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]");
0146     urls << QStringLiteral("user:pass@www.kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]");
0147     urls << QStringLiteral("user:pass@www.kde.org:1234/  \n  sub/path:with:colon/  \n\t   \t   special(123)?") + QStringLiteral("\n\t  \n\t   a=1#anchor[bla]");
0148 
0149     for (const QString &schema : std::as_const(schemas)) {
0150         for (QString url : std::as_const(urls)) {
0151             // by definition: if the URL is enclosed in brackets, the URL itself is not allowed
0152             // to contain the closing bracket, as this would be detected as the end of the URL
0153             if ((left.length() == 1) && (url.contains(right[0]))) {
0154                 continue;
0155             }
0156 
0157             // if the url contains a whitespace, it must be enclosed with brackets
0158             if ((url.contains(QLatin1Char('\n')) || url.contains(QLatin1Char('\t')) || url.contains(QLatin1Char(' '))) && left.isEmpty()) {
0159                 continue;
0160             }
0161 
0162             QString test(left + schema + url + right);
0163             KTextToHTMLHelper ll(test, left.length());
0164             QString gotUrl = ll.getUrl();
0165 
0166             // we want to have the url without whitespace
0167             url.remove(QLatin1Char(' '));
0168             url.remove(QLatin1Char('\n'));
0169             url.remove(QLatin1Char('\t'));
0170 
0171             bool ok = (gotUrl == (schema + url));
0172             if (!ok) {
0173                 qDebug() << "got:" << gotUrl;
0174             }
0175             QVERIFY2(ok, qPrintable(test));
0176         }
0177     }
0178 
0179     QStringList urlsWithoutSchema;
0180     urlsWithoutSchema << QStringLiteral(".kde.org");
0181     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path");
0182     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path?a=1");
0183     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path?a=1#anchor");
0184     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path/special(123)?a=1#anchor");
0185     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor");
0186     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla");
0187     urlsWithoutSchema << QStringLiteral(".kde.org:1234/sub/path:with:colon/special(123)?a=1#anchor[bla]");
0188     urlsWithoutSchema << QStringLiteral(".kde.org:1234/\nsub/path:with:colon/\nspecial(123)?\na=1#anchor[bla]");
0189     urlsWithoutSchema << QStringLiteral(".kde.org:1234/  \n  sub/path:with:colon/  \n\t   \t   special(123)?") + QStringLiteral("\n\t  \n\t   a=1#anchor[bla]");
0190 
0191     QStringList starts;
0192     starts << QStringLiteral("www") << QStringLiteral("ftp") << QStringLiteral("news:www");
0193 
0194     for (const QString &start : std::as_const(starts)) {
0195         for (QString url : std::as_const(urlsWithoutSchema)) {
0196             // by definition: if the URL is enclosed in brackets, the URL itself is not allowed
0197             // to contain the closing bracket, as this would be detected as the end of the URL
0198             if ((left.length() == 1) && (url.contains(right[0]))) {
0199                 continue;
0200             }
0201 
0202             // if the url contains a whitespace, it must be enclosed with brackets
0203             if ((url.contains(QLatin1Char('\n')) || url.contains(QLatin1Char('\t')) || url.contains(QLatin1Char(' '))) && left.isEmpty()) {
0204                 continue;
0205             }
0206 
0207             QString test(left + start + url + right);
0208             KTextToHTMLHelper ll(test, left.length());
0209             QString gotUrl = ll.getUrl();
0210 
0211             // we want to have the url without whitespace
0212             url.remove(QLatin1Char(' '));
0213             url.remove(QLatin1Char('\n'));
0214             url.remove(QLatin1Char('\t'));
0215 
0216             bool ok = (gotUrl == (start + url));
0217             if (!ok) {
0218                 qDebug() << "got:" << gotUrl;
0219             }
0220             QVERIFY2(ok, qPrintable(gotUrl));
0221         }
0222     }
0223 
0224     // test max url length
0225     QString url = QStringLiteral("https://www.kde.org/this/is/a_very_loooooong_url/test/test/test");
0226     {
0227         KTextToHTMLHelper ll(url, 0, 10);
0228         QVERIFY(ll.getUrl().isEmpty()); // url too long
0229     }
0230     {
0231         KTextToHTMLHelper ll(url, 0, url.length() - 1);
0232         QVERIFY(ll.getUrl().isEmpty()); // url too long
0233     }
0234     {
0235         KTextToHTMLHelper ll(url, 0, url.length());
0236         QCOMPARE(ll.getUrl(), url);
0237     }
0238     {
0239         KTextToHTMLHelper ll(url, 0, url.length() + 1);
0240         QCOMPARE(ll.getUrl(), url);
0241     }
0242 
0243     // mailto
0244     {
0245         QString addr = QStringLiteral("mailto:test@kde.org");
0246         QString test(left + addr + right);
0247         KTextToHTMLHelper ll(test, left.length());
0248 
0249         QString gotUrl = ll.getUrl();
0250 
0251         bool ok = (gotUrl == addr);
0252         if (!ok) {
0253             qDebug() << "got:" << gotUrl;
0254         }
0255         QVERIFY2(ok, qPrintable(gotUrl));
0256     }
0257 }
0258 
0259 void KTextToHTMLTest::testHtmlConvert_data()
0260 {
0261     QTest::addColumn<QString>("plainText");
0262     QTest::addColumn<RuqolaKTextToHTML::Options>("flags");
0263     QTest::addColumn<QString>("htmlText");
0264     // Linker error when using PreserveSpaces, therefore the hardcoded 0x01 or 0x09
0265 
0266     // Test preserving whitespace correctly
0267     QTest::newRow("") << " foo" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "&nbsp;foo";
0268     QTest::newRow("") << "  foo" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "&nbsp;&nbsp;foo";
0269     QTest::newRow("") << "  foo  " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "&nbsp;&nbsp;foo&nbsp;&nbsp;";
0270     QTest::newRow("") << "  foo " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "&nbsp;&nbsp;foo&nbsp;";
0271     QTest::newRow("") << "bla bla bla bla bla" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "bla bla bla bla bla";
0272     QTest::newRow("") << "bla bla bla \n  bla bla bla " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0273                       << "bla bla bla&nbsp;<br />\n&nbsp;&nbsp;bla bla bla&nbsp;";
0274     QTest::newRow("") << "bla bla  bla" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "bla bla&nbsp;&nbsp;bla";
0275     QTest::newRow("") << " bla bla \n bla bla a\n  bla bla " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0276                       << "&nbsp;bla bla&nbsp;<br />\n&nbsp;bla bla a<br />\n"
0277                          "&nbsp;&nbsp;bla bla&nbsp;";
0278 
0279     // Test highlighting with *, / and _
0280     QTest::newRow("") << "Ce paragraphe _contient_ des mots ou des _groupes de mots_ à mettre en"
0281                          " forme…"
0282                       << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0283                       << "Ce paragraphe <i>contient</i> des mots ou des"
0284                          " <i>groupes de mots</i> à mettre en forme…";
0285     QTest::newRow("punctation-bug") << "Ce texte *a l'air* de _fonctionner_, à condition"
0286                                        " d’utiliser le guillemet ASCII."
0287                                     << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0288                                     << "Ce texte <b>*a l'air*</b> de <u>_fonctionner_</u>, à"
0289                                        " condition d’utiliser le guillemet ASCII.";
0290     QTest::newRow("punctation-bug") << "Un répertoire /est/ un *dossier* où on peut mettre des"
0291                                        " *fichiers*."
0292                                     << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0293                                     << "Un répertoire <i>/est/</i> un"
0294                                        " <b>*dossier*</b> où on peut mettre des <b>*fichiers*</b>.";
0295     QTest::newRow("punctation-bug") << "*BLA BLA BLA BLA*." << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0296                                     << "<b>BLA BLA BLA BLA</b>.";
0297     QTest::newRow("") << "Je vais tenter de repérer des faux positif*"
0298                       << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0299                       << "Je vais tenter de repérer des faux positif*";
0300     QTest::newRow("") << "*Ouais !* *Yes!*" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0301                       << "<b>Ouais !</b> <b>Yes!</b>";
0302 
0303     QTest::newRow("multispace") << "*Ouais     foo*" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0304                                 << "<b>Ouais     foo</b>";
0305 
0306     QTest::newRow("multispace3") << "*Ouais:     foo*" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0307                                  << "<b>Ouais:     foo</b>";
0308 
0309     QTest::newRow("multi-") << "** Ouais:  foo **" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0310                             << "** Ouais:&nbsp;&nbsp;foo **";
0311 
0312     QTest::newRow("multi-") << "*** Ouais:  foo ***" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0313                             << "*** Ouais:&nbsp;&nbsp;foo ***";
0314 
0315     QTest::newRow("nohtmlversion") << "* Ouais:     foo *" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0316                                    << "<b>Ouais:     foo</b>";
0317 
0318     QTest::newRow("nohtmlversion2") << "*Ouais:     foo *" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0319                                     << "<b>Ouais:     foo</b>";
0320 
0321     QTest::newRow("nohtmlversion3") << "* Ouais:     foo*" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0322                                     << "<b>Ouais:     foo</b>";
0323 
0324     QTest::newRow("nohtmlversion4") << "* Ouais: *ff sfsdf* foo *"
0325                                     << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0326                                     << "* Ouais: <b>ff sfsdf</b> foo *";
0327 
0328     QTest::newRow("") << "the /etc/{rsyslog.d,syslog-ng.d}/package.rpmnew file"
0329                       << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0330                       << "the /etc/{rsyslog.d,syslog-ng.d}/package.rpmnew file";
0331 
0332     // This test has problems with the encoding, apparently.
0333     // QTest::newRow( "" ) << "*Ça fait plaisir de pouvoir utiliser des lettres accentuées dans du"
0334     //                       " texte mis en forme*." << 0x09 << "<b>Ça fait plaisir de pouvoir"
0335     //                       " utiliser des lettres accentuées dans du texte mis en forme</b>.";
0336 
0337     // Bug reported by dfaure, the <hostname> would get lost
0338     QTest::newRow("") << "QUrl url(\"http://strange<hostname>/\");"
0339                       << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::ReplaceSmileys | RuqolaKTextToHTML::HighlightText)
0340                       << "QUrl url(&quot;<a href=\"http://strange<hostname>/\">"
0341                          "http://strange&lt;hostname&gt;/</a>&quot;);";
0342 
0343     // Bug: 211128 - plain text emails should not replace ampersand & with &amp;
0344     QTest::newRow("bug211128") << "https://green-site/?Ticket=85&Page=next" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0345                                << "<a href=\"https://green-site/?Ticket=85&Page=next\">"
0346                                   "https://green-site/?Ticket=85&amp;Page=next</a>";
0347 
0348     QTest::newRow("dotBeforeEnd") << "Look at this file: www.example.com/example.h" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0349                                   << "Look at this file: <a href=\"http://www.example.com/example.h\">"
0350                                      "www.example.com/example.h</a>";
0351     QTest::newRow("dotInMiddle") << "Look at this file: www.example.com/.bashrc" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0352                                  << "Look at this file: <a href=\"http://www.example.com/.bashrc\">"
0353                                     "www.example.com/.bashrc</a>";
0354 
0355     // A dot at the end of an URL is explicitly ignored
0356     QTest::newRow("dotAtEnd") << "Look at this file: www.example.com/test.cpp." << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0357                               << "Look at this file: <a href=\"http://www.example.com/test.cpp\">"
0358                                  "www.example.com/test.cpp</a>.";
0359 
0360     // Bug 313719 - URL in parenthesis
0361     QTest::newRow("url-in-parenthesis-1") << "KDE (website https://www.kde.org)" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0362                                           << "KDE (website <a href=\"https://www.kde.org\">https://www.kde.org</a>)";
0363     QTest::newRow("url-in-parenthesis-2") << "KDE website (https://www.kde.org)" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0364                                           << "KDE website (<a href=\"https://www.kde.org\">https://www.kde.org</a>)";
0365     QTest::newRow("url-in-parenthesis-3") << "bla (https://www.kde.org - section 5.2)" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0366                                           << "bla (<a href=\"https://www.kde.org\">https://www.kde.org</a> - section 5.2)";
0367 
0368     // Fix url as foo <<url> <url>> when we concatened them.
0369     QTest::newRow("url-with-url")
0370         << "foo <https://www.kde.org/ <https://www.kde.org/>>" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0371         << "foo &lt;<a href=\"https://www.kde.org/ \">https://www.kde.org/ </a>&lt;<a href=\"https://www.kde.org/\">https://www.kde.org/</a>&gt;&gt;";
0372 
0373     // Fix url exploit
0374     QTest::newRow("url-exec-html") << "https://\"><!--" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "https://&quot;&gt;&lt;!--";
0375 
0376     QTest::newRow("url-exec-html-2") << "https://192.168.1.1:\"><!--" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0377                                      << "https://192.168.1.1:&quot;&gt;&lt;!--";
0378 
0379     QTest::newRow("url-exec-html-3") << "https://<IP>:\"><!--" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0380                                      << "https://&lt;IP&gt;:&quot;&gt;&lt;!--";
0381 
0382     QTest::newRow("url-exec-html-4") << "https://<IP>:/\"><!--" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0383                                      << "https://&lt;IP&gt;:/&quot;&gt;&lt;!--";
0384 
0385     QTest::newRow("url-exec-html-5") << "https://<IP>:/\"><script>alert(1);</script><!--" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0386                                      << "https://&lt;IP&gt;:/&quot;&gt;&lt;script&gt;alert(1);&lt;/script&gt;&lt;!--";
0387 
0388     QTest::newRow("url-exec-html-6") << "https://<IP>:/\"><script>alert(1);</script><!--\nTest2"
0389                                      << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0390                                      << "https://&lt;IP&gt;:/&quot;&gt;&lt;script&gt;alert(1);&lt;/script&gt;&lt;!--\nTest2";
0391 
0392     QTest::newRow("url-with-ref-in-[") << "https://www.kde.org[1]" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0393                                        << "<a href=\"https://www.kde.org\">https://www.kde.org</a>[1]";
0394 
0395     QTest::newRow("url-with-ref-in-[2") << "[http://www.example.org/][whatever]" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0396                                         << "[<a href=\"http://www.example.org/\">http://www.example.org/</a>][whatever]";
0397     // Bug 346132
0398     QTest::newRow("url-with-ref-in-<") << "http://www.foo.bar<http://foo.bar/>" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0399                                        << "<a href=\"http://www.foo.bar\">http://www.foo.bar</a>&lt;<a href=\"http://foo.bar/\">http://foo.bar/</a>&gt;";
0400 
0401     QTest::newRow("url-with-ref-in-]") << "[Please visit our booth 24-25 http://example.com/]" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0402                                        << "[Please visit our booth 24-25 <a href=\"http://example.com/\">http://example.com/</a>]";
0403 
0404     QTest::newRow("two url with space") << "http://www.kde.org/standards/kcfg/1.0 http://www.kde.org/"
0405                                         << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0406                                         << "<a href=\"http://www.kde.org/standards/kcfg/1.0\">http://www.kde.org/standards/kcfg/1.0</a> <a "
0407                                            "href=\"http://www.kde.org/\">http://www.kde.org/</a>";
0408 
0409     // Bug kmail
0410     QTest::newRow("two url with space-2")
0411         << "@@ -55,6 +55,10 @@ xsi:schemaLocation=\"http://www.kde.org/standards/kcfg/1.0 http://www.kde.org/"
0412         << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0413         << "@@ -55,6 +55,10 @@ xsi:schemaLocation=&quot;<a href=\"http://www.kde.org/standards/kcfg/1.0\">http://www.kde.org/standards/kcfg/1.0</a> <a "
0414            "href=\"http://www.kde.org/\">http://www.kde.org/</a>";
0415 
0416     const auto opt = RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::ConvertPhoneNumbers;
0417     // tel: urls
0418     QTest::newRow("tel url compact") << "bla bla <tel:+491234567890> bla bla" << opt
0419                                      << "bla bla &lt;<a href=\"tel:+491234567890\">tel:+491234567890</a>&gt; bla bla";
0420     QTest::newRow("tel url fancy") << "bla bla tel:+49-321-123456 bla bla" << opt << "bla bla <a href=\"tel:+49-321-123456\">tel:+49-321-123456</a> bla bla";
0421 
0422     // negative tel: url tests
0423     QTest::newRow("empty tel url") << "bla tel: blub" << opt << "bla tel: blub";
0424 
0425     // phone numbers
0426     QTest::newRow("tel compact international") << "call +49123456789, then hang up" << opt
0427                                                << "call <a href=\"tel:+49123456789\">+49123456789</a>, then hang up";
0428     QTest::newRow("tel parenthesis/spaces international")
0429         << "phone:+33 (01) 12 34 56 78 blub" << opt << "phone:<a href=\"tel:+330112345678\">+33 (01) 12 34 56 78</a> blub";
0430     QTest::newRow("tel dashes international") << "bla +44-321-1-234-567" << opt << "bla <a href=\"tel:+443211234567\">+44-321-1-234-567</a>";
0431     QTest::newRow("tel dashes/spaces international") << "+1 123-456-7000 blub" << opt << "<a href=\"tel:+11234567000\">+1 123-456-7000</a> blub";
0432     QTest::newRow("tel spaces international") << "bla +32 1 234 5678 blub" << opt << "bla <a href=\"tel:+3212345678\">+32 1 234 5678</a> blub";
0433     QTest::newRow("tel slash domestic") << "bla 030/12345678 blub" << opt << "bla <a href=\"tel:03012345678\">030/12345678</a> blub";
0434     QTest::newRow("tel slash/space domestic") << "Tel.: 089 / 12 34 56 78" << opt << "Tel.: <a href=\"tel:08912345678\">089 / 12 34 56 78</a>";
0435     QTest::newRow("tel follow by parenthesis") << "Telefon: 0 18 05 / 12 23 46 (14 Cent/Min.*)" << opt
0436                                                << "Telefon: <a href=\"tel:01805122346\">0 18 05 / 12 23 46</a> (14 Cent/Min.*)";
0437     QTest::newRow("tel space single digit at end") << "0123/123 456 7" << opt << "<a href=\"tel:01231234567\">0123/123 456 7</a>";
0438     QTest::newRow("tel space around dash") << "bla +49 (0) 12 23 - 45 6000 blub" << opt
0439                                            << "bla <a href=\"tel:+4901223456000\">+49 (0) 12 23 - 45 6000</a> blub";
0440     QTest::newRow("tel two numbers speparated by dash")
0441         << "bla +49 (0) 12 23 46 78 - +49 0123/123 456 78 blub" << opt
0442         << "bla <a href=\"tel:+49012234678\">+49 (0) 12 23 46 78</a> - <a href=\"tel:+49012312345678\">+49 0123/123 456 78</a> blub";
0443 
0444     // negative tests for phone numbers
0445     QTest::newRow("non-tel number") << "please send 1200 cakes" << opt << "please send 1200 cakes";
0446     QTest::newRow("non-tel alpha-numeric") << "bla 1-123-456-ABCD blub" << opt << "bla 1-123-456-ABCD blub";
0447     QTest::newRow("non-tel alpha prefix") << "ABCD0123-456-789" << opt << "ABCD0123-456-789";
0448     QTest::newRow("non-tel date") << "bla 02/03/2019 blub" << opt << "bla 02/03/2019 blub";
0449     QTest::newRow("non-tel too long") << "bla +012-4567890123456 blub" << opt << "bla +012-4567890123456 blub";
0450     QTest::newRow("non-tel unbalanced") << "bla +012-456789(01 blub" << opt << "bla +012-456789(01 blub";
0451     QTest::newRow("non-tel nested") << "bla +012-4(56(78)90)1 blub" << opt << "bla +012-4(56(78)90)1 blub";
0452     QTest::newRow("tel extraction disabled") << "call +49123456789 now" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0453                                              << "call +49123456789 now";
0454 
0455     QTest::newRow("bug-414360")
0456         << "https://www.openstreetmap.org/directions?engine=graphhopper_foot&route=44.85765%2C-0.55931%3B44.85713%2C-0.56117#map=18/44.85756/-0.56094"
0457         << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0458         << "<a "
0459            "href=\"https://www.openstreetmap.org/directions?engine=graphhopper_foot&route=44.85765%2C-0.55931%3B44.85713%2C-0.56117#map=18/44.85756/"
0460            "-0.56094\">https://www.openstreetmap.org/directions?engine=graphhopper_foot&amp;route=44.85765%2C-0.55931%3B44.85713%2C-0.56117#map=18/44.85756/"
0461            "-0.56094</a>";
0462 
0463     // xmpp bug 422291
0464     QTest::newRow("xmpp1") << "xmpp:username@server.tld" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0465                            << "<a href=\"xmpp:username@server.tld\">xmpp:username@server.tld</a>";
0466     QTest::newRow("xmpp2") << "xmpp:conversations@conference.siacs.eu" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0467                            << "<a href=\"xmpp:conversations@conference.siacs.eu\">xmpp:conversations@conference.siacs.eu</a>";
0468     QTest::newRow("xmpp3") << "xmpp:conversations@conference.siacs.eu?join" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces)
0469                            << "<a href=\"xmpp:conversations@conference.siacs.eu?join\">xmpp:conversations@conference.siacs.eu?join</a>";
0470 
0471     // Test news: only
0472     QTest::newRow("news") << "news: " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "news:&nbsp;";
0473 
0474     QTest::newRow("ftp") << "ftp: " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "ftp:&nbsp;";
0475     QTest::newRow("mailto") << "mailto: " << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "mailto:&nbsp;";
0476     QTest::newRow("empty") << "" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces) << "";
0477 
0478     QTest::newRow("spaceafter") << "_test1: test2 _" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0479                                 << "<i>test1: test2</i>";
0480     QTest::newRow("spacebefore") << "_ test1: bla_" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0481                                  << "<i>test1: bla</i>";
0482     QTest::newRow("spacebeforeandafter") << "_ test1: blo _" << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0483                                          << "<i>test1: blo</i>";
0484     QTest::newRow("spacebeforeandafter2") << "__ test1: blo _"
0485                                           << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0486                                           << "<i>test1: blo</i>";
0487     QTest::newRow("spacebeforeandafter3") << "__ test1: blo __"
0488                                           << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0489                                           << "<i>test1: blo</i>";
0490     QTest::newRow("spacebeforeandafter4") << "___ test1: blo _"
0491                                           << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0492                                           << "<i>test1: blo</i>";
0493     QTest::newRow("spacebeforeandafter5") << "_ test1: blo __"
0494                                           << RuqolaKTextToHTML::Options(RuqolaKTextToHTML::PreserveSpaces | RuqolaKTextToHTML::HighlightText)
0495                                           << "<i>test1: blo</i>";
0496 }
0497 
0498 void KTextToHTMLTest::testHtmlConvert()
0499 {
0500     QFETCH(QString, plainText);
0501     QFETCH(RuqolaKTextToHTML::Options, flags);
0502     QFETCH(QString, htmlText);
0503 
0504     QEXPECT_FAIL("punctation-bug", "Linklocator does not properly detect punctation as boundaries", Continue);
0505     QEXPECT_FAIL("spacebeforeandafter3", "multi _/*/~ at end is not supported yet because we use InvertedGreedinessOption", Continue);
0506     QEXPECT_FAIL("spacebeforeandafter5", "multi _/*/~ at end is not supported yet because we use InvertedGreedinessOption", Continue);
0507 
0508     const QString actualHtml = RuqolaKTextToHTML::convertToHtml(plainText, flags);
0509     QCOMPARE(actualHtml, htmlText);
0510 }
0511 
0512 #define s(x) QStringLiteral(x)
0513 
0514 void KTextToHTMLTest::testEmoticons_data()
0515 {
0516     QTest::addColumn<QString>("input");
0517     QTest::addColumn<QString>("output");
0518     QTest::newRow("empty") << QString() << QString();
0519     QTest::newRow("trailing") << s("Hello :-)") << s("Hello :-)");
0520     QTest::newRow("embedded") << s("Hello :-) How are you?") << s("Hello :-) How are you?");
0521     QTest::newRow("leading") << s(":-( Bye") << s(":-( Bye");
0522     QTest::newRow("embedded-html") << s("<b>:(</b>") << s("&lt;b&gt;:(&lt;/b&gt;");
0523     QTest::newRow("html-attribute") << s("<img src=\"...\" title=\":-)\" />") << s("&lt;img src=&quot;...&quot; title=&quot;:-)&quot; /&gt;");
0524     QTest::newRow("broken-1") << s(":))") << s(":))");
0525     QTest::newRow("broken-4") << s(":D and :-D are not the same as :d and :-d") << s(":D and :-D are not the same as :d and :-d");
0526     QTest::newRow("broken-5") << s("4d:D>:)F:/&gt;:-(:Pu:d9") << s("4d:D&gt;:)F:/&amp;gt;:-(:Pu:d9");
0527     QTest::newRow("broken-6") << s("&lt;::pvar:: test=1&gt;") << s("&amp;lt;::pvar:: test=1&amp;gt;");
0528     QTest::newRow("working-5") << s("(&amp;)") << s("(&amp;amp;)");
0529     QTest::newRow("working-6") << s("Bla (&nbsp;)") << s("Bla (&amp;nbsp;)");
0530     QTest::newRow("working-7") << s("a non-breaking space (&nbsp;) character") << s("a non-breaking space (&amp;nbsp;) character");
0531 
0532     QTest::newRow("angle-bracket-1") << s(">:)") << s("&gt;:)");
0533     QTest::newRow("angle-bracket-2") << s("<b>:)") << s("&lt;b&gt;:)");
0534 }
0535 
0536 void KTextToHTMLTest::testEmoticons()
0537 {
0538     QFETCH(QString, input);
0539     QFETCH(QString, output);
0540     QCOMPARE(RuqolaKTextToHTML::convertToHtml(input, RuqolaKTextToHTML::ReplaceSmileys | RuqolaKTextToHTML::IgnoreUrls), output);
0541 }
0542 
0543 void KTextToHTMLTest::testEmoticonsNoReplace_data()
0544 {
0545     QTest::addColumn<QString>("input");
0546     QTest::newRow("empty") << QString();
0547     QTest::newRow("no-space-spearator") << s("Very happy! :-):-)");
0548     QTest::newRow("broken-2") << s("In a sentence:practical example");
0549     QTest::newRow("broken-8") << s("-+-[-:-(-:-)-:-]-+-");
0550     QTest::newRow("broken-9") << s("::shrugs::");
0551     QTest::newRow("broken-10") << s(":Ptesting:P");
0552     QTest::newRow("working-1") << s(":):)");
0553     QTest::newRow("working-4") << s("http://www.kde.org");
0554     QTest::newRow("working-3") << s("End of sentence:p");
0555     QTest::newRow("xmpp-1") << s("an xmpp emoticon (%)");
0556 }
0557 
0558 void KTextToHTMLTest::testEmoticonsNoReplace()
0559 {
0560     QFETCH(QString, input);
0561     QCOMPARE(RuqolaKTextToHTML::convertToHtml(input, RuqolaKTextToHTML::ReplaceSmileys | RuqolaKTextToHTML::IgnoreUrls), input);
0562 }
0563 
0564 #include "moc_ruqolaktexttohtmltest.cpp"