File indexing completed on 2023-11-26 04:55:47

0001 /*
0002  * Text Parser common class
0003  * Copyright (C) 2004 Peter Simonsson <psn@linux.se>
0004  * Copyright (C) 2006-2008 Eike Hein <hein@kde.org>
0005  * Copyright (C) 2011 Przemek Czekaj <xcojack@gmail.com>
0006  *
0007  * This library is free software; you can redistribute it and/or
0008  * modify it under the terms of the GNU Lesser General Public
0009  * License as published by the Free Software Foundation; either
0010  * version 2.1 of the License, or (at your option) any later version.
0011  *
0012  * This library is distributed in the hope that it will be useful,
0013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015  * Lesser General Public License for more details.
0016  *
0017  * You should have received a copy of the GNU Lesser General Public
0018  * License along with this library; if not, write to the Free Software
0019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
0020  */
0021 
0022 #include "text-parser.h"
0023 
0024 #include <QtCore/QLatin1String>
0025 
0026 namespace KTp
0027 {
0028 
0029 TextParser* TextParser::s_instance = nullptr;
0030 
0031 /**
0032  * RegExp for url detection
0033  */
0034 static QRegExp s_urlPattern(QString::fromLatin1("\\b((?:(?:([a-z][\\w\\.-]+:/{1,3})|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|\\}\\]|[^\\s`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6])|[a-z0-9.\\-+_]+@[a-z0-9.\\-]+[.][a-z]{1,5}[^\\s/`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6]))")
0035                             .arg(QChar(0x00AB)).arg(QChar(0x00BB)).arg(QChar(0x201C)).arg(QChar(0x201D)).arg(QChar(0x2018)).arg(QChar(0x2019)));
0036 
0037 TextParser::TextParser(QObject* parent)
0038     : QObject(parent)
0039 {
0040 }
0041 
0042 TextParser* TextParser::instance()
0043 {
0044     if (!s_instance) {
0045         s_instance = new TextParser(nullptr);
0046     }
0047 
0048     return s_instance;
0049 }
0050 
0051 TextUrlData TextParser::extractUrlData(const QString& text, bool doUrlFixup)
0052 {
0053     TextUrlData data;
0054     QString htmlText(text);
0055     s_urlPattern.setCaseSensitivity(Qt::CaseInsensitive);
0056 
0057     int pos = 0;
0058     int urlLen = 0;
0059 
0060     QString protocol;
0061     QString href;
0062 
0063     while ((pos = s_urlPattern.indexIn(htmlText, pos)) >= 0) {
0064         urlLen = s_urlPattern.matchedLength();
0065         href = htmlText.mid(pos, urlLen);
0066 
0067         data.urlRanges << QPair<int, int>(pos, href.length());
0068         pos += href.length();
0069 
0070         if (doUrlFixup) {
0071             protocol.clear();
0072             if (s_urlPattern.cap(2).isEmpty()) {
0073                 QString urlPatternCap1(s_urlPattern.cap(1));
0074                 if (urlPatternCap1.contains(QLatin1Char('@'))) {
0075                     protocol = QLatin1String("mailto:");
0076                 } else if (urlPatternCap1.startsWith(QLatin1String("ftp."), Qt::CaseInsensitive)) {
0077                     protocol = QLatin1String("ftp://");
0078                 } else {
0079                     protocol = QLatin1String("http://");
0080                 }
0081             }
0082 
0083             href = protocol + href;
0084             data.fixedUrls.append(href);
0085         }
0086     }
0087     return data;
0088 }
0089 
0090 TextParser::~TextParser()
0091 {
0092 }
0093 
0094 }