File indexing completed on 2023-11-26 04:55:47
0001 /* 0002 * Text Parser common class 0003 * Copyright (C) 2004 Peter Simonsson <psn@linux.se> 0004 * Copyright (C) 2006-2008 Eike Hein <hein@kde.org> 0005 * Copyright (C) 2011 Przemek Czekaj <xcojack@gmail.com> 0006 * 0007 * This library is free software; you can redistribute it and/or 0008 * modify it under the terms of the GNU Lesser General Public 0009 * License as published by the Free Software Foundation; either 0010 * version 2.1 of the License, or (at your option) any later version. 0011 * 0012 * This library is distributed in the hope that it will be useful, 0013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0015 * Lesser General Public License for more details. 0016 * 0017 * You should have received a copy of the GNU Lesser General Public 0018 * License along with this library; if not, write to the Free Software 0019 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 0020 */ 0021 0022 #include "text-parser.h" 0023 0024 #include <QtCore/QLatin1String> 0025 0026 namespace KTp 0027 { 0028 0029 TextParser* TextParser::s_instance = nullptr; 0030 0031 /** 0032 * RegExp for url detection 0033 */ 0034 static QRegExp s_urlPattern(QString::fromLatin1("\\b((?:(?:([a-z][\\w\\.-]+:/{1,3})|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|\\}\\]|[^\\s`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6])|[a-z0-9.\\-+_]+@[a-z0-9.\\-]+[.][a-z]{1,5}[^\\s/`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6]))") 0035 .arg(QChar(0x00AB)).arg(QChar(0x00BB)).arg(QChar(0x201C)).arg(QChar(0x201D)).arg(QChar(0x2018)).arg(QChar(0x2019))); 0036 0037 TextParser::TextParser(QObject* parent) 0038 : QObject(parent) 0039 { 0040 } 0041 0042 TextParser* TextParser::instance() 0043 { 0044 if (!s_instance) { 0045 s_instance = new TextParser(nullptr); 0046 } 0047 0048 return s_instance; 0049 } 0050 0051 TextUrlData TextParser::extractUrlData(const QString& text, bool doUrlFixup) 0052 { 0053 TextUrlData data; 0054 QString htmlText(text); 0055 s_urlPattern.setCaseSensitivity(Qt::CaseInsensitive); 0056 0057 int pos = 0; 0058 int urlLen = 0; 0059 0060 QString protocol; 0061 QString href; 0062 0063 while ((pos = s_urlPattern.indexIn(htmlText, pos)) >= 0) { 0064 urlLen = s_urlPattern.matchedLength(); 0065 href = htmlText.mid(pos, urlLen); 0066 0067 data.urlRanges << QPair<int, int>(pos, href.length()); 0068 pos += href.length(); 0069 0070 if (doUrlFixup) { 0071 protocol.clear(); 0072 if (s_urlPattern.cap(2).isEmpty()) { 0073 QString urlPatternCap1(s_urlPattern.cap(1)); 0074 if (urlPatternCap1.contains(QLatin1Char('@'))) { 0075 protocol = QLatin1String("mailto:"); 0076 } else if (urlPatternCap1.startsWith(QLatin1String("ftp."), Qt::CaseInsensitive)) { 0077 protocol = QLatin1String("ftp://"); 0078 } else { 0079 protocol = QLatin1String("http://"); 0080 } 0081 } 0082 0083 href = protocol + href; 0084 data.fixedUrls.append(href); 0085 } 0086 } 0087 return data; 0088 } 0089 0090 TextParser::~TextParser() 0091 { 0092 } 0093 0094 }