File indexing completed on 2024-10-06 04:31:46
0001 /* This file is part of the KDE project 0002 0003 Copyright (C) 2008 Javier Goday <jgoday @ gmail.com> 0004 First Url regular expression taken from urlview tool by Michael Elkins <me@cs.hmc.edu>. 0005 Regular expression improved by FiNex. 0006 Improvements to regular expression and slotReadFile by Frantisek Ziacik 0007 0008 This program is free software; you can redistribute it and/or 0009 modify it under the terms of the GNU General Public 0010 License as published by the Free Software Foundation; either 0011 version 2 of the License, or (at your option) any later version. 0012 */ 0013 #include "linkimporter.h" 0014 0015 #include <QDebug> 0016 #include <QDir> 0017 #include <QFile> 0018 #include <QIODevice> 0019 #include <QMap> 0020 #include <QRegularExpression> 0021 #include <QTextStream> 0022 0023 #include <KIO/CopyJob> 0024 #include <KLocalizedString> 0025 #include <qregularexpression.h> 0026 0027 // static QString REGULAR_EXPRESSION = "(((https?|ftp|gopher)://|(mailto|file|news):)[^’ <>\"]+|(www|web|w3).[-a-z0-9.]+)[^’ .,;<>\":]"; 0028 // static QString REGULAR_EXPRESSION = "((http|https|ftp|ftps)+([\\:\\w\\d:#@%/;$()~_?\\+-=\\\\.&])*)"; 0029 static QString REGULAR_EXPRESSION = 0030 "(\\w+[:]//" 0031 ")?(((([\\w-]+[.]){1,}(ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|" 0032 "cl|cm|cn|co|com|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|" 0033 "gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|int|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|" 0034 "mil|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|net|nf|ng|ni|nl|no|np|nr|nt|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|" 0035 "ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|sv|st|sy|sz|tc|td|tf|tg|th|tj|tk|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|" 0036 "wf|ws|ye|yt|yu|za|zm|zw|aero|biz|coop|info|museum|name|pro|travel))|([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+)))([:][0-9]*)?([?/][\\w~#\\-;%?@&=/.+]*)?(?!\\w)"; 0037 0038 LinkImporter::LinkImporter(const QUrl &url, QObject *parent) 0039 : QThread(parent) 0040 , m_url(url) 0041 , m_transfers() 0042 , m_tempFile() 0043 { 0044 } 0045 0046 LinkImporter::LinkImporter(QObject *parent) 0047 : QThread(parent) 0048 , m_url() 0049 , m_transfers() 0050 , m_tempFile() 0051 { 0052 } 0053 0054 LinkImporter::~LinkImporter() 0055 { 0056 } 0057 0058 void LinkImporter::checkClipboard(const QString &clipboardContent) 0059 { 0060 static QRegularExpression rx(REGULAR_EXPRESSION); 0061 0062 int regexPos = 1; 0063 0064 QRegularExpressionMatch match = rx.match(clipboardContent); 0065 0066 auto link = match.captured(regexPos); 0067 while (!link.isEmpty()) { 0068 addTransfer(link); 0069 regexPos++; 0070 link = match.captured(regexPos); 0071 } 0072 } 0073 0074 void LinkImporter::run() 0075 { 0076 if (!m_url.isLocalFile() && !m_tempFile.isEmpty()) { 0077 slotReadFile(QUrl(m_tempFile)); 0078 } else { 0079 slotReadFile(m_url); 0080 } 0081 0082 quit(); 0083 } 0084 0085 void LinkImporter::copyRemoteFile() 0086 { 0087 m_tempFile = QString("%1/%2.tmp").arg(QDir::tempPath()).arg("importer_aux"); 0088 0089 QUrl aux(m_tempFile); 0090 KIO::CopyJob *job = KIO::copy(m_url, aux, KIO::HideProgressInfo); 0091 0092 if (!job->exec()) { 0093 Q_EMIT error(ki18n("Error trying to get %1").subs(m_url.url())); 0094 } 0095 } 0096 0097 void LinkImporter::slotReadFile(const QUrl &url) 0098 { 0099 static QRegularExpression rx(REGULAR_EXPRESSION); 0100 QFile file(url.toLocalFile()); 0101 0102 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) 0103 return; 0104 0105 QTextStream in(&file); 0106 quint64 size = file.size(); 0107 quint64 position = 0; 0108 0109 while (!in.atEnd()) { 0110 QString line = in.readLine(); 0111 int regexPos = 0; 0112 quint64 lastPosition = position; 0113 0114 auto match = rx.match(line); 0115 0116 while (match.hasMatch()) { 0117 QString link = match.captured(0); 0118 0119 addTransfer(link); 0120 0121 regexPos += match.capturedLength(0); 0122 position = lastPosition + regexPos; 0123 0124 Q_EMIT progress(position * 100 / size); 0125 0126 match = rx.match(line, regexPos); 0127 } 0128 0129 position += line.size(); 0130 0131 Q_EMIT progress(position * 100 / size); 0132 } 0133 0134 if (!m_url.isLocalFile()) { 0135 file.remove(); 0136 } 0137 } 0138 0139 void LinkImporter::addTransfer(QString &link) 0140 { 0141 QUrl auxUrl; 0142 0143 if (link.contains("://")) { 0144 auxUrl = QUrl(link); 0145 } else { 0146 auxUrl = QUrl(QString("http://") + link); 0147 } 0148 0149 if (!link.isEmpty() && auxUrl.isValid() && m_transfers.indexOf(link) < 0 && !auxUrl.scheme().isEmpty() && !auxUrl.host().isEmpty()) { 0150 m_transfers << link; 0151 } 0152 } 0153 0154 #include "moc_linkimporter.cpp"