File indexing completed on 2024-10-13 07:27:33

0001 /* This file is part of the KDE project
0002 
0003    Copyright (C) 2008 Javier Goday <jgoday @ gmail.com>
0004    First Url regular expression taken from urlview tool by Michael Elkins <me@cs.hmc.edu>.
0005    Regular expression improved by FiNex.
0006    Improvements to regular expression and slotReadFile by Frantisek Ziacik
0007 
0008    This program is free software; you can redistribute it and/or
0009    modify it under the terms of the GNU General Public
0010    License as published by the Free Software Foundation; either
0011    version 2 of the License, or (at your option) any later version.
0012 */
0013 #include "linkimporter.h"
0014 
0015 #include <QDebug>
0016 #include <QDir>
0017 #include <QFile>
0018 #include <QIODevice>
0019 #include <QMap>
0020 #include <QRegularExpression>
0021 #include <QTextStream>
0022 
0023 #include <KIO/CopyJob>
0024 #include <KLocalizedString>
0025 #include <qregularexpression.h>
0026 
0027 // static QString REGULAR_EXPRESSION = "(((https?|ftp|gopher)://|(mailto|file|news):)[^’ <>\"]+|(www|web|w3).[-a-z0-9.]+)[^’ .,;<>\":]";
0028 //  static QString REGULAR_EXPRESSION = "((http|https|ftp|ftps)+([\\:\\w\\d:#@%/;$()~_?\\+-=\\\\.&])*)";
0029 static QString REGULAR_EXPRESSION =
0030     "(\\w+[:]//"
0031     ")?(((([\\w-]+[.]){1,}(ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|"
0032     "cl|cm|cn|co|com|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|"
0033     "gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|int|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|"
0034     "mil|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|net|nf|ng|ni|nl|no|np|nr|nt|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|"
0035     "ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|sv|st|sy|sz|tc|td|tf|tg|th|tj|tk|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|"
0036     "wf|ws|ye|yt|yu|za|zm|zw|aero|biz|coop|info|museum|name|pro|travel))|([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+)))([:][0-9]*)?([?/][\\w~#\\-;%?@&=/.+]*)?(?!\\w)";
0037 
0038 LinkImporter::LinkImporter(const QUrl &url, QObject *parent)
0039     : QThread(parent)
0040     , m_url(url)
0041     , m_transfers()
0042     , m_tempFile()
0043 {
0044 }
0045 
0046 LinkImporter::LinkImporter(QObject *parent)
0047     : QThread(parent)
0048     , m_url()
0049     , m_transfers()
0050     , m_tempFile()
0051 {
0052 }
0053 
0054 LinkImporter::~LinkImporter()
0055 {
0056 }
0057 
0058 void LinkImporter::checkClipboard(const QString &clipboardContent)
0059 {
0060     static QRegularExpression rx(REGULAR_EXPRESSION);
0061 
0062     int regexPos = 1;
0063 
0064     QRegularExpressionMatch match = rx.match(clipboardContent);
0065 
0066     auto link = match.captured(regexPos);
0067     while (!link.isEmpty()) {
0068         addTransfer(link);
0069         regexPos++;
0070         link = match.captured(regexPos);
0071     }
0072 }
0073 
0074 void LinkImporter::run()
0075 {
0076     if (!m_url.isLocalFile() && !m_tempFile.isEmpty()) {
0077         slotReadFile(QUrl(m_tempFile));
0078     } else {
0079         slotReadFile(m_url);
0080     }
0081 
0082     quit();
0083 }
0084 
0085 void LinkImporter::copyRemoteFile()
0086 {
0087     m_tempFile = QString("%1/%2.tmp").arg(QDir::tempPath()).arg("importer_aux");
0088 
0089     QUrl aux(m_tempFile);
0090     KIO::CopyJob *job = KIO::copy(m_url, aux, KIO::HideProgressInfo);
0091 
0092     if (!job->exec()) {
0093         Q_EMIT error(ki18n("Error trying to get %1").subs(m_url.url()));
0094     }
0095 }
0096 
0097 void LinkImporter::slotReadFile(const QUrl &url)
0098 {
0099     static QRegularExpression rx(REGULAR_EXPRESSION);
0100     QFile file(url.toLocalFile());
0101 
0102     if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
0103         return;
0104 
0105     QTextStream in(&file);
0106     quint64 size = file.size();
0107     quint64 position = 0;
0108 
0109     while (!in.atEnd()) {
0110         QString line = in.readLine();
0111         int regexPos = 0;
0112         quint64 lastPosition = position;
0113 
0114         auto match = rx.match(line);
0115 
0116         while (match.hasMatch()) {
0117             QString link = match.captured(0);
0118 
0119             addTransfer(link);
0120 
0121             regexPos += match.capturedLength(0);
0122             position = lastPosition + regexPos;
0123 
0124             Q_EMIT progress(position * 100 / size);
0125 
0126             match = rx.match(line, regexPos);
0127         }
0128 
0129         position += line.size();
0130 
0131         Q_EMIT progress(position * 100 / size);
0132     }
0133 
0134     if (!m_url.isLocalFile()) {
0135         file.remove();
0136     }
0137 }
0138 
0139 void LinkImporter::addTransfer(QString &link)
0140 {
0141     QUrl auxUrl;
0142 
0143     if (link.contains("://")) {
0144         auxUrl = QUrl(link);
0145     } else {
0146         auxUrl = QUrl(QString("http://") + link);
0147     }
0148 
0149     if (!link.isEmpty() && auxUrl.isValid() && m_transfers.indexOf(link) < 0 && !auxUrl.scheme().isEmpty() && !auxUrl.host().isEmpty()) {
0150         m_transfers << link;
0151     }
0152 }
0153 
0154 #include "moc_linkimporter.cpp"