File indexing completed on 2024-12-22 04:40:58

0001 /* ============================================================
0002 * Falkon - Qt web browser
0003 * Copyright (C) 2010-2017 David Rosca <nowrep@gmail.com>
0004 *
0005 * This program is free software: you can redistribute it and/or modify
0006 * it under the terms of the GNU General Public License as published by
0007 * the Free Software Foundation, either version 3 of the License, or
0008 * (at your option) any later version.
0009 *
0010 * This program is distributed in the hope that it will be useful,
0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0013 * GNU General Public License for more details.
0014 *
0015 * You should have received a copy of the GNU General Public License
0016 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0017 * ============================================================ */
0018 #include "htmlimporter.h"
0019 #include "bookmarkitem.h"
0020 
0021 #include <QUrl>
0022 #include <QFileDialog>
0023 #include <QRegularExpression>
0024 
0025 HtmlImporter::HtmlImporter(QObject* parent)
0026     : BookmarksImporter(parent)
0027 {
0028 }
0029 
0030 QString HtmlImporter::description() const
0031 {
0032     return BookmarksImporter::tr("You can import bookmarks from any browser that supports HTML exporting. "
0033                                  "This file has usually these suffixes");
0034 }
0035 
0036 QString HtmlImporter::standardPath() const
0037 {
0038     return QStringLiteral(".htm, .html");
0039 }
0040 
0041 QString HtmlImporter::getPath(QWidget* parent)
0042 {
0043     const QString filter = BookmarksImporter::tr("HTML Bookmarks") + QLatin1String(" (*.htm *.html)");
0044     m_path = QFileDialog::getOpenFileName(parent, BookmarksImporter::tr("Choose file..."), QDir::homePath(), filter);
0045     return m_path;
0046 }
0047 
0048 bool HtmlImporter::prepareImport()
0049 {
0050     m_file.setFileName(m_path);
0051 
0052     if (!m_file.open(QFile::ReadOnly)) {
0053         setError(BookmarksImporter::tr("Unable to open file."));
0054         return false;
0055     }
0056 
0057     return true;
0058 }
0059 
0060 static int qzMin(int a, int b)
0061 {
0062     if (a > -1 && b > -1) {
0063         return qMin(a, b);
0064     }
0065 
0066     if (a > -1) {
0067         return a;
0068     }
0069     else {
0070         return b;
0071     }
0072 }
0073 
0074 BookmarkItem* HtmlImporter::importBookmarks()
0075 {
0076     QString bookmarks = QString::fromUtf8(m_file.readAll());
0077     m_file.close();
0078 
0079     // Converting tags to lower case -,-
0080     // For some reason Qt::CaseInsensitive is not every time insensitive :-D
0081 
0082     bookmarks.replace(QLatin1String("<DL"), QLatin1String("<dl"));
0083     bookmarks.replace(QLatin1String("</DL"), QLatin1String("</dl"));
0084     bookmarks.replace(QLatin1String("<DT"), QLatin1String("<dt"));
0085     bookmarks.replace(QLatin1String("</DT"), QLatin1String("</dt"));
0086     bookmarks.replace(QLatin1String("<P"), QLatin1String("<p"));
0087     bookmarks.replace(QLatin1String("</P"), QLatin1String("</p"));
0088     bookmarks.replace(QLatin1String("<A"), QLatin1String("<a"));
0089     bookmarks.replace(QLatin1String("</A"), QLatin1String("</a"));
0090     bookmarks.replace(QLatin1String("HREF="), QLatin1String("href="));
0091     bookmarks.replace(QLatin1String("<H3"), QLatin1String("<h3"));
0092     bookmarks.replace(QLatin1String("</H3"), QLatin1String("</h3"));
0093 
0094     bookmarks = bookmarks.left(bookmarks.lastIndexOf(QLatin1String("</dl><p>")));
0095     int start = bookmarks.indexOf(QLatin1String("<dl><p>"));
0096 
0097     auto* root = new BookmarkItem(BookmarkItem::Folder);
0098     root->setTitle(QStringLiteral("HTML Import"));
0099 
0100     QList<BookmarkItem*> folders;
0101     folders.append(root);
0102 
0103     while (start > 0) {
0104         QString string = bookmarks.mid(start);
0105 
0106         int posOfFolder = string.indexOf(QLatin1String("<dt><h3"));
0107         int posOfEndFolder = string.indexOf(QLatin1String("</dl><p>"));
0108         int posOfLink = string.indexOf(QLatin1String("<dt><a"));
0109 
0110         int nearest = qzMin(posOfLink, qzMin(posOfFolder, posOfEndFolder));
0111         if (nearest == -1) {
0112             break;
0113         }
0114 
0115         if (nearest == posOfFolder) {
0116             // Next is folder
0117             QRegularExpression rx(QSL("<dt><h3(.*)>(.*)</h3>"), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption);
0118             QRegularExpressionMatch match = rx.match(string);
0119             QString folderName = match.captured(2).trimmed();
0120 
0121             auto* folder = new BookmarkItem(BookmarkItem::Folder, folders.isEmpty() ? root : folders.last());
0122             folder->setTitle(folderName);
0123             folders.append(folder);
0124 
0125             start += posOfFolder + match.captured(0).size();
0126         }
0127         else if (nearest == posOfEndFolder) {
0128             // Next is end of folder
0129             if (!folders.isEmpty()) {
0130                 folders.removeLast();
0131             }
0132 
0133             start += posOfEndFolder + 8;
0134         }
0135         else {
0136             // Next is link
0137             QRegularExpression rx(QSL("<dt><a(.*)>(.*)</a>"), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption);
0138             QRegularExpressionMatch match = rx.match(string);
0139 
0140             QString arguments = match.captured(1);
0141             QString linkName = match.captured(2).trimmed();
0142 
0143             QRegularExpression rx2(QSL("href=\"(.*)\""), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption);
0144             QRegularExpressionMatch match2 = rx2.match(arguments);
0145 
0146             QUrl url = QUrl::fromEncoded(match2.captured(1).trimmed().toUtf8());
0147 
0148             start += posOfLink + match.captured(0).size();
0149 
0150             if (url.isEmpty() || url.scheme() == QL1S("place") || url.scheme() == QL1S("about"))
0151                 continue;
0152 
0153             auto* b = new BookmarkItem(BookmarkItem::Url, folders.isEmpty() ? root : folders.last());
0154             b->setTitle(linkName.isEmpty() ? url.toString() : linkName);
0155             b->setUrl(url);
0156         }
0157     }
0158 
0159     return root;
0160 }