File indexing completed on 2024-12-22 04:40:58
0001 /* ============================================================ 0002 * Falkon - Qt web browser 0003 * Copyright (C) 2010-2017 David Rosca <nowrep@gmail.com> 0004 * 0005 * This program is free software: you can redistribute it and/or modify 0006 * it under the terms of the GNU General Public License as published by 0007 * the Free Software Foundation, either version 3 of the License, or 0008 * (at your option) any later version. 0009 * 0010 * This program is distributed in the hope that it will be useful, 0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0013 * GNU General Public License for more details. 0014 * 0015 * You should have received a copy of the GNU General Public License 0016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0017 * ============================================================ */ 0018 #include "htmlimporter.h" 0019 #include "bookmarkitem.h" 0020 0021 #include <QUrl> 0022 #include <QFileDialog> 0023 #include <QRegularExpression> 0024 0025 HtmlImporter::HtmlImporter(QObject* parent) 0026 : BookmarksImporter(parent) 0027 { 0028 } 0029 0030 QString HtmlImporter::description() const 0031 { 0032 return BookmarksImporter::tr("You can import bookmarks from any browser that supports HTML exporting. " 0033 "This file has usually these suffixes"); 0034 } 0035 0036 QString HtmlImporter::standardPath() const 0037 { 0038 return QStringLiteral(".htm, .html"); 0039 } 0040 0041 QString HtmlImporter::getPath(QWidget* parent) 0042 { 0043 const QString filter = BookmarksImporter::tr("HTML Bookmarks") + QLatin1String(" (*.htm *.html)"); 0044 m_path = QFileDialog::getOpenFileName(parent, BookmarksImporter::tr("Choose file..."), QDir::homePath(), filter); 0045 return m_path; 0046 } 0047 0048 bool HtmlImporter::prepareImport() 0049 { 0050 m_file.setFileName(m_path); 0051 0052 if (!m_file.open(QFile::ReadOnly)) { 0053 setError(BookmarksImporter::tr("Unable to open file.")); 0054 return false; 0055 } 0056 0057 return true; 0058 } 0059 0060 static int qzMin(int a, int b) 0061 { 0062 if (a > -1 && b > -1) { 0063 return qMin(a, b); 0064 } 0065 0066 if (a > -1) { 0067 return a; 0068 } 0069 else { 0070 return b; 0071 } 0072 } 0073 0074 BookmarkItem* HtmlImporter::importBookmarks() 0075 { 0076 QString bookmarks = QString::fromUtf8(m_file.readAll()); 0077 m_file.close(); 0078 0079 // Converting tags to lower case -,- 0080 // For some reason Qt::CaseInsensitive is not every time insensitive :-D 0081 0082 bookmarks.replace(QLatin1String("<DL"), QLatin1String("<dl")); 0083 bookmarks.replace(QLatin1String("</DL"), QLatin1String("</dl")); 0084 bookmarks.replace(QLatin1String("<DT"), QLatin1String("<dt")); 0085 bookmarks.replace(QLatin1String("</DT"), QLatin1String("</dt")); 0086 bookmarks.replace(QLatin1String("<P"), QLatin1String("<p")); 0087 bookmarks.replace(QLatin1String("</P"), QLatin1String("</p")); 0088 bookmarks.replace(QLatin1String("<A"), QLatin1String("<a")); 0089 bookmarks.replace(QLatin1String("</A"), QLatin1String("</a")); 0090 bookmarks.replace(QLatin1String("HREF="), QLatin1String("href=")); 0091 bookmarks.replace(QLatin1String("<H3"), QLatin1String("<h3")); 0092 bookmarks.replace(QLatin1String("</H3"), QLatin1String("</h3")); 0093 0094 bookmarks = bookmarks.left(bookmarks.lastIndexOf(QLatin1String("</dl><p>"))); 0095 int start = bookmarks.indexOf(QLatin1String("<dl><p>")); 0096 0097 auto* root = new BookmarkItem(BookmarkItem::Folder); 0098 root->setTitle(QStringLiteral("HTML Import")); 0099 0100 QList<BookmarkItem*> folders; 0101 folders.append(root); 0102 0103 while (start > 0) { 0104 QString string = bookmarks.mid(start); 0105 0106 int posOfFolder = string.indexOf(QLatin1String("<dt><h3")); 0107 int posOfEndFolder = string.indexOf(QLatin1String("</dl><p>")); 0108 int posOfLink = string.indexOf(QLatin1String("<dt><a")); 0109 0110 int nearest = qzMin(posOfLink, qzMin(posOfFolder, posOfEndFolder)); 0111 if (nearest == -1) { 0112 break; 0113 } 0114 0115 if (nearest == posOfFolder) { 0116 // Next is folder 0117 QRegularExpression rx(QSL("<dt><h3(.*)>(.*)</h3>"), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption); 0118 QRegularExpressionMatch match = rx.match(string); 0119 QString folderName = match.captured(2).trimmed(); 0120 0121 auto* folder = new BookmarkItem(BookmarkItem::Folder, folders.isEmpty() ? root : folders.last()); 0122 folder->setTitle(folderName); 0123 folders.append(folder); 0124 0125 start += posOfFolder + match.captured(0).size(); 0126 } 0127 else if (nearest == posOfEndFolder) { 0128 // Next is end of folder 0129 if (!folders.isEmpty()) { 0130 folders.removeLast(); 0131 } 0132 0133 start += posOfEndFolder + 8; 0134 } 0135 else { 0136 // Next is link 0137 QRegularExpression rx(QSL("<dt><a(.*)>(.*)</a>"), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption); 0138 QRegularExpressionMatch match = rx.match(string); 0139 0140 QString arguments = match.captured(1); 0141 QString linkName = match.captured(2).trimmed(); 0142 0143 QRegularExpression rx2(QSL("href=\"(.*)\""), QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption); 0144 QRegularExpressionMatch match2 = rx2.match(arguments); 0145 0146 QUrl url = QUrl::fromEncoded(match2.captured(1).trimmed().toUtf8()); 0147 0148 start += posOfLink + match.captured(0).size(); 0149 0150 if (url.isEmpty() || url.scheme() == QL1S("place") || url.scheme() == QL1S("about")) 0151 continue; 0152 0153 auto* b = new BookmarkItem(BookmarkItem::Url, folders.isEmpty() ? root : folders.last()); 0154 b->setTitle(linkName.isEmpty() ? url.toString() : linkName); 0155 b->setUrl(url); 0156 } 0157 } 0158 0159 return root; 0160 }