Warning, file /utilities/telly-skout/src/tvspielfilmfetcher.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // SPDX-FileCopyrightText: 2022 Plata Hill <plata.hill@kdemail.net> 0002 // SPDX-License-Identifier: LGPL-2.1-or-later 0003 0004 #include "tvspielfilmfetcher.h" 0005 0006 #include "TellySkoutSettings.h" 0007 #include "database.h" 0008 0009 #include <KLocalizedString> 0010 0011 #include <QDate> 0012 #include <QDateTime> 0013 #include <QDebug> 0014 #include <QRegularExpression> 0015 #include <QString> 0016 #include <QTimeZone> 0017 0018 TvSpielfilmFetcher::TvSpielfilmFetcher(QNetworkAccessManager *nam) 0019 : NetworkFetcher(nam) 0020 { 0021 } 0022 0023 void TvSpielfilmFetcher::fetchGroups(std::function<void(const QVector<GroupData> &)> callback, std::function<void(const Error &)> errorCallback) 0024 { 0025 Q_UNUSED(errorCallback); 0026 0027 QVector<GroupData> groups; 0028 GroupData data; 0029 data.m_id = GroupId("tvspielfilm.germany"); 0030 data.m_name = i18n("Germany"); 0031 data.m_url = "https://www.tvspielfilm.de/tv-programm/sendungen"; 0032 0033 groups.push_back(data); 0034 0035 if (callback) { 0036 callback(groups); 0037 } 0038 } 0039 0040 void TvSpielfilmFetcher::fetchGroup(const QString &url, 0041 const GroupId &groupId, 0042 std::function<void(const QList<ChannelData> &)> callback, 0043 std::function<void(const Error &)> errorCallback) 0044 { 0045 qDebug() << "Starting to fetch group (" << groupId.value() << ", " << url << ")"; 0046 0047 m_provider.get( 0048 QUrl(url), 0049 [this, callback](QByteArray data) { 0050 static QRegularExpression reChannelList("<select name=\\\"channel\\\">.*</select>"); 0051 reChannelList.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); 0052 QRegularExpressionMatch matchChannelList = reChannelList.match(data); 0053 if (matchChannelList.hasMatch()) { 0054 QMap<ChannelId, ChannelData> channels; 0055 const QString channelList = matchChannelList.captured(0); 0056 0057 static QRegularExpression reChannel("<option.*?value=\\\"(.*?)\\\"> (.*?)</option>"); 0058 reChannel.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); 0059 QRegularExpressionMatchIterator it = reChannel.globalMatch(channelList); 0060 while (it.hasNext()) { 0061 QRegularExpressionMatch channelMatch = it.next(); 0062 const ChannelId id = ChannelId(channelMatch.captured(1)); 0063 0064 // exclude groups (e.g. "alle Sender" or "g:1") 0065 if (id.value().length() > 0 && !id.value().contains("g:")) { 0066 const QString name = channelMatch.captured(2); 0067 fetchChannel(id, name, channels); 0068 } 0069 } 0070 0071 if (callback) { 0072 callback(channels.values()); 0073 } 0074 } 0075 }, 0076 [groupId, errorCallback](const Error &error) { 0077 qWarning() << "Error fetching group" << groupId.value(); 0078 qWarning() << error.m_message; 0079 0080 if (errorCallback) { 0081 errorCallback(error); 0082 } 0083 }); 0084 } 0085 0086 void TvSpielfilmFetcher::fetchChannel(const ChannelId &channelId, const QString &name, QMap<ChannelId, ChannelData> &channels) 0087 { 0088 if (!channels.contains(channelId)) { 0089 ChannelData data; 0090 data.m_id = channelId; 0091 data.m_name = name; 0092 0093 // https://www.tvspielfilm.de/tv-programm/sendungen/das-erste,ARD.html 0094 data.m_url = "https://www.tvspielfilm.de/tv-programm/sendungen/" + name.toLower().replace(' ', '-') + "," + channelId.value() + ".html"; 0095 0096 data.m_image = "https://a2.tvspielfilm.de/images/tv/sender/mini/" + channelId.value().toLower() + ".png"; 0097 0098 channels.insert(channelId, data); 0099 } 0100 } 0101 0102 void TvSpielfilmFetcher::fetchProgramDescription(const ChannelId &channelId, 0103 const ProgramId &programId, 0104 const QString &url, 0105 std::function<void(const QString &)> callback, 0106 std::function<void(const Error &)> errorCallback) 0107 { 0108 qDebug() << "Starting to fetch description for" << programId.value() << "(" << url << ")"; 0109 0110 m_provider.get( 0111 QUrl(url), 0112 [this, channelId, programId, url, callback](const QByteArray &data) { 0113 if (callback) { 0114 callback(processDescription(data, url)); 0115 } 0116 }, 0117 [channelId, programId, url, errorCallback](const Error &error) { 0118 qWarning() << "Error fetching program description for" << channelId.value() << "," << programId.value() << "(" << url << "):"; 0119 qWarning() << error.m_message; 0120 0121 if (errorCallback) { 0122 errorCallback(error); 0123 } 0124 }); 0125 } 0126 0127 void TvSpielfilmFetcher::fetchProgram(const ChannelId &channelId, 0128 std::function<void(const QVector<ProgramData> &)> callback, 0129 std::function<void(const Error &)> errorCallback) 0130 { 0131 // backwards such that we can stop early (see below) 0132 const QDate lastDate = QDate::currentDate().addDays(TellySkoutSettings::tvSpielfilmPrefetch()); 0133 0134 QVector<ProgramData> programs; 0135 if (programExists(channelId, lastDate)) { 0136 // assume that programs from previous days are available 0137 if (callback) { 0138 callback(programs); 0139 } 0140 } else { 0141 fetchProgram(channelId, lastDate, 1, programs, callback, errorCallback); 0142 } 0143 } 0144 0145 void TvSpielfilmFetcher::fetchProgram(const ChannelId &channelId, 0146 const QDate &date, 0147 unsigned int page, 0148 QVector<ProgramData> &programs, 0149 std::function<void(const QVector<ProgramData> &)> callback, 0150 std::function<void(const Error &)> errorCallback) 0151 { 0152 // https://www.tvspielfilm.de/tv-programm/sendungen/?time=day&channel=ARD&date=2021-11-09&page=1 0153 const QString url = "https://www.tvspielfilm.de/tv-programm/sendungen/?time=day&channel=" + channelId.value() + "&date=" + date.toString("yyyy-MM-dd") 0154 + "&page=" + QString::number(page); 0155 0156 qDebug() << "Starting to fetch program for " << channelId.value() << "(" << url << ")"; 0157 0158 m_provider.get( 0159 QUrl(url), 0160 [this, channelId, date, page, programs, callback, errorCallback, url](QByteArray data) { 0161 QVector<ProgramData> allPrograms(programs); 0162 allPrograms.append(processChannel(data, url, channelId)); 0163 0164 // fetch next page 0165 static QRegularExpression reNextPage( 0166 "<ul class=\\\"pagination__items\\\">.*</ul>\\s*<a href=\\\"(.*?)\\\".*class=\\\"js-track-link pagination__link pagination__link--next\\\""); 0167 reNextPage.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); 0168 QRegularExpressionMatch matchNextPage = reNextPage.match(data); 0169 if (matchNextPage.hasMatch()) { 0170 fetchProgram(channelId, date, page + 1, allPrograms, callback, errorCallback); 0171 } else { 0172 // all pages for this day processed, continue with previous day (stop yesterday) 0173 const QDate previousDay = date.addDays(-1); 0174 if (QDate::currentDate().addDays(-1) <= previousDay && !programExists(channelId, previousDay)) { 0175 fetchProgram(channelId, previousDay, 1, allPrograms, callback, errorCallback); 0176 } else { 0177 // all pages for all days processed 0178 if (callback) { 0179 callback(allPrograms); 0180 } 0181 } 0182 } 0183 }, 0184 [channelId, errorCallback](const Error &error) { 0185 qWarning() << "Error fetching channel" << channelId.value(); 0186 qWarning() << error.m_message; 0187 0188 if (errorCallback) { 0189 errorCallback(error); 0190 } 0191 }); 0192 } 0193 0194 QVector<ProgramData> TvSpielfilmFetcher::processChannel(const QString &infoTable, const QString &url, const ChannelId &channelId) 0195 { 0196 QVector<ProgramData> programs; 0197 0198 // column with title + description URL + start/stop time 0199 const QString reDescriptionUrl("<a href=\\\"(https://www.tvspielfilm.de/tv-programm/sendung/.*?\\.html)\\\""); 0200 const QString reTitle("<strong>(.*?)</strong>"); 0201 const QString reDateTime("class=\\\"progressbar-info\\\".*?data-rel-start=\\\"(\\d+)\\\".*?data-rel-end=\\\"(\\d+)\\\""); 0202 const QString reMainCol("<td class=\\\"col-3\\\">.*?" + reDescriptionUrl + ".*?" + reTitle + ".*?" + reDateTime + ".*?</td>"); 0203 0204 // column with category 0205 const QString reCategory("<span>(.*?)</span>"); 0206 const QString reCategoryCol("<td class=\\\"col-4\\\">.*?" + reCategory + ".*?</td>"); 0207 0208 QRegularExpression reProgram("<tr class=\\\"hover\\\">.*?" + reMainCol + ".*?" + reCategoryCol + ".*?</tr>"); 0209 reProgram.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); 0210 QRegularExpressionMatchIterator it = reProgram.globalMatch(infoTable); 0211 while (it.hasNext()) { 0212 QRegularExpressionMatch match = it.next(); 0213 const ProgramData programData = processProgram(match, url, channelId, !it.hasNext()); 0214 if (!programs.empty()) { 0215 // sometimes, there can be multiple programs for the same time (e.g. different news per region of a local channel) 0216 // show this as alternative in the title 0217 ProgramData &previousProgamData = programs.last(); 0218 if (programData.m_startTime < previousProgamData.m_stopTime) { 0219 previousProgamData.m_title += " / " + programData.m_title; 0220 } else { 0221 programs.push_back(programData); 0222 } 0223 } else { 0224 programs.push_back(programData); 0225 } 0226 } 0227 0228 return programs; 0229 } 0230 0231 ProgramData TvSpielfilmFetcher::processProgram(const QRegularExpressionMatch &programMatch, const QString &url, const ChannelId &channelId, bool isLast) 0232 { 0233 Q_UNUSED(isLast) 0234 0235 ProgramData programData; 0236 0237 if (programMatch.hasMatch()) { 0238 const QString descriptionUrl = programMatch.captured(1); 0239 const QString title = programMatch.captured(2); 0240 0241 const QDateTime startTime = QDateTime::fromSecsSinceEpoch(programMatch.captured(3).toInt()); 0242 const QDateTime stopTime = QDateTime::fromSecsSinceEpoch(programMatch.captured(4).toInt()); 0243 0244 const QString category = programMatch.captured(5); 0245 0246 // channel + start time can be used as ID 0247 const ProgramId programId = ProgramId(channelId.value() + "_" + QString::number(startTime.toSecsSinceEpoch())); 0248 0249 programData.m_id = programId; 0250 programData.m_url = descriptionUrl; 0251 programData.m_channelId = channelId; 0252 programData.m_startTime = startTime.toLocalTime(); 0253 programData.m_stopTime = stopTime.toLocalTime(); 0254 programData.m_title = title; 0255 programData.m_subtitle = ""; 0256 programData.m_description = ""; 0257 programData.m_descriptionFetched = false; 0258 programData.m_categories.push_back(category); 0259 } else { 0260 qWarning() << "Failed to parse program " << url; 0261 } 0262 0263 return programData; 0264 } 0265 0266 QString TvSpielfilmFetcher::processDescription(const QString &descriptionPage, const QString &url) 0267 { 0268 static QRegularExpression reDescription("<section class=\\\"broadcast-detail__description\\\">.*?<p>(.*?)</p>"); 0269 reDescription.setPatternOptions(QRegularExpression::DotMatchesEverythingOption); 0270 QRegularExpressionMatch match = reDescription.match(descriptionPage); 0271 if (match.hasMatch()) { 0272 return match.captured(1); 0273 } else { 0274 qWarning() << "Failed to parse program description from" << url; 0275 } 0276 return ""; 0277 } 0278 0279 bool TvSpielfilmFetcher::programExists(const ChannelId &channelId, const QDate &date) 0280 { 0281 const QDateTime lastTime(date, QTime(23, 59, 59)); 0282 0283 return Database::instance().programExists(channelId, lastTime); 0284 }