File indexing completed on 2024-05-19 05:57:23

0001 // SPDX-FileCopyrightText: 2022 Plata Hill <plata.hill@kdemail.net>
0002 // SPDX-License-Identifier: LGPL-2.1-or-later
0003 
0004 #include "tvspielfilmfetcher.h"
0005 
0006 #include "TellySkoutSettings.h"
0007 #include "database.h"
0008 
0009 #include <KLocalizedString>
0010 
0011 #include <QDate>
0012 #include <QDateTime>
0013 #include <QDebug>
0014 #include <QRegularExpression>
0015 #include <QString>
0016 #include <QTimeZone>
0017 
0018 TvSpielfilmFetcher::TvSpielfilmFetcher(QNetworkAccessManager *nam)
0019     : NetworkFetcher(nam)
0020 {
0021 }
0022 
0023 void TvSpielfilmFetcher::fetchGroups(std::function<void(const QVector<GroupData> &)> callback, std::function<void(const Error &)> errorCallback)
0024 {
0025     Q_UNUSED(errorCallback);
0026 
0027     QVector<GroupData> groups;
0028     GroupData data;
0029     data.m_id = GroupId("tvspielfilm.germany");
0030     data.m_name = i18n("Germany");
0031     data.m_url = "https://www.tvspielfilm.de/tv-programm/sendungen";
0032 
0033     groups.push_back(data);
0034 
0035     if (callback) {
0036         callback(groups);
0037     }
0038 }
0039 
0040 void TvSpielfilmFetcher::fetchGroup(const QString &url,
0041                                     const GroupId &groupId,
0042                                     std::function<void(const QList<ChannelData> &)> callback,
0043                                     std::function<void(const Error &)> errorCallback)
0044 {
0045     qDebug() << "Starting to fetch group (" << groupId.value() << ", " << url << ")";
0046 
0047     m_provider.get(
0048         QUrl(url),
0049         [this, callback](QByteArray data) {
0050             static QRegularExpression reChannelList("<select name=\\\"channel\\\">.*</select>");
0051             reChannelList.setPatternOptions(QRegularExpression::DotMatchesEverythingOption);
0052             QRegularExpressionMatch matchChannelList = reChannelList.match(data);
0053             if (matchChannelList.hasMatch()) {
0054                 QMap<ChannelId, ChannelData> channels;
0055                 const QString channelList = matchChannelList.captured(0);
0056 
0057                 static QRegularExpression reChannel("<option.*?value=\\\"(.*?)\\\">&nbsp;&nbsp;(.*?)</option>");
0058                 reChannel.setPatternOptions(QRegularExpression::DotMatchesEverythingOption);
0059                 QRegularExpressionMatchIterator it = reChannel.globalMatch(channelList);
0060                 while (it.hasNext()) {
0061                     QRegularExpressionMatch channelMatch = it.next();
0062                     const ChannelId id = ChannelId(channelMatch.captured(1));
0063 
0064                     // exclude groups (e.g. "alle Sender" or "g:1")
0065                     if (id.value().length() > 0 && !id.value().contains("g:")) {
0066                         const QString name = channelMatch.captured(2);
0067                         fetchChannel(id, name, channels);
0068                     }
0069                 }
0070 
0071                 if (callback) {
0072                     callback(channels.values());
0073                 }
0074             }
0075         },
0076         [groupId, errorCallback](const Error &error) {
0077             qWarning() << "Error fetching group" << groupId.value();
0078             qWarning() << error.m_message;
0079 
0080             if (errorCallback) {
0081                 errorCallback(error);
0082             }
0083         });
0084 }
0085 
0086 void TvSpielfilmFetcher::fetchChannel(const ChannelId &channelId, const QString &name, QMap<ChannelId, ChannelData> &channels)
0087 {
0088     if (!channels.contains(channelId)) {
0089         ChannelData data;
0090         data.m_id = channelId;
0091         data.m_name = name;
0092 
0093         // https://www.tvspielfilm.de/tv-programm/sendungen/das-erste,ARD.html
0094         data.m_url = "https://www.tvspielfilm.de/tv-programm/sendungen/" + name.toLower().replace(' ', '-') + "," + channelId.value() + ".html";
0095 
0096         data.m_image = "https://a2.tvspielfilm.de/images/tv/sender/mini/" + channelId.value().toLower() + ".png";
0097 
0098         channels.insert(channelId, data);
0099     }
0100 }
0101 
0102 void TvSpielfilmFetcher::fetchProgramDescription(const ChannelId &channelId,
0103                                                  const ProgramId &programId,
0104                                                  const QString &url,
0105                                                  std::function<void(const QString &)> callback,
0106                                                  std::function<void(const Error &)> errorCallback)
0107 {
0108     qDebug() << "Starting to fetch description for" << programId.value() << "(" << url << ")";
0109 
0110     m_provider.get(
0111         QUrl(url),
0112         [this, channelId, programId, url, callback](const QByteArray &data) {
0113             if (callback) {
0114                 callback(processDescription(data, url));
0115             }
0116         },
0117         [channelId, programId, url, errorCallback](const Error &error) {
0118             qWarning() << "Error fetching program description for" << channelId.value() << "," << programId.value() << "(" << url << "):";
0119             qWarning() << error.m_message;
0120 
0121             if (errorCallback) {
0122                 errorCallback(error);
0123             }
0124         });
0125 }
0126 
0127 void TvSpielfilmFetcher::fetchProgram(const ChannelId &channelId,
0128                                       std::function<void(const QVector<ProgramData> &)> callback,
0129                                       std::function<void(const Error &)> errorCallback)
0130 {
0131     // backwards such that we can stop early (see below)
0132     const QDate lastDate = QDate::currentDate().addDays(TellySkoutSettings::tvSpielfilmPrefetch());
0133 
0134     QVector<ProgramData> programs;
0135     if (programExists(channelId, lastDate)) {
0136         // assume that programs from previous days are available
0137         if (callback) {
0138             callback(programs);
0139         }
0140     } else {
0141         fetchProgram(channelId, lastDate, 1, programs, callback, errorCallback);
0142     }
0143 }
0144 
0145 void TvSpielfilmFetcher::fetchProgram(const ChannelId &channelId,
0146                                       const QDate &date,
0147                                       unsigned int page,
0148                                       QVector<ProgramData> &programs,
0149                                       std::function<void(const QVector<ProgramData> &)> callback,
0150                                       std::function<void(const Error &)> errorCallback)
0151 {
0152     // https://www.tvspielfilm.de/tv-programm/sendungen/?time=day&channel=ARD&date=2021-11-09&page=1
0153     const QString url = "https://www.tvspielfilm.de/tv-programm/sendungen/?time=day&channel=" + channelId.value() + "&date=" + date.toString("yyyy-MM-dd")
0154         + "&page=" + QString::number(page);
0155 
0156     qDebug() << "Starting to fetch program for " << channelId.value() << "(" << url << ")";
0157 
0158     m_provider.get(
0159         QUrl(url),
0160         [this, channelId, date, page, programs, callback, errorCallback, url](QByteArray data) {
0161             QVector<ProgramData> allPrograms(programs);
0162             allPrograms.append(processChannel(data, url, channelId));
0163 
0164             // fetch next page
0165             static QRegularExpression reNextPage(
0166                 "<ul class=\\\"pagination__items\\\">.*</ul>\\s*<a href=\\\"(.*?)\\\".*class=\\\"js-track-link pagination__link pagination__link--next\\\"");
0167             reNextPage.setPatternOptions(QRegularExpression::DotMatchesEverythingOption);
0168             QRegularExpressionMatch matchNextPage = reNextPage.match(data);
0169             if (matchNextPage.hasMatch()) {
0170                 fetchProgram(channelId, date, page + 1, allPrograms, callback, errorCallback);
0171             } else {
0172                 // all pages for this day processed, continue with previous day (stop yesterday)
0173                 const QDate previousDay = date.addDays(-1);
0174                 if (QDate::currentDate().addDays(-1) <= previousDay && !programExists(channelId, previousDay)) {
0175                     fetchProgram(channelId, previousDay, 1, allPrograms, callback, errorCallback);
0176                 } else {
0177                     // all pages for all days processed
0178                     if (callback) {
0179                         callback(allPrograms);
0180                     }
0181                 }
0182             }
0183         },
0184         [channelId, errorCallback](const Error &error) {
0185             qWarning() << "Error fetching channel" << channelId.value();
0186             qWarning() << error.m_message;
0187 
0188             if (errorCallback) {
0189                 errorCallback(error);
0190             }
0191         });
0192 }
0193 
0194 QVector<ProgramData> TvSpielfilmFetcher::processChannel(const QString &infoTable, const QString &url, const ChannelId &channelId)
0195 {
0196     QVector<ProgramData> programs;
0197 
0198     // column with title + description URL + start/stop time
0199     const QString reDescriptionUrl("<a href=\\\"(https://www.tvspielfilm.de/tv-programm/sendung/.*?\\.html)\\\"");
0200     const QString reTitle("<strong>(.*?)</strong>");
0201     const QString reDateTime("class=\\\"progressbar-info\\\".*?data-rel-start=\\\"(\\d+)\\\".*?data-rel-end=\\\"(\\d+)\\\"");
0202     const QString reMainCol("<td class=\\\"col-3\\\">.*?" + reDescriptionUrl + ".*?" + reTitle + ".*?" + reDateTime + ".*?</td>");
0203 
0204     // column with category
0205     const QString reCategory("<span>(.*?)</span>");
0206     const QString reCategoryCol("<td class=\\\"col-4\\\">.*?" + reCategory + ".*?</td>");
0207 
0208     QRegularExpression reProgram("<tr class=\\\"hover\\\">.*?" + reMainCol + ".*?" + reCategoryCol + ".*?</tr>");
0209     reProgram.setPatternOptions(QRegularExpression::DotMatchesEverythingOption);
0210     QRegularExpressionMatchIterator it = reProgram.globalMatch(infoTable);
0211     while (it.hasNext()) {
0212         QRegularExpressionMatch match = it.next();
0213         const ProgramData programData = processProgram(match, url, channelId, !it.hasNext());
0214         if (!programs.empty()) {
0215             // sometimes, there can be multiple programs for the same time (e.g. different news per region of a local channel)
0216             // show this as alternative in the title
0217             ProgramData &previousProgamData = programs.last();
0218             if (programData.m_startTime < previousProgamData.m_stopTime) {
0219                 previousProgamData.m_title += " / " + programData.m_title;
0220             } else {
0221                 programs.push_back(programData);
0222             }
0223         } else {
0224             programs.push_back(programData);
0225         }
0226     }
0227 
0228     return programs;
0229 }
0230 
0231 ProgramData TvSpielfilmFetcher::processProgram(const QRegularExpressionMatch &programMatch, const QString &url, const ChannelId &channelId, bool isLast)
0232 {
0233     Q_UNUSED(isLast)
0234 
0235     ProgramData programData;
0236 
0237     if (programMatch.hasMatch()) {
0238         const QString descriptionUrl = programMatch.captured(1);
0239         const QString title = programMatch.captured(2);
0240 
0241         const QDateTime startTime = QDateTime::fromSecsSinceEpoch(programMatch.captured(3).toInt());
0242         const QDateTime stopTime = QDateTime::fromSecsSinceEpoch(programMatch.captured(4).toInt());
0243 
0244         const QString category = programMatch.captured(5);
0245 
0246         // channel + start time can be used as ID
0247         const ProgramId programId = ProgramId(channelId.value() + "_" + QString::number(startTime.toSecsSinceEpoch()));
0248 
0249         programData.m_id = programId;
0250         programData.m_url = descriptionUrl;
0251         programData.m_channelId = channelId;
0252         programData.m_startTime = startTime.toLocalTime();
0253         programData.m_stopTime = stopTime.toLocalTime();
0254         programData.m_title = title;
0255         programData.m_subtitle = "";
0256         programData.m_description = "";
0257         programData.m_descriptionFetched = false;
0258         programData.m_categories.push_back(category);
0259     } else {
0260         qWarning() << "Failed to parse program " << url;
0261     }
0262 
0263     return programData;
0264 }
0265 
0266 QString TvSpielfilmFetcher::processDescription(const QString &descriptionPage, const QString &url)
0267 {
0268     static QRegularExpression reDescription("<section class=\\\"broadcast-detail__description\\\">.*?<p>(.*?)</p>");
0269     reDescription.setPatternOptions(QRegularExpression::DotMatchesEverythingOption);
0270     QRegularExpressionMatch match = reDescription.match(descriptionPage);
0271     if (match.hasMatch()) {
0272         return match.captured(1);
0273     } else {
0274         qWarning() << "Failed to parse program description from" << url;
0275     }
0276     return "";
0277 }
0278 
0279 bool TvSpielfilmFetcher::programExists(const ChannelId &channelId, const QDate &date)
0280 {
0281     const QDateTime lastTime(date, QTime(23, 59, 59));
0282 
0283     return Database::instance().programExists(channelId, lastTime);
0284 }