File indexing completed on 2024-04-28 04:41:45

0001 
0002 /*
0003     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include <gbfs/gbfsjob.h>
0009 #include <gbfs/gbfsservice.h>
0010 
0011 #include <QCoreApplication>
0012 #include <QDebug>
0013 #include <QDirIterator>
0014 #include <QJsonArray>
0015 #include <QJsonDocument>
0016 #include <QJsonObject>
0017 #include <QNetworkAccessManager>
0018 #include <QNetworkReply>
0019 #include <QNetworkRequest>
0020 #include <QRectF>
0021 #include <QTimer>
0022 
0023 #include <iostream>
0024 #include <random>
0025 
0026 using namespace KPublicTransport;
0027 
0028 /** Determine bounding boxes of the initial set of GBFS feeds we ship. */
0029 class GBFSProbe : public QObject
0030 {
0031     Q_OBJECT
0032 public:
0033     explicit GBFSProbe(QObject *parent = nullptr);
0034 
0035     void start();
0036     void getFeedList();
0037     void discoverNextFeed();
0038     void checkDuplicateSystemIds();
0039     void writeFeeds();
0040 
0041     QNetworkAccessManager m_nam;
0042     QStringList m_gbfsFeeds;
0043     int m_currentFeedIdx = -1;
0044     int m_throttleTime = 0;
0045     QStringList m_throttledFeeds;
0046     bool m_syntheticSystemId = false;
0047 
0048     std::vector<GBFSService> m_services;
0049     QString m_outputFileName;
0050 };
0051 
0052 GBFSProbe::GBFSProbe(QObject *parent)
0053     : QObject(parent)
0054 {
0055     m_nam.setRedirectPolicy(QNetworkRequest::NoLessSafeRedirectPolicy);
0056 }
0057 
0058 void GBFSProbe::start()
0059 {
0060     getFeedList();
0061 }
0062 
0063 void GBFSProbe::getFeedList()
0064 {
0065     auto reply = m_nam.get(QNetworkRequest(QUrl(QStringLiteral("https://raw.githubusercontent.com/NABSA/gbfs/master/systems.csv"))));
0066     connect(reply, &QNetworkReply::finished, this, [this, reply]() {
0067         reply->deleteLater();
0068         if (reply->error() != QNetworkReply::NoError) {
0069             qCritical() << reply->errorString();
0070             QCoreApplication::exit(1);
0071             return;
0072         }
0073 
0074         reply->readLine(); // skip header line
0075         while (!reply->atEnd()) {
0076             auto line = reply->readLine();
0077             line.replace("http:", "https:");
0078             const auto idx = line.lastIndexOf("https");
0079             if (idx < 0) {
0080                 continue;
0081             }
0082             line = line.mid(idx).trimmed();
0083             if (line.endsWith('"')) {
0084                 line.chop(1);
0085             }
0086             m_gbfsFeeds.push_back(QString::fromUtf8(line));
0087         }
0088 
0089         std::sort(m_gbfsFeeds.begin(), m_gbfsFeeds.end());
0090         m_gbfsFeeds.erase(std::unique(m_gbfsFeeds.begin(), m_gbfsFeeds.end()), m_gbfsFeeds.end());
0091 
0092         QFile extraFeeds(QStringLiteral("gbfs-feeds.txt"));
0093         if (!extraFeeds.open(QFile::ReadOnly)) {
0094             qCritical() << extraFeeds.errorString();
0095             QCoreApplication::exit(1);
0096             return;
0097         }
0098         const auto lines = QString::fromUtf8(extraFeeds.readAll()).split(QLatin1Char('\n'));
0099         for (const auto &line : lines) {
0100             const auto extraFeed = line.trimmed();
0101             if (extraFeed.isEmpty()) {
0102                 continue;
0103             }
0104 
0105             const auto it = std::lower_bound(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), extraFeed);
0106             if (it != m_gbfsFeeds.end() && (*it) == extraFeed) {
0107                 qDebug() << "Extra feed already in NABSA systems.csv:" << extraFeed;
0108                 continue;
0109             }
0110             m_gbfsFeeds.insert(it, extraFeed);
0111         }
0112 
0113         qDebug() << "Found" << m_gbfsFeeds.size() << "possible feeds - running discovery on them...";
0114         std::shuffle(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), std::default_random_engine()); // reduce risk of spamming the same service with too many requests
0115         discoverNextFeed();
0116     });
0117 }
0118 
0119 void GBFSProbe::discoverNextFeed()
0120 {
0121     ++m_currentFeedIdx;
0122     if (m_currentFeedIdx >= m_gbfsFeeds.size()) {
0123         if (!m_throttledFeeds.isEmpty()) {
0124             qDebug() << "Retrying for" << m_throttledFeeds.size() << "throttled feeds";
0125             m_gbfsFeeds = std::move(m_throttledFeeds);
0126             std::shuffle(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), std::default_random_engine());
0127             m_throttledFeeds.clear();
0128             m_currentFeedIdx = 0;
0129             m_throttleTime = m_throttleTime == 0 ? 500 : (2 * m_throttleTime);
0130         } else {
0131             checkDuplicateSystemIds();
0132             return;
0133         }
0134     }
0135 
0136     auto job = new GBFSJob(&m_nam);
0137     job->setRequestedData({GBFS::StationInformation, GBFS::FreeBikeStatus, GBFS::GeofencingZones}); // everything we can use for the bounding box
0138     QObject::connect(job, &GBFSJob::finished, this, [job, this]() {
0139         job->deleteLater();
0140         if (job->error() == GBFSJob::TooManyRequestsError) {
0141             m_throttledFeeds.push_back(m_gbfsFeeds[m_currentFeedIdx]);
0142             qWarning() << "Scheduling for later:" << m_gbfsFeeds[m_currentFeedIdx] << job->errorMessage();
0143         } else if (job->error() != GBFSJob::NoError) {
0144             qWarning() << m_gbfsFeeds[m_currentFeedIdx] << job->error() << job->errorMessage() << "- skipping";
0145         } else if (job->service().boundingBox.isEmpty()) {
0146             qWarning() << m_gbfsFeeds[m_currentFeedIdx] << "has an empty bounding box - skipping";
0147         } else {
0148             m_services.push_back(job->service());
0149         }
0150         QTimer::singleShot(m_throttleTime, this, &GBFSProbe::discoverNextFeed);
0151     });
0152 
0153     GBFSService service;
0154     service.discoveryUrl = QUrl(m_gbfsFeeds[m_currentFeedIdx]);
0155     if (m_syntheticSystemId) {
0156         service.generateSystemId();
0157     }
0158     job->discoverAndUpdate(service);
0159 }
0160 
0161 static bool sortBySystemId(const GBFSService &lhs, const GBFSService &rhs)
0162 {
0163     return lhs.systemId < rhs.systemId;
0164 }
0165 
0166 void GBFSProbe::checkDuplicateSystemIds()
0167 {
0168     m_gbfsFeeds.clear();
0169 
0170     std::sort(m_services.begin(), m_services.end(), sortBySystemId);
0171     for (auto it = m_services.begin(); it != m_services.end();) {
0172         const auto range = std::equal_range(it, m_services.end(), (*it), sortBySystemId);
0173         assert(range.first == it);
0174         if (std::next(it) == range.second) {
0175             it = range.second;
0176         } else {
0177             for (auto it2 = range.first; it2 != range.second; ++it2) {
0178                 m_gbfsFeeds.push_back((*it2).discoveryUrl.toString());
0179             }
0180             it = m_services.erase(range.first, range.second);
0181         }
0182     }
0183 
0184     qWarning() << "Feeds with colliding system ids:" << m_gbfsFeeds.size() << m_gbfsFeeds;
0185     if (!m_gbfsFeeds.isEmpty() && !m_syntheticSystemId) {
0186         m_syntheticSystemId = true;
0187         m_currentFeedIdx = -1;
0188         m_throttleTime = 0;
0189         std::shuffle(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), std::default_random_engine());
0190         discoverNextFeed();
0191     } else {
0192         writeFeeds();
0193     }
0194 }
0195 
0196 void GBFSProbe::writeFeeds()
0197 {
0198     QJsonArray array;
0199     for (const auto &service : m_services) {
0200         array.push_back(GBFSService::toJson(service));
0201     };
0202 
0203     auto b = QJsonDocument(array).toJson(QJsonDocument::Compact);
0204     b.replace("},{", "},\n {");
0205     b.replace("[{", "[\n {");
0206     b.replace("}]", "}\n]\n");
0207 
0208     QFile f(m_outputFileName);
0209     if (!f.open(QFile::WriteOnly)) {
0210         qCritical() << f.errorString();
0211         QCoreApplication::exit(1);
0212         return;
0213     }
0214     f.write(b);
0215     f.close();
0216 
0217     qDebug() << m_services.size() << "valid feeds discovered";
0218     QCoreApplication::quit();
0219 }
0220 
0221 int main(int argc, char **argv)
0222 {
0223     QCoreApplication app(argc, argv);
0224     if (app.arguments().size() <= 1) {
0225         std::cerr << "Usage: " << argv[0] << " [path to GBFS services file]" << std::endl;
0226         return 1;
0227     }
0228 
0229     GBFSProbe probe;
0230     probe.m_outputFileName = app.arguments().at(1);
0231     QMetaObject::invokeMethod(&probe, &GBFSProbe::start, Qt::QueuedConnection);
0232 
0233     return app.exec();
0234 }
0235 
0236 #include "gbfsprobe.moc"