File indexing completed on 2024-12-01 04:20:06
0001 0002 /* 0003 SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include <gbfs/gbfsjob.h> 0009 #include <gbfs/gbfsservice.h> 0010 0011 #include <QCoreApplication> 0012 #include <QDebug> 0013 #include <QDirIterator> 0014 #include <QJsonArray> 0015 #include <QJsonDocument> 0016 #include <QJsonObject> 0017 #include <QNetworkAccessManager> 0018 #include <QNetworkReply> 0019 #include <QNetworkRequest> 0020 #include <QRectF> 0021 #include <QTimer> 0022 0023 #include <iostream> 0024 #include <random> 0025 0026 using namespace KPublicTransport; 0027 0028 /** Determine bounding boxes of the initial set of GBFS feeds we ship. */ 0029 class GBFSProbe : public QObject 0030 { 0031 Q_OBJECT 0032 public: 0033 explicit GBFSProbe(QObject *parent = nullptr); 0034 0035 void start(); 0036 void getFeedList(); 0037 void discoverNextFeed(); 0038 void checkDuplicateSystemIds(); 0039 void writeFeeds(); 0040 0041 QNetworkAccessManager m_nam; 0042 QStringList m_gbfsFeeds; 0043 int m_currentFeedIdx = -1; 0044 int m_throttleTime = 0; 0045 QStringList m_throttledFeeds; 0046 bool m_syntheticSystemId = false; 0047 0048 std::vector<GBFSService> m_services; 0049 QString m_outputFileName; 0050 }; 0051 0052 GBFSProbe::GBFSProbe(QObject *parent) 0053 : QObject(parent) 0054 { 0055 m_nam.setRedirectPolicy(QNetworkRequest::NoLessSafeRedirectPolicy); 0056 } 0057 0058 void GBFSProbe::start() 0059 { 0060 getFeedList(); 0061 } 0062 0063 void GBFSProbe::getFeedList() 0064 { 0065 auto reply = m_nam.get(QNetworkRequest(QUrl(QStringLiteral("https://raw.githubusercontent.com/NABSA/gbfs/master/systems.csv")))); 0066 connect(reply, &QNetworkReply::finished, this, [this, reply]() { 0067 reply->deleteLater(); 0068 if (reply->error() != QNetworkReply::NoError) { 0069 qCritical() << reply->errorString(); 0070 QCoreApplication::exit(1); 0071 return; 0072 } 0073 0074 reply->readLine(); // skip header line 0075 while (!reply->atEnd()) { 0076 auto line = reply->readLine(); 0077 line.replace("http:", "https:"); 0078 const auto idx = line.lastIndexOf("https"); 0079 if (idx < 0) { 0080 continue; 0081 } 0082 line = line.mid(idx).trimmed(); 0083 if (line.endsWith('"')) { 0084 line.chop(1); 0085 } 0086 m_gbfsFeeds.push_back(QString::fromUtf8(line)); 0087 } 0088 0089 std::sort(m_gbfsFeeds.begin(), m_gbfsFeeds.end()); 0090 m_gbfsFeeds.erase(std::unique(m_gbfsFeeds.begin(), m_gbfsFeeds.end()), m_gbfsFeeds.end()); 0091 0092 QFile extraFeeds(QStringLiteral("gbfs-feeds.txt")); 0093 if (!extraFeeds.open(QFile::ReadOnly)) { 0094 qCritical() << extraFeeds.errorString(); 0095 QCoreApplication::exit(1); 0096 return; 0097 } 0098 const auto lines = QString::fromUtf8(extraFeeds.readAll()).split(QLatin1Char('\n')); 0099 for (const auto &line : lines) { 0100 const auto extraFeed = line.trimmed(); 0101 if (extraFeed.isEmpty()) { 0102 continue; 0103 } 0104 0105 const auto it = std::lower_bound(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), extraFeed); 0106 if (it != m_gbfsFeeds.end() && (*it) == extraFeed) { 0107 qDebug() << "Extra feed already in NABSA systems.csv:" << extraFeed; 0108 continue; 0109 } 0110 m_gbfsFeeds.insert(it, extraFeed); 0111 } 0112 0113 qDebug() << "Found" << m_gbfsFeeds.size() << "possible feeds - running discovery on them..."; 0114 std::shuffle(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), std::default_random_engine()); // reduce risk of spamming the same service with too many requests 0115 discoverNextFeed(); 0116 }); 0117 } 0118 0119 void GBFSProbe::discoverNextFeed() 0120 { 0121 ++m_currentFeedIdx; 0122 if (m_currentFeedIdx >= m_gbfsFeeds.size()) { 0123 if (!m_throttledFeeds.isEmpty()) { 0124 qDebug() << "Retrying for" << m_throttledFeeds.size() << "throttled feeds"; 0125 m_gbfsFeeds = std::move(m_throttledFeeds); 0126 std::shuffle(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), std::default_random_engine()); 0127 m_throttledFeeds.clear(); 0128 m_currentFeedIdx = 0; 0129 m_throttleTime = m_throttleTime == 0 ? 500 : (2 * m_throttleTime); 0130 } else { 0131 checkDuplicateSystemIds(); 0132 return; 0133 } 0134 } 0135 0136 auto job = new GBFSJob(&m_nam); 0137 job->setRequestedData({GBFS::StationInformation, GBFS::FreeBikeStatus, GBFS::GeofencingZones}); // everything we can use for the bounding box 0138 QObject::connect(job, &GBFSJob::finished, this, [job, this]() { 0139 job->deleteLater(); 0140 if (job->error() == GBFSJob::TooManyRequestsError) { 0141 m_throttledFeeds.push_back(m_gbfsFeeds[m_currentFeedIdx]); 0142 qWarning() << "Scheduling for later:" << m_gbfsFeeds[m_currentFeedIdx] << job->errorMessage(); 0143 } else if (job->error() != GBFSJob::NoError) { 0144 qWarning() << m_gbfsFeeds[m_currentFeedIdx] << job->error() << job->errorMessage() << "- skipping"; 0145 } else if (job->service().boundingBox.isEmpty()) { 0146 qWarning() << m_gbfsFeeds[m_currentFeedIdx] << "has an empty bounding box - skipping"; 0147 } else { 0148 m_services.push_back(job->service()); 0149 } 0150 QTimer::singleShot(m_throttleTime, this, &GBFSProbe::discoverNextFeed); 0151 }); 0152 0153 GBFSService service; 0154 service.discoveryUrl = QUrl(m_gbfsFeeds[m_currentFeedIdx]); 0155 if (m_syntheticSystemId) { 0156 service.generateSystemId(); 0157 } 0158 job->discoverAndUpdate(service); 0159 } 0160 0161 static bool sortBySystemId(const GBFSService &lhs, const GBFSService &rhs) 0162 { 0163 return lhs.systemId < rhs.systemId; 0164 } 0165 0166 void GBFSProbe::checkDuplicateSystemIds() 0167 { 0168 m_gbfsFeeds.clear(); 0169 0170 std::sort(m_services.begin(), m_services.end(), sortBySystemId); 0171 for (auto it = m_services.begin(); it != m_services.end();) { 0172 const auto range = std::equal_range(it, m_services.end(), (*it), sortBySystemId); 0173 assert(range.first == it); 0174 if (std::next(it) == range.second) { 0175 it = range.second; 0176 } else { 0177 for (auto it2 = range.first; it2 != range.second; ++it2) { 0178 m_gbfsFeeds.push_back((*it2).discoveryUrl.toString()); 0179 } 0180 it = m_services.erase(range.first, range.second); 0181 } 0182 } 0183 0184 qWarning() << "Feeds with colliding system ids:" << m_gbfsFeeds.size() << m_gbfsFeeds; 0185 if (!m_gbfsFeeds.isEmpty() && !m_syntheticSystemId) { 0186 m_syntheticSystemId = true; 0187 m_currentFeedIdx = -1; 0188 m_throttleTime = 0; 0189 std::shuffle(m_gbfsFeeds.begin(), m_gbfsFeeds.end(), std::default_random_engine()); 0190 discoverNextFeed(); 0191 } else { 0192 writeFeeds(); 0193 } 0194 } 0195 0196 void GBFSProbe::writeFeeds() 0197 { 0198 QJsonArray array; 0199 for (const auto &service : m_services) { 0200 array.push_back(GBFSService::toJson(service)); 0201 }; 0202 0203 auto b = QJsonDocument(array).toJson(QJsonDocument::Compact); 0204 b.replace("},{", "},\n {"); 0205 b.replace("[{", "[\n {"); 0206 b.replace("}]", "}\n]\n"); 0207 0208 QFile f(m_outputFileName); 0209 if (!f.open(QFile::WriteOnly)) { 0210 qCritical() << f.errorString(); 0211 QCoreApplication::exit(1); 0212 return; 0213 } 0214 f.write(b); 0215 f.close(); 0216 0217 qDebug() << m_services.size() << "valid feeds discovered"; 0218 QCoreApplication::quit(); 0219 } 0220 0221 int main(int argc, char **argv) 0222 { 0223 QCoreApplication app(argc, argv); 0224 if (app.arguments().size() <= 1) { 0225 std::cerr << "Usage: " << argv[0] << " [path to GBFS services file]" << std::endl; 0226 return 1; 0227 } 0228 0229 GBFSProbe probe; 0230 probe.m_outputFileName = app.arguments().at(1); 0231 QMetaObject::invokeMethod(&probe, &GBFSProbe::start, Qt::QueuedConnection); 0232 0233 return app.exec(); 0234 } 0235 0236 #include "gbfsprobe.moc"