File indexing completed on 2024-05-12 04:42:46
0001 /* 0002 SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "gbfsjob.h" 0008 #include "gbfsreader.h" 0009 #include "geo/geojson_p.h" 0010 0011 #include <KPublicTransport/Location> 0012 0013 #include <QJsonObject> 0014 #include <QNetworkAccessManager> 0015 #include <QNetworkReply> 0016 #include <QNetworkReply> 0017 #include <QPolygonF> 0018 #include <QVersionNumber> 0019 0020 #include <cassert> 0021 #include <cmath> 0022 0023 using namespace KPublicTransport; 0024 0025 GBFSJob::GBFSJob(QNetworkAccessManager *nam, QObject *parent) 0026 : QObject(parent) 0027 , m_nam(nam) 0028 { 0029 assert(nam); 0030 } 0031 0032 GBFSJob::~GBFSJob() = default; 0033 0034 GBFSJob::Error GBFSJob::error() const 0035 { 0036 return m_error; 0037 } 0038 0039 QString GBFSJob::errorMessage() const 0040 { 0041 return m_errorMsg; 0042 } 0043 0044 GBFSService GBFSJob::service() const 0045 { 0046 return m_service; 0047 } 0048 0049 void GBFSJob::setRequestedData(std::vector<GBFS::FileType> &&fileTypes) 0050 { 0051 m_fileTypes = std::move(fileTypes); 0052 } 0053 0054 void GBFSJob::discoverAndUpdate(const GBFSService &service) 0055 { 0056 m_service = service; 0057 if (!m_service.systemId.isEmpty()) { 0058 m_store = GBFSStore(m_service.systemId); 0059 0060 if (m_store.hasCurrentData(GBFS::Discovery)) { 0061 qDebug() << "reusing cached discovery data" << m_service.systemId; 0062 m_discoverDoc = m_store.loadData(GBFS::Discovery); 0063 parseDiscoverData(); 0064 return; 0065 } 0066 } 0067 0068 qDebug() << "fetching discovery data" << m_service.discoveryUrl; 0069 auto reply = m_nam->get(QNetworkRequest(m_service.discoveryUrl)); 0070 connect(reply, &QNetworkReply::finished, this, [this, reply]() { discoverFinished(reply); }); 0071 } 0072 0073 void GBFSJob::discoverFinished(QNetworkReply *reply) 0074 { 0075 reply->deleteLater(); 0076 if (reply->error() != QNetworkReply::NoError) { 0077 if (m_previousDiscoveryUrl.isValid()) { 0078 qDebug() << "new version discovery failed, falling back to old one" << reply->errorString(); 0079 m_service.discoveryUrl = m_previousDiscoveryUrl; 0080 } else { 0081 handleNetworkError(reply); 0082 return; 0083 } 0084 } else { 0085 m_discoverDoc = QJsonDocument::fromJson(reply->readAll()); 0086 } 0087 parseDiscoverData(); 0088 } 0089 0090 void GBFSJob::parseDiscoverData() 0091 { 0092 const auto top = m_discoverDoc.object(); 0093 //qDebug() << QJsonDocument(top).toJson(); 0094 0095 const auto data = top.value(QLatin1String("data")).toObject(); 0096 // pick the feeds with the best language for our current locale 0097 if (data.size() == 1) { 0098 // only one set of feeds 0099 m_feeds = data.begin().value().toObject().value(QLatin1String("feeds")).toArray(); 0100 if (m_feeds.isEmpty()) { // invalid format for single feeds 0101 m_feeds = data.value(QLatin1String("feeds")).toArray(); 0102 } 0103 } else if (!data.empty()) { 0104 const auto localeLangs = QLocale().uiLanguages(); 0105 for (const auto &l : localeLangs) { 0106 m_feeds = data.value(l).toObject().value(QLatin1String("feeds")).toArray(); 0107 if (m_feeds.isEmpty()) { 0108 m_feeds = data.value(l.toLower()).toObject().value(QLatin1String("feeds")).toArray(); 0109 } 0110 if (m_feeds.empty() && l.size() > 2 && l[2] == QLatin1Char('-')) { 0111 m_feeds = data.value(l.left(2)).toObject().value(QLatin1String("feeds")).toArray(); 0112 } 0113 if (!m_feeds.empty()) { 0114 break; 0115 } 0116 } 0117 // take the first one if we haven't found a better match 0118 if (m_feeds.empty()) { 0119 qDebug() << "picking first language, as none matches" << localeLangs; 0120 m_feeds = data.begin().value().toObject().value(QLatin1String("feeds")).toArray(); 0121 } 0122 } 0123 if (m_feeds.empty()) { 0124 m_error = DataError; 0125 m_errorMsg = QStringLiteral("no feed found in discovery response!"); 0126 Q_EMIT finished(); 0127 return; 0128 } 0129 0130 m_state = m_state == State::Discover ? State::Version : State::SystemInformation; 0131 processFeeds(); 0132 } 0133 0134 void GBFSJob::processFeeds() 0135 { 0136 bool proccedAtLeastOneFeed = false; 0137 const auto state = m_state; // can change as result of processing 0138 for (const auto &feedVal : std::as_const(m_feeds)) { 0139 const auto feed = feedVal.toObject(); 0140 const auto name = feed.value(QLatin1String("name")).toString(); 0141 const auto type = GBFS::typeForKeyName(name); 0142 const auto url = QUrl(feed.value(QLatin1String("url")).toString()); 0143 0144 switch (type) { 0145 case GBFS::SystemInformation: 0146 if (state != State::SystemInformation) { 0147 continue; 0148 } 0149 break; 0150 case GBFS::Versions: 0151 if (state != State::Version) { 0152 continue; 0153 } 0154 break; 0155 case GBFS::StationInformation: 0156 case GBFS::StationStatus: 0157 case GBFS::FreeBikeStatus: 0158 case GBFS::VehicleTypes: 0159 case GBFS::GeofencingZones: 0160 if (state != State::Data || !shouldFetchFile(type)) { 0161 continue; 0162 } 0163 break; 0164 case GBFS::Discovery: 0165 case GBFS::SystemHours: 0166 case GBFS::SystemCalendar: 0167 case GBFS::SystemRegions: 0168 case GBFS::SystemPricingPlans: 0169 case GBFS::SystemAlerts: 0170 continue; 0171 default: 0172 qDebug() << "Unhandled feed:" << name << url; 0173 continue; 0174 } 0175 0176 if (!m_store.isValid() || !m_store.hasCurrentData(type)) { 0177 qDebug() << "fetching" << name; 0178 auto reply = m_nam->get(QNetworkRequest(url)); 0179 connect(reply, &QNetworkReply::finished, this, [this, reply, type]() { fetchFinished(reply, type); }); 0180 ++m_pendingJobs; 0181 } else { 0182 parseData(m_store.loadData(type), type); 0183 } 0184 proccedAtLeastOneFeed = true; 0185 } 0186 0187 if (!proccedAtLeastOneFeed) { 0188 switch (m_state) { 0189 case State::Version: 0190 m_state = State::SystemInformation; 0191 break; 0192 case State::SystemInformation: 0193 case State::Data: 0194 m_error = DataError; 0195 m_errorMsg = m_state == State::SystemInformation ? QStringLiteral("no system information") : QStringLiteral("no data"); 0196 Q_EMIT finished(); 0197 return; 0198 default: 0199 Q_UNREACHABLE(); 0200 } 0201 QMetaObject::invokeMethod(this, &GBFSJob::processFeeds, Qt::QueuedConnection); 0202 } else if (m_pendingJobs == 0 && state == State::Data) { 0203 finalize(); 0204 } 0205 } 0206 0207 void GBFSJob::fetchFinished(QNetworkReply *reply, GBFS::FileType type) 0208 { 0209 reply->deleteLater(); 0210 --m_pendingJobs; 0211 const auto state = m_state; // can change as part of processing 0212 0213 if (reply->error() != QNetworkReply::NoError) { 0214 // don't consider geofencing_zones failure fatal 0215 if (type != GBFS::GeofencingZones) { 0216 handleNetworkError(reply); 0217 return; 0218 } else { 0219 qDebug() << reply->url() << reply->errorString(); 0220 } 0221 } else { 0222 const auto doc = QJsonDocument::fromJson(reply->readAll()); 0223 if (m_store.isValid()) { 0224 m_store.storeData(type, doc); 0225 } 0226 parseData(doc, type); 0227 } 0228 0229 if (m_pendingJobs == 0 && state == State::Data) { 0230 finalize(); 0231 } 0232 } 0233 0234 void GBFSJob::handleNetworkError(QNetworkReply *reply) 0235 { 0236 m_error = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt() == 429 ? TooManyRequestsError : NetworkError; 0237 m_errorMsg = reply->errorString(); 0238 if (m_pendingJobs == 0) { // wait for the rest to finish otherwise, to avoid double finished() emission 0239 Q_EMIT finished(); 0240 } 0241 } 0242 0243 void GBFSJob::parseData(const QJsonDocument &doc, GBFS::FileType type) 0244 { 0245 switch (type) { 0246 case GBFS::SystemInformation: 0247 parseSystemInformation(doc); 0248 break; 0249 case GBFS::StationInformation: 0250 parseStationInformation(doc); 0251 break; 0252 case GBFS::FreeBikeStatus: 0253 parseFreeBikeStatus(doc); 0254 break; 0255 case GBFS::Versions: 0256 parseVersionData(doc); 0257 break; 0258 case GBFS::GeofencingZones: 0259 parseGeofencingZones(doc); 0260 default: 0261 break; 0262 } 0263 } 0264 0265 void GBFSJob::parseSystemInformation(const QJsonDocument &doc) 0266 { 0267 const auto systemId = GBFSReader::dataValue(doc, QLatin1String("system_id")).toString(); 0268 if (systemId.isEmpty()) { 0269 m_error = DataError; 0270 m_errorMsg = QStringLiteral("unable to determine system_id!"); 0271 Q_EMIT finished(); 0272 return; 0273 } 0274 if (m_service.systemId.isEmpty()) { 0275 m_service.systemId = systemId; 0276 } 0277 m_store = GBFSStore(m_service.systemId); 0278 m_store.storeData(GBFS::Discovery, m_discoverDoc); 0279 m_store.storeData(GBFS::SystemInformation, doc); 0280 if (!m_versionDoc.isEmpty()) { 0281 m_store.storeData(GBFS::Versions, m_versionDoc); 0282 } 0283 0284 m_state = State::Data; 0285 QMetaObject::invokeMethod(this, &GBFSJob::processFeeds, Qt::QueuedConnection); 0286 } 0287 0288 void GBFSJob::parseStationInformation(const QJsonDocument &doc) 0289 { 0290 const auto stations = GBFSReader::dataValue(doc, QLatin1String("stations")).toArray(); 0291 collectCoordinates(stations); 0292 qDebug() << stations.size() << "stations/docks"; 0293 } 0294 0295 void GBFSJob::parseFreeBikeStatus(const QJsonDocument &doc) 0296 { 0297 const auto bikes = GBFSReader::dataValue(doc, QLatin1String("bikes")).toArray(); 0298 collectCoordinates(bikes); 0299 qDebug() << bikes.size() << "free floating vehicles"; 0300 } 0301 0302 static void filterOutliers(const std::vector<double> &values, double &minVal, double &maxVal, const std::function<double(double, double)> &distFunc) 0303 { 0304 // first step: primitive distance-based trimming at the extremes 0305 auto beginIt = values.begin(); 0306 while(std::next(beginIt) != values.end()) { 0307 if (distFunc(*beginIt, *std::next(beginIt)) > 50'000) { 0308 ++beginIt; 0309 } else { 0310 break; 0311 } 0312 } 0313 auto endIt = std::prev(values.end()); 0314 while(endIt != beginIt && std::prev(endIt) != beginIt) { 0315 if (distFunc(*endIt, *std::prev(endIt)) > 50'000) { 0316 --endIt; 0317 } else { 0318 break; 0319 } 0320 } 0321 ++endIt; 0322 0323 // second step: standard deviation 0324 const auto n = std::distance(beginIt, endIt); 0325 const auto mean = std::accumulate(beginIt, endIt, 0.0, [n](auto a, auto b) { return a + b / n; }); 0326 auto sigma = std::accumulate(beginIt, endIt, 0.0, [n](auto a, auto b) { 0327 return a + (std::pow(b, 2.0) / n); 0328 }); 0329 sigma = std::sqrt(sigma - std::pow(mean, 2.0)) * 3.0; 0330 0331 auto lowerBound = mean - sigma; 0332 auto it = std::lower_bound(values.begin(), values.end(), lowerBound); 0333 if (it != values.end()) { 0334 lowerBound = (*it); 0335 } 0336 auto upperBound = mean + sigma; 0337 it = std::lower_bound(values.begin(), values.end(), upperBound); 0338 if (it != values.begin()) { 0339 upperBound = *(std::prev(it)); 0340 } 0341 0342 minVal = std::min(minVal, std::max(lowerBound, values.front())); // clamp by 3 sigma, but don't exceed the input range when not needed 0343 maxVal = std::max(maxVal, std::min(upperBound, values.back())); 0344 } 0345 0346 void GBFSJob::collectCoordinates(const QJsonArray &array) 0347 { 0348 m_latitudes.reserve(m_latitudes.size() + array.size()); 0349 m_longitudes.reserve(m_longitudes.size() + array.size()); 0350 0351 for (const auto &statVal : array) { 0352 const auto station = statVal.toObject(); 0353 const auto lat = GBFSReader::readLatitude(station); 0354 if (!std::isnan(lat) && lat >= -90.0 && lat <= 90.0 && std::abs(lat) > 0.001) { 0355 m_latitudes.push_back(lat); 0356 } 0357 const auto lon = GBFSReader::readLongitude(station); 0358 if (!std::isnan(lon) && lon >= -180.0 && lon <= 180.0 && std::abs(lon) > 0.001) { 0359 m_longitudes.push_back(lon); 0360 } 0361 } 0362 } 0363 0364 void GBFSJob::parseVersionData(const QJsonDocument &doc) 0365 { 0366 m_versionDoc = doc; 0367 const auto versions = GBFSReader::dataValue(doc, QLatin1String("versions")).toArray(); 0368 QJsonObject bestVersion; 0369 for (const auto &verVal : versions) { 0370 const auto version = verVal.toObject(); 0371 if (bestVersion.isEmpty()) { 0372 bestVersion = version; 0373 } 0374 if (QVersionNumber::fromString(bestVersion.value(QLatin1String("version")).toString()) < QVersionNumber::fromString(version.value(QLatin1String("version")).toString())) { 0375 bestVersion = version; 0376 } 0377 } 0378 0379 const auto url = QUrl(bestVersion.value(QLatin1String("url")).toString()); 0380 if (!url.isEmpty() && m_service.discoveryUrl != url) { 0381 qDebug() << "found newer version:" << url << m_service.discoveryUrl; 0382 m_previousDiscoveryUrl = m_service.discoveryUrl; 0383 m_service.discoveryUrl = url; 0384 m_state = State::DiscoverRestart; 0385 discoverAndUpdate(m_service); 0386 } else { 0387 m_state = State::SystemInformation; 0388 QMetaObject::invokeMethod(this, &GBFSJob::processFeeds, Qt::QueuedConnection); 0389 } 0390 } 0391 0392 void GBFSJob::parseGeofencingZones(const QJsonDocument &doc) 0393 { 0394 const auto features = GBFSReader::dataValue(doc, QLatin1String("geofencing_zones")).toObject() 0395 .value(QLatin1String("features")).toArray(); 0396 for (const auto &featureVal : features) { 0397 const auto geo = featureVal.toObject().value(QLatin1String("geometry")).toObject(); 0398 const auto rect = GeoJson::readOuterPolygon(geo).boundingRect(); 0399 if (rect.isNull() || rect.left() < -180.0 || rect.right() > 180.0 || rect.top() < -90.0 || rect.bottom() > 90.0) { 0400 qDebug() << "invalid geofence box:" << rect; 0401 continue; 0402 } 0403 // we need to run this through outlier filtering as well, we got random nonsense elements in a few cities as well 0404 m_latitudes.push_back(rect.top()); 0405 m_latitudes.push_back(rect.bottom()); 0406 m_longitudes.push_back(rect.left()); 0407 m_longitudes.push_back(rect.right()); 0408 } 0409 } 0410 0411 void GBFSJob::finalize() 0412 { 0413 // add a 500m radius for single points 0414 if (m_latitudes.size() == 1) { 0415 const auto d = 250.0 / Location::distance(m_latitudes.front(), 0.0, m_latitudes.front() + 1.0, 0.0); 0416 m_latitudes.push_back(m_latitudes.front() - d); 0417 m_latitudes.push_back(m_latitudes.front() + d); 0418 } 0419 if (m_longitudes.size() == 1 && !m_latitudes.empty()) { 0420 const auto d = 250.0 / Location::distance(m_latitudes.front(), m_longitudes.front(), m_latitudes.front(), m_longitudes.front() + 1.0); 0421 m_longitudes.push_back(m_longitudes.front() - d); 0422 m_longitudes.push_back(m_longitudes.front() + d); 0423 } 0424 0425 double minLat = 90.0, maxLat = -90.0, minLon = 180.0, maxLon = -180.0; 0426 if (!m_latitudes.empty() && !m_longitudes.empty()) { 0427 std::sort(m_latitudes.begin(), m_latitudes.end()); 0428 std::sort(m_longitudes.begin(), m_longitudes.end()); 0429 0430 // covered area is reasonable, take as-is 0431 if (Location::distance(m_latitudes.front(), m_longitudes.front(), m_latitudes.back(), m_longitudes.back()) <= 50'000) { 0432 minLat = m_latitudes.front(); 0433 minLon = m_longitudes.front(); 0434 maxLat = m_latitudes.back(); 0435 maxLon = m_longitudes.back(); 0436 } else { 0437 // try to filter out outliers 0438 filterOutliers(m_latitudes, minLat, maxLat, [](auto lat1, auto lat2) { return Location::distance(lat1, 0.0, lat2, 0.0); }); 0439 filterOutliers(m_longitudes, minLon, maxLon, [&](auto lon1, auto lon2) { 0440 const auto lat = (maxLat - minLat) / 2.0; 0441 return Location::distance(lat, lon1, lat, lon2); 0442 }); 0443 } 0444 } 0445 0446 if (maxLat > minLat && maxLon > minLon) { 0447 m_service.boundingBox = QRectF(QPointF(minLon, minLat), QPointF(maxLon, maxLat)); 0448 } 0449 0450 // round bounding box coordinates to stabilize the diff gbfs-feeds.json a bit 0451 constexpr const auto COORD_RESOLUTION = 100.0; 0452 m_service.boundingBox.setLeft(std::floor(m_service.boundingBox.left() * COORD_RESOLUTION) / COORD_RESOLUTION); 0453 m_service.boundingBox.setTop(std::floor(m_service.boundingBox.top() * COORD_RESOLUTION) / COORD_RESOLUTION); 0454 m_service.boundingBox.setRight(std::ceil(m_service.boundingBox.right() * COORD_RESOLUTION) / COORD_RESOLUTION); 0455 m_service.boundingBox.setBottom(std::ceil(m_service.boundingBox.bottom() * COORD_RESOLUTION) / COORD_RESOLUTION); 0456 0457 qDebug() << "bounding box:" << m_service.boundingBox; 0458 GBFSServiceRepository::store(m_service); 0459 Q_EMIT finished(); 0460 } 0461 0462 bool GBFSJob::shouldFetchFile(GBFS::FileType fileType) const 0463 { 0464 return m_fileTypes.empty() || std::find(m_fileTypes.begin(), m_fileTypes.end(), fileType) != m_fileTypes.end(); 0465 }