File indexing completed on 2024-05-12 04:42:46

0001 /*
0002     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "gbfsjob.h"
0008 #include "gbfsreader.h"
0009 #include "geo/geojson_p.h"
0010 
0011 #include <KPublicTransport/Location>
0012 
0013 #include <QJsonObject>
0014 #include <QNetworkAccessManager>
0015 #include <QNetworkReply>
0016 #include <QNetworkReply>
0017 #include <QPolygonF>
0018 #include <QVersionNumber>
0019 
0020 #include <cassert>
0021 #include <cmath>
0022 
0023 using namespace KPublicTransport;
0024 
0025 GBFSJob::GBFSJob(QNetworkAccessManager *nam, QObject *parent)
0026     : QObject(parent)
0027     , m_nam(nam)
0028 {
0029     assert(nam);
0030 }
0031 
0032 GBFSJob::~GBFSJob() = default;
0033 
0034 GBFSJob::Error GBFSJob::error() const
0035 {
0036     return m_error;
0037 }
0038 
0039 QString GBFSJob::errorMessage() const
0040 {
0041     return m_errorMsg;
0042 }
0043 
0044 GBFSService GBFSJob::service() const
0045 {
0046     return m_service;
0047 }
0048 
0049 void GBFSJob::setRequestedData(std::vector<GBFS::FileType> &&fileTypes)
0050 {
0051     m_fileTypes = std::move(fileTypes);
0052 }
0053 
0054 void GBFSJob::discoverAndUpdate(const GBFSService &service)
0055 {
0056     m_service = service;
0057     if (!m_service.systemId.isEmpty()) {
0058         m_store = GBFSStore(m_service.systemId);
0059 
0060         if (m_store.hasCurrentData(GBFS::Discovery)) {
0061             qDebug() << "reusing cached discovery data" << m_service.systemId;
0062             m_discoverDoc = m_store.loadData(GBFS::Discovery);
0063             parseDiscoverData();
0064             return;
0065         }
0066     }
0067 
0068     qDebug() << "fetching discovery data" << m_service.discoveryUrl;
0069     auto reply = m_nam->get(QNetworkRequest(m_service.discoveryUrl));
0070     connect(reply, &QNetworkReply::finished, this, [this, reply]() { discoverFinished(reply); });
0071 }
0072 
0073 void GBFSJob::discoverFinished(QNetworkReply *reply)
0074 {
0075     reply->deleteLater();
0076     if (reply->error() != QNetworkReply::NoError) {
0077         if (m_previousDiscoveryUrl.isValid()) {
0078             qDebug() << "new version discovery failed, falling back to old one" <<  reply->errorString();
0079             m_service.discoveryUrl = m_previousDiscoveryUrl;
0080         } else {
0081             handleNetworkError(reply);
0082             return;
0083         }
0084     } else {
0085         m_discoverDoc = QJsonDocument::fromJson(reply->readAll());
0086     }
0087     parseDiscoverData();
0088 }
0089 
0090 void GBFSJob::parseDiscoverData()
0091 {
0092     const auto top = m_discoverDoc.object();
0093     //qDebug() << QJsonDocument(top).toJson();
0094 
0095     const auto data = top.value(QLatin1String("data")).toObject();
0096     // pick the feeds with the best language for our current locale
0097     if (data.size() == 1) {
0098         // only one set of feeds
0099         m_feeds = data.begin().value().toObject().value(QLatin1String("feeds")).toArray();
0100         if (m_feeds.isEmpty()) { // invalid format for single feeds
0101             m_feeds = data.value(QLatin1String("feeds")).toArray();
0102         }
0103     } else if (!data.empty()) {
0104         const auto localeLangs = QLocale().uiLanguages();
0105         for (const auto &l : localeLangs) {
0106             m_feeds = data.value(l).toObject().value(QLatin1String("feeds")).toArray();
0107             if (m_feeds.isEmpty()) {
0108                 m_feeds = data.value(l.toLower()).toObject().value(QLatin1String("feeds")).toArray();
0109             }
0110             if (m_feeds.empty() && l.size() > 2 && l[2] == QLatin1Char('-')) {
0111                 m_feeds = data.value(l.left(2)).toObject().value(QLatin1String("feeds")).toArray();
0112             }
0113             if (!m_feeds.empty()) {
0114                 break;
0115             }
0116         }
0117         // take the first one if we haven't found a better match
0118         if (m_feeds.empty()) {
0119             qDebug() << "picking first language, as none matches" << localeLangs;
0120             m_feeds = data.begin().value().toObject().value(QLatin1String("feeds")).toArray();
0121         }
0122     }
0123     if (m_feeds.empty()) {
0124         m_error = DataError;
0125         m_errorMsg = QStringLiteral("no feed found in discovery response!");
0126         Q_EMIT finished();
0127         return;
0128     }
0129 
0130     m_state = m_state == State::Discover ? State::Version : State::SystemInformation;
0131     processFeeds();
0132 }
0133 
0134 void GBFSJob::processFeeds()
0135 {
0136     bool proccedAtLeastOneFeed = false;
0137     const auto state = m_state; // can change as result of processing
0138     for (const auto &feedVal : std::as_const(m_feeds)) {
0139         const auto feed = feedVal.toObject();
0140         const auto name = feed.value(QLatin1String("name")).toString();
0141         const auto type = GBFS::typeForKeyName(name);
0142         const auto url = QUrl(feed.value(QLatin1String("url")).toString());
0143 
0144         switch (type) {
0145             case GBFS::SystemInformation:
0146                 if (state != State::SystemInformation) {
0147                     continue;
0148                 }
0149                 break;
0150             case GBFS::Versions:
0151                 if (state != State::Version) {
0152                     continue;
0153                 }
0154                 break;
0155             case GBFS::StationInformation:
0156             case GBFS::StationStatus:
0157             case GBFS::FreeBikeStatus:
0158             case GBFS::VehicleTypes:
0159             case GBFS::GeofencingZones:
0160                 if (state != State::Data || !shouldFetchFile(type)) {
0161                     continue;
0162                 }
0163                 break;
0164             case GBFS::Discovery:
0165             case GBFS::SystemHours:
0166             case GBFS::SystemCalendar:
0167             case GBFS::SystemRegions:
0168             case GBFS::SystemPricingPlans:
0169             case GBFS::SystemAlerts:
0170                 continue;
0171             default:
0172                 qDebug() << "Unhandled feed:" << name << url;
0173                 continue;
0174         }
0175 
0176         if (!m_store.isValid() || !m_store.hasCurrentData(type)) {
0177             qDebug() << "fetching" << name;
0178             auto reply = m_nam->get(QNetworkRequest(url));
0179             connect(reply, &QNetworkReply::finished, this, [this, reply, type]() { fetchFinished(reply, type); });
0180             ++m_pendingJobs;
0181         } else {
0182             parseData(m_store.loadData(type), type);
0183         }
0184         proccedAtLeastOneFeed = true;
0185     }
0186 
0187     if (!proccedAtLeastOneFeed) {
0188         switch (m_state) {
0189             case State::Version:
0190                 m_state = State::SystemInformation;
0191                 break;
0192             case State::SystemInformation:
0193             case State::Data:
0194                 m_error = DataError;
0195                 m_errorMsg = m_state == State::SystemInformation ? QStringLiteral("no system information") : QStringLiteral("no data");
0196                 Q_EMIT finished();
0197                 return;
0198             default:
0199                 Q_UNREACHABLE();
0200         }
0201         QMetaObject::invokeMethod(this, &GBFSJob::processFeeds, Qt::QueuedConnection);
0202     } else if (m_pendingJobs == 0 && state == State::Data) {
0203         finalize();
0204     }
0205 }
0206 
0207 void GBFSJob::fetchFinished(QNetworkReply *reply, GBFS::FileType type)
0208 {
0209     reply->deleteLater();
0210     --m_pendingJobs;
0211     const auto state = m_state; // can change as part of processing
0212 
0213     if (reply->error() != QNetworkReply::NoError) {
0214         // don't consider geofencing_zones failure fatal
0215         if (type != GBFS::GeofencingZones) {
0216             handleNetworkError(reply);
0217             return;
0218         } else {
0219             qDebug() << reply->url() << reply->errorString();
0220         }
0221     } else {
0222         const auto doc = QJsonDocument::fromJson(reply->readAll());
0223         if (m_store.isValid()) {
0224             m_store.storeData(type, doc);
0225         }
0226         parseData(doc, type);
0227     }
0228 
0229     if (m_pendingJobs == 0 && state == State::Data) {
0230         finalize();
0231     }
0232 }
0233 
0234 void GBFSJob::handleNetworkError(QNetworkReply *reply)
0235 {
0236     m_error = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt() == 429 ? TooManyRequestsError : NetworkError;
0237     m_errorMsg = reply->errorString();
0238     if (m_pendingJobs == 0) { // wait for the rest to finish otherwise, to avoid double finished() emission
0239         Q_EMIT finished();
0240     }
0241 }
0242 
0243 void GBFSJob::parseData(const QJsonDocument &doc, GBFS::FileType type)
0244 {
0245     switch (type) {
0246         case GBFS::SystemInformation:
0247             parseSystemInformation(doc);
0248             break;
0249         case GBFS::StationInformation:
0250             parseStationInformation(doc);
0251             break;
0252         case GBFS::FreeBikeStatus:
0253             parseFreeBikeStatus(doc);
0254             break;
0255         case GBFS::Versions:
0256             parseVersionData(doc);
0257             break;
0258         case GBFS::GeofencingZones:
0259             parseGeofencingZones(doc);
0260         default:
0261             break;
0262     }
0263 }
0264 
0265 void GBFSJob::parseSystemInformation(const QJsonDocument &doc)
0266 {
0267     const auto systemId = GBFSReader::dataValue(doc, QLatin1String("system_id")).toString();
0268     if (systemId.isEmpty()) {
0269         m_error = DataError;
0270         m_errorMsg = QStringLiteral("unable to determine system_id!");
0271         Q_EMIT finished();
0272         return;
0273     }
0274     if (m_service.systemId.isEmpty()) {
0275         m_service.systemId = systemId;
0276     }
0277     m_store = GBFSStore(m_service.systemId);
0278     m_store.storeData(GBFS::Discovery, m_discoverDoc);
0279     m_store.storeData(GBFS::SystemInformation, doc);
0280     if (!m_versionDoc.isEmpty()) {
0281         m_store.storeData(GBFS::Versions, m_versionDoc);
0282     }
0283 
0284     m_state = State::Data;
0285     QMetaObject::invokeMethod(this, &GBFSJob::processFeeds, Qt::QueuedConnection);
0286 }
0287 
0288 void GBFSJob::parseStationInformation(const QJsonDocument &doc)
0289 {
0290     const auto stations = GBFSReader::dataValue(doc, QLatin1String("stations")).toArray();
0291     collectCoordinates(stations);
0292     qDebug() << stations.size() << "stations/docks";
0293 }
0294 
0295 void GBFSJob::parseFreeBikeStatus(const QJsonDocument &doc)
0296 {
0297     const auto bikes = GBFSReader::dataValue(doc, QLatin1String("bikes")).toArray();
0298     collectCoordinates(bikes);
0299     qDebug() << bikes.size() << "free floating vehicles";
0300 }
0301 
0302 static void filterOutliers(const std::vector<double> &values, double &minVal, double &maxVal, const std::function<double(double, double)> &distFunc)
0303 {
0304     // first step: primitive distance-based trimming at the extremes
0305     auto beginIt = values.begin();
0306     while(std::next(beginIt) != values.end()) {
0307         if (distFunc(*beginIt, *std::next(beginIt)) > 50'000) {
0308             ++beginIt;
0309         } else {
0310             break;
0311         }
0312     }
0313     auto endIt = std::prev(values.end());
0314     while(endIt != beginIt && std::prev(endIt) != beginIt) {
0315         if (distFunc(*endIt, *std::prev(endIt)) > 50'000) {
0316             --endIt;
0317         } else {
0318             break;
0319         }
0320     }
0321     ++endIt;
0322 
0323     // second step: standard deviation
0324     const auto n = std::distance(beginIt, endIt);
0325     const auto mean = std::accumulate(beginIt, endIt, 0.0, [n](auto a, auto b) { return a + b / n; });
0326     auto sigma = std::accumulate(beginIt, endIt, 0.0, [n](auto a, auto b) {
0327         return a + (std::pow(b, 2.0) / n);
0328     });
0329     sigma = std::sqrt(sigma - std::pow(mean, 2.0)) * 3.0;
0330 
0331     auto lowerBound = mean - sigma;
0332     auto it = std::lower_bound(values.begin(), values.end(), lowerBound);
0333     if (it != values.end()) {
0334         lowerBound = (*it);
0335     }
0336     auto upperBound = mean + sigma;
0337     it = std::lower_bound(values.begin(), values.end(), upperBound);
0338     if (it != values.begin()) {
0339         upperBound = *(std::prev(it));
0340     }
0341 
0342     minVal = std::min(minVal, std::max(lowerBound, values.front())); // clamp by 3 sigma, but don't exceed the input range when not needed
0343     maxVal = std::max(maxVal, std::min(upperBound, values.back()));
0344 }
0345 
0346 void GBFSJob::collectCoordinates(const QJsonArray &array)
0347 {
0348     m_latitudes.reserve(m_latitudes.size() + array.size());
0349     m_longitudes.reserve(m_longitudes.size() + array.size());
0350 
0351     for (const auto &statVal : array) {
0352         const auto station = statVal.toObject();
0353         const auto lat = GBFSReader::readLatitude(station);
0354         if (!std::isnan(lat) && lat >= -90.0 && lat <= 90.0 && std::abs(lat) > 0.001) {
0355             m_latitudes.push_back(lat);
0356         }
0357         const auto lon = GBFSReader::readLongitude(station);
0358         if (!std::isnan(lon) && lon >= -180.0 && lon <= 180.0 && std::abs(lon) > 0.001) {
0359             m_longitudes.push_back(lon);
0360         }
0361     }
0362 }
0363 
0364 void GBFSJob::parseVersionData(const QJsonDocument &doc)
0365 {
0366     m_versionDoc = doc;
0367     const auto versions = GBFSReader::dataValue(doc, QLatin1String("versions")).toArray();
0368     QJsonObject bestVersion;
0369     for (const auto &verVal : versions) {
0370         const auto version = verVal.toObject();
0371         if (bestVersion.isEmpty()) {
0372             bestVersion = version;
0373         }
0374         if (QVersionNumber::fromString(bestVersion.value(QLatin1String("version")).toString()) < QVersionNumber::fromString(version.value(QLatin1String("version")).toString())) {
0375             bestVersion = version;
0376         }
0377     }
0378 
0379     const auto url = QUrl(bestVersion.value(QLatin1String("url")).toString());
0380     if (!url.isEmpty() && m_service.discoveryUrl != url) {
0381         qDebug() << "found newer version:" << url << m_service.discoveryUrl;
0382         m_previousDiscoveryUrl = m_service.discoveryUrl;
0383         m_service.discoveryUrl = url;
0384         m_state = State::DiscoverRestart;
0385         discoverAndUpdate(m_service);
0386     } else {
0387         m_state = State::SystemInformation;
0388         QMetaObject::invokeMethod(this, &GBFSJob::processFeeds, Qt::QueuedConnection);
0389     }
0390 }
0391 
0392 void GBFSJob::parseGeofencingZones(const QJsonDocument &doc)
0393 {
0394     const auto features = GBFSReader::dataValue(doc, QLatin1String("geofencing_zones")).toObject()
0395         .value(QLatin1String("features")).toArray();
0396     for (const auto &featureVal : features) {
0397         const auto geo = featureVal.toObject().value(QLatin1String("geometry")).toObject();
0398         const auto rect = GeoJson::readOuterPolygon(geo).boundingRect();
0399         if (rect.isNull() || rect.left() < -180.0 || rect.right() > 180.0 || rect.top() < -90.0 || rect.bottom() > 90.0) {
0400             qDebug() << "invalid geofence box:" << rect;
0401             continue;
0402         }
0403         // we need to run this through outlier filtering as well, we got random nonsense elements in a few cities as well
0404         m_latitudes.push_back(rect.top());
0405         m_latitudes.push_back(rect.bottom());
0406         m_longitudes.push_back(rect.left());
0407         m_longitudes.push_back(rect.right());
0408     }
0409 }
0410 
0411 void GBFSJob::finalize()
0412 {
0413     // add a 500m radius for single points
0414     if (m_latitudes.size() == 1) {
0415         const auto d = 250.0 / Location::distance(m_latitudes.front(), 0.0, m_latitudes.front() + 1.0, 0.0);
0416         m_latitudes.push_back(m_latitudes.front() - d);
0417         m_latitudes.push_back(m_latitudes.front() + d);
0418     }
0419     if (m_longitudes.size() == 1 && !m_latitudes.empty()) {
0420         const auto d = 250.0 / Location::distance(m_latitudes.front(), m_longitudes.front(), m_latitudes.front(), m_longitudes.front() + 1.0);
0421         m_longitudes.push_back(m_longitudes.front() - d);
0422         m_longitudes.push_back(m_longitudes.front() + d);
0423     }
0424 
0425     double minLat = 90.0, maxLat = -90.0, minLon = 180.0, maxLon = -180.0;
0426     if (!m_latitudes.empty() && !m_longitudes.empty()) {
0427         std::sort(m_latitudes.begin(), m_latitudes.end());
0428         std::sort(m_longitudes.begin(), m_longitudes.end());
0429 
0430         // covered area is reasonable, take as-is
0431         if (Location::distance(m_latitudes.front(), m_longitudes.front(), m_latitudes.back(), m_longitudes.back()) <= 50'000) {
0432             minLat = m_latitudes.front();
0433             minLon = m_longitudes.front();
0434             maxLat = m_latitudes.back();
0435             maxLon = m_longitudes.back();
0436         } else {
0437             // try to filter out outliers
0438             filterOutliers(m_latitudes, minLat, maxLat, [](auto lat1, auto lat2) { return Location::distance(lat1, 0.0, lat2, 0.0); });
0439             filterOutliers(m_longitudes, minLon, maxLon, [&](auto lon1, auto lon2) {
0440                 const auto lat = (maxLat - minLat) / 2.0;
0441                 return Location::distance(lat, lon1, lat, lon2);
0442             });
0443         }
0444     }
0445 
0446     if (maxLat > minLat && maxLon > minLon) {
0447         m_service.boundingBox = QRectF(QPointF(minLon, minLat), QPointF(maxLon, maxLat));
0448     }
0449 
0450     // round bounding box coordinates to stabilize the diff gbfs-feeds.json a bit
0451     constexpr const auto COORD_RESOLUTION = 100.0;
0452     m_service.boundingBox.setLeft(std::floor(m_service.boundingBox.left() * COORD_RESOLUTION) / COORD_RESOLUTION);
0453     m_service.boundingBox.setTop(std::floor(m_service.boundingBox.top() * COORD_RESOLUTION) / COORD_RESOLUTION);
0454     m_service.boundingBox.setRight(std::ceil(m_service.boundingBox.right() * COORD_RESOLUTION) / COORD_RESOLUTION);
0455     m_service.boundingBox.setBottom(std::ceil(m_service.boundingBox.bottom() * COORD_RESOLUTION) / COORD_RESOLUTION);
0456 
0457     qDebug() << "bounding box:" << m_service.boundingBox;
0458     GBFSServiceRepository::store(m_service);
0459     Q_EMIT finished();
0460 }
0461 
0462 bool GBFSJob::shouldFetchFile(GBFS::FileType fileType) const
0463 {
0464     return m_fileTypes.empty() || std::find(m_fileTypes.begin(), m_fileTypes.end(), fileType) != m_fileTypes.end();
0465 }