File indexing completed on 2024-11-24 04:45:05

0001 /*
0002     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "osmairportdb.h"
0008 
0009 #include "osm/geomath.h"
0010 #include "osm/xmlparser.h"
0011 
0012 #include <QDebug>
0013 #include <QFile>
0014 
0015 enum {
0016     StationClusterDistance = 100, // in meter
0017     StationToTerminalDistance = 75, // in meter
0018 };
0019 
0020 constexpr float TerminalSizeThreshold = 0.1; // percent of largest terminal size
0021 
0022 void OSMAirportDb::load(const QString &path)
0023 {
0024     QFile f(path);
0025     if (!f.open(QFile::ReadOnly)) {
0026         qCritical() << "Failed to open OSM input file!" << f.errorString() << f.fileName();
0027         return;
0028     }
0029     OSM::XmlParser p(&m_dataset);
0030     p.parse(&f);
0031 
0032     qDebug() << "nodes:" << m_dataset.nodes.size();
0033     qDebug() << "ways:" << m_dataset.ways.size();
0034     qDebug() << "relations:" << m_dataset.relations.size();
0035 
0036     // find all airports
0037     // those can be present in multiple forms
0038     // as a single node: we don't care, this doesn't improve coordinate information for our use-case
0039     // as a single way for the outer shape
0040     // as a relation representing a multi-polygon outer shape
0041     OSM::for_each(m_dataset, [this](auto elem) { loadAirport(elem); }, OSM::IncludeRelations | OSM::IncludeWays);
0042 
0043     // find all terminal buildings, and add them to their airports
0044     OSM::for_each(m_dataset, [this](auto elem) { loadTerminal(elem); });
0045 
0046     // load railway stations
0047     OSM::for_each(m_dataset, [this](auto elem) { loadStation(elem); });
0048 
0049     // once we have all elements grouped by airport, filter out elements we don't want to consider
0050     for (auto &a : m_iataMap) {
0051         filterTerminals(a.second);
0052         filterStations(a.second);
0053     }
0054 
0055     qDebug() << "airports:" << m_iataMap.size();
0056     qDebug() << "  with a single terminal:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminals.size() == 1; } );
0057     qDebug() << "  with multiple terminals:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminals.size() > 1; } );
0058     qDebug() << "  with a single entrance:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminalEntrances.size() == 1; } );
0059     qDebug() << "  with multiple entrances:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminalEntrances.size() > 1; } );
0060     qDebug() << "  with a single station:" <<  std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.stations.size() == 1; } );
0061     qDebug() << "  with multiple stations:" <<  std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.stations.size() > 1; } );
0062     qDebug() << "  with at least one singular feature:" <<  std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) {
0063         return a.second.stations.size() == 1 || a.second.terminals.size() == 1 || a.second.terminalEntrances.size() == 1;
0064     });
0065     qDebug() << "  with conflicting features:" <<  std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) {
0066         return a.second.stations.size() != 1 && a.second.terminals.size() != 1 && a.second.terminalEntrances.size() != 1 &&
0067             !(a.second.stations.empty() && a.second.terminals.empty() && a.second.terminalEntrances.empty());
0068     });
0069 }
0070 
0071 void OSMAirportDb::loadAirport(OSM::Element elem)
0072 {
0073     const auto aeroway = elem.tagValue("aeroway");
0074     if (aeroway != QLatin1StringView("aerodrome")) {
0075       return;
0076     }
0077 
0078     // filter out airports we aren't interested in
0079     // not strictly needed here, but it reduces the diagnostic noise
0080     const auto disused = elem.tagValue("disused");
0081     const auto militayLanduse =
0082         elem.tagValue("landuse") == QLatin1StringView("military");
0083     if (!disused.isEmpty() || militayLanduse) {
0084         return;
0085     }
0086 
0087     const auto iata = elem.tagValue("iata");
0088     if (iata.isEmpty()) {
0089         return;
0090     }
0091 
0092     // osmconvert gives us wrong values e.g. for FRA, so we need to do this ourselves...
0093     elem.recomputeBoundingBox(m_dataset);
0094 
0095     // semicolon list split
0096     if (iata.contains(QLatin1Char(';'))) {
0097         const auto iatas = iata.split(QLatin1Char(';'), Qt::SkipEmptyParts);
0098         for (const auto &iata : iatas) {
0099             loadAirport(elem, iata);
0100         }
0101     } else {
0102         loadAirport(elem, iata);
0103     }
0104 }
0105 
0106 static QPolygonF polygonFromOuterPath(const std::vector<const OSM::Node*> &path)
0107 {
0108     if (path.empty()) {
0109         return {};
0110     }
0111 
0112     QPolygonF subPoly, result;
0113     subPoly.push_back(QPointF(path[0]->coordinate.latF(), path[0]->coordinate.lonF()));
0114     OSM::Id firstNode = path[0]->id;
0115     for (auto it = std::next(path.begin()); it != path.end(); ++it) {
0116         if (firstNode == 0) { // starting a new loop
0117             firstNode = (*it)->id;
0118             subPoly.push_back(QPointF((*it)->coordinate.latF(), (*it)->coordinate.lonF()));
0119         } else if ((*it)->id == firstNode) { // just closed a loop, so this is not a line on the path
0120             subPoly.push_back(QPointF((*it)->coordinate.latF(), (*it)->coordinate.lonF()));
0121             firstNode = 0;
0122             result = result.united(subPoly);
0123             subPoly.clear();
0124         } else {
0125             subPoly.push_back(QPointF((*it)->coordinate.latF(), (*it)->coordinate.lonF()));
0126         }
0127     }
0128     if (!subPoly.empty()) {
0129         result = result.united(subPoly);
0130     }
0131     return result;
0132 }
0133 
0134 void OSMAirportDb::loadAirport(OSM::Element elem, const QString &iataCode)
0135 {
0136     if (iataCode.size() != 3 || !std::all_of(iataCode.begin(), iataCode.end(), [](const auto c) { return c.isUpper(); })) {
0137         qWarning() << "IATA code format violation:" << iataCode << elem.url();
0138         return;
0139     }
0140 
0141     const auto it = m_iataMap.find(iataCode);
0142     if (it != m_iataMap.end() && !OSM::intersects((*it).second.bbox, elem.boundingBox())) {
0143         qWarning() << "Duplicate IATA code:" << iataCode << (*it).second.source << elem.url();
0144         return;
0145     }
0146 
0147     const auto poly = polygonFromOuterPath(elem.outerPath(m_dataset));
0148     if (it != m_iataMap.end()) {
0149         (*it).second.bbox = OSM::unite(elem.boundingBox(), (*it).second.bbox);
0150         (*it).second.airportPolygon = (*it).second.airportPolygon.united(poly);
0151     } else {
0152         OSMAirportData airport;
0153         airport.source = elem.url();
0154         airport.bbox = elem.boundingBox();
0155         airport.airportPolygon = poly;
0156         m_iataMap[iataCode] = std::move(airport);
0157     }
0158 }
0159 
0160 void OSMAirportDb::loadTerminal(OSM::Element elem)
0161 {
0162     const auto aeroway = elem.tagValue("aeroway");
0163     if (aeroway != QLatin1StringView("terminal")) {
0164       return;
0165     }
0166 
0167     // filter out freight terminals
0168     const auto usage = elem.tagValue("usage");
0169     const auto traffic_mode = elem.tagValue("traffic_mode");
0170     const auto building = elem.tagValue("building");
0171     const auto industrial = elem.tagValue("industrial");
0172     if (usage == QLatin1StringView("freight") ||
0173         traffic_mode == QLatin1StringView("freigt") ||
0174         building == QLatin1StringView("industrial") || !industrial.isEmpty()) {
0175       return;
0176     }
0177 
0178     // find matching airport
0179     for (auto it = m_iataMap.begin(); it != m_iataMap.end(); ++it) {
0180         if (!OSM::intersects((*it).second.bbox, elem.boundingBox())) {
0181             continue;
0182         }
0183         // check against the exact airport boundary, not just the bounding box,
0184         // this excludes terminal buildings from adjacent sites we don't care about
0185         // example: the Airbus delivery buildings next to TLS
0186         if (!(*it).second.airportPolygon.intersects(QRectF(QPointF(elem.boundingBox().min.latF(), elem.boundingBox().min.lonF()), QPointF(elem.boundingBox().max.latF(), elem.boundingBox().max.lonF())))) {
0187             continue;
0188         }
0189         //qDebug() << "found terminal for airport:" << elem.url() << (*it).first << (*it).second.source;
0190         (*it).second.terminals.push_back(elem);
0191 
0192         // look for entrances to terminals
0193         for (auto node : elem.outerPath(m_dataset)) {
0194 
0195             // filter out inaccessible entrances, or gates
0196             const auto access =
0197                 OSM::tagValue(*node, QLatin1StringView("access"));
0198             const auto aeroway =
0199                 OSM::tagValue(*node, QLatin1StringView("gate"));
0200             if (access == QLatin1StringView("private") ||
0201                 access == QLatin1String("no") ||
0202                 aeroway == QLatin1String("gate")) {
0203               continue;
0204             }
0205 
0206             const auto entrance =
0207                 OSM::tagValue(*node, QLatin1StringView("entrance"));
0208             if (entrance == QLatin1StringView("yes") ||
0209                 entrance == QLatin1String("main")) {
0210               // qDebug() << "  found entrance for terminal:" << (*nodeIt).url()
0211               // << entrance << access;
0212               (*it).second.terminalEntrances.push_back(node->coordinate);
0213             }
0214         }
0215     }
0216 }
0217 
0218 void OSMAirportDb::filterTerminals(OSMAirportData &airport)
0219 {
0220     if (airport.terminals.empty()) {
0221         return;
0222     }
0223 
0224     // sort by size, and drop micro terminals (which are usually data artifacts)
0225     std::sort(airport.terminals.begin(), airport.terminals.end(), [](auto lhs, auto rhs) {
0226         return OSM::distance(lhs.boundingBox().min, lhs.boundingBox().max) > OSM::distance(rhs.boundingBox().min, rhs.boundingBox().max);
0227     });
0228     const auto sizeThreshold = OSM::distance(airport.terminals[0].boundingBox().min, airport.terminals[0].boundingBox().max) * TerminalSizeThreshold;
0229     airport.terminals.erase(std::partition(airport.terminals.begin(), airport.terminals.end(), [sizeThreshold](auto t) {
0230         return OSM::distance(t.boundingBox().min, t.boundingBox().max) > sizeThreshold;
0231     }), airport.terminals.end());
0232 }
0233 
0234 void OSMAirportDb::loadStation(OSM::Element elem)
0235 {
0236     const auto railway = elem.tagValue("railway");
0237     if (railway != QLatin1StringView("station") &&
0238         railway != QLatin1String("halt") &&
0239         railway != QLatin1String("tram_stop")) {
0240       return;
0241     }
0242 
0243     // try to filter out airport-interal transport systems, those are typically airside and thus not what we want
0244     const auto station = elem.tagValue("station");
0245     if (station == QLatin1StringView("monorail")) {
0246       return;
0247     }
0248 
0249     for (auto it = m_iataMap.begin(); it != m_iataMap.end(); ++it) {
0250         const auto &airport = (*it).second;
0251 
0252         // we need the exact path here, the bounding box can contain a lot more stuff
0253         // the bounding box check is just for speed
0254         // as we also look for stations in close proximity to terminals, we technically need a slightly larger bounding box though
0255         // in most cases this just works, SHA being one of the counter examples due to perfect alignment with its bounding box
0256         const auto adjustedBbox = OSM::BoundingBox(OSM::Coordinate(airport.bbox.min.latitude - 100'000, airport.bbox.min.longitude - 100'000),
0257                                                    OSM::Coordinate(airport.bbox.max.latitude + 100'000, airport.bbox.max.longitude + 100'000));
0258         if (!OSM::contains(adjustedBbox, elem.center())) {
0259             continue;
0260         }
0261 
0262         const auto onPremises = airport.airportPolygon.containsPoint(QPointF(elem.center().latF(), elem.center().lonF()), Qt::WindingFill);
0263         // one would assume that terminals are always within the airport bounds, but that's not the case
0264         // they sometimes expand beyond them. A station inside a terminal is however most likely something relevant for us
0265         const auto inTerminal = std::any_of(airport.terminals.begin(), airport.terminals.end(), [&elem](const auto &terminal) {
0266             return OSM::contains(terminal.boundingBox(), elem.center());
0267         });
0268 
0269         // distance of the station to the terminal outer polygon
0270         uint32_t distanceToTerminal = std::numeric_limits<uint32_t>::max();
0271         for (auto terminal : airport.terminals) {
0272             const auto outerPath = terminal.outerPath(m_dataset);
0273             distanceToTerminal = std::min(distanceToTerminal, OSM::distance(outerPath, elem.center()));
0274         }
0275 
0276         if (onPremises || inTerminal || distanceToTerminal < StationToTerminalDistance) {
0277             qDebug() << "found station for airport:" << elem.url() << (*it).first << (*it).second.source << onPremises << inTerminal << distanceToTerminal;
0278             (*it).second.stations.push_back(elem);
0279         }
0280     }
0281 }
0282 
0283 void OSMAirportDb::filterStations(OSMAirportData &airport)
0284 {
0285     // if we have a full station, drop halts
0286     auto it = std::partition(
0287         airport.stations.begin(), airport.stations.end(), [](auto station) {
0288           return station.tagValue("railway") == QLatin1StringView("station");
0289         });
0290     if (it != airport.stations.begin() && it != airport.stations.end()) {
0291         airport.stations.erase(it, airport.stations.end());
0292     }
0293 
0294     // drop light_rail in favor of "real" rail, as that's often used for on-premises transport lines
0295     it = std::partition(
0296         airport.stations.begin(), airport.stations.end(), [](auto station) {
0297           return station.tagValue("station") != QLatin1StringView("light_rail");
0298         });
0299     if (it != airport.stations.begin() && it != airport.stations.end()) {
0300         airport.stations.erase(it, airport.stations.end());
0301     }
0302 
0303     // "creative" way of separating "real" and on-premises stations: only real ones tend to have Wikidata tags
0304     it = std::partition(airport.stations.begin(), airport.stations.end(), [](auto station) {
0305         return !station.tagValue("wikidata").isEmpty();
0306     });
0307     if (it != airport.stations.begin() && it != airport.stations.end()) {
0308         airport.stations.erase(it, airport.stations.end());
0309     }
0310 
0311     // prioritize by number of platforms, if we have that information for all stations
0312     if (airport.stations.size() > 1 && std::all_of(airport.stations.begin(), airport.stations.end(), [](auto s) { return !s.tagValue("platforms").isEmpty(); })) {
0313         std::sort(airport.stations.begin(), airport.stations.end(), [](auto lhs, auto rhs) {
0314             return lhs.tagValue("platforms").toInt() > rhs.tagValue("platforms").toInt();
0315         });
0316         if (airport.stations[0].tagValue("platforms") != airport.stations[1].tagValue("platforms")) {
0317             airport.stations.erase(std::next(airport.stations.begin()), airport.stations.end());
0318         }
0319     }
0320 }
0321 
0322 OSM::Coordinate OSMAirportDb::lookup(const QString &iata, float lat, float lon)
0323 {
0324     const auto it = m_iataMap.find(iata);
0325     if (it == m_iataMap.end()) {
0326         //qDebug() << "No airport with IATA code:" << iata;
0327         return {};
0328     }
0329 
0330     const OSM::Coordinate wdCoord(lat, lon);
0331     const auto &airport = (*it).second;
0332     if (!OSM::contains(airport.bbox, wdCoord)) {
0333         qDebug() << "Airport" << iata << "is not where we expect it to be!?" << airport.source << airport.bbox << lat << lon;
0334         return {};
0335     }
0336     if (airport.terminals.empty() && airport.terminalEntrances.empty() && airport.stations.empty()) {
0337         // no details available for this airport
0338         return {};
0339     }
0340 
0341     qDebug() << "Optimizing" << iata << airport.source << lat << lon << airport.bbox;
0342     qDebug() << "  entrances:" << airport.terminalEntrances.size() << "terminals:" << airport.terminals.size() << "stations:" << airport.stations.size();
0343 
0344     // single station
0345     if (airport.stations.size() == 1) {
0346         qDebug() << "  by station:" << airport.stations[0].url();
0347         return airport.stations[0].center();
0348     }
0349 
0350     // multiple stations, but close together
0351     if (airport.stations.size() > 1) {
0352         auto stationBbox = std::accumulate(airport.stations.begin(), airport.stations.end(), OSM::BoundingBox(), [](OSM::BoundingBox lhs, OSM::Element rhs) {
0353             return OSM::unite(lhs, OSM::BoundingBox(rhs.boundingBox().center(), rhs.boundingBox().center()));
0354         });
0355         if (OSM::distance(stationBbox.min, stationBbox.max) < StationClusterDistance) {
0356             qDebug() << "  by clustered station:" << stationBbox;
0357             return stationBbox.center();
0358         }
0359     }
0360 
0361     // single entrance
0362     if (airport.terminalEntrances.size() == 1) { // ### this works for small airports, but for larger ones this is often due to missing data
0363         qDebug() << "  by entrance:" << airport.terminalEntrances[0];
0364         return airport.terminalEntrances[0];
0365     }
0366 
0367     // single terminal
0368     if (airport.terminals.size() == 1) {
0369         qDebug() << "  by terminal:" << airport.terminals[0].url() << airport.terminals[0].center();
0370         return airport.terminals[0].center();
0371     }
0372 
0373     // multiple terminals: take the center of the sum of all bounding boxes, and TODO check the result isn't ridiculously large
0374     if (airport.terminals.size() > 1) {
0375         const auto terminalBbox = std::accumulate(airport.terminals.begin(), airport.terminals.end(), OSM::BoundingBox(), [](const auto &bbox, auto terminal) {
0376             return OSM::unite(bbox, terminal.boundingBox());
0377         });
0378         // if the original coordinate is outside the terminal bounding box, this is highly likely an improvement,
0379         // otherwise we cannot be sure (see MUC, where the Wikidata coordinate is ideal).
0380         //qDebug() << "    considering terminal bbox:" << terminalBbox;
0381         if (!OSM::contains(terminalBbox, wdCoord)) {
0382             qDebug() << "  by terminal bbox center:" << terminalBbox.center();
0383             return terminalBbox.center();
0384         }
0385     }
0386 
0387     return {};
0388 }