File indexing completed on 2024-11-24 04:45:05
0001 /* 0002 SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "osmairportdb.h" 0008 0009 #include "osm/geomath.h" 0010 #include "osm/xmlparser.h" 0011 0012 #include <QDebug> 0013 #include <QFile> 0014 0015 enum { 0016 StationClusterDistance = 100, // in meter 0017 StationToTerminalDistance = 75, // in meter 0018 }; 0019 0020 constexpr float TerminalSizeThreshold = 0.1; // percent of largest terminal size 0021 0022 void OSMAirportDb::load(const QString &path) 0023 { 0024 QFile f(path); 0025 if (!f.open(QFile::ReadOnly)) { 0026 qCritical() << "Failed to open OSM input file!" << f.errorString() << f.fileName(); 0027 return; 0028 } 0029 OSM::XmlParser p(&m_dataset); 0030 p.parse(&f); 0031 0032 qDebug() << "nodes:" << m_dataset.nodes.size(); 0033 qDebug() << "ways:" << m_dataset.ways.size(); 0034 qDebug() << "relations:" << m_dataset.relations.size(); 0035 0036 // find all airports 0037 // those can be present in multiple forms 0038 // as a single node: we don't care, this doesn't improve coordinate information for our use-case 0039 // as a single way for the outer shape 0040 // as a relation representing a multi-polygon outer shape 0041 OSM::for_each(m_dataset, [this](auto elem) { loadAirport(elem); }, OSM::IncludeRelations | OSM::IncludeWays); 0042 0043 // find all terminal buildings, and add them to their airports 0044 OSM::for_each(m_dataset, [this](auto elem) { loadTerminal(elem); }); 0045 0046 // load railway stations 0047 OSM::for_each(m_dataset, [this](auto elem) { loadStation(elem); }); 0048 0049 // once we have all elements grouped by airport, filter out elements we don't want to consider 0050 for (auto &a : m_iataMap) { 0051 filterTerminals(a.second); 0052 filterStations(a.second); 0053 } 0054 0055 qDebug() << "airports:" << m_iataMap.size(); 0056 qDebug() << " with a single terminal:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminals.size() == 1; } ); 0057 qDebug() << " with multiple terminals:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminals.size() > 1; } ); 0058 qDebug() << " with a single entrance:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminalEntrances.size() == 1; } ); 0059 qDebug() << " with multiple entrances:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.terminalEntrances.size() > 1; } ); 0060 qDebug() << " with a single station:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.stations.size() == 1; } ); 0061 qDebug() << " with multiple stations:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { return a.second.stations.size() > 1; } ); 0062 qDebug() << " with at least one singular feature:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { 0063 return a.second.stations.size() == 1 || a.second.terminals.size() == 1 || a.second.terminalEntrances.size() == 1; 0064 }); 0065 qDebug() << " with conflicting features:" << std::count_if(m_iataMap.begin(), m_iataMap.end(), [](const auto &a) { 0066 return a.second.stations.size() != 1 && a.second.terminals.size() != 1 && a.second.terminalEntrances.size() != 1 && 0067 !(a.second.stations.empty() && a.second.terminals.empty() && a.second.terminalEntrances.empty()); 0068 }); 0069 } 0070 0071 void OSMAirportDb::loadAirport(OSM::Element elem) 0072 { 0073 const auto aeroway = elem.tagValue("aeroway"); 0074 if (aeroway != QLatin1StringView("aerodrome")) { 0075 return; 0076 } 0077 0078 // filter out airports we aren't interested in 0079 // not strictly needed here, but it reduces the diagnostic noise 0080 const auto disused = elem.tagValue("disused"); 0081 const auto militayLanduse = 0082 elem.tagValue("landuse") == QLatin1StringView("military"); 0083 if (!disused.isEmpty() || militayLanduse) { 0084 return; 0085 } 0086 0087 const auto iata = elem.tagValue("iata"); 0088 if (iata.isEmpty()) { 0089 return; 0090 } 0091 0092 // osmconvert gives us wrong values e.g. for FRA, so we need to do this ourselves... 0093 elem.recomputeBoundingBox(m_dataset); 0094 0095 // semicolon list split 0096 if (iata.contains(QLatin1Char(';'))) { 0097 const auto iatas = iata.split(QLatin1Char(';'), Qt::SkipEmptyParts); 0098 for (const auto &iata : iatas) { 0099 loadAirport(elem, iata); 0100 } 0101 } else { 0102 loadAirport(elem, iata); 0103 } 0104 } 0105 0106 static QPolygonF polygonFromOuterPath(const std::vector<const OSM::Node*> &path) 0107 { 0108 if (path.empty()) { 0109 return {}; 0110 } 0111 0112 QPolygonF subPoly, result; 0113 subPoly.push_back(QPointF(path[0]->coordinate.latF(), path[0]->coordinate.lonF())); 0114 OSM::Id firstNode = path[0]->id; 0115 for (auto it = std::next(path.begin()); it != path.end(); ++it) { 0116 if (firstNode == 0) { // starting a new loop 0117 firstNode = (*it)->id; 0118 subPoly.push_back(QPointF((*it)->coordinate.latF(), (*it)->coordinate.lonF())); 0119 } else if ((*it)->id == firstNode) { // just closed a loop, so this is not a line on the path 0120 subPoly.push_back(QPointF((*it)->coordinate.latF(), (*it)->coordinate.lonF())); 0121 firstNode = 0; 0122 result = result.united(subPoly); 0123 subPoly.clear(); 0124 } else { 0125 subPoly.push_back(QPointF((*it)->coordinate.latF(), (*it)->coordinate.lonF())); 0126 } 0127 } 0128 if (!subPoly.empty()) { 0129 result = result.united(subPoly); 0130 } 0131 return result; 0132 } 0133 0134 void OSMAirportDb::loadAirport(OSM::Element elem, const QString &iataCode) 0135 { 0136 if (iataCode.size() != 3 || !std::all_of(iataCode.begin(), iataCode.end(), [](const auto c) { return c.isUpper(); })) { 0137 qWarning() << "IATA code format violation:" << iataCode << elem.url(); 0138 return; 0139 } 0140 0141 const auto it = m_iataMap.find(iataCode); 0142 if (it != m_iataMap.end() && !OSM::intersects((*it).second.bbox, elem.boundingBox())) { 0143 qWarning() << "Duplicate IATA code:" << iataCode << (*it).second.source << elem.url(); 0144 return; 0145 } 0146 0147 const auto poly = polygonFromOuterPath(elem.outerPath(m_dataset)); 0148 if (it != m_iataMap.end()) { 0149 (*it).second.bbox = OSM::unite(elem.boundingBox(), (*it).second.bbox); 0150 (*it).second.airportPolygon = (*it).second.airportPolygon.united(poly); 0151 } else { 0152 OSMAirportData airport; 0153 airport.source = elem.url(); 0154 airport.bbox = elem.boundingBox(); 0155 airport.airportPolygon = poly; 0156 m_iataMap[iataCode] = std::move(airport); 0157 } 0158 } 0159 0160 void OSMAirportDb::loadTerminal(OSM::Element elem) 0161 { 0162 const auto aeroway = elem.tagValue("aeroway"); 0163 if (aeroway != QLatin1StringView("terminal")) { 0164 return; 0165 } 0166 0167 // filter out freight terminals 0168 const auto usage = elem.tagValue("usage"); 0169 const auto traffic_mode = elem.tagValue("traffic_mode"); 0170 const auto building = elem.tagValue("building"); 0171 const auto industrial = elem.tagValue("industrial"); 0172 if (usage == QLatin1StringView("freight") || 0173 traffic_mode == QLatin1StringView("freigt") || 0174 building == QLatin1StringView("industrial") || !industrial.isEmpty()) { 0175 return; 0176 } 0177 0178 // find matching airport 0179 for (auto it = m_iataMap.begin(); it != m_iataMap.end(); ++it) { 0180 if (!OSM::intersects((*it).second.bbox, elem.boundingBox())) { 0181 continue; 0182 } 0183 // check against the exact airport boundary, not just the bounding box, 0184 // this excludes terminal buildings from adjacent sites we don't care about 0185 // example: the Airbus delivery buildings next to TLS 0186 if (!(*it).second.airportPolygon.intersects(QRectF(QPointF(elem.boundingBox().min.latF(), elem.boundingBox().min.lonF()), QPointF(elem.boundingBox().max.latF(), elem.boundingBox().max.lonF())))) { 0187 continue; 0188 } 0189 //qDebug() << "found terminal for airport:" << elem.url() << (*it).first << (*it).second.source; 0190 (*it).second.terminals.push_back(elem); 0191 0192 // look for entrances to terminals 0193 for (auto node : elem.outerPath(m_dataset)) { 0194 0195 // filter out inaccessible entrances, or gates 0196 const auto access = 0197 OSM::tagValue(*node, QLatin1StringView("access")); 0198 const auto aeroway = 0199 OSM::tagValue(*node, QLatin1StringView("gate")); 0200 if (access == QLatin1StringView("private") || 0201 access == QLatin1String("no") || 0202 aeroway == QLatin1String("gate")) { 0203 continue; 0204 } 0205 0206 const auto entrance = 0207 OSM::tagValue(*node, QLatin1StringView("entrance")); 0208 if (entrance == QLatin1StringView("yes") || 0209 entrance == QLatin1String("main")) { 0210 // qDebug() << " found entrance for terminal:" << (*nodeIt).url() 0211 // << entrance << access; 0212 (*it).second.terminalEntrances.push_back(node->coordinate); 0213 } 0214 } 0215 } 0216 } 0217 0218 void OSMAirportDb::filterTerminals(OSMAirportData &airport) 0219 { 0220 if (airport.terminals.empty()) { 0221 return; 0222 } 0223 0224 // sort by size, and drop micro terminals (which are usually data artifacts) 0225 std::sort(airport.terminals.begin(), airport.terminals.end(), [](auto lhs, auto rhs) { 0226 return OSM::distance(lhs.boundingBox().min, lhs.boundingBox().max) > OSM::distance(rhs.boundingBox().min, rhs.boundingBox().max); 0227 }); 0228 const auto sizeThreshold = OSM::distance(airport.terminals[0].boundingBox().min, airport.terminals[0].boundingBox().max) * TerminalSizeThreshold; 0229 airport.terminals.erase(std::partition(airport.terminals.begin(), airport.terminals.end(), [sizeThreshold](auto t) { 0230 return OSM::distance(t.boundingBox().min, t.boundingBox().max) > sizeThreshold; 0231 }), airport.terminals.end()); 0232 } 0233 0234 void OSMAirportDb::loadStation(OSM::Element elem) 0235 { 0236 const auto railway = elem.tagValue("railway"); 0237 if (railway != QLatin1StringView("station") && 0238 railway != QLatin1String("halt") && 0239 railway != QLatin1String("tram_stop")) { 0240 return; 0241 } 0242 0243 // try to filter out airport-interal transport systems, those are typically airside and thus not what we want 0244 const auto station = elem.tagValue("station"); 0245 if (station == QLatin1StringView("monorail")) { 0246 return; 0247 } 0248 0249 for (auto it = m_iataMap.begin(); it != m_iataMap.end(); ++it) { 0250 const auto &airport = (*it).second; 0251 0252 // we need the exact path here, the bounding box can contain a lot more stuff 0253 // the bounding box check is just for speed 0254 // as we also look for stations in close proximity to terminals, we technically need a slightly larger bounding box though 0255 // in most cases this just works, SHA being one of the counter examples due to perfect alignment with its bounding box 0256 const auto adjustedBbox = OSM::BoundingBox(OSM::Coordinate(airport.bbox.min.latitude - 100'000, airport.bbox.min.longitude - 100'000), 0257 OSM::Coordinate(airport.bbox.max.latitude + 100'000, airport.bbox.max.longitude + 100'000)); 0258 if (!OSM::contains(adjustedBbox, elem.center())) { 0259 continue; 0260 } 0261 0262 const auto onPremises = airport.airportPolygon.containsPoint(QPointF(elem.center().latF(), elem.center().lonF()), Qt::WindingFill); 0263 // one would assume that terminals are always within the airport bounds, but that's not the case 0264 // they sometimes expand beyond them. A station inside a terminal is however most likely something relevant for us 0265 const auto inTerminal = std::any_of(airport.terminals.begin(), airport.terminals.end(), [&elem](const auto &terminal) { 0266 return OSM::contains(terminal.boundingBox(), elem.center()); 0267 }); 0268 0269 // distance of the station to the terminal outer polygon 0270 uint32_t distanceToTerminal = std::numeric_limits<uint32_t>::max(); 0271 for (auto terminal : airport.terminals) { 0272 const auto outerPath = terminal.outerPath(m_dataset); 0273 distanceToTerminal = std::min(distanceToTerminal, OSM::distance(outerPath, elem.center())); 0274 } 0275 0276 if (onPremises || inTerminal || distanceToTerminal < StationToTerminalDistance) { 0277 qDebug() << "found station for airport:" << elem.url() << (*it).first << (*it).second.source << onPremises << inTerminal << distanceToTerminal; 0278 (*it).second.stations.push_back(elem); 0279 } 0280 } 0281 } 0282 0283 void OSMAirportDb::filterStations(OSMAirportData &airport) 0284 { 0285 // if we have a full station, drop halts 0286 auto it = std::partition( 0287 airport.stations.begin(), airport.stations.end(), [](auto station) { 0288 return station.tagValue("railway") == QLatin1StringView("station"); 0289 }); 0290 if (it != airport.stations.begin() && it != airport.stations.end()) { 0291 airport.stations.erase(it, airport.stations.end()); 0292 } 0293 0294 // drop light_rail in favor of "real" rail, as that's often used for on-premises transport lines 0295 it = std::partition( 0296 airport.stations.begin(), airport.stations.end(), [](auto station) { 0297 return station.tagValue("station") != QLatin1StringView("light_rail"); 0298 }); 0299 if (it != airport.stations.begin() && it != airport.stations.end()) { 0300 airport.stations.erase(it, airport.stations.end()); 0301 } 0302 0303 // "creative" way of separating "real" and on-premises stations: only real ones tend to have Wikidata tags 0304 it = std::partition(airport.stations.begin(), airport.stations.end(), [](auto station) { 0305 return !station.tagValue("wikidata").isEmpty(); 0306 }); 0307 if (it != airport.stations.begin() && it != airport.stations.end()) { 0308 airport.stations.erase(it, airport.stations.end()); 0309 } 0310 0311 // prioritize by number of platforms, if we have that information for all stations 0312 if (airport.stations.size() > 1 && std::all_of(airport.stations.begin(), airport.stations.end(), [](auto s) { return !s.tagValue("platforms").isEmpty(); })) { 0313 std::sort(airport.stations.begin(), airport.stations.end(), [](auto lhs, auto rhs) { 0314 return lhs.tagValue("platforms").toInt() > rhs.tagValue("platforms").toInt(); 0315 }); 0316 if (airport.stations[0].tagValue("platforms") != airport.stations[1].tagValue("platforms")) { 0317 airport.stations.erase(std::next(airport.stations.begin()), airport.stations.end()); 0318 } 0319 } 0320 } 0321 0322 OSM::Coordinate OSMAirportDb::lookup(const QString &iata, float lat, float lon) 0323 { 0324 const auto it = m_iataMap.find(iata); 0325 if (it == m_iataMap.end()) { 0326 //qDebug() << "No airport with IATA code:" << iata; 0327 return {}; 0328 } 0329 0330 const OSM::Coordinate wdCoord(lat, lon); 0331 const auto &airport = (*it).second; 0332 if (!OSM::contains(airport.bbox, wdCoord)) { 0333 qDebug() << "Airport" << iata << "is not where we expect it to be!?" << airport.source << airport.bbox << lat << lon; 0334 return {}; 0335 } 0336 if (airport.terminals.empty() && airport.terminalEntrances.empty() && airport.stations.empty()) { 0337 // no details available for this airport 0338 return {}; 0339 } 0340 0341 qDebug() << "Optimizing" << iata << airport.source << lat << lon << airport.bbox; 0342 qDebug() << " entrances:" << airport.terminalEntrances.size() << "terminals:" << airport.terminals.size() << "stations:" << airport.stations.size(); 0343 0344 // single station 0345 if (airport.stations.size() == 1) { 0346 qDebug() << " by station:" << airport.stations[0].url(); 0347 return airport.stations[0].center(); 0348 } 0349 0350 // multiple stations, but close together 0351 if (airport.stations.size() > 1) { 0352 auto stationBbox = std::accumulate(airport.stations.begin(), airport.stations.end(), OSM::BoundingBox(), [](OSM::BoundingBox lhs, OSM::Element rhs) { 0353 return OSM::unite(lhs, OSM::BoundingBox(rhs.boundingBox().center(), rhs.boundingBox().center())); 0354 }); 0355 if (OSM::distance(stationBbox.min, stationBbox.max) < StationClusterDistance) { 0356 qDebug() << " by clustered station:" << stationBbox; 0357 return stationBbox.center(); 0358 } 0359 } 0360 0361 // single entrance 0362 if (airport.terminalEntrances.size() == 1) { // ### this works for small airports, but for larger ones this is often due to missing data 0363 qDebug() << " by entrance:" << airport.terminalEntrances[0]; 0364 return airport.terminalEntrances[0]; 0365 } 0366 0367 // single terminal 0368 if (airport.terminals.size() == 1) { 0369 qDebug() << " by terminal:" << airport.terminals[0].url() << airport.terminals[0].center(); 0370 return airport.terminals[0].center(); 0371 } 0372 0373 // multiple terminals: take the center of the sum of all bounding boxes, and TODO check the result isn't ridiculously large 0374 if (airport.terminals.size() > 1) { 0375 const auto terminalBbox = std::accumulate(airport.terminals.begin(), airport.terminals.end(), OSM::BoundingBox(), [](const auto &bbox, auto terminal) { 0376 return OSM::unite(bbox, terminal.boundingBox()); 0377 }); 0378 // if the original coordinate is outside the terminal bounding box, this is highly likely an improvement, 0379 // otherwise we cannot be sure (see MUC, where the Wikidata coordinate is ideal). 0380 //qDebug() << " considering terminal bbox:" << terminalBbox; 0381 if (!OSM::contains(terminalBbox, wdCoord)) { 0382 qDebug() << " by terminal bbox center:" << terminalBbox.center(); 0383 return terminalBbox.center(); 0384 } 0385 } 0386 0387 return {}; 0388 }