File indexing completed on 2024-12-08 04:17:52
0001 /* 0002 SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "indexeddatatable.h" 0008 #include "lineinfo.h" 0009 0010 #include "../lib/datatypes/linecompare_p.h" 0011 0012 #include <osm/io.h> 0013 #include <osm/ztile.h> 0014 0015 #include <wikidataquery.h> 0016 #include <wikidataquerymanager.h> 0017 0018 #include <QColor> 0019 #include <QCommandLineParser> 0020 #include <QCoreApplication> 0021 #include <QDebug> 0022 #include <QFile> 0023 0024 #include <set> 0025 0026 namespace wd = Wikidata; 0027 0028 enum { 0029 MaxLogoFileSize = 25000, // bytes 0030 QuadTreeDepthLimit = 16, // we don't want to use more than 32 bit 0031 }; 0032 0033 static constexpr const auto MaxLogoAspectRatio = 2.75; 0034 static constexpr const auto MinLogoAspectRatio = 0.45; // Shanghai Metro is the extreme still valid case with 0.5 0035 0036 static constexpr const auto MinTileCoverage = 0.1; 0037 0038 static bool isCompatibleMode(LineInfo::Mode lhs, LineInfo::Mode rhs) 0039 { 0040 if (lhs == rhs || lhs == LineInfo::Unknown || rhs == LineInfo::Unknown) { 0041 return true; 0042 } 0043 if ((lhs == LineInfo::LocalTrain && rhs == LineInfo::RapidTransit) || (lhs == LineInfo::RapidTransit && rhs == LineInfo::LocalTrain)) { 0044 return true; 0045 } 0046 if (lhs == LineInfo::Train) { 0047 return rhs == LineInfo::LocalTrain 0048 || rhs == LineInfo::LongDistance 0049 || rhs == LineInfo::RapidTransit; 0050 } 0051 if (rhs == LineInfo::Train) { 0052 return lhs == LineInfo::LocalTrain 0053 || lhs == LineInfo::LongDistance 0054 || lhs == LineInfo::RapidTransit; 0055 } 0056 0057 return false; 0058 } 0059 0060 static bool isSameLine(const LineInfo &lhs, const LineInfo &rhs) 0061 { 0062 return isCompatibleMode(lhs.mode, rhs.mode) 0063 && KPublicTransport::Internal::isSameLineName(lhs.name, rhs.name, KPublicTransport::Internal::StrictCompare); 0064 } 0065 0066 class Generator { 0067 public: 0068 void processOSMData(OSM::DataSet &&dataSet); 0069 void augmentFromWikidata(); 0070 void applyWikidataResults(std::vector<wd::Item> &&entities); 0071 void augmentProductsFromWikidata(); 0072 void applyWikidataProductResults(std::vector<wd::Item> &&entities); 0073 void applyWikidataProductResults(); 0074 void verifyImages(); 0075 void verifyImageMetaData(std::vector<wd::Image> &&images); 0076 void verifyImageMetaData(QStringList &logos); 0077 void verifyImageMetaData(); 0078 0079 void generateQuadTree(); 0080 bool resolveOneBottomUpConflict(); 0081 void writeCode(); 0082 0083 void insertToBucket(OSM::ZTile tile, std::size_t lineIdx); 0084 0085 QIODevice *out = nullptr; 0086 std::vector<LineInfo> lines; 0087 WikidataQueryManager *m_wdMgr = new WikidataQueryManager(QCoreApplication::instance()); 0088 std::set<wd::Q> wdPartOfIds; 0089 std::map<wd::Q, wd::Item> wdItems; 0090 std::map<QString, wd::Image> wdImages; 0091 0092 std::map<OSM::ZTile, std::vector<size_t>> zQuadTree; 0093 }; 0094 0095 void Generator::insertToBucket(OSM::ZTile tile, std::size_t lineIdx) 0096 { 0097 auto &bucket = zQuadTree[tile]; 0098 if (std::find(bucket.begin(), bucket.end(), lineIdx) == bucket.end()) { 0099 bucket.push_back(lineIdx); 0100 } else { 0101 // all tree transforms below should not create duplicates 0102 assert(false); 0103 } 0104 } 0105 0106 void Generator::processOSMData(OSM::DataSet &&dataSet) 0107 { 0108 qDebug() << "got" << dataSet.relations.size() << "relations from OSM"; 0109 // expand multi-line relations 0110 for (auto it = dataSet.relations.begin(); it != dataSet.relations.end();) { 0111 auto ref = OSM::tagValue(*it, "ref"); 0112 ref.replace(',', ';'); // wrong ref separator syntax, occurs rarely 0113 if (!ref.contains(';')) { 0114 ++it; 0115 continue; 0116 } 0117 0118 const auto refs = ref.split(';'); 0119 for (const auto &ref : refs) { 0120 if (ref.isEmpty()) { 0121 continue; 0122 } 0123 auto rel = *it; 0124 const auto tagKey = dataSet.makeTagKey("ref", OSM::DataSet::StringIsPersistent); 0125 OSM::setTagValue(rel, tagKey, ref); 0126 dataSet.relations.push_back(rel); 0127 } 0128 it = dataSet.relations.erase(it); 0129 } 0130 qDebug() << "OSM relations after ref split:" << dataSet.relations.size(); 0131 0132 // split relations into route_master elements and route elements 0133 auto splitIt = std::partition(dataSet.relations.begin(), dataSet.relations.end(), [](const auto &rel) { 0134 return OSM::tagValue(rel, "type") == "route_master"; 0135 }); 0136 sort(splitIt, dataSet.relations.end()); 0137 const auto routeMasterCount = std::distance(dataSet.relations.begin(), splitIt); 0138 qDebug() << " containing" << routeMasterCount << "route masters and" << (dataSet.relations.size() - routeMasterCount) << "routes"; 0139 0140 // resolve route masters first, and then consider the remaining routes that have no route_master 0141 for (auto i = 0; i < routeMasterCount; ++i) { 0142 auto &rel = dataSet.relations[i]; 0143 auto info = LineInfo::fromRelation(rel); 0144 if (info.name.isEmpty()) { 0145 continue; 0146 } 0147 0148 const auto members = std::move(rel.members); // reference will become invalid once we start to modify below 0149 for (const auto &mem : members) { 0150 auto memIt = std::lower_bound(dataSet.relations.begin() + routeMasterCount, dataSet.relations.end(), mem.id); 0151 if (memIt == dataSet.relations.end() || (*memIt).id != mem.id) { 0152 continue; 0153 } 0154 LineInfo::merge(info, LineInfo::fromRelation(*memIt)); 0155 dataSet.relations.erase(memIt); 0156 } 0157 lines.push_back(std::move(info)); 0158 } 0159 0160 // deal with routes without a route master 0161 qDebug() << " " << (dataSet.relations.size() - routeMasterCount) << "dangling routes remaining"; 0162 for (auto it = dataSet.relations.begin() + routeMasterCount; it != dataSet.relations.end(); ++it) { 0163 auto info = LineInfo::fromRelation(*it); 0164 if (info.name.isEmpty()) { 0165 continue; 0166 } 0167 lines.push_back(std::move(info)); 0168 } 0169 qDebug() << "merged lines:" << lines.size(); 0170 0171 // filter useless lines - ie. those that don't contain useful information and have no wikidata id to fill the missing gaps 0172 lines.erase(std::remove_if(lines.begin(), lines.end(), [](const auto &info) { 0173 if (!LineInfo::isUseful(info) && !info.wdId.isValid() && info.mode != LineInfo::LongDistance) { 0174 qDebug() << "dropping" << info; 0175 } 0176 return !LineInfo::isUseful(info) && !info.wdId.isValid(); 0177 }), lines.end()); 0178 qDebug() << "lines after filtering OSM data:" << lines.size(); 0179 0180 // check for uniqueness of (bbox, name) - would break indexing and can happen for lines without a route_master relation 0181 // we assume those to belong together as well and thus merge them to a single line 0182 for (auto lit = lines.begin(); lit != lines.end(); ++lit) { 0183 // a merge pass can increase the bbox to include more elements that we previously missed 0184 // so do this as long as we find things 0185 while (true) { 0186 auto dupIt = std::partition(lit + 1, lines.end(), [lit](const auto &rhs) { 0187 return !isSameLine(*lit, rhs) || !OSM::intersects((*lit).bbox, rhs.bbox); 0188 }); 0189 if (dupIt == lines.end()) { 0190 break; 0191 } 0192 0193 for (auto it = dupIt; it != lines.end(); ++it) { 0194 qDebug() << " merging:" << *lit << "with" << *it; 0195 LineInfo::merge(*lit, *it); 0196 } 0197 lines.erase(dupIt, lines.end()); 0198 } 0199 } 0200 qDebug() << "lines after bbox merge:" << lines.size(); 0201 0202 // check for uniqueness of Wikidata reference, even with non-intersecting bboxes 0203 // can help to fix broken/split routes 0204 for (auto lit = lines.begin(); lit != lines.end(); ++lit) { 0205 if (!(*lit).wdId.isValid()) { 0206 continue; 0207 } 0208 auto dupIt = std::partition(std::next(lit), lines.end(), [lit](const auto &rhs) { 0209 return !isSameLine(*lit, rhs) || (*lit).wdId != rhs.wdId; 0210 }); 0211 if (dupIt == lines.end()) { 0212 continue; 0213 } 0214 0215 for (auto it = dupIt; it != lines.end(); ++it) { 0216 qDebug() << " merging:" << *lit << "with" << *it; 0217 LineInfo::merge(*lit, *it); 0218 } 0219 lines.erase(dupIt, lines.end()); 0220 } 0221 qDebug() << "lines after WD reference merge:" << lines.size(); 0222 0223 augmentFromWikidata(); 0224 } 0225 0226 void Generator::augmentFromWikidata() 0227 { 0228 // sort to maximize cache hits of the batches wikidata queries 0229 std::sort(lines.begin(), lines.end(), [](const auto &lhs, const auto &rhs) { 0230 return lhs.wdId < rhs.wdId; 0231 }); 0232 std::vector<wd::Q> wdIds; 0233 for (const auto &r: lines) { 0234 if (r.wdId.isValid()) { 0235 wdIds.push_back(r.wdId); 0236 } 0237 } 0238 wdIds.erase(std::unique(wdIds.begin(), wdIds.end()), wdIds.end()); 0239 qDebug() << "Retrieving" << wdIds.size() << "items from Wikidata"; 0240 0241 auto query = new WikidataEntitiesQuery(m_wdMgr); 0242 query->setItems(std::move(wdIds)); 0243 QObject::connect(query, &WikidataEntitiesQuery::partialResult, [this](auto query) { applyWikidataResults(std::move(query->takeResult())); }); 0244 QObject::connect(query, &WikidataQuery::finished, [this, query]() mutable { 0245 if (query->error() != WikidataQuery::NoError) { 0246 qCritical() << "Wikidata query failed"; 0247 QCoreApplication::exit(1); 0248 return; 0249 } 0250 0251 augmentProductsFromWikidata(); 0252 }); 0253 m_wdMgr->execute(query); 0254 } 0255 0256 static struct { 0257 wd::Q id; 0258 LineInfo::Mode mode; 0259 } const wd_type_to_mode_map[] = { 0260 { wd::Q(1412403), LineInfo::RapidTransit }, // commuter rail 0261 { wd::Q(1192191), LineInfo::Train }, // airport rail link 0262 { wd::Q(50331459), LineInfo::RapidTransit }, // S-Bahn line 0263 { wd::Q(95723), LineInfo::RapidTransit }, // S-Bahn 0264 { wd::Q(129172), LineInfo::LongDistance }, // ICE 0265 { wd::Q(15145593), LineInfo::Tram }, // tram line 0266 { wd::Q(2572054), LineInfo::RapidTransit }, // Transilien (multiple variants) 0267 { wd::Q(732778), LineInfo::RapidTransit }, // Transilien 0268 { wd::Q(2571458), LineInfo::RapidTransit }, // Transilien 0269 { wd::Q(2571977), LineInfo::RapidTransit }, // Transilien 0270 { wd::Q(373725), LineInfo::RapidTransit }, // Transilien 0271 { wd::Q(858485), LineInfo::LongDistance }, // high speed railway line 0272 { wd::Q(2138247), LineInfo::LocalTrain }, // Regional-Express 0273 { wd::Q(515449), LineInfo::LocalTrain }, // Regionalbahn 0274 }; 0275 0276 static LineInfo::Mode modeFromWikidataType(wd::Q type) 0277 { 0278 LineInfo::Mode mode = LineInfo::Unknown; 0279 for (const auto &modeMap : wd_type_to_mode_map) { 0280 if (type == modeMap.id) { 0281 mode = std::max(modeMap.mode, mode); 0282 } 0283 } 0284 return mode; 0285 } 0286 0287 static const wd::Q wd_excluded_types[] { 0288 wd::Q(43229), // organization 0289 wd::Q(740752), // transport company 0290 wd::Q(928830), // metro station 0291 wd::Q(4830453), // business 0292 wd::Q(7835189), // transit district 0293 wd::Q(249556), // railway company 0294 wd::Q(17377208), // train operating company 0295 wd::Q(138825), // children's railway (yes, really...) 0296 wd::Q(1144661), // amusement ride 0297 wd::Q(420962), // heritage railway 0298 }; 0299 0300 static bool isExcludedWikidataType(wd::Q type) 0301 { 0302 return std::find(std::begin(wd_excluded_types), std::end(wd_excluded_types), type) != std::end(wd_excluded_types); 0303 } 0304 0305 static const wd::Q wd_product_types[] { 0306 wd::Q(5503), // rapid transit 0307 wd::Q(95723), // S-Bahn 0308 wd::Q(15640053), // tram system 0309 wd::Q(1412403), // commuter rail 0310 wd::Q(2140646), // Stadtbahn 0311 wd::Q(1268865), // light rail 0312 wd::Q(113455515), // automated rapid transit 0313 }; 0314 0315 static bool isWikidataProductType(wd::Q type) 0316 { 0317 return std::find(std::begin(wd_product_types), std::end(wd_product_types), type) != std::end(wd_product_types); 0318 } 0319 0320 static const wd::Q wd_recursive_product_types[] = { 0321 wd::Q(95723), // S-Bahn 0322 wd::Q(1054782), // CercanÃas 0323 }; 0324 0325 void Generator::applyWikidataResults(std::vector<wd::Item> &&items) 0326 { 0327 for (auto &item : items) { 0328 auto rit = std::lower_bound(lines.begin(), lines.end(), item.id(), [](const LineInfo &lhs, wd::Q rhs) { 0329 return lhs.wdId < rhs; 0330 }); 0331 // wikidata line elements shouldn't occur for multiple lines, but in some cases 0332 // OSM points to product or network elements instead, and those can occur once per line 0333 for (; rit != lines.end() && (*rit).wdId == item.id(); ++rit) { 0334 // check if this is a plausible type 0335 const auto instancesOf = item.values<wd::Q>(wd::P::instanceOf); 0336 bool found = false; 0337 LineInfo::Mode mode = LineInfo::Unknown; 0338 for (const auto &instanceOf : instancesOf) { 0339 if (isExcludedWikidataType(instanceOf)) { 0340 qWarning() << "Suspicious WD types:" << (*rit) << instancesOf; 0341 found = true; 0342 } 0343 mode = std::max(mode, modeFromWikidataType(instanceOf)); 0344 0345 if (isWikidataProductType(instanceOf)) { // wikidata link in OSM is pointing to the product, not the line it seems 0346 wdPartOfIds.insert(item.id()); 0347 (*rit).wdProducts.push_back(item.id()); 0348 } 0349 } 0350 if (found) { 0351 break; 0352 } 0353 0354 // collect possible product types 0355 for (const wd::P p : {wd::P(wd::P::partOf), wd::P(16)}) { 0356 for (const auto &id : item.values<wd::Q>(p)) { 0357 wdPartOfIds.insert(id); 0358 (*rit).wdProducts.push_back(id); 0359 0360 mode = std::max(mode, modeFromWikidataType(id)); 0361 } 0362 } 0363 0364 // merge information 0365 const auto color = item.value<QColor>(wd::P(465)); 0366 if ((*rit).color.isValid() && color.isValid() && (*rit).color != color) { 0367 //qWarning() << "OSM/WD color conflict:" << (*rit) << color.name(); 0368 } else if (color.isValid()) { 0369 (*rit).color = color; 0370 } 0371 const auto logos = item.values<QString>(wd::P::logoImage); 0372 for (const auto &logo : logos) { 0373 if (!logo.isEmpty() && !(*rit).lineLogos.contains(logo)) { 0374 (*rit).lineLogos.push_back(logo); 0375 } 0376 } 0377 if (!isCompatibleMode((*rit).mode, mode)) { 0378 qWarning() << "OSM/WD mode conflict:" << (*rit) << mode; 0379 } 0380 (*rit).mode = std::max((*rit).mode, mode); 0381 } 0382 0383 wdItems[item.id()] = std::move(item); 0384 } 0385 } 0386 0387 void Generator::augmentProductsFromWikidata() 0388 { 0389 for (const auto &t : wd_recursive_product_types) { // should all be already in there, but let's make sure so we don't have to check again below 0390 wdPartOfIds.insert(t); 0391 } 0392 std::vector<wd::Q> wdIds; 0393 for (const auto &id: wdPartOfIds) { 0394 if (id.isValid() && wdItems.find(id) == wdItems.end()) { 0395 wdIds.push_back(id); 0396 } 0397 } 0398 qDebug() << "Retrieving" << wdIds.size() << "product items from Wikidata"; 0399 0400 auto query = new WikidataEntitiesQuery(m_wdMgr); 0401 query->setItems(std::move(wdIds)); 0402 QObject::connect(query, &WikidataEntitiesQuery::partialResult, [this](auto query) { applyWikidataProductResults(std::move(query->takeResult())); }); 0403 QObject::connect(query, &WikidataQuery::finished, [this, query]() mutable { 0404 if (query->error() != WikidataQuery::NoError) { 0405 qCritical() << "Wikidata product query failed"; 0406 QCoreApplication::exit(1); 0407 return; 0408 } 0409 0410 applyWikidataProductResults(); 0411 verifyImages(); 0412 }); 0413 m_wdMgr->execute(query); 0414 } 0415 0416 void Generator::applyWikidataProductResults(std::vector<wd::Item> &&items) 0417 { 0418 for (auto &item : items) { 0419 wdItems[item.id()] = std::move(item); 0420 } 0421 } 0422 0423 void Generator::applyWikidataProductResults() 0424 { 0425 for (const auto &id : wdPartOfIds) { 0426 const auto item = wdItems[id]; 0427 // check if this is a plausible type 0428 const auto instancesOf = item.values<wd::Q>(wd::P::instanceOf); 0429 LineInfo::Mode mode = LineInfo::Unknown; 0430 bool found = false; 0431 for (const auto &instanceOf : instancesOf) { 0432 if (isWikidataProductType(instanceOf)) { 0433 found = true; 0434 } 0435 mode = std::max(mode, modeFromWikidataType(instanceOf)); 0436 } 0437 if (!found && !std::all_of(instancesOf.begin(), instancesOf.end(), isExcludedWikidataType)) { 0438 qDebug() << "Product" << item.id() << "is of unknown type " << instancesOf; 0439 } 0440 if (!found) { 0441 continue; 0442 } 0443 0444 // retrieve logo and find the lines this is for 0445 auto logoNames = item.values<QString>(wd::P::logoImage); 0446 // check if this is a type we should recurse on for more logo candidates 0447 for (const auto &instanceOf : instancesOf) { 0448 if (std::find(std::begin(wd_recursive_product_types), std::end(wd_recursive_product_types), instanceOf) == std::end(wd_recursive_product_types)) { 0449 continue; 0450 } 0451 const auto logos = wdItems[instanceOf].values<QString>(wd::P::logoImage); 0452 std::copy(logos.begin(), logos.end(), std::back_inserter(logoNames)); 0453 } 0454 0455 if (logoNames.empty() && mode == LineInfo::Unknown) { 0456 continue; 0457 } 0458 for (auto &line : lines) { 0459 if (std::find(line.wdProducts.begin(), line.wdProducts.end(), item.id()) == line.wdProducts.end()) { 0460 continue; 0461 } 0462 0463 if (line.mode == LineInfo::Unknown) { 0464 line.mode = mode; 0465 } 0466 0467 std::copy(logoNames.begin(), logoNames.end(), std::back_inserter(line.productLogos)); 0468 } 0469 } 0470 } 0471 0472 void Generator::verifyImages() 0473 { 0474 std::vector<QString> imageIds; 0475 for (const auto &r: lines) { 0476 if (!LineInfo::isUseful(r)) { 0477 continue; 0478 } 0479 std::copy(r.lineLogos.begin(), r.lineLogos.end(), std::back_inserter(imageIds)); 0480 std::copy(r.productLogos.begin(), r.productLogos.end(), std::back_inserter(imageIds)); 0481 } 0482 std::sort(imageIds.begin(), imageIds.end()); 0483 imageIds.erase(std::unique(imageIds.begin(), imageIds.end()), imageIds.end()); 0484 imageIds.erase(std::remove(imageIds.begin(), imageIds.end(), QString()), imageIds.end()); 0485 qDebug() << "Verifying" << imageIds.size() << "images" << imageIds; 0486 0487 auto query = new WikidataImageMetadataQuery(m_wdMgr); 0488 query->setImages(std::move(imageIds)); 0489 QObject::connect(query, &WikidataImageMetadataQuery::partialResult, [this](auto query) { verifyImageMetaData(std::move(query->takeResult())); }); 0490 QObject::connect(query, &WikidataQuery::finished, [this, query]() mutable { 0491 if (query->error() != WikidataQuery::NoError) { 0492 qCritical() << "Wikidata image metadata query failed"; 0493 QCoreApplication::exit(1); 0494 return; 0495 } 0496 verifyImageMetaData(); 0497 0498 // filter lines still missing data 0499 lines.erase(std::remove_if(lines.begin(), lines.end(), [](const auto &info) { 0500 if (!LineInfo::isUseful(info)) { 0501 qDebug() << "dropping" << info; 0502 } 0503 return !LineInfo::isUseful(info); 0504 }), lines.end()); 0505 qDebug() << "lines after Wikidata filtering:" << lines.size(); 0506 0507 generateQuadTree(); 0508 }); 0509 m_wdMgr->execute(query); 0510 } 0511 0512 void Generator::verifyImageMetaData(std::vector<wd::Image> &&images) 0513 { 0514 const QStringList valid_licenses({ 0515 QStringLiteral("cc0"), 0516 QStringLiteral("copyrighted free use"), 0517 QStringLiteral("public domain"), 0518 QStringLiteral("cc by 3.0"), 0519 QStringLiteral("cc by 3.0 it"), 0520 QStringLiteral("cc by-sa 3.0"), 0521 QStringLiteral("cc by-sa 3.0 de"), 0522 QStringLiteral("cc-by-sa-3.0"), 0523 QStringLiteral("cc by-sa 4.0"), 0524 }); 0525 0526 // things that for whatever reason managed to bypass all generic checks but that we don't want still 0527 const QStringList excluded_names({ 0528 QStringLiteral("S-Bahn München Logo ohne DB (2022).svg"), // cheats around the aspect ratio check, fallback gives us the standard S-Bahn logo 0529 }); 0530 0531 for (auto &image : images) { 0532 const auto name = image.name(); 0533 const auto fileSize = image.fileSize(); 0534 if (fileSize > MaxLogoFileSize) { 0535 qWarning() << "not using logo" << name << "due to file size:" << fileSize; 0536 continue; 0537 } 0538 0539 const auto aspectRatio = (double)image.width() / (double)image.height(); 0540 if (aspectRatio > MaxLogoAspectRatio || aspectRatio < MinLogoAspectRatio) { 0541 qWarning() << "not using logo" << name << "due to aspect ratio:" << aspectRatio; 0542 continue; 0543 } 0544 0545 const auto mt = image.mimeType(); 0546 if (mt != QLatin1String("image/svg+xml") && mt != QLatin1String("image/png")) { 0547 qWarning() << "not using logo" << name << "due to mimetype:" << mt; 0548 continue; 0549 } 0550 0551 const auto lic = image.license(); 0552 if (!valid_licenses.contains(lic, Qt::CaseInsensitive)) { 0553 qWarning() << "not using logo" << name << "due to license:" << lic; 0554 continue; 0555 } 0556 0557 if (excluded_names.contains(name)) { 0558 qWarning() << "dropping logo" << name; 0559 continue; 0560 } 0561 0562 wdImages[name] = std::move(image); 0563 } 0564 images.clear(); 0565 } 0566 0567 void Generator::verifyImageMetaData(QStringList &logos) 0568 { 0569 logos.erase(std::remove_if(logos.begin(), logos.end(), [this](const auto &logo) { 0570 return wdImages.find(logo) == wdImages.end(); 0571 }), logos.end()); 0572 0573 if (logos.size() <= 1) { 0574 return; 0575 } 0576 0577 // TODO sort by best aspect ratio/size 0578 qDebug() << "multiple logos available:" << logos; 0579 } 0580 0581 void Generator::verifyImageMetaData() 0582 { 0583 for (auto &line : lines) { 0584 verifyImageMetaData(line.lineLogos); 0585 if (!line.lineLogos.empty() && line.productLogos.size() > 1) { 0586 line.productLogos.erase(std::remove(line.productLogos.begin(), line.productLogos.end(), line.lineLogos[0]), line.productLogos.end()); 0587 } 0588 verifyImageMetaData(line.productLogos); 0589 } 0590 } 0591 0592 void Generator::generateQuadTree() 0593 { 0594 // order lines by OSM id, to increase output stability 0595 std::sort(lines.begin(), lines.end(), [](const auto &lhs, const auto &rhs) { return lhs.relId < rhs.relId; }); 0596 0597 // initialize quad tree by smallest single tile containing the entire line bbox 0598 for (auto lineIt = lines.begin(); lineIt != lines.end(); ++lineIt) { 0599 auto z = OSM::ztileFromBoundingBox((*lineIt).bbox); 0600 while (z.depth < QuadTreeDepthLimit) { // don't go deeper than what we can represent in 32bit 0601 z = z.parent(); 0602 } 0603 insertToBucket(z, std::distance(lines.begin(), lineIt)); 0604 } 0605 qDebug() << "initial tiles:" << zQuadTree.size(); 0606 0607 // collision resolution 0608 // this is done in two steps: bottom-up and top-down 0609 // bottom-up means that for each tile/line we check if there is a parent tile with a conflicting line, and if so, we 0610 // propagate that line down to the same level 0611 // this is done in a rather crude brute-force way, but it gets the job done 0612 while(resolveOneBottomUpConflict()) {} 0613 qDebug() << "tiles after bottom-up conflict resolution:" << zQuadTree.size(); 0614 0615 // top-down means we look at conflicts inside a given tile, and propagate the conflicting lines down 0616 for (auto tileIt = zQuadTree.begin(); tileIt != zQuadTree.end(); ++tileIt) { 0617 if ((*tileIt).second.size() <= 1) { 0618 continue; 0619 } 0620 // check for name collisions 0621 for (auto lit = (*tileIt).second.end(); lit != (*tileIt).second.begin();) { 0622 const auto lend = lit; 0623 const auto firstIdx = (*tileIt).second.front(); 0624 lit = std::partition((*tileIt).second.begin(), lend, [this, firstIdx](const auto &lineIdx) { 0625 return !isSameLine(lines[firstIdx], lines[lineIdx]); 0626 }); 0627 0628 // must not happen if isSameLineName(x, x) == true holds 0629 assert(lit != lend); 0630 0631 if (lit + 1 == lend) { // only a single line with that name 0632 continue; 0633 } 0634 0635 // insert subtiles, if they actually contain the line bbox 0636 //qDebug() << "subdividing" << lines[*lit].name << lines[*lit].bbox << lines[*lit].relId << std::distance(lit, lend) << (*lit) << (*tileIt).first.depth << (*tileIt).first.z; 0637 for (auto it = lit; it != lend; ++it) { 0638 //qDebug() << " " << lines[*it]; 0639 for (auto subtile : (*tileIt).first.quadSplit()) { 0640 if (subtile.intersects(lines[*it].bbox)) { 0641 insertToBucket(subtile, *it); 0642 } 0643 } 0644 } 0645 0646 // remove current entries 0647 lit = (*tileIt).second.erase(lit, lend); 0648 } 0649 } 0650 qDebug() << "tiles after top-down conflict resolution:" << zQuadTree.size(); 0651 0652 // split oversized tiles for what they contain, depsite the name being unique in the dataset we have here 0653 for (auto tileIt = zQuadTree.begin(); tileIt != zQuadTree.end(); ++tileIt) { 0654 if ((*tileIt).first.depth == QuadTreeDepthLimit) { // below that we need more than 32 bit for z in the quad tree storage 0655 break; 0656 } 0657 for (auto lineIt = (*tileIt).second.begin(); lineIt != (*tileIt).second.end();) { 0658 // push down elements that actually only occupy a sub-tile 0659 // this can happen as part of conflict resolution above as well as the splitting below 0660 int coveredSubTiles = 0; 0661 for (auto subtile : (*tileIt).first.quadSplit()) { 0662 if (subtile.intersects(lines[*lineIt].bbox)) { 0663 ++coveredSubTiles; 0664 } 0665 } 0666 0667 // check if the line is too small for the current tile, and if so, split it 0668 const auto coverage = std::max((double)lines[*lineIt].bbox.width() / (double)(*tileIt).first.boundingBox().width(), 0669 (double)lines[*lineIt].bbox.height() / (double)(*tileIt).first.boundingBox().height()); 0670 0671 if (coverage < MinTileCoverage || coveredSubTiles == 1) { 0672 //qDebug() << "splitting due to small coverage:" << lines[*lineIt] << (*tileIt).first.depth << coverage << coveredSubTiles; 0673 for (auto subtile : (*tileIt).first.quadSplit()) { 0674 if (subtile.intersects(lines[*lineIt].bbox)) { 0675 insertToBucket(subtile, *lineIt); 0676 } 0677 } 0678 lineIt = (*tileIt).second.erase(lineIt); 0679 continue; 0680 } 0681 0682 ++lineIt; 0683 } 0684 } 0685 qDebug() << "tiles after coverage cleanup:" << zQuadTree.size(); 0686 0687 // remove empty buckets, and sort the non-empty ones for output stability 0688 for (auto tileIt = zQuadTree.begin(); tileIt != zQuadTree.end();) { 0689 if ((*tileIt).second.empty()) { 0690 tileIt = zQuadTree.erase(tileIt); 0691 } else if ((*tileIt).first.depth < QuadTreeDepthLimit) { 0692 qWarning() << " found tile below depth limit!"; 0693 tileIt = zQuadTree.erase(tileIt); 0694 } else { 0695 std::sort((*tileIt).second.begin(), (*tileIt).second.end(), [this](const auto lhs, const auto rhs) { 0696 return lines[lhs].name < lines[rhs].name; 0697 }); 0698 0699 ++tileIt; 0700 } 0701 } 0702 qDebug() << "tiles remaining after cleanup:" << zQuadTree.size(); 0703 0704 writeCode(); 0705 } 0706 0707 bool Generator::resolveOneBottomUpConflict() 0708 { 0709 for (auto tileIt = zQuadTree.rbegin(); tileIt != zQuadTree.rend(); ++tileIt) { 0710 for (auto lineIt = (*tileIt).second.begin(); lineIt != (*tileIt).second.end(); ++lineIt) { 0711 auto parentTile = (*tileIt).first.parent(); 0712 while (parentTile.depth < 32) { 0713 const auto parentTileIt = zQuadTree.find(parentTile); 0714 if (parentTileIt != zQuadTree.end()) { 0715 auto conflictIt = std::find_if((*parentTileIt).second.begin(), (*parentTileIt).second.end(), [this, lineIt](const auto lhs) { 0716 return isSameLine(lines[lhs], lines[*lineIt]); 0717 }); 0718 if (conflictIt != (*parentTileIt).second.end()) { 0719 //qDebug() << "propagating down:" << lines[*conflictIt].name << lines[*conflictIt].relId << parentTile.z << parentTile.depth << *conflictIt; 0720 auto splitTile = parentTile; 0721 while (splitTile.depth > (*tileIt).first.depth) { 0722 for (auto subtile : splitTile.quadSplit()) { 0723 if (subtile.intersects((*tileIt).first)) { 0724 splitTile = subtile; 0725 } else if (subtile.intersects(lines[*conflictIt].bbox)) { 0726 insertToBucket(subtile, *conflictIt); 0727 } 0728 } 0729 } 0730 if (splitTile.intersects(lines[*conflictIt].bbox)) { 0731 insertToBucket(splitTile, *conflictIt); 0732 } 0733 (*parentTileIt).second.erase(conflictIt); 0734 return true; 0735 } 0736 } 0737 parentTile = parentTile.parent(); 0738 } 0739 } 0740 } 0741 return false; 0742 } 0743 0744 static QByteArray modeToString(LineInfo::Mode mode) 0745 { 0746 switch (mode) { 0747 case LineInfo::Tram: 0748 return "LineMetaDataContent::Tramway"; 0749 case LineInfo::Subway: 0750 return "LineMetaDataContent::Subway"; 0751 case LineInfo::RapidTransit: 0752 return "LineMetaDataContent::RapidTransit"; 0753 case LineInfo::Train: 0754 case LineInfo::LocalTrain: 0755 return "LineMetaDataContent::LocalTrain"; 0756 case LineInfo::LongDistance: 0757 return "LineMetaDataContent::LongDistanceTrain"; 0758 default: 0759 assert(false); 0760 } 0761 0762 return {}; 0763 } 0764 0765 void Generator::writeCode() 0766 { 0767 // write header 0768 out->write(R"(/* 0769 SPDX-FileCopyrightText: OpenStreetMap contributors 0770 SPDX-License-Identifier: ODbL-1.0 0771 0772 Generated code based on data from OpenStreetMap (ODbL) and Wikidata (CC0), do not edit! 0773 */ 0774 0775 #include "linemetadata_p.h" 0776 0777 namespace KPublicTransport { 0778 0779 )"); 0780 0781 // create and write string tables 0782 // name and logo are separated to achieve smaller index values (and thus need less bits for storage) 0783 // this can be done as due to file extensions on the logos we will never be able to apply suffix compression here 0784 StringTable nameStrTab; 0785 StringTable logoStrTab; 0786 for (const auto &line : lines) { 0787 nameStrTab.addString(line.name); 0788 logoStrTab.addString(line.lineLogos.value(0)); 0789 logoStrTab.addString(line.productLogos.value(0)); 0790 } 0791 nameStrTab.writeCode("line_name_stringtab", out); 0792 logoStrTab.writeCode("line_logo_stringtab", out); 0793 // create a symbolic name for the common no logo case (reduces diff on changes) 0794 out->write("static const constexpr uint16_t NoLogo = "); 0795 out->write(QByteArray::number((int)logoStrTab.stringOffset(QString()))); 0796 out->write(";\n\n"); 0797 0798 // write line table 0799 out->write("static const constexpr LineMetaDataContent line_data[] = {\n"); 0800 for (const auto &line : lines) { 0801 out->write(" { "); 0802 out->write(QByteArray::number((int)nameStrTab.stringOffset(line.name))); 0803 out->write(", "); 0804 if (!line.lineLogos.isEmpty()) { 0805 out->write(QByteArray::number((int)logoStrTab.stringOffset(line.lineLogos.at(0)))); 0806 } else { 0807 out->write("NoLogo"); 0808 } 0809 out->write(", "); 0810 if (!line.productLogos.isEmpty()) { 0811 out->write(QByteArray::number((int)logoStrTab.stringOffset(line.productLogos.at(0)))); 0812 } else { 0813 out->write("NoLogo"); 0814 } 0815 out->write(", "); 0816 out->write(modeToString(line.mode)); 0817 out->write(", Color{"); 0818 if (line.color.isValid()) { 0819 out->write("0x"); 0820 const auto col = QByteArray::number(line.color.rgb() & 0xffffff, 16); 0821 out->write(QByteArray(6 - col.size(), '0')); 0822 out->write(col); 0823 } 0824 out->write("} }, // "); 0825 out->write(line.name.toUtf8()), 0826 out->write(" OSM: "); 0827 out->write(QByteArray::number((long long)line.relId)); 0828 if (line.wdId.isValid()) { 0829 out->write(" WD: "); 0830 out->write(line.wdId.toString().toUtf8()); 0831 } 0832 out->write(" "); 0833 out->write(QByteArray::number(line.bbox.min.latF(), 'g', 4)); 0834 out->write(", "); 0835 out->write(QByteArray::number(line.bbox.min.lonF(), 'g', 4)); 0836 out->write(" x "); 0837 out->write(QByteArray::number(line.bbox.max.latF(), 'g', 4)); 0838 out->write(", "); 0839 out->write(QByteArray::number(line.bbox.max.lonF(), 'g', 4)); 0840 out->write("\n"); 0841 } 0842 out->write(R"(}; 0843 0844 static constexpr const auto line_data_count = sizeof(line_data) / sizeof(LineMetaDataContent); 0845 0846 static inline constexpr uint16_t Bucket(uint16_t index) { return line_data_count + index; } 0847 0848 )"); 0849 0850 // write bucket table 0851 IndexedDataTable<std::vector<std::size_t>> bucketTable; 0852 for (const auto &zi : zQuadTree) { 0853 if (zi.second.size() > 1) { 0854 bucketTable.addEntry(zi.second); 0855 } 0856 } 0857 bucketTable.writeCode("int16_t", "line_data_bucketTable", out, [](const std::vector<std::size_t> &bucket, QIODevice *out) { 0858 for (const auto i : bucket) { 0859 out->write(QByteArray::number((int)i)); 0860 out->write(", "); 0861 } 0862 out->write("-1,"); 0863 }); 0864 0865 // write quad tree depth offsets 0866 out->write("static const constexpr LineMetaDataQuadTreeDepthIndex line_data_depthOffsets[] = {\n"); 0867 int offset = -1; 0868 uint8_t lastDepth = 0; 0869 for (const auto &zi : zQuadTree) { 0870 ++offset; 0871 if (lastDepth == zi.first.depth) { 0872 continue; 0873 } 0874 lastDepth = zi.first.depth; 0875 out->write(" { "); 0876 out->write(QByteArray::number(lastDepth)); 0877 out->write(", "); 0878 out->write(QByteArray::number((qulonglong)offset)); 0879 out->write(" },\n"); 0880 } 0881 out->write("};\n\n"); 0882 0883 // write quad tree 0884 out->write("static const constexpr LineMetaDataZIndex line_data_zquadtree[] = {\n"); 0885 for (const auto &zi : zQuadTree) { 0886 out->write(" { "); 0887 out->write(QByteArray::number((qulonglong)zi.first.z)); 0888 out->write(", "); 0889 if (zi.second.size() == 1) { 0890 out->write(QByteArray::number((qulonglong)zi.second[0])); 0891 } else { 0892 out->write("Bucket("); 0893 out->write(QByteArray::number((qulonglong)bucketTable.entryOffset(zi.second))); 0894 out->write(")"); 0895 } 0896 out->write(" }, // "); 0897 out->write(QByteArray::number(zi.first.boundingBox().min.latF(), 'g', 4)); 0898 out->write(", "); 0899 out->write(QByteArray::number(zi.first.boundingBox().min.lonF(), 'g', 4)); 0900 out->write(" x "); 0901 out->write(QByteArray::number(zi.first.boundingBox().max.latF(), 'g', 4)); 0902 out->write(", "); 0903 out->write(QByteArray::number(zi.first.boundingBox().max.lonF(), 'g', 4)); 0904 out->write("\n"); 0905 } 0906 out->write("};\n}\n"); 0907 0908 QCoreApplication::quit(); 0909 } 0910 0911 int main(int argc, char **argv) 0912 { 0913 QCoreApplication app(argc, argv); 0914 QCommandLineParser parser; 0915 parser.addHelpOption(); 0916 QCommandLineOption outFileOption({ QStringLiteral("o") }, QStringLiteral("Output file name"), QStringLiteral("out")); 0917 parser.addOption(outFileOption); 0918 QCommandLineOption osmInputOption({ QStringLiteral("i") }, QStringLiteral("O5M input file"), QStringLiteral("o5m-in")); 0919 parser.addOption(osmInputOption); 0920 parser.process(app); 0921 0922 QFile out(parser.value(outFileOption)); 0923 if (!out.open(QFile::WriteOnly)) { 0924 qCritical() << "Failed to open output file:" << out.errorString(); 0925 return 1; 0926 } 0927 0928 QFile f(parser.value(osmInputOption)); 0929 if (!f.open(QFile::ReadOnly)) { 0930 qCritical() << "Failed to open OSM input file!" << f.errorString() << f.fileName(); 0931 QCoreApplication::exit(1); 0932 } 0933 OSM::DataSet dataSet; 0934 auto reader = OSM::IO::readerForMimeType(u"application/vnd.openstreetmap.data+o5m", &dataSet); 0935 assert(reader); 0936 reader->read(f.map(0, f.size()), f.size()); 0937 0938 Generator generator; 0939 generator.out = &out; 0940 generator.processOSMData(std::move(dataSet)); 0941 0942 return QCoreApplication::exec(); 0943 }