File indexing completed on 2024-04-28 04:41:14

0001 /*
0002     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "indexeddatatable.h"
0008 #include "lineinfo.h"
0009 
0010 #include "../lib/datatypes/linecompare_p.h"
0011 
0012 #include <osm/io.h>
0013 #include <osm/ztile.h>
0014 
0015 #include <wikidataquery.h>
0016 #include <wikidataquerymanager.h>
0017 
0018 #include <QColor>
0019 #include <QCommandLineParser>
0020 #include <QCoreApplication>
0021 #include <QDebug>
0022 #include <QFile>
0023 
0024 #include <set>
0025 
0026 namespace wd = Wikidata;
0027 
0028 enum {
0029     MaxLogoFileSize = 25000, // bytes
0030     QuadTreeDepthLimit = 16, // we don't want to use more than 32 bit
0031 };
0032 
0033 static constexpr const auto MaxLogoAspectRatio = 2.75;
0034 static constexpr const auto MinLogoAspectRatio = 0.45; // Shanghai Metro is the extreme still valid case with 0.5
0035 
0036 static constexpr const auto MinTileCoverage = 0.1;
0037 
0038 static bool isCompatibleMode(LineInfo::Mode lhs, LineInfo::Mode rhs)
0039 {
0040     if (lhs == rhs || lhs == LineInfo::Unknown || rhs == LineInfo::Unknown) {
0041         return true;
0042     }
0043     if ((lhs == LineInfo::LocalTrain && rhs == LineInfo::RapidTransit) || (lhs == LineInfo::RapidTransit && rhs == LineInfo::LocalTrain)) {
0044         return true;
0045     }
0046     if (lhs == LineInfo::Train) {
0047         return rhs == LineInfo::LocalTrain
0048             || rhs == LineInfo::LongDistance
0049             || rhs == LineInfo::RapidTransit;
0050     }
0051     if (rhs == LineInfo::Train) {
0052         return lhs == LineInfo::LocalTrain
0053             || lhs == LineInfo::LongDistance
0054             || lhs == LineInfo::RapidTransit;
0055     }
0056 
0057     return false;
0058 }
0059 
0060 static bool isSameLine(const LineInfo &lhs, const LineInfo &rhs)
0061 {
0062     return isCompatibleMode(lhs.mode, rhs.mode)
0063         && KPublicTransport::Internal::isSameLineName(lhs.name, rhs.name, KPublicTransport::Internal::StrictCompare);
0064 }
0065 
0066 class Generator {
0067 public:
0068     void processOSMData(OSM::DataSet &&dataSet);
0069     void augmentFromWikidata();
0070     void applyWikidataResults(std::vector<wd::Item> &&entities);
0071     void augmentProductsFromWikidata();
0072     void applyWikidataProductResults(std::vector<wd::Item> &&entities);
0073     void applyWikidataProductResults();
0074     void verifyImages();
0075     void verifyImageMetaData(std::vector<wd::Image> &&images);
0076     void verifyImageMetaData(QStringList &logos);
0077     void verifyImageMetaData();
0078 
0079     void generateQuadTree();
0080     bool resolveOneBottomUpConflict();
0081     void writeCode();
0082 
0083     void insertToBucket(OSM::ZTile tile, std::size_t lineIdx);
0084 
0085     QIODevice *out = nullptr;
0086     std::vector<LineInfo> lines;
0087     WikidataQueryManager *m_wdMgr = new WikidataQueryManager(QCoreApplication::instance());
0088     std::set<wd::Q> wdPartOfIds;
0089     std::map<wd::Q, wd::Item> wdItems;
0090     std::map<QString, wd::Image> wdImages;
0091 
0092     std::map<OSM::ZTile, std::vector<size_t>> zQuadTree;
0093 };
0094 
0095 void Generator::insertToBucket(OSM::ZTile tile, std::size_t lineIdx)
0096 {
0097     auto &bucket = zQuadTree[tile];
0098     if (std::find(bucket.begin(), bucket.end(), lineIdx) == bucket.end()) {
0099         bucket.push_back(lineIdx);
0100     } else {
0101         // all tree transforms below should not create duplicates
0102         assert(false);
0103     }
0104 }
0105 
0106 void Generator::processOSMData(OSM::DataSet &&dataSet)
0107 {
0108     qDebug() << "got" << dataSet.relations.size() << "relations from OSM";
0109     // expand multi-line relations
0110     for (auto it = dataSet.relations.begin(); it != dataSet.relations.end();) {
0111         auto ref = OSM::tagValue(*it, "ref");
0112         ref.replace(',', ';'); // wrong ref separator syntax, occurs rarely
0113         if (!ref.contains(';')) {
0114             ++it;
0115             continue;
0116         }
0117 
0118         const auto refs = ref.split(';');
0119         for (const auto &ref : refs) {
0120             if (ref.isEmpty()) {
0121                 continue;
0122             }
0123             auto rel = *it;
0124             const auto tagKey = dataSet.makeTagKey("ref", OSM::DataSet::StringIsPersistent);
0125             OSM::setTagValue(rel, tagKey, ref);
0126             dataSet.relations.push_back(rel);
0127         }
0128         it = dataSet.relations.erase(it);
0129     }
0130     qDebug() << "OSM relations after ref split:" << dataSet.relations.size();
0131 
0132     // split relations into route_master elements and route elements
0133     auto splitIt = std::partition(dataSet.relations.begin(), dataSet.relations.end(), [](const auto &rel) {
0134         return OSM::tagValue(rel, "type") == "route_master";
0135     });
0136     sort(splitIt, dataSet.relations.end());
0137     const auto routeMasterCount = std::distance(dataSet.relations.begin(), splitIt);
0138     qDebug() << "  containing" << routeMasterCount << "route masters and" << (dataSet.relations.size() - routeMasterCount) << "routes";
0139 
0140     // resolve route masters first, and then consider the remaining routes that have no route_master
0141     for (auto i = 0; i < routeMasterCount; ++i) {
0142         auto &rel = dataSet.relations[i];
0143         auto info = LineInfo::fromRelation(rel);
0144         if (info.name.isEmpty()) {
0145             continue;
0146         }
0147 
0148         const auto members = std::move(rel.members); // reference will become invalid once we start to modify below
0149         for (const auto &mem : members) {
0150             auto memIt = std::lower_bound(dataSet.relations.begin() + routeMasterCount, dataSet.relations.end(), mem.id);
0151             if (memIt == dataSet.relations.end() || (*memIt).id != mem.id) {
0152                 continue;
0153             }
0154             LineInfo::merge(info, LineInfo::fromRelation(*memIt));
0155             dataSet.relations.erase(memIt);
0156         }
0157         lines.push_back(std::move(info));
0158     }
0159 
0160     // deal with routes without a route master
0161     qDebug() << "  " << (dataSet.relations.size() - routeMasterCount) << "dangling routes remaining";
0162     for (auto it = dataSet.relations.begin() + routeMasterCount; it != dataSet.relations.end(); ++it) {
0163         auto info = LineInfo::fromRelation(*it);
0164         if (info.name.isEmpty()) {
0165             continue;
0166         }
0167         lines.push_back(std::move(info));
0168     }
0169     qDebug() << "merged lines:" << lines.size();
0170 
0171     // filter useless lines - ie. those that don't contain useful information and have no wikidata id to fill the missing gaps
0172     lines.erase(std::remove_if(lines.begin(), lines.end(), [](const auto &info) {
0173         if (!LineInfo::isUseful(info) && !info.wdId.isValid() && info.mode != LineInfo::LongDistance) {
0174             qDebug() << "dropping" << info;
0175         }
0176         return !LineInfo::isUseful(info) && !info.wdId.isValid();
0177     }), lines.end());
0178     qDebug() << "lines after filtering OSM data:" << lines.size();
0179 
0180     // check for uniqueness of (bbox, name) - would break indexing and can happen for lines without a route_master relation
0181     // we assume those to belong together as well and thus merge them to a single line
0182     for (auto lit = lines.begin(); lit != lines.end(); ++lit) {
0183         // a merge pass can increase the bbox to include more elements that we previously missed
0184         // so do this as long as we find things
0185         while (true) {
0186             auto dupIt = std::partition(lit + 1, lines.end(), [lit](const auto &rhs) {
0187                 return !isSameLine(*lit, rhs) || !OSM::intersects((*lit).bbox, rhs.bbox);
0188             });
0189             if (dupIt == lines.end()) {
0190                 break;
0191             }
0192 
0193             for (auto it = dupIt; it != lines.end(); ++it) {
0194                 qDebug() << "  merging:" << *lit << "with" << *it;
0195                 LineInfo::merge(*lit, *it);
0196             }
0197             lines.erase(dupIt, lines.end());
0198         }
0199     }
0200     qDebug() << "lines after bbox merge:" << lines.size();
0201 
0202     // check for uniqueness of Wikidata reference, even with non-intersecting bboxes
0203     // can help to fix broken/split routes
0204     for (auto lit = lines.begin(); lit != lines.end(); ++lit) {
0205         if (!(*lit).wdId.isValid()) {
0206             continue;
0207         }
0208         auto dupIt = std::partition(std::next(lit), lines.end(), [lit](const auto &rhs) {
0209             return !isSameLine(*lit, rhs) || (*lit).wdId != rhs.wdId;
0210         });
0211         if (dupIt == lines.end()) {
0212             continue;
0213         }
0214 
0215         for (auto it = dupIt; it != lines.end(); ++it) {
0216             qDebug() << "  merging:" << *lit << "with" << *it;
0217             LineInfo::merge(*lit, *it);
0218         }
0219         lines.erase(dupIt, lines.end());
0220     }
0221     qDebug() << "lines after WD reference merge:" << lines.size();
0222 
0223     augmentFromWikidata();
0224 }
0225 
0226 void Generator::augmentFromWikidata()
0227 {
0228     // sort to maximize cache hits of the batches wikidata queries
0229     std::sort(lines.begin(), lines.end(), [](const auto &lhs, const auto &rhs) {
0230         return lhs.wdId < rhs.wdId;
0231     });
0232     std::vector<wd::Q> wdIds;
0233     for (const auto &r: lines) {
0234         if (r.wdId.isValid()) {
0235             wdIds.push_back(r.wdId);
0236         }
0237     }
0238     wdIds.erase(std::unique(wdIds.begin(), wdIds.end()), wdIds.end());
0239     qDebug() << "Retrieving" << wdIds.size() << "items from Wikidata";
0240 
0241     auto query = new WikidataEntitiesQuery(m_wdMgr);
0242     query->setItems(std::move(wdIds));
0243     QObject::connect(query, &WikidataEntitiesQuery::partialResult, [this](auto query) { applyWikidataResults(std::move(query->takeResult())); });
0244     QObject::connect(query, &WikidataQuery::finished, [this, query]() mutable {
0245         if (query->error() != WikidataQuery::NoError) {
0246             qCritical() << "Wikidata query failed";
0247             QCoreApplication::exit(1);
0248             return;
0249         }
0250 
0251         augmentProductsFromWikidata();
0252     });
0253     m_wdMgr->execute(query);
0254 }
0255 
0256 static struct {
0257     wd::Q id;
0258     LineInfo::Mode mode;
0259 } const wd_type_to_mode_map[] = {
0260     { wd::Q(1412403), LineInfo::RapidTransit }, // commuter rail
0261     { wd::Q(1192191), LineInfo::Train }, // airport rail link
0262     { wd::Q(50331459), LineInfo::RapidTransit }, // S-Bahn line
0263     { wd::Q(95723), LineInfo::RapidTransit }, // S-Bahn
0264     { wd::Q(129172), LineInfo::LongDistance }, // ICE
0265     { wd::Q(15145593), LineInfo::Tram }, // tram line
0266     { wd::Q(2572054), LineInfo::RapidTransit }, // Transilien (multiple variants)
0267     { wd::Q(732778), LineInfo::RapidTransit }, // Transilien
0268     { wd::Q(2571458), LineInfo::RapidTransit }, // Transilien
0269     { wd::Q(2571977), LineInfo::RapidTransit }, // Transilien
0270     { wd::Q(373725), LineInfo::RapidTransit }, // Transilien
0271     { wd::Q(858485), LineInfo::LongDistance }, // high speed railway line
0272     { wd::Q(2138247), LineInfo::LocalTrain }, // Regional-Express
0273     { wd::Q(515449), LineInfo::LocalTrain }, // Regionalbahn
0274 };
0275 
0276 static LineInfo::Mode modeFromWikidataType(wd::Q type)
0277 {
0278     LineInfo::Mode mode = LineInfo::Unknown;
0279     for (const auto &modeMap : wd_type_to_mode_map) {
0280         if (type == modeMap.id) {
0281             mode = std::max(modeMap.mode, mode);
0282         }
0283     }
0284     return mode;
0285 }
0286 
0287 static const wd::Q wd_excluded_types[] {
0288     wd::Q(43229), // organization
0289     wd::Q(740752), // transport company
0290     wd::Q(928830), // metro station
0291     wd::Q(4830453), // business
0292     wd::Q(7835189), // transit district
0293     wd::Q(249556), // railway company
0294     wd::Q(17377208), // train operating company
0295     wd::Q(138825), // children's railway (yes, really...)
0296     wd::Q(1144661), // amusement ride
0297     wd::Q(420962), // heritage railway
0298 };
0299 
0300 static bool isExcludedWikidataType(wd::Q type)
0301 {
0302     return std::find(std::begin(wd_excluded_types), std::end(wd_excluded_types), type) != std::end(wd_excluded_types);
0303 }
0304 
0305 static const wd::Q wd_product_types[] {
0306     wd::Q(5503), // rapid transit
0307     wd::Q(95723), // S-Bahn
0308     wd::Q(15640053), // tram system
0309     wd::Q(1412403), // commuter rail
0310     wd::Q(2140646), // Stadtbahn
0311     wd::Q(1268865), // light rail
0312     wd::Q(113455515), // automated rapid transit
0313 };
0314 
0315 static bool isWikidataProductType(wd::Q type)
0316 {
0317     return std::find(std::begin(wd_product_types), std::end(wd_product_types), type) != std::end(wd_product_types);
0318 }
0319 
0320 static const wd::Q wd_recursive_product_types[] = {
0321     wd::Q(95723), // S-Bahn
0322     wd::Q(1054782), // Cercanías
0323 };
0324 
0325 void Generator::applyWikidataResults(std::vector<wd::Item> &&items)
0326 {
0327     for (auto &item : items) {
0328         auto rit = std::lower_bound(lines.begin(), lines.end(), item.id(), [](const LineInfo &lhs, wd::Q rhs) {
0329             return lhs.wdId < rhs;
0330         });
0331         // wikidata line elements shouldn't occur for multiple lines, but in some cases
0332         // OSM points to product or network elements instead, and those can occur once per line
0333         for (; rit != lines.end() && (*rit).wdId == item.id(); ++rit) {
0334             // check if this is a plausible type
0335             const auto instancesOf = item.values<wd::Q>(wd::P::instanceOf);
0336             bool found = false;
0337             LineInfo::Mode mode = LineInfo::Unknown;
0338             for (const auto &instanceOf : instancesOf) {
0339                 if (isExcludedWikidataType(instanceOf)) {
0340                     qWarning() << "Suspicious WD types:" << (*rit) << instancesOf;
0341                     found = true;
0342                 }
0343                 mode = std::max(mode, modeFromWikidataType(instanceOf));
0344 
0345                 if (isWikidataProductType(instanceOf)) { // wikidata link in OSM is pointing to the product, not the line it seems
0346                     wdPartOfIds.insert(item.id());
0347                     (*rit).wdProducts.push_back(item.id());
0348                 }
0349             }
0350             if (found) {
0351                 break;
0352             }
0353 
0354             // collect possible product types
0355             for (const wd::P p : {wd::P(wd::P::partOf), wd::P(16)}) {
0356                 for (const auto &id : item.values<wd::Q>(p)) {
0357                     wdPartOfIds.insert(id);
0358                     (*rit).wdProducts.push_back(id);
0359 
0360                     mode = std::max(mode, modeFromWikidataType(id));
0361                 }
0362             }
0363 
0364             // merge information
0365             const auto color = item.value<QColor>(wd::P(465));
0366             if ((*rit).color.isValid() && color.isValid() && (*rit).color != color) {
0367                 //qWarning() << "OSM/WD color conflict:" << (*rit) << color.name();
0368             } else if (color.isValid()) {
0369                 (*rit).color = color;
0370             }
0371             const auto logos = item.values<QString>(wd::P::logoImage);
0372             for (const auto &logo : logos) {
0373                 if (!logo.isEmpty() && !(*rit).lineLogos.contains(logo)) {
0374                     (*rit).lineLogos.push_back(logo);
0375                 }
0376             }
0377             if (!isCompatibleMode((*rit).mode, mode)) {
0378                 qWarning() << "OSM/WD mode conflict:" << (*rit) << mode;
0379             }
0380             (*rit).mode = std::max((*rit).mode, mode);
0381         }
0382 
0383         wdItems[item.id()] = std::move(item);
0384     }
0385 }
0386 
0387 void Generator::augmentProductsFromWikidata()
0388 {
0389     for (const auto &t : wd_recursive_product_types) { // should all be already in there, but let's make sure so we don't have to check again below
0390         wdPartOfIds.insert(t);
0391     }
0392     std::vector<wd::Q> wdIds;
0393     for (const auto &id: wdPartOfIds) {
0394         if (id.isValid() && wdItems.find(id) == wdItems.end()) {
0395             wdIds.push_back(id);
0396         }
0397     }
0398     qDebug() << "Retrieving" << wdIds.size() << "product items from Wikidata";
0399 
0400     auto query = new WikidataEntitiesQuery(m_wdMgr);
0401     query->setItems(std::move(wdIds));
0402     QObject::connect(query, &WikidataEntitiesQuery::partialResult, [this](auto query) { applyWikidataProductResults(std::move(query->takeResult())); });
0403     QObject::connect(query, &WikidataQuery::finished, [this, query]() mutable {
0404         if (query->error() != WikidataQuery::NoError) {
0405             qCritical() << "Wikidata product query failed";
0406             QCoreApplication::exit(1);
0407             return;
0408         }
0409 
0410         applyWikidataProductResults();
0411         verifyImages();
0412     });
0413     m_wdMgr->execute(query);
0414 }
0415 
0416 void Generator::applyWikidataProductResults(std::vector<wd::Item> &&items)
0417 {
0418     for (auto &item : items) {
0419         wdItems[item.id()] = std::move(item);
0420     }
0421 }
0422 
0423 void Generator::applyWikidataProductResults()
0424 {
0425     for (const auto &id : wdPartOfIds) {
0426         const auto item = wdItems[id];
0427         // check if this is a plausible type
0428         const auto instancesOf = item.values<wd::Q>(wd::P::instanceOf);
0429         LineInfo::Mode mode = LineInfo::Unknown;
0430         bool found = false;
0431         for (const auto &instanceOf : instancesOf) {
0432             if (isWikidataProductType(instanceOf)) {
0433                 found = true;
0434             }
0435             mode = std::max(mode, modeFromWikidataType(instanceOf));
0436         }
0437         if (!found && !std::all_of(instancesOf.begin(), instancesOf.end(), isExcludedWikidataType)) {
0438             qDebug() << "Product" << item.id() << "is of unknown type " << instancesOf;
0439         }
0440         if (!found) {
0441             continue;
0442         }
0443 
0444         // retrieve logo and find the lines this is for
0445         auto logoNames = item.values<QString>(wd::P::logoImage);
0446         // check if this is a type we should recurse on for more logo candidates
0447         for (const auto &instanceOf : instancesOf) {
0448             if (std::find(std::begin(wd_recursive_product_types), std::end(wd_recursive_product_types), instanceOf) == std::end(wd_recursive_product_types)) {
0449                 continue;
0450             }
0451             const auto logos = wdItems[instanceOf].values<QString>(wd::P::logoImage);
0452             std::copy(logos.begin(), logos.end(), std::back_inserter(logoNames));
0453         }
0454 
0455         if (logoNames.empty() && mode == LineInfo::Unknown) {
0456             continue;
0457         }
0458         for (auto &line : lines) {
0459             if (std::find(line.wdProducts.begin(), line.wdProducts.end(), item.id()) == line.wdProducts.end()) {
0460                 continue;
0461             }
0462 
0463             if (line.mode == LineInfo::Unknown) {
0464                 line.mode = mode;
0465             }
0466 
0467             std::copy(logoNames.begin(), logoNames.end(), std::back_inserter(line.productLogos));
0468         }
0469     }
0470 }
0471 
0472 void Generator::verifyImages()
0473 {
0474     std::vector<QString> imageIds;
0475     for (const auto &r: lines) {
0476         if (!LineInfo::isUseful(r)) {
0477             continue;
0478         }
0479         std::copy(r.lineLogos.begin(), r.lineLogos.end(), std::back_inserter(imageIds));
0480         std::copy(r.productLogos.begin(), r.productLogos.end(), std::back_inserter(imageIds));
0481     }
0482     std::sort(imageIds.begin(), imageIds.end());
0483     imageIds.erase(std::unique(imageIds.begin(), imageIds.end()), imageIds.end());
0484     imageIds.erase(std::remove(imageIds.begin(), imageIds.end(), QString()), imageIds.end());
0485     qDebug() << "Verifying" << imageIds.size() << "images" << imageIds;
0486 
0487     auto query = new WikidataImageMetadataQuery(m_wdMgr);
0488     query->setImages(std::move(imageIds));
0489     QObject::connect(query, &WikidataImageMetadataQuery::partialResult, [this](auto query) { verifyImageMetaData(std::move(query->takeResult())); });
0490     QObject::connect(query, &WikidataQuery::finished, [this, query]() mutable {
0491         if (query->error() != WikidataQuery::NoError) {
0492             qCritical() << "Wikidata image metadata query failed";
0493             QCoreApplication::exit(1);
0494             return;
0495         }
0496         verifyImageMetaData();
0497 
0498         // filter lines still missing data
0499         lines.erase(std::remove_if(lines.begin(), lines.end(), [](const auto &info) {
0500             if (!LineInfo::isUseful(info)) {
0501                 qDebug() << "dropping" << info;
0502             }
0503             return !LineInfo::isUseful(info);
0504         }), lines.end());
0505         qDebug() << "lines after Wikidata filtering:" << lines.size();
0506 
0507         generateQuadTree();
0508     });
0509     m_wdMgr->execute(query);
0510 }
0511 
0512 void Generator::verifyImageMetaData(std::vector<wd::Image> &&images)
0513 {
0514     const QStringList valid_licenses({
0515         QStringLiteral("cc0"),
0516         QStringLiteral("copyrighted free use"),
0517         QStringLiteral("public domain"),
0518         QStringLiteral("cc by 3.0"),
0519         QStringLiteral("cc by 3.0 it"),
0520         QStringLiteral("cc by-sa 3.0"),
0521         QStringLiteral("cc by-sa 3.0 de"),
0522         QStringLiteral("cc-by-sa-3.0"),
0523         QStringLiteral("cc by-sa 4.0"),
0524     });
0525 
0526     // things that for whatever reason managed to bypass all generic checks but that we don't want still
0527     const QStringList excluded_names({
0528         QStringLiteral("S-Bahn München Logo ohne DB (2022).svg"), // cheats around the aspect ratio check, fallback gives us the standard S-Bahn logo
0529     });
0530 
0531     for (auto &image : images) {
0532         const auto name = image.name();
0533         const auto fileSize = image.fileSize();
0534         if (fileSize > MaxLogoFileSize) {
0535             qWarning() << "not using logo" << name << "due to file size:" << fileSize;
0536             continue;
0537         }
0538 
0539         const auto aspectRatio = (double)image.width() / (double)image.height();
0540         if (aspectRatio > MaxLogoAspectRatio || aspectRatio < MinLogoAspectRatio) {
0541             qWarning() << "not using logo" << name << "due to aspect ratio:" << aspectRatio;
0542             continue;
0543         }
0544 
0545         const auto mt = image.mimeType();
0546         if (mt != QLatin1String("image/svg+xml") && mt != QLatin1String("image/png")) {
0547             qWarning() << "not using logo" << name << "due to mimetype:" << mt;
0548             continue;
0549         }
0550 
0551         const auto lic = image.license();
0552         if (!valid_licenses.contains(lic, Qt::CaseInsensitive)) {
0553             qWarning() << "not using logo" << name << "due to license:" << lic;
0554             continue;
0555         }
0556 
0557         if (excluded_names.contains(name)) {
0558             qWarning() << "dropping logo" << name;
0559             continue;
0560         }
0561 
0562         wdImages[name] = std::move(image);
0563    }
0564    images.clear();
0565 }
0566 
0567 void Generator::verifyImageMetaData(QStringList &logos)
0568 {
0569     logos.erase(std::remove_if(logos.begin(), logos.end(), [this](const auto &logo) {
0570         return wdImages.find(logo) == wdImages.end();
0571     }), logos.end());
0572 
0573     if (logos.size() <= 1) {
0574         return;
0575     }
0576 
0577     // TODO sort by best aspect ratio/size
0578     qDebug() << "multiple logos available:" << logos;
0579 }
0580 
0581 void Generator::verifyImageMetaData()
0582 {
0583     for (auto &line : lines) {
0584         verifyImageMetaData(line.lineLogos);
0585         if (!line.lineLogos.empty() && line.productLogos.size() > 1) {
0586             line.productLogos.erase(std::remove(line.productLogos.begin(), line.productLogos.end(), line.lineLogos[0]), line.productLogos.end());
0587         }
0588         verifyImageMetaData(line.productLogos);
0589     }
0590 }
0591 
0592 void Generator::generateQuadTree()
0593 {
0594     // order lines by OSM id, to increase output stability
0595     std::sort(lines.begin(), lines.end(), [](const auto &lhs, const auto &rhs) { return lhs.relId < rhs.relId; });
0596 
0597     // initialize quad tree by smallest single tile containing the entire line bbox
0598     for (auto lineIt = lines.begin(); lineIt != lines.end(); ++lineIt) {
0599         auto z = OSM::ztileFromBoundingBox((*lineIt).bbox);
0600         while (z.depth < QuadTreeDepthLimit) { // don't go deeper than what we can represent in 32bit
0601             z = z.parent();
0602         }
0603         insertToBucket(z, std::distance(lines.begin(), lineIt));
0604     }
0605     qDebug() << "initial tiles:" << zQuadTree.size();
0606 
0607     // collision resolution
0608     // this is done in two steps: bottom-up and top-down
0609     // bottom-up means that for each tile/line we check if there is a parent tile with a conflicting line, and if so, we
0610     // propagate that line down to the same level
0611     // this is done in a rather crude brute-force way, but it gets the job done
0612     while(resolveOneBottomUpConflict()) {}
0613     qDebug() << "tiles after bottom-up conflict resolution:" << zQuadTree.size();
0614 
0615     // top-down means we look at conflicts inside a given tile, and propagate the conflicting lines down
0616     for (auto tileIt = zQuadTree.begin(); tileIt != zQuadTree.end(); ++tileIt) {
0617         if ((*tileIt).second.size() <= 1) {
0618             continue;
0619         }
0620         // check for name collisions
0621         for (auto lit = (*tileIt).second.end(); lit != (*tileIt).second.begin();) {
0622             const auto lend = lit;
0623             const auto firstIdx = (*tileIt).second.front();
0624             lit = std::partition((*tileIt).second.begin(), lend, [this, firstIdx](const auto &lineIdx) {
0625                 return !isSameLine(lines[firstIdx], lines[lineIdx]);
0626             });
0627 
0628             // must not happen if isSameLineName(x, x) == true holds
0629             assert(lit != lend);
0630 
0631             if (lit + 1 == lend) { // only a single line with that name
0632                 continue;
0633             }
0634 
0635             // insert subtiles, if they actually contain the line bbox
0636             //qDebug() << "subdividing" << lines[*lit].name << lines[*lit].bbox << lines[*lit].relId << std::distance(lit, lend) << (*lit) << (*tileIt).first.depth << (*tileIt).first.z;
0637             for (auto it = lit; it != lend; ++it) {
0638                 //qDebug() << "  " << lines[*it];
0639                 for (auto subtile : (*tileIt).first.quadSplit()) {
0640                     if (subtile.intersects(lines[*it].bbox)) {
0641                         insertToBucket(subtile, *it);
0642                     }
0643                 }
0644             }
0645 
0646             // remove current entries
0647             lit = (*tileIt).second.erase(lit, lend);
0648         }
0649     }
0650     qDebug() << "tiles after top-down conflict resolution:" << zQuadTree.size();
0651 
0652     // split oversized tiles for what they contain, depsite the name being unique in the dataset we have here
0653     for (auto tileIt = zQuadTree.begin(); tileIt != zQuadTree.end(); ++tileIt) {
0654         if ((*tileIt).first.depth == QuadTreeDepthLimit) { // below that we need more than 32 bit for z in the quad tree storage
0655             break;
0656         }
0657         for (auto lineIt = (*tileIt).second.begin(); lineIt != (*tileIt).second.end();) {
0658             // push down elements that actually only occupy a sub-tile
0659             // this can happen as part of conflict resolution above as well as the splitting below
0660             int coveredSubTiles = 0;
0661             for (auto subtile : (*tileIt).first.quadSplit()) {
0662                 if (subtile.intersects(lines[*lineIt].bbox)) {
0663                     ++coveredSubTiles;
0664                 }
0665             }
0666 
0667             // check if the line is too small for the current tile, and if so, split it
0668             const auto coverage = std::max((double)lines[*lineIt].bbox.width() / (double)(*tileIt).first.boundingBox().width(),
0669                                            (double)lines[*lineIt].bbox.height() / (double)(*tileIt).first.boundingBox().height());
0670 
0671             if (coverage < MinTileCoverage || coveredSubTiles == 1) {
0672                 //qDebug() << "splitting due to small coverage:" << lines[*lineIt] << (*tileIt).first.depth << coverage << coveredSubTiles;
0673                 for (auto subtile : (*tileIt).first.quadSplit()) {
0674                     if (subtile.intersects(lines[*lineIt].bbox)) {
0675                         insertToBucket(subtile, *lineIt);
0676                     }
0677                 }
0678                 lineIt = (*tileIt).second.erase(lineIt);
0679                 continue;
0680             }
0681 
0682             ++lineIt;
0683         }
0684     }
0685     qDebug() << "tiles after coverage cleanup:" << zQuadTree.size();
0686 
0687     // remove empty buckets, and sort the non-empty ones for output stability
0688     for (auto tileIt = zQuadTree.begin(); tileIt != zQuadTree.end();) {
0689         if ((*tileIt).second.empty()) {
0690             tileIt = zQuadTree.erase(tileIt);
0691         } else if ((*tileIt).first.depth < QuadTreeDepthLimit) {
0692             qWarning() << "  found tile below depth limit!";
0693             tileIt = zQuadTree.erase(tileIt);
0694         } else {
0695             std::sort((*tileIt).second.begin(), (*tileIt).second.end(), [this](const auto lhs, const auto rhs) {
0696                 return lines[lhs].name < lines[rhs].name;
0697             });
0698 
0699             ++tileIt;
0700         }
0701     }
0702     qDebug() << "tiles remaining after cleanup:" << zQuadTree.size();
0703 
0704     writeCode();
0705 }
0706 
0707 bool Generator::resolveOneBottomUpConflict()
0708 {
0709     for (auto tileIt = zQuadTree.rbegin(); tileIt != zQuadTree.rend(); ++tileIt) {
0710         for (auto lineIt = (*tileIt).second.begin(); lineIt != (*tileIt).second.end(); ++lineIt) {
0711             auto parentTile = (*tileIt).first.parent();
0712             while (parentTile.depth < 32) {
0713                 const auto parentTileIt = zQuadTree.find(parentTile);
0714                 if (parentTileIt != zQuadTree.end()) {
0715                     auto conflictIt = std::find_if((*parentTileIt).second.begin(), (*parentTileIt).second.end(), [this, lineIt](const auto lhs) {
0716                         return isSameLine(lines[lhs], lines[*lineIt]);
0717                     });
0718                     if (conflictIt != (*parentTileIt).second.end()) {
0719                         //qDebug() << "propagating down:" << lines[*conflictIt].name << lines[*conflictIt].relId << parentTile.z << parentTile.depth << *conflictIt;
0720                         auto splitTile = parentTile;
0721                         while (splitTile.depth > (*tileIt).first.depth) {
0722                             for (auto subtile : splitTile.quadSplit()) {
0723                                 if (subtile.intersects((*tileIt).first)) {
0724                                     splitTile = subtile;
0725                                 } else if (subtile.intersects(lines[*conflictIt].bbox)) {
0726                                     insertToBucket(subtile, *conflictIt);
0727                                 }
0728                             }
0729                         }
0730                         if (splitTile.intersects(lines[*conflictIt].bbox)) {
0731                             insertToBucket(splitTile, *conflictIt);
0732                         }
0733                         (*parentTileIt).second.erase(conflictIt);
0734                         return true;
0735                     }
0736                 }
0737                 parentTile = parentTile.parent();
0738             }
0739         }
0740     }
0741     return false;
0742 }
0743 
0744 static QByteArray modeToString(LineInfo::Mode mode)
0745 {
0746     switch (mode) {
0747         case LineInfo::Tram:
0748             return "LineMetaDataContent::Tramway";
0749         case LineInfo::Subway:
0750             return "LineMetaDataContent::Subway";
0751         case LineInfo::RapidTransit:
0752             return "LineMetaDataContent::RapidTransit";
0753         case LineInfo::Train:
0754         case LineInfo::LocalTrain:
0755             return "LineMetaDataContent::LocalTrain";
0756         case LineInfo::LongDistance:
0757             return "LineMetaDataContent::LongDistanceTrain";
0758         default:
0759             assert(false);
0760     }
0761 
0762     return {};
0763 }
0764 
0765 void Generator::writeCode()
0766 {
0767     // write header
0768     out->write(R"(/*
0769     SPDX-FileCopyrightText: OpenStreetMap contributors
0770     SPDX-License-Identifier: ODbL-1.0
0771 
0772     Generated code based on data from OpenStreetMap (ODbL) and Wikidata (CC0), do not edit!
0773 */
0774 
0775 #include "linemetadata_p.h"
0776 
0777 namespace KPublicTransport {
0778 
0779 )");
0780 
0781     // create and write string tables
0782     // name and logo are separated to achieve smaller index values (and thus need less bits for storage)
0783     // this can be done as due to file extensions on the logos we will never be able to apply suffix compression here
0784     StringTable nameStrTab;
0785     StringTable logoStrTab;
0786     for (const auto &line : lines) {
0787         nameStrTab.addString(line.name);
0788         logoStrTab.addString(line.lineLogos.value(0));
0789         logoStrTab.addString(line.productLogos.value(0));
0790     }
0791     nameStrTab.writeCode("line_name_stringtab", out);
0792     logoStrTab.writeCode("line_logo_stringtab", out);
0793     // create a symbolic name for the common no logo case (reduces diff on changes)
0794     out->write("static const constexpr uint16_t NoLogo = ");
0795     out->write(QByteArray::number((int)logoStrTab.stringOffset(QString())));
0796     out->write(";\n\n");
0797 
0798     // write line table
0799     out->write("static const constexpr LineMetaDataContent line_data[] = {\n");
0800     for (const auto &line : lines) {
0801         out->write("    { ");
0802         out->write(QByteArray::number((int)nameStrTab.stringOffset(line.name)));
0803         out->write(", ");
0804         if (!line.lineLogos.isEmpty()) {
0805             out->write(QByteArray::number((int)logoStrTab.stringOffset(line.lineLogos.at(0))));
0806         } else {
0807             out->write("NoLogo");
0808         }
0809         out->write(", ");
0810         if (!line.productLogos.isEmpty()) {
0811             out->write(QByteArray::number((int)logoStrTab.stringOffset(line.productLogos.at(0))));
0812         } else {
0813             out->write("NoLogo");
0814         }
0815         out->write(", ");
0816         out->write(modeToString(line.mode));
0817         out->write(", Color{");
0818         if (line.color.isValid()) {
0819             out->write("0x");
0820             const auto col = QByteArray::number(line.color.rgb() & 0xffffff, 16);
0821             out->write(QByteArray(6 - col.size(), '0'));
0822             out->write(col);
0823         }
0824         out->write("} }, // ");
0825         out->write(line.name.toUtf8()),
0826         out->write(" OSM: ");
0827         out->write(QByteArray::number((long long)line.relId));
0828         if (line.wdId.isValid()) {
0829             out->write(" WD: ");
0830             out->write(line.wdId.toString().toUtf8());
0831         }
0832         out->write(" ");
0833         out->write(QByteArray::number(line.bbox.min.latF(), 'g', 4));
0834         out->write(", ");
0835         out->write(QByteArray::number(line.bbox.min.lonF(), 'g', 4));
0836         out->write(" x ");
0837         out->write(QByteArray::number(line.bbox.max.latF(), 'g', 4));
0838         out->write(", ");
0839         out->write(QByteArray::number(line.bbox.max.lonF(), 'g', 4));
0840         out->write("\n");
0841     }
0842     out->write(R"(};
0843 
0844 static constexpr const auto line_data_count = sizeof(line_data) / sizeof(LineMetaDataContent);
0845 
0846 static inline constexpr uint16_t Bucket(uint16_t index) { return line_data_count + index; }
0847 
0848 )");
0849 
0850     // write bucket table
0851     IndexedDataTable<std::vector<std::size_t>> bucketTable;
0852     for (const auto &zi : zQuadTree) {
0853         if (zi.second.size() > 1) {
0854             bucketTable.addEntry(zi.second);
0855         }
0856     }
0857     bucketTable.writeCode("int16_t", "line_data_bucketTable", out, [](const std::vector<std::size_t> &bucket, QIODevice *out) {
0858         for (const auto i : bucket) {
0859             out->write(QByteArray::number((int)i));
0860             out->write(", ");
0861         }
0862         out->write("-1,");
0863     });
0864 
0865     // write quad tree depth offsets
0866     out->write("static const constexpr LineMetaDataQuadTreeDepthIndex line_data_depthOffsets[] = {\n");
0867     int offset = -1;
0868     uint8_t lastDepth = 0;
0869     for (const auto &zi : zQuadTree) {
0870         ++offset;
0871         if (lastDepth == zi.first.depth) {
0872             continue;
0873         }
0874         lastDepth = zi.first.depth;
0875         out->write("    { ");
0876         out->write(QByteArray::number(lastDepth));
0877         out->write(", ");
0878         out->write(QByteArray::number((qulonglong)offset));
0879         out->write(" },\n");
0880     }
0881     out->write("};\n\n");
0882 
0883     // write quad tree
0884     out->write("static const constexpr LineMetaDataZIndex line_data_zquadtree[] = {\n");
0885     for (const auto &zi : zQuadTree) {
0886         out->write("    { ");
0887         out->write(QByteArray::number((qulonglong)zi.first.z));
0888         out->write(", ");
0889         if (zi.second.size() == 1) {
0890             out->write(QByteArray::number((qulonglong)zi.second[0]));
0891         } else {
0892             out->write("Bucket(");
0893             out->write(QByteArray::number((qulonglong)bucketTable.entryOffset(zi.second)));
0894             out->write(")");
0895         }
0896         out->write(" }, // ");
0897         out->write(QByteArray::number(zi.first.boundingBox().min.latF(), 'g', 4));
0898         out->write(", ");
0899         out->write(QByteArray::number(zi.first.boundingBox().min.lonF(), 'g', 4));
0900         out->write(" x ");
0901         out->write(QByteArray::number(zi.first.boundingBox().max.latF(), 'g', 4));
0902         out->write(", ");
0903         out->write(QByteArray::number(zi.first.boundingBox().max.lonF(), 'g', 4));
0904         out->write("\n");
0905     }
0906     out->write("};\n}\n");
0907 
0908     QCoreApplication::quit();
0909 }
0910 
0911 int main(int argc, char **argv)
0912 {
0913     QCoreApplication app(argc, argv);
0914     QCommandLineParser parser;
0915     parser.addHelpOption();
0916     QCommandLineOption outFileOption({ QStringLiteral("o") }, QStringLiteral("Output file name"), QStringLiteral("out"));
0917     parser.addOption(outFileOption);
0918     QCommandLineOption osmInputOption({ QStringLiteral("i") }, QStringLiteral("O5M input file"),  QStringLiteral("o5m-in"));
0919     parser.addOption(osmInputOption);
0920     parser.process(app);
0921 
0922     QFile out(parser.value(outFileOption));
0923     if (!out.open(QFile::WriteOnly)) {
0924         qCritical() << "Failed to open output file:" << out.errorString();
0925         return 1;
0926     }
0927 
0928     QFile f(parser.value(osmInputOption));
0929     if (!f.open(QFile::ReadOnly)) {
0930         qCritical() << "Failed to open OSM input file!" << f.errorString() << f.fileName();
0931         QCoreApplication::exit(1);
0932     }
0933     OSM::DataSet dataSet;
0934     auto reader = OSM::IO::readerForMimeType(u"application/vnd.openstreetmap.data+o5m", &dataSet);
0935     assert(reader);
0936     reader->read(f.map(0, f.size()), f.size());
0937 
0938     Generator generator;
0939     generator.out = &out;
0940     generator.processOSMData(std::move(dataSet));
0941 
0942     return QCoreApplication::exec();
0943 }