File indexing completed on 2024-05-12 04:42:44
0001 /* 0002 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "location.h" 0008 0009 #include "datatypes_p.h" 0010 #include "equipment.h" 0011 #include "equipmentutil.h" 0012 #include "json_p.h" 0013 #include "mergeutil_p.h" 0014 #include "rentalvehicle.h" 0015 #include "rentalvehicleutil_p.h" 0016 #include "ifopt/ifoptutil.h" 0017 0018 #include <QDebug> 0019 #include <QHash> 0020 #include <QJsonArray> 0021 #include <QJsonObject> 0022 #include <QRegularExpression> 0023 #include <QTimeZone> 0024 0025 #include <cmath> 0026 0027 using namespace KPublicTransport; 0028 0029 namespace KPublicTransport { 0030 0031 class LocationPrivate : public QSharedData 0032 { 0033 public: 0034 Location::Type type = Location::Place; 0035 QString name; 0036 float latitude = NAN; 0037 float longitude = NAN; 0038 QTimeZone timeZone; 0039 QHash<QString, QString> ids; 0040 0041 QString streetAddress; 0042 QString postalCode; 0043 QString locality; 0044 QString region; 0045 QString country; 0046 0047 QVariant data; 0048 }; 0049 0050 } 0051 0052 KPUBLICTRANSPORT_MAKE_GADGET(Location) 0053 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, Location::Type, type, setType) 0054 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QString, name, setName) 0055 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, float, latitude, setLatitude) 0056 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, float, longitude, setLongitude) 0057 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QString, streetAddress, setStreetAddress) 0058 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QString, postalCode, setPostalCode) 0059 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QString, locality, setLocality) 0060 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QString, region, setRegion) 0061 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QString, country, setCountry) 0062 KPUBLICTRANSPORT_MAKE_PROPERTY(Location, QVariant, data, setData) 0063 0064 void Location::setCoordinate(float latitude, float longitude) 0065 { 0066 d.detach(); 0067 d->latitude = latitude; 0068 d->longitude = longitude; 0069 } 0070 0071 bool Location::hasCoordinate() const 0072 { 0073 return !std::isnan(d->latitude) && !std::isnan(d->longitude) && std::abs(d->latitude) <= 90.0 && std::abs(d->longitude) <= 180.0; 0074 } 0075 0076 bool Location::isEmpty() const 0077 { 0078 return !hasCoordinate() && d->name.isEmpty() && d->ids.isEmpty() && d->streetAddress.isEmpty(); 0079 } 0080 0081 QTimeZone Location::timeZone() const 0082 { 0083 return d->timeZone; 0084 } 0085 0086 void Location::setTimeZone(const QTimeZone &tz) 0087 { 0088 d.detach(); 0089 d->timeZone = tz; 0090 } 0091 0092 QString Location::identifier(const QString &identifierType) const 0093 { 0094 return d->ids.value(identifierType); 0095 } 0096 0097 void Location::setIdentifier(const QString &identifierType, const QString &id) 0098 { 0099 d.detach(); 0100 d->ids.insert(identifierType, id); 0101 } 0102 0103 RentalVehicleStation Location::rentalVehicleStation() const 0104 { 0105 return d->data.value<RentalVehicleStation>(); 0106 } 0107 0108 void Location::setRentalVehicleStation(const RentalVehicleStation &dock) 0109 { 0110 d.detach(); 0111 d->data = QVariant::fromValue(dock); 0112 } 0113 0114 RentalVehicle Location::rentalVehicle() const 0115 { 0116 return d->data.value<RentalVehicle>(); 0117 } 0118 0119 KPublicTransport::Equipment Location::equipment() const 0120 { 0121 return d->data.value<KPublicTransport::Equipment>(); 0122 } 0123 0124 QHash<QString, QString> Location::identifiers() const 0125 { 0126 return d->ids; 0127 } 0128 0129 // keep this sorted by key 0130 struct { 0131 const char *key; 0132 const char *value; 0133 } static const name_normalization_map[] = { 0134 { "bahnhof", nullptr }, 0135 { "bhf", nullptr }, 0136 { "centraal", "central" }, 0137 { "cs", "central" }, 0138 { "de", nullptr }, 0139 { "flughafen", "airport" }, 0140 { "gare", nullptr }, 0141 { "hbf", "hauptbahnhof" }, 0142 { "rer", nullptr }, 0143 { "st", "saint" }, 0144 { "str", "strasse" }, 0145 }; 0146 0147 static QStringList splitAndNormalizeName(const QString &name) 0148 { 0149 static const QRegularExpression splitRegExp(QStringLiteral(R"([, \(\)-/\.\[\]])")); 0150 auto l = name.split(splitRegExp, Qt::SkipEmptyParts); 0151 0152 for (auto it = l.begin(); it != l.end();) { 0153 // ignore single-letter fragments, with the exception of the 'H' used in Denmark 0154 // this seem to be mostly transport mode abbreviations (such as 'S' and 'U' in Germany) 0155 if ((*it).size() == 1) { 0156 it = l.erase(it); 0157 continue; 0158 } 0159 0160 *it = (*it).toCaseFolded(); 0161 const auto b = (*it).toUtf8(); 0162 const auto entry = std::lower_bound(std::begin(name_normalization_map), std::end(name_normalization_map), b.constData(), [](const auto &lhs, const auto rhs) { 0163 return strcmp(lhs.key, rhs) < 0; 0164 }); 0165 if (entry != std::end(name_normalization_map) && strcmp((*entry).key, b.constData()) == 0) { 0166 if (!(*entry).value) { 0167 it = l.erase(it); 0168 continue; 0169 } 0170 *it = QString::fromUtf8((*entry).value); 0171 } 0172 ++it; 0173 } 0174 0175 l.removeDuplicates(); 0176 std::sort(l.begin(), l.end()); 0177 return l; 0178 } 0179 0180 static QString stripDiacritics(const QString &s) 0181 { 0182 QString res; 0183 res.reserve(s.size()); 0184 0185 // if the character has a canonical decomposition use that and skip the combining diacritic markers following it 0186 // see https://en.wikipedia.org/wiki/Unicode_equivalence 0187 // see https://en.wikipedia.org/wiki/Combining_character 0188 for (const auto &c : s) { 0189 if (c.decompositionTag() == QChar::Canonical) { 0190 res.push_back(c.decomposition().at(0)); 0191 } else { 0192 res.push_back(c); 0193 } 0194 } 0195 0196 return res; 0197 } 0198 0199 // keep this ordered (see https://en.wikipedia.org/wiki/List_of_Unicode_characters) 0200 struct { 0201 ushort key; 0202 const char* replacement; 0203 } static const transliteration_map[] = { 0204 { 0x00DF, "ss" }, // ß 0205 { 0x00E4, "ae" }, // ä 0206 { 0x00F6, "oe" }, // ö 0207 { 0x00F8, "oe" }, // ø 0208 { 0x00FC, "ue" }, // ü 0209 }; 0210 0211 static QString applyTransliterations(const QString &s) 0212 { 0213 QString res; 0214 res.reserve(s.size()); 0215 0216 for (const auto c : s) { 0217 const auto it = std::lower_bound(std::begin(transliteration_map), std::end(transliteration_map), c, [](const auto &lhs, const auto rhs) { 0218 return QChar(lhs.key) < rhs; 0219 }); 0220 if (it != std::end(transliteration_map) && QChar((*it).key) == c) { 0221 res += QString::fromUtf8((*it).replacement); 0222 continue; 0223 } 0224 0225 if (c.decompositionTag() == QChar::Canonical) { // see above 0226 res += c.decomposition().at(0); 0227 } else { 0228 res += c; 0229 } 0230 } 0231 0232 return res; 0233 } 0234 0235 static bool isCompatibleLocationType(Location::Type lhs, Location::Type rhs) 0236 { 0237 return lhs == rhs 0238 || (lhs == Location::Place && rhs == Location::Stop) 0239 || (rhs == Location::Place && lhs == Location::Stop); 0240 } 0241 0242 static int isSameDistanceThreshold(Location::Type type) 0243 { 0244 switch (type) { 0245 case Location::Place: 0246 case Location::Stop: 0247 case Location::CarpoolPickupDropoff: 0248 return 10; // meter 0249 case Location::RentedVehicleStation: 0250 return 5; 0251 case Location::Equipment: 0252 case Location::RentedVehicle: 0253 return 3; 0254 } 0255 Q_UNREACHABLE(); 0256 } 0257 0258 bool Location::isSame(const Location &lhs, const Location &rhs) 0259 { 0260 const auto dist = Location::distance(lhs.latitude(), lhs.longitude(), rhs.latitude(), rhs.longitude()); 0261 // further than 1km apart is certainly not the same 0262 if (lhs.hasCoordinate() && rhs.hasCoordinate() && dist > 1000) { 0263 return false; 0264 } 0265 // incompatible types are also unmergable 0266 if (!isCompatibleLocationType(lhs.type(), rhs.type())) { 0267 return false; 0268 } 0269 0270 // ids - IFOPT takes priority here due to its special hierarchical handling, but only for stations 0271 const auto lhsIfopt = lhs.identifier(IfoptUtil::identifierType()); 0272 const auto rhsIfopt = rhs.identifier(IfoptUtil::identifierType()); 0273 if (!lhsIfopt.isEmpty() && !rhsIfopt.isEmpty() && (lhs.type() == Location::Stop || rhs.type() == Location::Stop)) { 0274 return IfoptUtil::isSameStopPlace(lhsIfopt, rhsIfopt); 0275 } 0276 0277 const auto lhsIds = lhs.identifiers(); 0278 bool foundEqualId = false; 0279 for (auto it = lhsIds.constBegin(); it != lhsIds.constEnd(); ++it) { 0280 const auto rhsId = rhs.identifier(it.key()); 0281 if (it.value().isEmpty() || rhsId.isEmpty()) { 0282 continue; 0283 } 0284 if (it.value() != rhsId) { 0285 return false; 0286 } else if (it.value() == rhsId) { 0287 foundEqualId = true; 0288 } 0289 } 0290 if (foundEqualId) { 0291 return true; 0292 } 0293 0294 if (lhs.rentalVehicleStation().isValid() && rhs.rentalVehicleStation().isValid() 0295 && !RentalVehicleStation::isSame(lhs.rentalVehicleStation(), rhs.rentalVehicleStation())) { 0296 return false; 0297 } 0298 if (lhs.type() == Location::Equipment && lhs.equipment().type() != rhs.equipment().type()) { 0299 return false; 0300 } 0301 0302 // name 0303 if (isSameName(lhs.name(), rhs.name())) { 0304 return true; 0305 } 0306 0307 // TODO consider the address properties here? 0308 0309 // anything sufficiently close together is assumed to be the same 0310 if (lhs.hasCoordinate() && rhs.hasCoordinate() && dist < std::min(isSameDistanceThreshold(lhs.type()), isSameDistanceThreshold(rhs.type()))) { 0311 return true; 0312 } 0313 0314 return false; 0315 } 0316 0317 bool Location::isSameName(const QString &lhs, const QString &rhs) 0318 { 0319 // simple prefix test, before we do the expensive fragment-based comparison below 0320 if (lhs.startsWith(rhs, Qt::CaseInsensitive) || rhs.startsWith(lhs, Qt::CaseSensitive)) { 0321 return true; 0322 } 0323 0324 const auto lhsNameFragments = splitAndNormalizeName(lhs); 0325 const auto rhsNameFragments = splitAndNormalizeName(rhs); 0326 0327 // first try with stripping diacritics 0328 std::vector<QString> lhsNormalized; 0329 lhsNormalized.reserve(lhsNameFragments.size()); 0330 std::transform(lhsNameFragments.begin(), lhsNameFragments.end(), std::back_inserter(lhsNormalized), stripDiacritics); 0331 std::sort(lhsNormalized.begin(), lhsNormalized.end()); 0332 lhsNormalized.erase(std::unique(lhsNormalized.begin(), lhsNormalized.end()), lhsNormalized.end()); 0333 0334 std::vector<QString> rhsNormalized; 0335 rhsNormalized.reserve(rhsNameFragments.size()); 0336 std::transform(rhsNameFragments.begin(), rhsNameFragments.end(), std::back_inserter(rhsNormalized), stripDiacritics); 0337 std::sort(rhsNormalized.begin(), rhsNormalized.end()); 0338 rhsNormalized.erase(std::unique(rhsNormalized.begin(), rhsNormalized.end()), rhsNormalized.end()); 0339 0340 if (lhsNormalized == rhsNormalized) { 0341 return true; 0342 } 0343 0344 // if that didn't help, try to apply alternative transliterations of diacritics 0345 lhsNormalized.clear(); 0346 std::transform(lhsNameFragments.begin(), lhsNameFragments.end(), std::back_inserter(lhsNormalized), applyTransliterations); 0347 rhsNormalized.clear(); 0348 std::transform(rhsNameFragments.begin(), rhsNameFragments.end(), std::back_inserter(rhsNormalized), applyTransliterations); 0349 return lhsNormalized == rhsNormalized; 0350 } 0351 0352 static float mergeCoordinate(float lhs, float rhs) 0353 { 0354 if (std::isnan(lhs)) { 0355 return rhs; 0356 } 0357 if (std::isnan(rhs)) { 0358 return lhs; 0359 } 0360 0361 return (lhs + rhs) / 2.0; 0362 } 0363 0364 Location Location::merge(const Location &lhs, const Location &rhs) 0365 { 0366 Location l(lhs); 0367 l.setType(std::max(lhs.type(), rhs.type())); 0368 0369 // merge identifiers 0370 const auto rhsIds = rhs.identifiers(); 0371 for (auto it = rhsIds.constBegin(); it != rhsIds.constEnd(); ++it) { 0372 if (it.key() == IfoptUtil::identifierType()) { 0373 l.setIdentifier(IfoptUtil::identifierType(), IfoptUtil::merge(l.identifier(IfoptUtil::identifierType()), it.value()).toString()); 0374 continue; 0375 } 0376 if (lhs.identifier(it.key()).isEmpty()) { 0377 l.setIdentifier(it.key(), it.value()); 0378 } 0379 } 0380 0381 if (!lhs.hasCoordinate()) { 0382 l.setCoordinate(rhs.latitude(), rhs.longitude()); 0383 } 0384 0385 l.setName(MergeUtil::mergeString(lhs.name(), rhs.name())); 0386 0387 if (!lhs.timeZone().isValid()) { 0388 l.setTimeZone(rhs.timeZone()); 0389 } 0390 0391 l.setLatitude(mergeCoordinate(lhs.latitude(), rhs.latitude())); 0392 l.setLongitude(mergeCoordinate(lhs.longitude(), rhs.longitude())); 0393 0394 l.setStreetAddress(MergeUtil::mergeString(lhs.streetAddress(), rhs.streetAddress())); 0395 l.setPostalCode(MergeUtil::mergeString(lhs.postalCode(), rhs.postalCode())); 0396 l.setLocality(MergeUtil::mergeString(lhs.locality(), rhs.locality())); 0397 l.setRegion(MergeUtil::mergeString(lhs.region(), rhs.region())); 0398 l.setCountry(MergeUtil::mergeString(lhs.country(), rhs.country())); 0399 0400 switch (l.type()) { 0401 case Place: 0402 case CarpoolPickupDropoff: 0403 case Stop: 0404 break; 0405 case RentedVehicleStation: 0406 l.setData(RentalVehicleUtil::merge(lhs.rentalVehicleStation(), rhs.rentalVehicleStation())); 0407 break; 0408 case RentedVehicle: 0409 l.setData(RentalVehicleUtil::merge(lhs.rentalVehicle(), rhs.rentalVehicle())); 0410 break; 0411 case Equipment: 0412 l.setData(EquipmentUtil::merge(lhs.equipment(), rhs.equipment())); 0413 break; 0414 } 0415 0416 return l; 0417 } 0418 0419 // see https://en.wikipedia.org/wiki/Haversine_formula 0420 float Location::distance(float lat1, float lon1, float lat2, float lon2) 0421 { 0422 const auto degToRad = M_PI / 180.0; 0423 const auto earthRadius = 6371000.0; // in meters 0424 0425 const auto d_lat = (lat1 - lat2) * degToRad; 0426 const auto d_lon = (lon1 - lon2) * degToRad; 0427 0428 const auto a = pow(sin(d_lat / 2.0), 2) + cos(lat1 * degToRad) * cos(lat2 * degToRad) * pow(sin(d_lon / 2.0), 2); 0429 return 2.0 * earthRadius * atan2(sqrt(a), sqrt(1.0 - a)); 0430 } 0431 0432 float Location::distance(const Location &lhs, const Location &rhs) 0433 { 0434 if (!lhs.hasCoordinate() || !rhs.hasCoordinate()) { 0435 return std::numeric_limits<float>::max(); 0436 } 0437 return Location::distance(lhs.latitude(), lhs.longitude(), rhs.latitude(), rhs.longitude()); 0438 } 0439 0440 QJsonObject Location::toJson(const Location &loc) 0441 { 0442 auto obj = Json::toJson(loc); 0443 if (loc.timeZone().isValid()) { 0444 obj.insert(QLatin1String("timezone"), QString::fromUtf8(loc.timeZone().id())); 0445 } 0446 0447 if (!loc.d->ids.isEmpty()) { 0448 QJsonObject ids; 0449 for (auto it = loc.d->ids.constBegin(); it != loc.d->ids.constEnd(); ++it) { 0450 ids.insert(it.key(), it.value()); 0451 } 0452 obj.insert(QLatin1String("identifier"), ids); 0453 } 0454 0455 switch (loc.type()) { 0456 case Place: 0457 obj.remove(QLatin1String("type")); 0458 [[fallthrough]]; 0459 case Stop: 0460 case CarpoolPickupDropoff: 0461 break; 0462 case RentedVehicleStation: 0463 obj.insert(QLatin1String("rentalVehicleStation"), RentalVehicleStation::toJson(loc.rentalVehicleStation())); 0464 break; 0465 case RentedVehicle: 0466 obj.insert(QLatin1String("rentalVehicle"), RentalVehicle::toJson(loc.rentalVehicle())); 0467 break; 0468 case Equipment: 0469 obj.insert(QLatin1String("equipment"), Equipment::toJson(loc.equipment())); 0470 break; 0471 } 0472 0473 return obj; 0474 } 0475 0476 QJsonArray Location::toJson(const std::vector<Location> &locs) 0477 { 0478 return Json::toJson(locs); 0479 } 0480 0481 Location Location::fromJson(const QJsonObject &obj) 0482 { 0483 auto loc = Json::fromJson<Location>(obj); 0484 const auto tz = obj.value(QLatin1String("timezone")).toString(); 0485 if (!tz.isEmpty()) { 0486 loc.setTimeZone(QTimeZone(tz.toUtf8())); 0487 } 0488 0489 const auto ids = obj.value(QLatin1String("identifier")).toObject(); 0490 for (auto it = ids.begin(); it != ids.end(); ++it) { 0491 loc.setIdentifier(it.key(), it.value().toString()); 0492 } 0493 0494 switch (loc.type()) { 0495 case Place: 0496 case Stop: 0497 case CarpoolPickupDropoff: 0498 break; 0499 case RentedVehicleStation: 0500 loc.setData(RentalVehicleStation::fromJson(obj.value(QLatin1String("rentalVehicleStation")).toObject())); 0501 break; 0502 case RentedVehicle: 0503 loc.setData(RentalVehicle::fromJson(obj.value(QLatin1String("rentalVehicle")).toObject())); 0504 break; 0505 case Equipment: 0506 loc.setData(Equipment::fromJson(obj.value(QLatin1String("equipment")).toObject())); 0507 break; 0508 } 0509 0510 return loc; 0511 } 0512 0513 std::vector<Location> Location::fromJson(const QJsonArray &a) 0514 { 0515 return Json::fromJson<Location>(a); 0516 } 0517 0518 #include "moc_location.cpp"