File indexing completed on 2024-12-22 04:59:44
0001 /* 0002 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "config-kitinerary.h" 0008 #include "extractorpostprocessor.h" 0009 #include "extractorpostprocessor_p.h" 0010 #include "extractorvalidator.h" 0011 #include "flightpostprocessor_p.h" 0012 #include "stringutil.h" 0013 0014 #include "iata/iatabcbpparser.h" 0015 #include "jsonlddocument.h" 0016 #include "logging.h" 0017 #include "mergeutil.h" 0018 #include "sortutil.h" 0019 #include "text/addressparser_p.h" 0020 0021 #include "knowledgedb/airportdb.h" 0022 #include "knowledgedb/timezonedb_p.h" 0023 #include "knowledgedb/trainstationdb.h" 0024 0025 #include <KItinerary/Action> 0026 #include <KItinerary/BoatTrip> 0027 #include <KItinerary/BusTrip> 0028 #include <KItinerary/Event> 0029 #include <KItinerary/Flight> 0030 #include <KItinerary/Organization> 0031 #include <KItinerary/Person> 0032 #include <KItinerary/Place> 0033 #include <KItinerary/ProgramMembership> 0034 #include <KItinerary/RentalCar> 0035 #include <KItinerary/Reservation> 0036 #include <KItinerary/Taxi> 0037 #include <KItinerary/Ticket> 0038 #include <KItinerary/TrainTrip> 0039 #include <KItinerary/Visit> 0040 0041 #include <KCountry> 0042 0043 #include <QDebug> 0044 #include <QJsonArray> 0045 #include <QJsonDocument> 0046 #include <QTimeZone> 0047 #include <QUrl> 0048 0049 #if HAVE_PHONENUMBER 0050 #include <phonenumbers/phonenumberutil.h> 0051 #endif 0052 0053 #include <algorithm> 0054 #include <cstring> 0055 0056 using namespace KItinerary; 0057 0058 ExtractorPostprocessor::ExtractorPostprocessor() 0059 : d(new ExtractorPostprocessorPrivate) 0060 { 0061 } 0062 0063 ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default; 0064 ExtractorPostprocessor::~ExtractorPostprocessor() = default; 0065 0066 void ExtractorPostprocessor::process(const QList<QVariant> &data) { 0067 d->m_resultFinalized = false; 0068 d->m_data.reserve(d->m_data.size() + data.size()); 0069 for (auto elem : data) { 0070 // reservation types 0071 if (JsonLd::isA<FlightReservation>(elem)) { 0072 elem = d->processFlightReservation(elem.value<FlightReservation>()); 0073 } else if (JsonLd::isA<TrainReservation>(elem)) { 0074 elem = d->processTrainReservation(elem.value<TrainReservation>()); 0075 } else if (JsonLd::isA<LodgingReservation>(elem)) { 0076 elem = d->processLodgingReservation(elem.value<LodgingReservation>()); 0077 } else if (JsonLd::isA<FoodEstablishmentReservation>(elem)) { 0078 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>()); 0079 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) { 0080 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>()); 0081 } else if (JsonLd::isA<BusReservation>(elem)) { 0082 elem = d->processBusReservation(elem.value<BusReservation>()); 0083 } else if (JsonLd::isA<BoatReservation>(elem)) { 0084 elem = d->processBoatReservation(elem.value<BoatReservation>()); 0085 } else if (JsonLd::isA<EventReservation>(elem)) { 0086 elem = d->processEventReservation(elem.value<EventReservation>()); 0087 } else if (JsonLd::isA<RentalCarReservation>(elem)) { 0088 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>()); 0089 } else if (JsonLd::isA<TaxiReservation>(elem)) { 0090 elem = d->processTaxiReservation(elem.value<TaxiReservation>()); 0091 } 0092 0093 // "reservationFor" types 0094 else if (JsonLd::isA<LodgingBusiness>(elem)) { 0095 elem = d->processPlace(elem.value<LodgingBusiness>()); 0096 } else if (JsonLd::isA<FoodEstablishment>(elem)) { 0097 elem = d->processPlace(elem.value<FoodEstablishment>()); 0098 } else if (JsonLd::isA<Event>(elem)) { 0099 elem = d->processEvent(elem.value<Event>()); 0100 } else if (JsonLd::isA<LocalBusiness>(elem)) { 0101 elem = d->processPlace(elem.value<LocalBusiness>()); 0102 } 0103 0104 // non-reservation types 0105 else if (JsonLd::isA<ProgramMembership>(elem)) { 0106 elem = d->processProgramMembership(elem.value<ProgramMembership>()); 0107 } else if (JsonLd::isA<Ticket>(elem)) { 0108 elem = d->processTicket(elem.value<Ticket>()); 0109 } 0110 0111 d->mergeOrAppend(elem); 0112 } 0113 } 0114 0115 QList<QVariant> ExtractorPostprocessor::result() const { 0116 if (!d->m_resultFinalized) { 0117 // fold elements we have reservations for into those reservations 0118 for (auto it = d->m_data.begin(); it != d->m_data.end();) { 0119 if (JsonLd::isA<Reservation>(*it)) { 0120 ++it; 0121 continue; 0122 } 0123 0124 bool merged = false; 0125 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) { 0126 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor"); 0127 if (MergeUtil::isSame(resFor, *it)) { 0128 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it)); 0129 merged = true; 0130 } 0131 } 0132 0133 if (merged) { 0134 it = d->m_data.erase(it); 0135 } else { 0136 ++it; 0137 } 0138 } 0139 0140 // search for "triangular" patterns, ie. a location change element that has a matching departure 0141 // and matching arrival to two different other location change elements (A->C vs A->B + B->C). 0142 // we remove those, as the fine-granular results are better 0143 if (d->m_data.size() >= 3) { 0144 for (auto it = d->m_data.begin(); it != d->m_data.end();) { 0145 auto depIt = it; 0146 auto arrIt = it; 0147 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) { 0148 if (it == it2) { 0149 continue; 0150 } 0151 if (MergeUtil::hasSameDeparture(*it, *it2)) { 0152 depIt = it2; 0153 } 0154 if (MergeUtil::hasSameArrival(*it, *it2)) { 0155 arrIt = it2; 0156 } 0157 } 0158 0159 if (depIt != it && arrIt != it && depIt != arrIt) { 0160 it = d->m_data.erase(it); 0161 } else { 0162 ++it; 0163 } 0164 } 0165 } 0166 0167 d->m_resultFinalized = true; 0168 } 0169 0170 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore); 0171 return d->m_data; 0172 } 0173 0174 void ExtractorPostprocessor::setContextDate(const QDateTime& dt) 0175 { 0176 d->m_contextDate = dt; 0177 } 0178 0179 void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate) 0180 { 0181 } 0182 0183 void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem) 0184 { 0185 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) { 0186 return MergeUtil::isSame(elem, other); 0187 }); 0188 0189 if (it == m_data.end()) { 0190 m_data.push_back(elem); 0191 } else { 0192 *it = MergeUtil::merge(*it, elem); 0193 } 0194 } 0195 0196 QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const 0197 { 0198 // expand ticketToken for IATA BCBP data 0199 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString(); 0200 if (!bcbp.isEmpty()) { 0201 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate); 0202 if (bcbpData.size() == 1) { 0203 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>(); 0204 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example 0205 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber()); 0206 } else { 0207 for (const auto &data : bcbpData) { 0208 if (MergeUtil::isSame(res, data)) { 0209 res = JsonLdDocument::apply(data, res).value<FlightReservation>(); 0210 break; 0211 } 0212 } 0213 } 0214 } 0215 0216 if (res.reservationFor().isValid()) { 0217 FlightPostProcessor p; 0218 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>())); 0219 } 0220 return processReservation(res); 0221 } 0222 0223 TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const 0224 { 0225 if (res.reservationFor().isValid()) { 0226 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>())); 0227 } 0228 return processReservation(res); 0229 } 0230 0231 TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const 0232 { 0233 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed()); 0234 trip.setDeparturePlatform(trip.departurePlatform().trimmed()); 0235 trip.setDepartureStation(processTrainStation(trip.departureStation())); 0236 trip.setArrivalStation(processTrainStation(trip.arrivalStation())); 0237 trip.setDepartureTime(processTrainTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation())); 0238 trip.setArrivalTime(processTrainTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation())); 0239 trip.setTrainNumber(trip.trainNumber().simplified()); 0240 trip.setTrainName(trip.trainName().simplified()); 0241 0242 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover 0243 const auto duration = trip.departureTime().secsTo(trip.arrivalTime()); 0244 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) { 0245 trip.setArrivalTime(trip.arrivalTime().addDays(1)); 0246 } 0247 0248 return trip; 0249 } 0250 0251 static void applyStationData(const KnowledgeDb::TrainStation &record, TrainStation &station) 0252 { 0253 if (!station.geo().isValid() && record.coordinate.isValid()) { 0254 GeoCoordinates geo; 0255 geo.setLatitude(record.coordinate.latitude); 0256 geo.setLongitude(record.coordinate.longitude); 0257 station.setGeo(geo); 0258 } 0259 auto addr = station.address(); 0260 if (addr.addressCountry().isEmpty() && record.country.isValid()) { 0261 addr.setAddressCountry(record.country.toString()); 0262 station.setAddress(addr); 0263 } 0264 } 0265 0266 static void applyStationCountry(const QString &isoCode, TrainStation &station) 0267 { 0268 auto addr = station.address(); 0269 if (addr.addressCountry().isEmpty()) { 0270 addr.setAddressCountry(isoCode.toUpper()); 0271 station.setAddress(addr); 0272 } 0273 } 0274 0275 TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation station) const 0276 { 0277 const auto id = station.identifier(); 0278 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output 0279 station.setIdentifier(QString()); 0280 } else if (id.startsWith(QLatin1StringView("sncf:")) && id.size() == 10) { 0281 const auto record = KnowledgeDb::stationForSncfStationId( 0282 KnowledgeDb::SncfStationId{id.mid(5)}); 0283 applyStationData(record, station); 0284 applyStationCountry(id.mid(5, 2).toUpper(), station); 0285 } else if (id.startsWith(QLatin1StringView("ibnr:")) && id.size() == 12) { 0286 const auto record = 0287 KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()}); 0288 applyStationData(record, station); 0289 const auto country = 0290 KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort()) 0291 .toString(); 0292 applyStationCountry(country, station); 0293 } else if (id.startsWith(QLatin1StringView("uic:")) && id.size() == 11) { 0294 const auto record = KnowledgeDb::stationForUic( 0295 KnowledgeDb::UICStation{id.mid(4).toUInt()}); 0296 applyStationData(record, station); 0297 const auto country = 0298 KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort()) 0299 .toString(); 0300 applyStationCountry(country, station); 0301 } else if (id.startsWith(QLatin1StringView("ir:")) && id.size() > 4) { 0302 const auto record = 0303 KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3)); 0304 applyStationData(record, station); 0305 } else if (id.startsWith(QLatin1StringView("benerail:")) && 0306 id.size() == 14) { 0307 const auto record = KnowledgeDb::stationForBenerailId( 0308 KnowledgeDb::BenerailStationId(id.mid(9))); 0309 applyStationData(record, station); 0310 applyStationCountry(id.mid(9, 2).toUpper(), station); 0311 } else if (id.startsWith(QLatin1StringView("vrfi:")) && id.size() >= 7 && 0312 id.size() <= 9) { 0313 const auto record = KnowledgeDb::stationForVRStationCode( 0314 KnowledgeDb::VRStationCode(id.mid(5))); 0315 applyStationData(record, station); 0316 } else if (id.startsWith(QLatin1StringView("iata:")) && id.size() == 8) { 0317 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5)); 0318 const auto record = KnowledgeDb::stationForIataCode(iataCode); 0319 applyStationData(record, station); 0320 // fall back to the airport with the matching IATA code for the country 0321 // information we cannot use the coordinate though, as that points to the 0322 // actual airport, not the station 0323 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(), 0324 station); 0325 } else if (id.startsWith(QLatin1StringView("amtrak:")) && id.size() == 10) { 0326 const auto record = KnowledgeDb::stationForAmtrakStationCode( 0327 KnowledgeDb::AmtrakStationCode(QStringView(id).mid(7))); 0328 applyStationData(record, station); 0329 } else if (id.startsWith(QLatin1StringView("via:")) && id.size() == 8) { 0330 const auto record = KnowledgeDb::stationForViaRailStationCode( 0331 KnowledgeDb::ViaRailStationCode(QStringView(id).mid(4))); 0332 applyStationData(record, station); 0333 } else if (id.startsWith(QLatin1StringView("uk:")) && id.size() == 6) { 0334 const auto record = KnowledgeDb::stationForUkRailwayStationCode( 0335 KnowledgeDb::UKRailwayStationCode(QStringView(id).mid(3))); 0336 applyStationData(record, station); 0337 } 0338 0339 return processPlace(station); 0340 } 0341 0342 QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, QDate departureDay, const TrainStation& station) const 0343 { 0344 if (!dt.isValid()) { 0345 return dt; 0346 } 0347 0348 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day 0349 dt.setDate(departureDay); 0350 } 0351 return processTimeForLocation(dt, station); 0352 } 0353 0354 BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const 0355 { 0356 if (res.reservationFor().isValid()) { 0357 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>())); 0358 } 0359 return processReservation(res); 0360 } 0361 0362 BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const 0363 { 0364 trip.setDepartureBusStop(processPlace(trip.departureBusStop())); 0365 trip.setArrivalBusStop(processPlace(trip.arrivalBusStop())); 0366 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBusStop())); 0367 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBusStop())); 0368 trip.setBusNumber(trip.busNumber().simplified()); 0369 trip.setBusName(trip.busName().simplified()); 0370 return trip; 0371 } 0372 0373 BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const 0374 { 0375 if (res.reservationFor().isValid()) { 0376 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>())); 0377 } 0378 return processReservation(res); 0379 } 0380 0381 BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const 0382 { 0383 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal())); 0384 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal())); 0385 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal())); 0386 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal())); 0387 0388 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover 0389 const auto duration = trip.departureTime().secsTo(trip.arrivalTime()); 0390 if (duration < 0 && duration > -3600*24) { 0391 trip.setArrivalTime(trip.arrivalTime().addDays(1)); 0392 } 0393 0394 return trip; 0395 } 0396 0397 LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const 0398 { 0399 if (res.reservationFor().isValid()) { 0400 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>())); 0401 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>())); 0402 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>())); 0403 } 0404 return processReservation(res); 0405 } 0406 0407 TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const 0408 { 0409 res.setPickupLocation(processPlace(res.pickupLocation())); 0410 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation())); 0411 return processReservation(res); 0412 } 0413 0414 RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const 0415 { 0416 if (res.reservationFor().isValid()) { 0417 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>())); 0418 } 0419 res.setPickupLocation(processPlace(res.pickupLocation())); 0420 res.setDropoffLocation(processPlace(res.dropoffLocation())); 0421 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation())); 0422 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation())); 0423 return processReservation(res); 0424 } 0425 0426 RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const 0427 { 0428 car.setName(car.name().trimmed()); 0429 return car; 0430 } 0431 0432 FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const 0433 { 0434 if (res.reservationFor().isValid()) { 0435 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>())); 0436 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>())); 0437 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>())); 0438 } 0439 return processReservation(res); 0440 } 0441 0442 TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const 0443 { 0444 visit.setTouristAttraction(processPlace(visit.touristAttraction())); 0445 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction())); 0446 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction())); 0447 return visit; 0448 } 0449 0450 EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const 0451 { 0452 if (res.reservationFor().isValid()) { 0453 res.setReservationFor(processEvent(res.reservationFor().value<Event>())); 0454 } 0455 return processReservation(res); 0456 } 0457 0458 KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const 0459 { 0460 event.setName(StringUtil::clean(event.name())); 0461 0462 // normalize location to be a Place 0463 if (JsonLd::isA<PostalAddress>(event.location())) { 0464 Place place; 0465 place.setAddress(event.location().value<PostalAddress>()); 0466 event.setLocation(place); 0467 } 0468 0469 if (JsonLd::isA<Place>(event.location())) { 0470 event.setLocation(processPlace(event.location().value<Place>())); 0471 0472 // try to obtain timezones if we have a location 0473 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>())); 0474 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>())); 0475 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>())); 0476 } 0477 0478 return event; 0479 } 0480 0481 static QString processCurrency(const QString ¤cy) 0482 { 0483 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) { 0484 return {}; 0485 } 0486 return currency; 0487 } 0488 0489 Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const 0490 { 0491 ticket.setName(StringUtil::clean(ticket.name())); 0492 ticket.setTicketNumber(ticket.ticketNumber().simplified()); 0493 ticket.setUnderName(processPerson(ticket.underName())); 0494 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat())); 0495 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency())); 0496 return ticket; 0497 } 0498 0499 ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const 0500 { 0501 // remove empty dummy entries found eg. in ERA FCB data 0502 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) { 0503 program.setProgramName(QString()); 0504 } 0505 0506 program.setProgramName(program.programName().simplified()); 0507 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input 0508 if (program.programName().isEmpty() && !program.programName().isNull()) { 0509 program.setProgramName(QString()); 0510 } 0511 program.setMember(processPerson(program.member())); 0512 return program; 0513 } 0514 0515 Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const 0516 { 0517 seat.setSeatSection(seat.seatSection().simplified()); 0518 seat.setSeatRow(seat.seatRow().simplified()); 0519 seat.setSeatNumber(seat.seatNumber().simplified()); 0520 seat.setSeatingType(seat.seatingType().simplified()); 0521 return seat; 0522 } 0523 0524 template <typename T> 0525 T ExtractorPostprocessorPrivate::processReservation(T res) const 0526 { 0527 res.setUnderName(processPerson(res.underName().template value<Person>())); 0528 res.setPotentialAction(processActions(res.potentialAction())); 0529 res.setReservationNumber(res.reservationNumber().trimmed()); 0530 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed())); 0531 res.setPriceCurrency(processCurrency(res.priceCurrency())); 0532 0533 if (JsonLd::isA<Ticket>(res.reservedTicket())) { 0534 res.setReservedTicket(processTicket(res.reservedTicket().template value<Ticket>())); 0535 } 0536 return res; 0537 } 0538 0539 static constexpr const char* name_prefixes[] = { 0540 "DR", "MR", "MRS", "MS" 0541 }; 0542 0543 static bool isSeparator(QChar c) 0544 { 0545 return c == QLatin1Char(' ') || c == QLatin1Char('/'); 0546 } 0547 0548 static QString simplifyNamePart(QString n) 0549 { 0550 n = n.simplified(); 0551 0552 for (auto prefix : name_prefixes) { 0553 const int prefixLen = std::strlen(prefix); 0554 if (n.size() > prefixLen + 2 && 0555 n.startsWith(QLatin1StringView(prefix, prefixLen), 0556 Qt::CaseInsensitive) && 0557 isSeparator(n[prefixLen])) { 0558 return n.mid(prefixLen + 1); 0559 } 0560 if (n.size() > prefixLen + 2 && 0561 n.endsWith(QLatin1StringView(prefix, prefixLen), 0562 Qt::CaseInsensitive) && 0563 isSeparator(n[n.size() - prefixLen - 1])) { 0564 return n.left(n.size() - prefixLen - 1); 0565 } 0566 } 0567 0568 return n; 0569 } 0570 0571 KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const 0572 { 0573 person.setName(simplifyNamePart(person.name())); 0574 person.setFamilyName(simplifyNamePart(person.familyName())); 0575 person.setGivenName(simplifyNamePart(person.givenName())); 0576 0577 // fill name with name parts, if it's empty 0578 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName()) 0579 && !person.familyName().isEmpty() && !person.givenName().isEmpty()) 0580 { 0581 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName()); 0582 } 0583 0584 return person; 0585 } 0586 0587 PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo) 0588 { 0589 // convert to ISO 3166-1 alpha-2 country codes 0590 if (addr.addressCountry().size() > 2) { 0591 QString alpha2Code; 0592 0593 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus 0594 if (addr.addressCountry().size() == 3) { 0595 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2(); 0596 } 0597 if (alpha2Code.isEmpty()) { 0598 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2(); 0599 } 0600 if (!alpha2Code.isEmpty()) { 0601 addr.setAddressCountry(alpha2Code); 0602 } 0603 } 0604 0605 // upper case country codes 0606 if (addr.addressCountry().size() == 2) { 0607 addr.setAddressCountry(addr.addressCountry().toUpper()); 0608 } 0609 0610 // normalize strings 0611 addr.setStreetAddress(addr.streetAddress().simplified()); 0612 addr.setPostalCode(addr.postalCode().simplified()); 0613 addr.setAddressLocality(addr.addressLocality().simplified()); 0614 addr.setAddressRegion(addr.addressRegion().simplified()); 0615 0616 #if HAVE_PHONENUMBER 0617 // recover country from phone number, if we have that 0618 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) { 0619 const auto phoneStr = phoneNumber.toStdString(); 0620 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance(); 0621 i18n::phonenumbers::PhoneNumber number; 0622 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) { 0623 std::string isoCode; 0624 util->GetRegionCodeForNumber(number, &isoCode); 0625 if (!isoCode.empty() && isoCode != "ZZ") { 0626 addr.setAddressCountry(QString::fromStdString(isoCode)); 0627 } 0628 } 0629 } 0630 #endif 0631 0632 if (geo.isValid() && addr.addressCountry().isEmpty()) { 0633 addr.setAddressCountry(KCountry::fromLocation(geo.latitude(), geo.longitude()).alpha2()); 0634 } 0635 0636 AddressParser addrParser; 0637 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2()); 0638 addrParser.parse(addr); 0639 addr = addrParser.result(); 0640 return addr; 0641 } 0642 0643 QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr) 0644 { 0645 #if HAVE_PHONENUMBER 0646 // or complete the phone number if we know the country 0647 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) { 0648 auto phoneStr = phoneNumber.toStdString(); 0649 const auto isoCode = addr.addressCountry().toStdString(); 0650 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance(); 0651 i18n::phonenumbers::PhoneNumber number; 0652 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) { 0653 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) { 0654 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr); 0655 return QString::fromStdString(phoneStr); 0656 } 0657 } 0658 } 0659 #else 0660 Q_UNUSED(addr) 0661 #endif 0662 return phoneNumber.simplified(); 0663 } 0664 0665 QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const 0666 { 0667 // remove non-actions and actions with invalid URLs 0668 QUrl viewUrl; 0669 for (auto it = actions.begin(); it != actions.end();) { 0670 if (!JsonLd::canConvert<Action>(*it)) { 0671 it = actions.erase(it); 0672 continue; 0673 } 0674 0675 const auto action = JsonLd::convert<Action>(*it); 0676 if (!action.target().isValid()) { 0677 it = actions.erase(it); 0678 continue; 0679 } 0680 0681 if (JsonLd::isA<ViewAction>(*it)) { 0682 viewUrl = action.target(); 0683 } 0684 ++it; 0685 } 0686 0687 // normalize the order, so JSON comparison still yields correct results 0688 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) { 0689 return strcmp(lhs.typeName(), rhs.typeName()) < 0; 0690 }); 0691 0692 // remove actions that don't actually have their own target, or duplicates 0693 QUrl prevUrl; 0694 const char* prevType = nullptr; 0695 for (auto it = actions.begin(); it != actions.end();) { 0696 const auto action = JsonLd::convert<Action>(*it); 0697 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false); 0698 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) { 0699 prevUrl = action.target(); 0700 prevType = (*it).typeName(); 0701 ++it; 0702 } else { 0703 it = actions.erase(it); 0704 } 0705 } 0706 0707 return actions; 0708 } 0709 0710 template <typename T> 0711 QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const 0712 { 0713 if (!dt.isValid() || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) { 0714 return dt; 0715 } 0716 0717 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion()); 0718 if (!tz.isValid()) { 0719 return dt; 0720 } 0721 0722 // prefer our timezone over externally provided UTC offset, if they match 0723 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) { 0724 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt); 0725 return dt; 0726 } 0727 0728 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) { 0729 dt.setTimeZone(tz); 0730 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) { 0731 dt = dt.toTimeZone(tz); 0732 } 0733 return dt; 0734 }