File indexing completed on 2024-12-22 04:59:44

0001 /*
0002    SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "config-kitinerary.h"
0008 #include "extractorpostprocessor.h"
0009 #include "extractorpostprocessor_p.h"
0010 #include "extractorvalidator.h"
0011 #include "flightpostprocessor_p.h"
0012 #include "stringutil.h"
0013 
0014 #include "iata/iatabcbpparser.h"
0015 #include "jsonlddocument.h"
0016 #include "logging.h"
0017 #include "mergeutil.h"
0018 #include "sortutil.h"
0019 #include "text/addressparser_p.h"
0020 
0021 #include "knowledgedb/airportdb.h"
0022 #include "knowledgedb/timezonedb_p.h"
0023 #include "knowledgedb/trainstationdb.h"
0024 
0025 #include <KItinerary/Action>
0026 #include <KItinerary/BoatTrip>
0027 #include <KItinerary/BusTrip>
0028 #include <KItinerary/Event>
0029 #include <KItinerary/Flight>
0030 #include <KItinerary/Organization>
0031 #include <KItinerary/Person>
0032 #include <KItinerary/Place>
0033 #include <KItinerary/ProgramMembership>
0034 #include <KItinerary/RentalCar>
0035 #include <KItinerary/Reservation>
0036 #include <KItinerary/Taxi>
0037 #include <KItinerary/Ticket>
0038 #include <KItinerary/TrainTrip>
0039 #include <KItinerary/Visit>
0040 
0041 #include <KCountry>
0042 
0043 #include <QDebug>
0044 #include <QJsonArray>
0045 #include <QJsonDocument>
0046 #include <QTimeZone>
0047 #include <QUrl>
0048 
0049 #if HAVE_PHONENUMBER
0050 #include <phonenumbers/phonenumberutil.h>
0051 #endif
0052 
0053 #include <algorithm>
0054 #include <cstring>
0055 
0056 using namespace KItinerary;
0057 
0058 ExtractorPostprocessor::ExtractorPostprocessor()
0059     : d(new ExtractorPostprocessorPrivate)
0060 {
0061 }
0062 
0063 ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
0064 ExtractorPostprocessor::~ExtractorPostprocessor() = default;
0065 
0066 void ExtractorPostprocessor::process(const QList<QVariant> &data) {
0067     d->m_resultFinalized = false;
0068     d->m_data.reserve(d->m_data.size() + data.size());
0069     for (auto elem : data) {
0070         // reservation types
0071         if (JsonLd::isA<FlightReservation>(elem)) {
0072             elem = d->processFlightReservation(elem.value<FlightReservation>());
0073         } else if (JsonLd::isA<TrainReservation>(elem)) {
0074             elem = d->processTrainReservation(elem.value<TrainReservation>());
0075         } else if (JsonLd::isA<LodgingReservation>(elem)) {
0076             elem = d->processLodgingReservation(elem.value<LodgingReservation>());
0077         } else if (JsonLd::isA<FoodEstablishmentReservation>(elem)) {
0078             elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
0079         } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
0080             elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
0081         } else if (JsonLd::isA<BusReservation>(elem)) {
0082             elem = d->processBusReservation(elem.value<BusReservation>());
0083         } else if (JsonLd::isA<BoatReservation>(elem)) {
0084             elem = d->processBoatReservation(elem.value<BoatReservation>());
0085         } else if (JsonLd::isA<EventReservation>(elem)) {
0086             elem = d->processEventReservation(elem.value<EventReservation>());
0087         } else if (JsonLd::isA<RentalCarReservation>(elem)) {
0088             elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
0089         } else if (JsonLd::isA<TaxiReservation>(elem)) {
0090             elem = d->processTaxiReservation(elem.value<TaxiReservation>());
0091         }
0092 
0093         // "reservationFor" types
0094         else if (JsonLd::isA<LodgingBusiness>(elem)) {
0095             elem = d->processPlace(elem.value<LodgingBusiness>());
0096         } else if (JsonLd::isA<FoodEstablishment>(elem)) {
0097             elem = d->processPlace(elem.value<FoodEstablishment>());
0098         } else if (JsonLd::isA<Event>(elem)) {
0099             elem = d->processEvent(elem.value<Event>());
0100         } else if (JsonLd::isA<LocalBusiness>(elem)) {
0101             elem = d->processPlace(elem.value<LocalBusiness>());
0102         }
0103 
0104         // non-reservation types
0105         else if (JsonLd::isA<ProgramMembership>(elem)) {
0106             elem = d->processProgramMembership(elem.value<ProgramMembership>());
0107         } else if (JsonLd::isA<Ticket>(elem)) {
0108             elem = d->processTicket(elem.value<Ticket>());
0109         }
0110 
0111         d->mergeOrAppend(elem);
0112     }
0113 }
0114 
0115 QList<QVariant> ExtractorPostprocessor::result() const {
0116     if (!d->m_resultFinalized) {
0117         // fold elements we have reservations for into those reservations
0118         for (auto it = d->m_data.begin(); it != d->m_data.end();) {
0119             if (JsonLd::isA<Reservation>(*it)) {
0120                 ++it;
0121                 continue;
0122             }
0123 
0124             bool merged = false;
0125             for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
0126                 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
0127                 if (MergeUtil::isSame(resFor, *it)) {
0128                     JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
0129                     merged = true;
0130                 }
0131             }
0132 
0133             if (merged) {
0134                 it = d->m_data.erase(it);
0135             } else {
0136                 ++it;
0137             }
0138         }
0139 
0140         // search for "triangular" patterns, ie. a location change element that has a matching departure
0141         // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
0142         // we remove those, as the fine-granular results are better
0143         if (d->m_data.size() >= 3) {
0144             for (auto it = d->m_data.begin(); it != d->m_data.end();) {
0145                 auto depIt = it;
0146                 auto arrIt = it;
0147                 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
0148                     if (it == it2) {
0149                         continue;
0150                     }
0151                     if (MergeUtil::hasSameDeparture(*it, *it2)) {
0152                         depIt = it2;
0153                     }
0154                     if (MergeUtil::hasSameArrival(*it, *it2)) {
0155                         arrIt = it2;
0156                     }
0157                 }
0158 
0159                 if (depIt != it && arrIt != it && depIt != arrIt) {
0160                     it = d->m_data.erase(it);
0161                 } else {
0162                     ++it;
0163                 }
0164             }
0165         }
0166 
0167         d->m_resultFinalized = true;
0168     }
0169 
0170     std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
0171     return d->m_data;
0172 }
0173 
0174 void ExtractorPostprocessor::setContextDate(const QDateTime& dt)
0175 {
0176     d->m_contextDate = dt;
0177 }
0178 
0179 void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
0180 {
0181 }
0182 
0183 void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
0184 {
0185     const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
0186         return MergeUtil::isSame(elem, other);
0187     });
0188 
0189     if (it == m_data.end()) {
0190         m_data.push_back(elem);
0191     } else {
0192         *it = MergeUtil::merge(*it, elem);
0193     }
0194 }
0195 
0196 QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
0197 {
0198     // expand ticketToken for IATA BCBP data
0199     const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
0200     if (!bcbp.isEmpty()) {
0201         const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
0202         if (bcbpData.size() == 1) {
0203             res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
0204             // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
0205             res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
0206         } else {
0207             for (const auto &data : bcbpData) {
0208                 if (MergeUtil::isSame(res, data)) {
0209                     res = JsonLdDocument::apply(data, res).value<FlightReservation>();
0210                     break;
0211                 }
0212             }
0213         }
0214     }
0215 
0216     if (res.reservationFor().isValid()) {
0217         FlightPostProcessor p;
0218         res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
0219     }
0220     return processReservation(res);
0221 }
0222 
0223 TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
0224 {
0225     if (res.reservationFor().isValid()) {
0226         res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
0227     }
0228     return processReservation(res);
0229 }
0230 
0231 TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
0232 {
0233     trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
0234     trip.setDeparturePlatform(trip.departurePlatform().trimmed());
0235     trip.setDepartureStation(processTrainStation(trip.departureStation()));
0236     trip.setArrivalStation(processTrainStation(trip.arrivalStation()));
0237     trip.setDepartureTime(processTrainTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
0238     trip.setArrivalTime(processTrainTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
0239     trip.setTrainNumber(trip.trainNumber().simplified());
0240     trip.setTrainName(trip.trainName().simplified());
0241 
0242     // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
0243     const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
0244     if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
0245         trip.setArrivalTime(trip.arrivalTime().addDays(1));
0246     }
0247 
0248     return trip;
0249 }
0250 
0251 static void applyStationData(const KnowledgeDb::TrainStation &record, TrainStation &station)
0252 {
0253     if (!station.geo().isValid() && record.coordinate.isValid()) {
0254         GeoCoordinates geo;
0255         geo.setLatitude(record.coordinate.latitude);
0256         geo.setLongitude(record.coordinate.longitude);
0257         station.setGeo(geo);
0258     }
0259     auto addr = station.address();
0260     if (addr.addressCountry().isEmpty() && record.country.isValid()) {
0261         addr.setAddressCountry(record.country.toString());
0262         station.setAddress(addr);
0263     }
0264 }
0265 
0266 static void applyStationCountry(const QString &isoCode, TrainStation &station)
0267 {
0268     auto addr = station.address();
0269     if (addr.addressCountry().isEmpty()) {
0270         addr.setAddressCountry(isoCode.toUpper());
0271         station.setAddress(addr);
0272     }
0273 }
0274 
0275 TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation station) const
0276 {
0277     const auto id = station.identifier();
0278     if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
0279         station.setIdentifier(QString());
0280     } else if (id.startsWith(QLatin1StringView("sncf:")) && id.size() == 10) {
0281       const auto record = KnowledgeDb::stationForSncfStationId(
0282           KnowledgeDb::SncfStationId{id.mid(5)});
0283       applyStationData(record, station);
0284       applyStationCountry(id.mid(5, 2).toUpper(), station);
0285     } else if (id.startsWith(QLatin1StringView("ibnr:")) && id.size() == 12) {
0286       const auto record =
0287           KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()});
0288       applyStationData(record, station);
0289       const auto country =
0290           KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort())
0291               .toString();
0292       applyStationCountry(country, station);
0293     } else if (id.startsWith(QLatin1StringView("uic:")) && id.size() == 11) {
0294       const auto record = KnowledgeDb::stationForUic(
0295           KnowledgeDb::UICStation{id.mid(4).toUInt()});
0296       applyStationData(record, station);
0297       const auto country =
0298           KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort())
0299               .toString();
0300       applyStationCountry(country, station);
0301     } else if (id.startsWith(QLatin1StringView("ir:")) && id.size() > 4) {
0302       const auto record =
0303           KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3));
0304       applyStationData(record, station);
0305     } else if (id.startsWith(QLatin1StringView("benerail:")) &&
0306                id.size() == 14) {
0307       const auto record = KnowledgeDb::stationForBenerailId(
0308           KnowledgeDb::BenerailStationId(id.mid(9)));
0309       applyStationData(record, station);
0310       applyStationCountry(id.mid(9, 2).toUpper(), station);
0311     } else if (id.startsWith(QLatin1StringView("vrfi:")) && id.size() >= 7 &&
0312                id.size() <= 9) {
0313       const auto record = KnowledgeDb::stationForVRStationCode(
0314           KnowledgeDb::VRStationCode(id.mid(5)));
0315       applyStationData(record, station);
0316     } else if (id.startsWith(QLatin1StringView("iata:")) && id.size() == 8) {
0317       const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
0318       const auto record = KnowledgeDb::stationForIataCode(iataCode);
0319       applyStationData(record, station);
0320       // fall back to the airport with the matching IATA code for the country
0321       // information we cannot use the coordinate though, as that points to the
0322       // actual airport, not the station
0323       applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(),
0324                           station);
0325     } else if (id.startsWith(QLatin1StringView("amtrak:")) && id.size() == 10) {
0326       const auto record = KnowledgeDb::stationForAmtrakStationCode(
0327           KnowledgeDb::AmtrakStationCode(QStringView(id).mid(7)));
0328       applyStationData(record, station);
0329     } else if (id.startsWith(QLatin1StringView("via:")) && id.size() == 8) {
0330       const auto record = KnowledgeDb::stationForViaRailStationCode(
0331           KnowledgeDb::ViaRailStationCode(QStringView(id).mid(4)));
0332       applyStationData(record, station);
0333     } else if (id.startsWith(QLatin1StringView("uk:")) && id.size() == 6) {
0334       const auto record = KnowledgeDb::stationForUkRailwayStationCode(
0335           KnowledgeDb::UKRailwayStationCode(QStringView(id).mid(3)));
0336       applyStationData(record, station);
0337     }
0338 
0339     return processPlace(station);
0340 }
0341 
0342 QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, QDate departureDay, const TrainStation& station) const
0343 {
0344     if (!dt.isValid()) {
0345         return dt;
0346     }
0347 
0348     if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
0349         dt.setDate(departureDay);
0350     }
0351     return processTimeForLocation(dt, station);
0352 }
0353 
0354 BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
0355 {
0356     if (res.reservationFor().isValid()) {
0357         res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
0358     }
0359     return processReservation(res);
0360 }
0361 
0362 BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
0363 {
0364     trip.setDepartureBusStop(processPlace(trip.departureBusStop()));
0365     trip.setArrivalBusStop(processPlace(trip.arrivalBusStop()));
0366     trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBusStop()));
0367     trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBusStop()));
0368     trip.setBusNumber(trip.busNumber().simplified());
0369     trip.setBusName(trip.busName().simplified());
0370     return trip;
0371 }
0372 
0373 BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
0374 {
0375     if (res.reservationFor().isValid()) {
0376         res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
0377     }
0378     return processReservation(res);
0379 }
0380 
0381 BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
0382 {
0383     trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
0384     trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
0385     trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
0386     trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
0387 
0388     // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
0389     const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
0390     if (duration < 0 && duration > -3600*24) {
0391         trip.setArrivalTime(trip.arrivalTime().addDays(1));
0392     }
0393 
0394     return trip;
0395 }
0396 
0397 LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
0398 {
0399     if (res.reservationFor().isValid()) {
0400         res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
0401         res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
0402         res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
0403     }
0404     return processReservation(res);
0405 }
0406 
0407 TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
0408 {
0409     res.setPickupLocation(processPlace(res.pickupLocation()));
0410     res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
0411     return processReservation(res);
0412 }
0413 
0414 RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
0415 {
0416     if (res.reservationFor().isValid()) {
0417         res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
0418     }
0419     res.setPickupLocation(processPlace(res.pickupLocation()));
0420     res.setDropoffLocation(processPlace(res.dropoffLocation()));
0421     res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
0422     res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
0423     return processReservation(res);
0424 }
0425 
0426 RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
0427 {
0428     car.setName(car.name().trimmed());
0429     return car;
0430 }
0431 
0432 FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
0433 {
0434     if (res.reservationFor().isValid()) {
0435         res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
0436         res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
0437         res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
0438     }
0439     return processReservation(res);
0440 }
0441 
0442 TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
0443 {
0444     visit.setTouristAttraction(processPlace(visit.touristAttraction()));
0445     visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
0446     visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
0447     return visit;
0448 }
0449 
0450 EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
0451 {
0452     if (res.reservationFor().isValid()) {
0453         res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
0454     }
0455     return processReservation(res);
0456 }
0457 
0458 KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
0459 {
0460     event.setName(StringUtil::clean(event.name()));
0461 
0462     // normalize location to be a Place
0463     if (JsonLd::isA<PostalAddress>(event.location())) {
0464         Place place;
0465         place.setAddress(event.location().value<PostalAddress>());
0466         event.setLocation(place);
0467     }
0468 
0469     if (JsonLd::isA<Place>(event.location())) {
0470         event.setLocation(processPlace(event.location().value<Place>()));
0471 
0472         // try to obtain timezones if we have a location
0473         event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
0474         event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
0475         event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
0476     }
0477 
0478     return event;
0479 }
0480 
0481 static QString processCurrency(const QString &currency)
0482 {
0483     if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
0484         return {};
0485     }
0486     return currency;
0487 }
0488 
0489 Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
0490 {
0491     ticket.setName(StringUtil::clean(ticket.name()));
0492     ticket.setTicketNumber(ticket.ticketNumber().simplified());
0493     ticket.setUnderName(processPerson(ticket.underName()));
0494     ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
0495     ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
0496     return ticket;
0497 }
0498 
0499 ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
0500 {
0501     // remove empty dummy entries found eg. in ERA FCB data
0502     if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
0503         program.setProgramName(QString());
0504     }
0505 
0506     program.setProgramName(program.programName().simplified());
0507     // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
0508     if (program.programName().isEmpty() && !program.programName().isNull()) {
0509         program.setProgramName(QString());
0510     }
0511     program.setMember(processPerson(program.member()));
0512     return program;
0513 }
0514 
0515 Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
0516 {
0517     seat.setSeatSection(seat.seatSection().simplified());
0518     seat.setSeatRow(seat.seatRow().simplified());
0519     seat.setSeatNumber(seat.seatNumber().simplified());
0520     seat.setSeatingType(seat.seatingType().simplified());
0521     return seat;
0522 }
0523 
0524 template <typename T>
0525 T ExtractorPostprocessorPrivate::processReservation(T res) const
0526 {
0527     res.setUnderName(processPerson(res.underName().template value<Person>()));
0528     res.setPotentialAction(processActions(res.potentialAction()));
0529     res.setReservationNumber(res.reservationNumber().trimmed());
0530     res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
0531     res.setPriceCurrency(processCurrency(res.priceCurrency()));
0532 
0533     if (JsonLd::isA<Ticket>(res.reservedTicket())) {
0534         res.setReservedTicket(processTicket(res.reservedTicket().template value<Ticket>()));
0535     }
0536     return res;
0537 }
0538 
0539 static constexpr const char* name_prefixes[] = {
0540     "DR", "MR", "MRS", "MS"
0541 };
0542 
0543 static bool isSeparator(QChar c)
0544 {
0545     return c == QLatin1Char(' ') || c == QLatin1Char('/');
0546 }
0547 
0548 static QString simplifyNamePart(QString n)
0549 {
0550     n = n.simplified();
0551 
0552     for (auto prefix : name_prefixes) {
0553         const int prefixLen = std::strlen(prefix);
0554         if (n.size() > prefixLen + 2 &&
0555             n.startsWith(QLatin1StringView(prefix, prefixLen),
0556                          Qt::CaseInsensitive) &&
0557             isSeparator(n[prefixLen])) {
0558           return n.mid(prefixLen + 1);
0559         }
0560         if (n.size() > prefixLen + 2 &&
0561             n.endsWith(QLatin1StringView(prefix, prefixLen),
0562                        Qt::CaseInsensitive) &&
0563             isSeparator(n[n.size() - prefixLen - 1])) {
0564           return n.left(n.size() - prefixLen - 1);
0565         }
0566     }
0567 
0568     return n;
0569 }
0570 
0571 KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
0572 {
0573     person.setName(simplifyNamePart(person.name()));
0574     person.setFamilyName(simplifyNamePart(person.familyName()));
0575     person.setGivenName(simplifyNamePart(person.givenName()));
0576 
0577     // fill name with name parts, if it's empty
0578     if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
0579         && !person.familyName().isEmpty() && !person.givenName().isEmpty())
0580     {
0581         person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
0582     }
0583 
0584     return person;
0585 }
0586 
0587 PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
0588 {
0589     // convert to ISO 3166-1 alpha-2 country codes
0590     if (addr.addressCountry().size() > 2) {
0591         QString alpha2Code;
0592 
0593         // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
0594         if (addr.addressCountry().size() == 3) {
0595             alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
0596         }
0597         if (alpha2Code.isEmpty()) {
0598             alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
0599         }
0600         if (!alpha2Code.isEmpty()) {
0601             addr.setAddressCountry(alpha2Code);
0602         }
0603     }
0604 
0605     // upper case country codes
0606     if (addr.addressCountry().size() == 2) {
0607         addr.setAddressCountry(addr.addressCountry().toUpper());
0608     }
0609 
0610     // normalize strings
0611     addr.setStreetAddress(addr.streetAddress().simplified());
0612     addr.setPostalCode(addr.postalCode().simplified());
0613     addr.setAddressLocality(addr.addressLocality().simplified());
0614     addr.setAddressRegion(addr.addressRegion().simplified());
0615 
0616 #if HAVE_PHONENUMBER
0617     // recover country from phone number, if we have that
0618     if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
0619         const auto phoneStr = phoneNumber.toStdString();
0620         const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
0621         i18n::phonenumbers::PhoneNumber number;
0622         if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
0623             std::string isoCode;
0624             util->GetRegionCodeForNumber(number, &isoCode);
0625             if (!isoCode.empty() && isoCode != "ZZ") {
0626                 addr.setAddressCountry(QString::fromStdString(isoCode));
0627             }
0628         }
0629     }
0630 #endif
0631 
0632     if (geo.isValid() && addr.addressCountry().isEmpty()) {
0633         addr.setAddressCountry(KCountry::fromLocation(geo.latitude(), geo.longitude()).alpha2());
0634     }
0635 
0636     AddressParser addrParser;
0637     addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
0638     addrParser.parse(addr);
0639     addr = addrParser.result();
0640     return addr;
0641 }
0642 
0643 QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
0644 {
0645 #if HAVE_PHONENUMBER
0646     // or complete the phone number if we know the country
0647     if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
0648         auto phoneStr = phoneNumber.toStdString();
0649         const auto isoCode = addr.addressCountry().toStdString();
0650         const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
0651         i18n::phonenumbers::PhoneNumber number;
0652         if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
0653             if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
0654                 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
0655                 return QString::fromStdString(phoneStr);
0656             }
0657         }
0658     }
0659 #else
0660     Q_UNUSED(addr)
0661 #endif
0662     return phoneNumber.simplified();
0663 }
0664 
0665 QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
0666 {
0667     // remove non-actions and actions with invalid URLs
0668     QUrl viewUrl;
0669     for (auto it = actions.begin(); it != actions.end();) {
0670         if (!JsonLd::canConvert<Action>(*it)) {
0671             it = actions.erase(it);
0672             continue;
0673         }
0674 
0675         const auto action = JsonLd::convert<Action>(*it);
0676         if (!action.target().isValid()) {
0677             it = actions.erase(it);
0678             continue;
0679         }
0680 
0681         if (JsonLd::isA<ViewAction>(*it)) {
0682             viewUrl = action.target();
0683         }
0684         ++it;
0685     }
0686 
0687     // normalize the order, so JSON comparison still yields correct results
0688     std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
0689         return strcmp(lhs.typeName(), rhs.typeName()) < 0;
0690     });
0691 
0692     // remove actions that don't actually have their own target, or duplicates
0693     QUrl prevUrl;
0694     const char* prevType = nullptr;
0695     for (auto it = actions.begin(); it != actions.end();) {
0696         const auto action = JsonLd::convert<Action>(*it);
0697         const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
0698         if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
0699             prevUrl = action.target();
0700             prevType = (*it).typeName();
0701             ++it;
0702         } else {
0703             it = actions.erase(it);
0704         }
0705     }
0706 
0707     return actions;
0708 }
0709 
0710 template <typename T>
0711 QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
0712 {
0713     if (!dt.isValid() || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
0714         return dt;
0715     }
0716 
0717     const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
0718     if (!tz.isValid()) {
0719         return dt;
0720     }
0721 
0722     // prefer our timezone over externally provided UTC offset, if they match
0723     if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
0724         qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
0725         return dt;
0726     }
0727 
0728     if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
0729         dt.setTimeZone(tz);
0730     } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
0731         dt = dt.toTimeZone(tz);
0732     }
0733     return dt;
0734 }