File indexing completed on 2024-12-29 04:50:00

0001 /*
0002     SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "genericboardingpassextractor.h"
0008 #include "flightutil_p.h"
0009 #include "locationutil.h"
0010 #include "logging.h"
0011 #include "stringutil.h"
0012 
0013 #include "knowledgedb/airportdb.h"
0014 #include "knowledgedb/airportnametokenizer_p.h"
0015 #include "pdf/pdfdocument.h"
0016 #include "text/terminalfinder_p.h"
0017 #include "text/timefinder_p.h"
0018 
0019 #include <KItinerary/ExtractorDocumentNode>
0020 #include <KItinerary/ExtractorResult>
0021 #include <KItinerary/Flight>
0022 #include <KItinerary/Reservation>
0023 
0024 #include <QDebug>
0025 #include <QTimeZone>
0026 
0027 #include <unordered_map>
0028 
0029 using namespace KItinerary;
0030 
0031 GenericBoardingPassExtractor::GenericBoardingPassExtractor()
0032 {
0033     m_filter.setMimeType(QStringLiteral("internal/iata-bcbp"));
0034     m_filter.setScope(ExtractorFilter::Descendants);
0035 }
0036 
0037 GenericBoardingPassExtractor::~GenericBoardingPassExtractor() = default;
0038 
0039 QString GenericBoardingPassExtractor::name() const
0040 {
0041     return QStringLiteral("<Generic PDF Boarding Pass>");
0042 }
0043 
0044 bool GenericBoardingPassExtractor::canHandle(const ExtractorDocumentNode &node) const
0045 {
0046     return node.content<PdfDocument*>() && m_filter.matches(node);
0047 }
0048 
0049 static void mergeOrAppend(QStringList &l, QStringView s)
0050 {
0051     for (auto &n : l) {
0052         if (n.compare(s, Qt::CaseInsensitive)  == 0) {
0053             n = StringUtil::betterString(n, s).toString();
0054             return;
0055         }
0056     }
0057     l.push_back(s.toString());
0058 }
0059 
0060 static int airportDistance(KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
0061 {
0062     const auto fromCoord = KnowledgeDb::coordinateForAirport(from);
0063     const auto toCoord = KnowledgeDb::coordinateForAirport(to);
0064     if (!fromCoord.isValid() || !toCoord.isValid()) {
0065         return std::numeric_limits<int>::max();
0066     }
0067     return LocationUtil::distance({fromCoord.latitude, fromCoord.longitude}, {toCoord.latitude, toCoord.longitude});
0068 }
0069 
0070 static bool isPlausibleBoardingTime(const QDateTime &boarding, const QDateTime &departure)
0071 {
0072     return boarding < departure && boarding.secsTo(departure) <= 3600;
0073 }
0074 
0075 static bool isPlausibleFlightTime(const QDateTime &fromTime, const QDateTime &toTime, KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
0076 {
0077     const auto distance = airportDistance(from, to);
0078 
0079     // times are local, so convert them to the right timezone first
0080     auto fromDt = fromTime;
0081     fromDt.setTimeZone(KnowledgeDb::timezoneForAirport(from));
0082     auto toDt = toTime;
0083     toDt.setTimeZone(KnowledgeDb::timezoneForAirport(to));
0084 
0085     const auto flightDuration = fromDt.secsTo(toDt);
0086     if (flightDuration < 3600) {
0087         return false;
0088     }
0089     return fromDt < toDt && FlightUtil::isPlausibleDistanceForDuration(distance, flightDuration);
0090 }
0091 
0092 [[nodiscard]] static qint64 flightDuration(const QDateTime &fromTime, const QDateTime &toTime, KnowledgeDb::IataCode from, KnowledgeDb::IataCode to)
0093 {
0094     // times are local, so convert them to the right timezone first
0095     auto fromDt = fromTime;
0096     fromDt.setTimeZone(KnowledgeDb::timezoneForAirport(from));
0097     auto toDt = toTime;
0098     toDt.setTimeZone(KnowledgeDb::timezoneForAirport(to));
0099     return fromDt.secsTo(toDt) / 60;
0100 }
0101 
0102 static bool conflictIfSet(const QDateTime &lhs, const QDateTime &rhs)
0103 {
0104     return lhs.isValid() && rhs.isValid() && lhs != rhs;
0105 }
0106 
0107 static void applyFlightTimes(QList<QVariant> &result, const QDateTime &boarding, const QDateTime &dep, const QDateTime &arr)
0108 {
0109     for (auto &res : result) {
0110         auto flightRes = res.value<FlightReservation>();
0111         auto flight = flightRes.reservationFor().value<Flight>();
0112 
0113         // check if already set times match, otherwise discard the entire set
0114         if (conflictIfSet(flight.boardingTime(), boarding) || conflictIfSet(flight.departureTime(), dep) || conflictIfSet(flight.arrivalTime(), arr)) {
0115             continue;
0116         }
0117 
0118         // apply not yet set times
0119         if (!flight.boardingTime().isValid() && boarding.isValid()) {
0120             flight.setBoardingTime(boarding);
0121         }
0122         if (!flight.departureTime().isValid() && dep.isValid()) {
0123             flight.setDepartureTime(dep);
0124         }
0125         if (!flight.arrivalTime().isValid() && arr.isValid()) {
0126             flight.setArrivalTime(arr);
0127         }
0128         flightRes.setReservationFor(flight);
0129         res = flightRes;
0130     }
0131 }
0132 
0133 ExtractorResult GenericBoardingPassExtractor::extract(const ExtractorDocumentNode &node, [[maybe_unused]] const ExtractorEngine *engine) const
0134 {
0135     static TerminalFinder terminalFinder(u"^", u"(?=\\b|\\s|$)");
0136 
0137     QList<QVariant> fullResult;
0138 
0139     const auto pdf = node.content<PdfDocument*>();
0140 
0141     std::vector<ExtractorDocumentNode> bcbpNodes;
0142     m_filter.allMatches(node, bcbpNodes);
0143     std::remove_if(bcbpNodes.begin(), bcbpNodes.end(), [](const auto &node) {
0144         return node.location().userType() != QMetaType::Int || node.result().isEmpty();
0145     });
0146     std::sort(bcbpNodes.begin(), bcbpNodes.end(), [](const auto &lhs, const auto &rhs) { return lhs.location().toInt() < rhs.location().toInt(); });
0147 
0148     for (auto it = bcbpNodes.begin(); it != bcbpNodes.end(); ++it) {
0149         QDate departureDay;
0150         KnowledgeDb::IataCode from, to;
0151         QList<QVariant> result;
0152 
0153         // 1 determine which airports we need to look for on the same page
0154         const auto pageNum = (*it).location().toInt();
0155         std::unordered_map<KnowledgeDb::IataCode, QStringList> airportNames;
0156         std::unordered_map<KnowledgeDb::IataCode, QString> terminalNames;
0157         for (auto it2 = it; it2 != bcbpNodes.end() && (*it2).location().toInt() == pageNum; ++it2) {
0158             const auto flightReservations = (*it).result().result();
0159             for (const auto &flightRes : flightReservations) {
0160                 const auto flight = flightRes.value<FlightReservation>().reservationFor().value<Flight>();
0161                 if (!flight.departureAirport().iataCode().isEmpty()) {
0162                     from = KnowledgeDb::IataCode{flight.departureAirport().iataCode()};
0163                     airportNames[from] = QStringList();
0164                     terminalNames[from] = QString();
0165                 }
0166                 if (!flight.arrivalAirport().iataCode().isEmpty()) {
0167                     to = KnowledgeDb::IataCode{flight.arrivalAirport().iataCode()};
0168                     airportNames[to] = QStringList();
0169                     terminalNames[to] = QString();
0170                 }
0171                 departureDay = flight.departureDay();
0172             }
0173         }
0174 
0175         // 2 tokenize the page and scan for airport names
0176         const auto page = pdf->page(pageNum);
0177         qCDebug(Log) << "scanning page" << pageNum << "for airport names";
0178         const auto pageText = page.text();
0179         AirportNameTokenizer tokenizer(pageText);
0180         while (tokenizer.hasNext()) {
0181             const auto s = tokenizer.next();
0182             if (s.compare(QLatin1StringView("international"),
0183                           Qt::CaseInsensitive) == 0) {
0184               qCDebug(Log) << "  ignoring" << s;
0185               continue;
0186             }
0187 
0188             // IATA code of one of the airports
0189             if (const auto code = KnowledgeDb::IataCode(s); !s.isNull() && airportNames.find(KnowledgeDb::IataCode{s}) != airportNames.end()) {
0190                 // also look for terminal information after the IATA code itself
0191                 const auto offset = s.size() + s.data() - pageText.data();
0192                 const auto res = terminalFinder.find(QStringView(pageText).mid(offset));
0193                 if (res.hasResult() && res.name != s.toString()) {
0194                     terminalNames[code] = res.name;
0195                 }
0196 
0197                 qCDebug(Log) << "  found own IATA code" << s;
0198                 continue;
0199             }
0200 
0201             const auto iataCodes = KnowledgeDb::iataCodesFromName(s);
0202             for (const auto code : iataCodes) {
0203                 auto it2 = airportNames.find(code);
0204                 if (it2 != airportNames.end()) {
0205                     qCDebug(Log) << "  found candidate:" << s << iataCodes;
0206                     mergeOrAppend((*it2).second, s);
0207 
0208                     // look for a following terminal name at the position after s
0209                     const auto offset = s.size() + s.data() - pageText.data();
0210                     const auto res = terminalFinder.find(QStringView(pageText).mid(offset));
0211                     if (res.hasResult() && res.name != code.toString()) {
0212                         terminalNames[(*it2).first] = res.name;
0213                     }
0214                 }
0215             }
0216         }
0217 
0218         // 3 augment the results with what we found
0219         const auto flightReservations = (*it).result().result();
0220         for (const auto &res : flightReservations) {
0221             auto flightRes = res.value<FlightReservation>();
0222             auto flight = flightRes.reservationFor().value<Flight>();
0223             auto airport = flight.departureAirport();
0224             airport.setName(airportNames[KnowledgeDb::IataCode{airport.iataCode()}].join(QLatin1Char(' ')));
0225             flight.setDepartureAirport(airport);
0226             flight.setDepartureTerminal(terminalNames[KnowledgeDb::IataCode{airport.iataCode()}]);
0227             airport = flight.arrivalAirport();
0228             airport.setName(airportNames[KnowledgeDb::IataCode{airport.iataCode()}].join(QLatin1Char(' ')));
0229             flight.setArrivalAirport(airport);
0230             flight.setArrivalTerminal(terminalNames[KnowledgeDb::IataCode{airport.iataCode()}]);
0231             flightRes.setReservationFor(flight);
0232             result.push_back(std::move(flightRes));
0233         }
0234 
0235         // 4 if there's only a single leg on this page, try to see if we can determine times
0236         if (airportNames.size() == 2) {
0237             TimeFinder timeFinder;
0238             timeFinder.find(pageText);
0239             std::vector<QDateTime> times;
0240             for (const auto &res : timeFinder.results()) {
0241                 switch (res.dateTime.userType()) {
0242                     case QMetaType::QTime:
0243                         times.push_back(QDateTime(departureDay, res.dateTime.toTime()));
0244                         break;
0245                     case QMetaType::QDateTime:
0246                         if (res.dateTime.toDateTime().date() == departureDay) {
0247                             times.push_back(res.dateTime.toDateTime());
0248                         }
0249                         break;
0250                     case QMetaType::QDate:
0251                     default:
0252                         break;
0253                 }
0254             }
0255             std::sort(times.begin(), times.end());
0256             times.erase(std::unique(times.begin(), times.end()), times.end());
0257             if (times.size() == 2) {
0258                 // boarding/departure only, and on the same day
0259                 if (isPlausibleBoardingTime(times[0], times[1]) && !isPlausibleFlightTime(times[0], times[1], from, to)) {
0260                     applyFlightTimes(result, times[0], times[1], {});
0261                 }
0262             } else if (times.size() == 3) {
0263                 // boarding/departure/arrival on the same day
0264                 if (isPlausibleBoardingTime(times[0], times[1]) && isPlausibleFlightTime(times[1], times[2], from, to)) {
0265                     applyFlightTimes(result, times[0], times[1], times[2]);
0266                 // boarding/departure on the same day, arrival on the next day
0267                 } else if (isPlausibleBoardingTime(times[1], times[2]) && isPlausibleFlightTime(times[2], times[0].addDays(1), from, to)) {
0268                     applyFlightTimes(result, times[1], times[2], times[0].addDays(1));
0269                 }
0270                 // TODO handle boarding before midnight
0271                 // departure/arrival/duration
0272                 else if (isPlausibleFlightTime(times[1], times[2], from, to) && flightDuration(times[1], times[2], from, to) == (times[0].time().hour() * 60 + times[0].time().minute())) {
0273                     applyFlightTimes(result, {}, times[1], times[2]);
0274                 }
0275             }
0276         }
0277 
0278         fullResult += result;
0279     }
0280 
0281     return fullResult;
0282 }