File indexing completed on 2024-12-22 04:59:44

0001 /*
0002    SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #pragma once
0008 
0009 #include "kitinerary_export.h"
0010 
0011 #include <QList>
0012 #include <QVariant>
0013 
0014 #include <memory>
0015 
0016 namespace KItinerary {
0017 
0018 class ExtractorPostprocessorPrivate;
0019 
0020 /** Post-process extracted data to filter out garbage and augment data from other sources.
0021  *
0022  *  In detail, this performs the tasks listed below for all data elements fed into it.
0023  *
0024  *  @section postproc_normalize Normalization
0025  *
0026  *  Basic normalization for e.g. renamed properties of older schema.org versions is already
0027  *  covered by JsonLdImportFilter, post-processing covers the more elaborate normalization steps,
0028  *  such as:
0029  *  - translate human readable and possibly localized country names into ISO 3166-1 codes.
0030  *  - expand IATA BCBP ticket tokens (see IataParser).
0031  *
0032  *  @section postproc_augment Augmentation
0033  *
0034  *  That is, add additional information derived from a built-in knowledge base (see KnowledgeDb).
0035  *  This includes:
0036  *  - Add timezone information to arrival and departure times.
0037  *  - Add geo coordinates and country information to known airports or train stations.
0038  *
0039  *  @section postproc_merge Merge Duplicates
0040  *
0041  *  Duplicate elements that might have been the result of two overlapping extractors (e.g. when
0042  *  extracting two different MIME parts of an email referring to the same reservation) are merged.
0043  *
0044  *  @section postproc_validation Validation
0045  *
0046  *  At this point, all invalid elements are discarded. The definition of invalid is fairly loose though,
0047  *  and typically only covers elements that are explicitly considered unusable. Examples:
0048  *  - A Flight missing a departure day or destination.
0049  *  - A LodigingReservation without an attached LodgingBusiness.
0050  *  - etc.
0051  *
0052  *  Validation can be disabled and done separately using KItinerary::ExtractorValidator, in case you
0053  *  want more control over which elements are considered valid. See setValidationEnabled().
0054  *
0055  *  @section postproc_sort Sorting
0056  *
0057  *  Finally the remaining elements are sorted based on their relevant date (see SortUtil). This
0058  *  makes the data usable for basic display right away, but it for example doesn't do multi-traveler
0059  *  aggregation, that's still left for the display layer.
0060  */
0061 class KITINERARY_EXPORT ExtractorPostprocessor
0062 {
0063 public:
0064     ExtractorPostprocessor();
0065     ExtractorPostprocessor(const ExtractorPostprocessor&) = delete;
0066     ExtractorPostprocessor(ExtractorPostprocessor&&) noexcept;
0067     ~ExtractorPostprocessor();
0068 
0069     /** This will normalize and augment the given data elements and merge them with
0070      *  already added data elements if applicable.
0071      */
0072     void process(const QList<QVariant> &data);
0073 
0074     /** This returns the final result of all previously executed processing steps
0075      *  followed by sorting and filtering out all invalid data elements.
0076      */
0077     QList<QVariant> result() const;
0078 
0079     /** The date the reservation(s) processed here have been made, if known.
0080      *  This is used for determining the year of incomplete dates provided by
0081      *  various sources. Therefore this has to be somewhen before the reservation
0082      *  becomes due.
0083      */
0084     void setContextDate(const QDateTime &dt);
0085 
0086     /** Enable or disable validation.
0087      *  By default this is enabled, and will discard all unknown types
0088      *  and incomplete items. If you need more control over this, disable
0089      *  this here and pass the items through ExtractorValidator yourself
0090      *  (or even use an entirely different validation mechanism entirely).
0091      *  @see ExtractorValidator.
0092      *  @deprecated Has no functionality anymore, remove and use ExtractorValidator
0093      *  explicitly instead.
0094      */
0095     [[deprecated("has no functionality anymore")]] void setValidationEnabled(bool validate);
0096 
0097 private:
0098     std::unique_ptr<ExtractorPostprocessorPrivate> d;
0099 };
0100 
0101 }
0102