File indexing completed on 2024-12-29 04:51:02

0001 /*
0002    SPDX-FileCopyrightText: 2018 Benjamin Port <benjamin.port@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 var regExMap = [];
0008 regExMap['en_US'] = [];
0009 regExMap['en_US']['bookingRef'] = /(?:Booking number|Confirmation:) +([0-9]*)\s+/;
0010 // 1: adress, 2: city, 3:postal code, 4: country, 5: phone
0011 regExMap['en_US']['hotelInformation'] = / *(.+), (.+), (.+), (.+)(?: -|\n)\s+Phone: (\+[0-9 ]*)\s+/;
0012 regExMap['en_US']['hotelName'] = /(?:\[checkmark\.png\] |\d\n)(.*?)(?: is expecting you on|\n *\[)/;
0013 regExMap['en_US']['arrivalDate'] = /Check-in *([A-z]+ [0-9]{1,2} [A-z]+ [0-9]+|[A-z]+, [A-z]+ \d{1,2}, \d{4}) \(f?r?o?m? ?([0-9]{1,2}:[0-9]{2})[^\)]*\)/;
0014 regExMap['en_US']['departureDate'] = /Check-out *([A-z]+ [0-9]{1,2} [A-z]+ [0-9]+|[A-z]+, [A-z]+ \d{1,2}, \d{4}) \(.*?([0-9]{1,2}:[0-9]{2})\)/;
0015 regExMap['en_US']['person'] = /Guest name[\n\s]+(.*?)(?:\n| Edit guest name)/;
0016 
0017 regExMap['fr_FR'] = [];
0018 regExMap['fr_FR']['bookingRef'] = /Numéro de réservation : ([0-9]*)\s+/;
0019 // 1: hotel name, 2: adress, 3: city, 4:postal code, 5: country, 6: phone
0020 regExMap['fr_FR']['hotelInformation'] = /(.+), (.+), (.+), (.+) -\s+Téléphone : (\+[0-9]*)\s+/;
0021 regExMap['fr_FR']['hotelName'] = /L'établissement (.*) vous attend le/;
0022 regExMap['fr_FR']['arrivalDate'] = /Arrivée  ([a-z]+ [0-9]{1,2} [a-zûé]+ [0-9]+) \(([0-9]{1,2}:[0-9]{2}) - ([0-9]{1,2}:[0-9]{2})\)/;
0023 regExMap['fr_FR']['departureDate'] = /Départ  ([a-z]+ [0-9]{1,2} [a-zûé]+ [0-9]+) \([0-9]{1,2}:[0-9]{2} - ([0-9]{1,2}:[0-9]{2})\)/;
0024 regExMap['fr_FR']['person'] = /Clients[\n\s]+(.*?)(?:\n| Modifier le nom du client)/;
0025 
0026 regExMap['de_DE'] = [];
0027 regExMap['de_DE']['bookingRef'] = /Buchungsnummer: ([0-9]*)\s+/;
0028 // 1: hotel name, 2: adress, 3: city, 4:postal code, 5: country, 6: phone
0029 regExMap['de_DE']['hotelInformation'] = /(.+), (.+), (.+), (.+) -\s+Telefon: (\+[0-9 \-]+)\n/;
0030 regExMap['de_DE']['hotelName'] = /\[checkmark.png\] Die Unterkunft (.*)\s+erwartet Sie/;
0031 regExMap['de_DE']['arrivalDate'] = /Anreise ([A-Z][a-z]+, [0-9]{1,2}\. \S+ [0-9]{4}) \(ab ([0-9]{1,2}:[0-9]{2})\)/;
0032 regExMap['de_DE']['departureDate'] = /Abreise ([A-Z][a-z]+, [0-9]{1,2}\. \S+ [0-9]{4}) \(bis ([0-9]{1,2}:[0-9]{2})\)/;
0033 regExMap['de_DE']['person'] = /Name des Gastes[\n\s]+(.*?)(?:\n| Name des Gastes bearbeiten)/;
0034 
0035 regExMap['es_ES'] = [];
0036 regExMap['es_ES']['person'] = /Nombre del huésped[\n\s]+(.*?)\n/;
0037 
0038 function main(text, node) {
0039     if (node.result.length > 0)
0040         return null; // this is just backup if we have no structured data
0041     var res = JsonLd.newLodgingReservation();
0042 
0043     for (var locale in regExMap) {
0044         var bookingRef = text.match(regExMap[locale]['bookingRef']);
0045         // If no booking reference go to the next locale
0046         if (!bookingRef || !regExMap[locale]['bookingRef'])
0047             continue;
0048         res.reservationNumber = bookingRef[1];
0049 
0050         var hotelName = text.match(regExMap[locale]['hotelName']);
0051         if (!hotelName)
0052             return null;
0053         res.reservationFor.name = hotelName[1];
0054 
0055         var hotel = text.match(regExMap[locale]['hotelInformation']);
0056         if (!hotel)
0057             return null;
0058 
0059         res.reservationFor.address.streetAddress = hotel[1];
0060         res.reservationFor.address.postalCode = hotel[3];
0061         res.reservationFor.address.addressLocality = hotel[2];
0062         res.reservationFor.address.addressCountry = hotel[4];
0063         res.reservationFor.telephone = hotel[5];
0064 
0065         idx = hotel.index + hotel[0].length;
0066 
0067         var arrivalDate = text.substr(idx).match(regExMap[locale]['arrivalDate']);
0068         if (!arrivalDate)
0069             return null;
0070 
0071         res.checkinTime = JsonLd.toDateTime(arrivalDate[1] + " " + arrivalDate[2], ["dddd d MMMM yyyy hh:mm", "dddd, d. MMMM yyyy hh:mm", "dddd, MMMM d, yyyy hh:mm"], locale);
0072         idx += arrivalDate.index + arrivalDate[0].length;
0073 
0074         var departureDate = text.substr(idx).match(regExMap[locale]['departureDate']);
0075         if (!departureDate)
0076             return null;
0077         res.checkoutTime = JsonLd.toDateTime(departureDate[1] + " " + departureDate[2], ["dddd d MMMM yyyy hh:mm", "dddd, d. MMMM yyyy hh:mm", "dddd, MMMM d, yyyy hh:mm"], locale);
0078 
0079         var name = text.match(regExMap[locale]['person']);
0080         if (!name)
0081             return null;
0082         res.underName.name = name[1];
0083 
0084         return res;
0085     }
0086 }
0087 
0088 function parseDateTimeAttribute(attr) {
0089     // valid ISO format (when time is present)
0090     if (attr.match(/^\d{4}-\d\d-\d\dT\d\d:\d\d/)) {
0091         // chop of UTC offset if present, that is based on time of booking, not time of travel
0092         return attr.substr(0, 19);
0093     }
0094     return JsonLd.toDateTime(attr.substr(0, 10), "yyyy-MM-dd", "en");
0095 }
0096 
0097 function parseHtmlCommon(doc, node, res)
0098 {
0099     const aElems = doc.eval('//a');
0100     for (aElem of aElems) {
0101         const href = aElem.attribute('href');
0102         if (href.startsWith('tel:')) {
0103             res.reservationFor.telephone = aElem.content;
0104         } else if (href.startsWith('mailto:')) {
0105             res.reservationFor.email = href.substr(7);
0106             // reservation id is the prefix in the mailto link, unlike other occurrences this seems most reliably present
0107             res.reservationNumber = href.match(/mailto:(\d+)-/)[1];
0108         } else if (aElem.attribute('universal') == 'true') {
0109             res.reservationFor.name = aElem.content;
0110         } else if (!res.modifyReservationUrl && href.startsWith("https:") && (href.match(/pbsource=email_change;/) || href.match(/pbsource=conf_email_modify;/))) {
0111             res.modifyReservationUrl = href;
0112         }
0113     }
0114 
0115     const times = doc.eval('//time');
0116     res.checkinTime = parseDateTimeAttribute(times[0].attribute("datetime"));
0117     res.checkoutTime = parseDateTimeAttribute(times[1].attribute("datetime"));
0118 
0119     const text = doc.root.recursiveContent;
0120     for (let locale in regExMap) {
0121          const name = text.match(regExMap[locale]['person']);
0122          if (name) {
0123              res.underName.name = name[1];
0124              break;
0125          }
0126     }
0127 
0128     return res;
0129 }
0130 
0131 function parseHtml(doc, node)
0132 {
0133     if (node.result.length > 0)
0134         return null; // this is just backup if we have no structured data
0135     var res = JsonLd.newLodgingReservation();
0136     var elem = doc.eval("//table[@class=\"mg_conf_hotel_preview\"]")[0];
0137     res.reservationFor.name = elem.eval("(.//b|.//strong)")[0].content;
0138 
0139     var fullAddr = elem.eval(".//tr")[1].recursiveContent;
0140     var addrRegex = /^(.*), (.*?), (.*?), ([^,]*?)\s*-?\s*$/;
0141 
0142     //HACK: Japanese addresses do not have the country set in Booking.com HTML
0143     // and have a different HTML structure
0144     if (!addrRegex.test(fullAddr)) {
0145         // The first two elements are the hotel name and the hotel name in
0146         // Japanese, skip to the third
0147         var addressElement = elem.eval("string(.//tr[3]//td)");
0148         // Booking.com addresses are always separated by "\n-\n"
0149         // We split and get the first part, which is the romanized address
0150         var fullAddr = addressElement.split("\n-\n")[0]
0151         // Replace double spaces from the extraction
0152         fullAddr = fullAddr.replace(/\s+/g, ' ');
0153         var addr = fullAddr.match(addrRegex);
0154         res.reservationFor.address.streetAddress = addr[4];
0155         res.reservationFor.address.addressLocality = addr[2];
0156         res.reservationFor.address.postalCode = addr[1];
0157         res.reservationFor.addressCountry = "Japan";
0158     } else {
0159         var addr = fullAddr.match(addrRegex);
0160         res.reservationFor.address.streetAddress = addr[1];
0161         res.reservationFor.address.addressLocality = addr[2];
0162         res.reservationFor.address.postalCode = addr[3];
0163         res.reservationFor.address.addressCountry = addr[4].split('\n')[0];
0164 
0165         if (fullAddr.match(/CANCELED$/)) {
0166             res.reservationStatus = "ReservationCancelled"
0167         }
0168     }
0169     res.reservationFor.telephone = elem.eval(".//*[@class=\"u-phone\"]")[0].content;
0170     return parseHtmlCommon(doc, node, res);
0171 }
0172 
0173 function parseHtmlAlternative(doc, node)
0174 {
0175     if (node.result.length > 0)
0176         return null; // this is just backup if we have no structured data
0177     var res = JsonLd.newLodgingReservation();
0178 
0179     const addrElems = doc.eval('//address')[0].content.split(',\n');
0180     res.reservationFor.address.streetAddress = addrElems[0];
0181     res.reservationFor.address.addressLocality = addrElems[addrElems.length - 3];
0182     res.reservationFor.address.postalCode = addrElems[addrElems.length - 2];
0183     res.reservationFor.address.addressCountry = addrElems[addrElems.length - 1];
0184     return parseHtmlCommon(doc, node, res);
0185 }