File indexing completed on 2024-12-29 04:51:02
0001 /* 0002 SPDX-FileCopyrightText: 2018 Benjamin Port <benjamin.port@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 var regExMap = []; 0008 regExMap['en_US'] = []; 0009 regExMap['en_US']['bookingRef'] = /(?:Booking number|Confirmation:) +([0-9]*)\s+/; 0010 // 1: adress, 2: city, 3:postal code, 4: country, 5: phone 0011 regExMap['en_US']['hotelInformation'] = / *(.+), (.+), (.+), (.+)(?: -|\n)\s+Phone: (\+[0-9 ]*)\s+/; 0012 regExMap['en_US']['hotelName'] = /(?:\[checkmark\.png\] |\d\n)(.*?)(?: is expecting you on|\n *\[)/; 0013 regExMap['en_US']['arrivalDate'] = /Check-in *([A-z]+ [0-9]{1,2} [A-z]+ [0-9]+|[A-z]+, [A-z]+ \d{1,2}, \d{4}) \(f?r?o?m? ?([0-9]{1,2}:[0-9]{2})[^\)]*\)/; 0014 regExMap['en_US']['departureDate'] = /Check-out *([A-z]+ [0-9]{1,2} [A-z]+ [0-9]+|[A-z]+, [A-z]+ \d{1,2}, \d{4}) \(.*?([0-9]{1,2}:[0-9]{2})\)/; 0015 regExMap['en_US']['person'] = /Guest name[\n\s]+(.*?)(?:\n| Edit guest name)/; 0016 0017 regExMap['fr_FR'] = []; 0018 regExMap['fr_FR']['bookingRef'] = /Numéro de réservation : ([0-9]*)\s+/; 0019 // 1: hotel name, 2: adress, 3: city, 4:postal code, 5: country, 6: phone 0020 regExMap['fr_FR']['hotelInformation'] = /(.+), (.+), (.+), (.+) -\s+Téléphone : (\+[0-9]*)\s+/; 0021 regExMap['fr_FR']['hotelName'] = /L'établissement (.*) vous attend le/; 0022 regExMap['fr_FR']['arrivalDate'] = /Arrivée ([a-z]+ [0-9]{1,2} [a-zûé]+ [0-9]+) \(([0-9]{1,2}:[0-9]{2}) - ([0-9]{1,2}:[0-9]{2})\)/; 0023 regExMap['fr_FR']['departureDate'] = /Départ ([a-z]+ [0-9]{1,2} [a-zûé]+ [0-9]+) \([0-9]{1,2}:[0-9]{2} - ([0-9]{1,2}:[0-9]{2})\)/; 0024 regExMap['fr_FR']['person'] = /Clients[\n\s]+(.*?)(?:\n| Modifier le nom du client)/; 0025 0026 regExMap['de_DE'] = []; 0027 regExMap['de_DE']['bookingRef'] = /Buchungsnummer: ([0-9]*)\s+/; 0028 // 1: hotel name, 2: adress, 3: city, 4:postal code, 5: country, 6: phone 0029 regExMap['de_DE']['hotelInformation'] = /(.+), (.+), (.+), (.+) -\s+Telefon: (\+[0-9 \-]+)\n/; 0030 regExMap['de_DE']['hotelName'] = /\[checkmark.png\] Die Unterkunft (.*)\s+erwartet Sie/; 0031 regExMap['de_DE']['arrivalDate'] = /Anreise ([A-Z][a-z]+, [0-9]{1,2}\. \S+ [0-9]{4}) \(ab ([0-9]{1,2}:[0-9]{2})\)/; 0032 regExMap['de_DE']['departureDate'] = /Abreise ([A-Z][a-z]+, [0-9]{1,2}\. \S+ [0-9]{4}) \(bis ([0-9]{1,2}:[0-9]{2})\)/; 0033 regExMap['de_DE']['person'] = /Name des Gastes[\n\s]+(.*?)(?:\n| Name des Gastes bearbeiten)/; 0034 0035 regExMap['es_ES'] = []; 0036 regExMap['es_ES']['person'] = /Nombre del huésped[\n\s]+(.*?)\n/; 0037 0038 function main(text, node) { 0039 if (node.result.length > 0) 0040 return null; // this is just backup if we have no structured data 0041 var res = JsonLd.newLodgingReservation(); 0042 0043 for (var locale in regExMap) { 0044 var bookingRef = text.match(regExMap[locale]['bookingRef']); 0045 // If no booking reference go to the next locale 0046 if (!bookingRef || !regExMap[locale]['bookingRef']) 0047 continue; 0048 res.reservationNumber = bookingRef[1]; 0049 0050 var hotelName = text.match(regExMap[locale]['hotelName']); 0051 if (!hotelName) 0052 return null; 0053 res.reservationFor.name = hotelName[1]; 0054 0055 var hotel = text.match(regExMap[locale]['hotelInformation']); 0056 if (!hotel) 0057 return null; 0058 0059 res.reservationFor.address.streetAddress = hotel[1]; 0060 res.reservationFor.address.postalCode = hotel[3]; 0061 res.reservationFor.address.addressLocality = hotel[2]; 0062 res.reservationFor.address.addressCountry = hotel[4]; 0063 res.reservationFor.telephone = hotel[5]; 0064 0065 idx = hotel.index + hotel[0].length; 0066 0067 var arrivalDate = text.substr(idx).match(regExMap[locale]['arrivalDate']); 0068 if (!arrivalDate) 0069 return null; 0070 0071 res.checkinTime = JsonLd.toDateTime(arrivalDate[1] + " " + arrivalDate[2], ["dddd d MMMM yyyy hh:mm", "dddd, d. MMMM yyyy hh:mm", "dddd, MMMM d, yyyy hh:mm"], locale); 0072 idx += arrivalDate.index + arrivalDate[0].length; 0073 0074 var departureDate = text.substr(idx).match(regExMap[locale]['departureDate']); 0075 if (!departureDate) 0076 return null; 0077 res.checkoutTime = JsonLd.toDateTime(departureDate[1] + " " + departureDate[2], ["dddd d MMMM yyyy hh:mm", "dddd, d. MMMM yyyy hh:mm", "dddd, MMMM d, yyyy hh:mm"], locale); 0078 0079 var name = text.match(regExMap[locale]['person']); 0080 if (!name) 0081 return null; 0082 res.underName.name = name[1]; 0083 0084 return res; 0085 } 0086 } 0087 0088 function parseDateTimeAttribute(attr) { 0089 // valid ISO format (when time is present) 0090 if (attr.match(/^\d{4}-\d\d-\d\dT\d\d:\d\d/)) { 0091 // chop of UTC offset if present, that is based on time of booking, not time of travel 0092 return attr.substr(0, 19); 0093 } 0094 return JsonLd.toDateTime(attr.substr(0, 10), "yyyy-MM-dd", "en"); 0095 } 0096 0097 function parseHtmlCommon(doc, node, res) 0098 { 0099 const aElems = doc.eval('//a'); 0100 for (aElem of aElems) { 0101 const href = aElem.attribute('href'); 0102 if (href.startsWith('tel:')) { 0103 res.reservationFor.telephone = aElem.content; 0104 } else if (href.startsWith('mailto:')) { 0105 res.reservationFor.email = href.substr(7); 0106 // reservation id is the prefix in the mailto link, unlike other occurrences this seems most reliably present 0107 res.reservationNumber = href.match(/mailto:(\d+)-/)[1]; 0108 } else if (aElem.attribute('universal') == 'true') { 0109 res.reservationFor.name = aElem.content; 0110 } else if (!res.modifyReservationUrl && href.startsWith("https:") && (href.match(/pbsource=email_change;/) || href.match(/pbsource=conf_email_modify;/))) { 0111 res.modifyReservationUrl = href; 0112 } 0113 } 0114 0115 const times = doc.eval('//time'); 0116 res.checkinTime = parseDateTimeAttribute(times[0].attribute("datetime")); 0117 res.checkoutTime = parseDateTimeAttribute(times[1].attribute("datetime")); 0118 0119 const text = doc.root.recursiveContent; 0120 for (let locale in regExMap) { 0121 const name = text.match(regExMap[locale]['person']); 0122 if (name) { 0123 res.underName.name = name[1]; 0124 break; 0125 } 0126 } 0127 0128 return res; 0129 } 0130 0131 function parseHtml(doc, node) 0132 { 0133 if (node.result.length > 0) 0134 return null; // this is just backup if we have no structured data 0135 var res = JsonLd.newLodgingReservation(); 0136 var elem = doc.eval("//table[@class=\"mg_conf_hotel_preview\"]")[0]; 0137 res.reservationFor.name = elem.eval("(.//b|.//strong)")[0].content; 0138 0139 var fullAddr = elem.eval(".//tr")[1].recursiveContent; 0140 var addrRegex = /^(.*), (.*?), (.*?), ([^,]*?)\s*-?\s*$/; 0141 0142 //HACK: Japanese addresses do not have the country set in Booking.com HTML 0143 // and have a different HTML structure 0144 if (!addrRegex.test(fullAddr)) { 0145 // The first two elements are the hotel name and the hotel name in 0146 // Japanese, skip to the third 0147 var addressElement = elem.eval("string(.//tr[3]//td)"); 0148 // Booking.com addresses are always separated by "\n-\n" 0149 // We split and get the first part, which is the romanized address 0150 var fullAddr = addressElement.split("\n-\n")[0] 0151 // Replace double spaces from the extraction 0152 fullAddr = fullAddr.replace(/\s+/g, ' '); 0153 var addr = fullAddr.match(addrRegex); 0154 res.reservationFor.address.streetAddress = addr[4]; 0155 res.reservationFor.address.addressLocality = addr[2]; 0156 res.reservationFor.address.postalCode = addr[1]; 0157 res.reservationFor.addressCountry = "Japan"; 0158 } else { 0159 var addr = fullAddr.match(addrRegex); 0160 res.reservationFor.address.streetAddress = addr[1]; 0161 res.reservationFor.address.addressLocality = addr[2]; 0162 res.reservationFor.address.postalCode = addr[3]; 0163 res.reservationFor.address.addressCountry = addr[4].split('\n')[0]; 0164 0165 if (fullAddr.match(/CANCELED$/)) { 0166 res.reservationStatus = "ReservationCancelled" 0167 } 0168 } 0169 res.reservationFor.telephone = elem.eval(".//*[@class=\"u-phone\"]")[0].content; 0170 return parseHtmlCommon(doc, node, res); 0171 } 0172 0173 function parseHtmlAlternative(doc, node) 0174 { 0175 if (node.result.length > 0) 0176 return null; // this is just backup if we have no structured data 0177 var res = JsonLd.newLodgingReservation(); 0178 0179 const addrElems = doc.eval('//address')[0].content.split(',\n'); 0180 res.reservationFor.address.streetAddress = addrElems[0]; 0181 res.reservationFor.address.addressLocality = addrElems[addrElems.length - 3]; 0182 res.reservationFor.address.postalCode = addrElems[addrElems.length - 2]; 0183 res.reservationFor.address.addressCountry = addrElems[addrElems.length - 1]; 0184 return parseHtmlCommon(doc, node, res); 0185 }