Warning, /libraries/kopeninghours/src/lib/openinghourslexer.l is written in an unsupported language. File is not indexed.
0001 %{ 0002 /* 0003 SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org> 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "openinghoursparser_p.h" 0008 0009 #include <cstring> 0010 0011 #define YY_USER_ACTION yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; 0012 0013 %} 0014 0015 %option warn nodefault 0016 /* technically the case of all tokens is clearly defined, but reality ignores that in parts, so we do the same */ 0017 %option caseless 0018 %option reentrant 0019 %option noyywrap 0020 %option nounput 0021 %option never-interactive 0022 %option bison-bridge 0023 %option bison-locations 0024 %option yylineno 0025 0026 SPACE ([ \t\r\n]| | | )+ 0027 0028 CYRILLIC (а|б|в|г|д|е|ё|ж|з|и|й|к|л|м|н|о|п|р|с|т|у|ф|х|ц|ч|ш|щ|ъ|ы|ь|э|ю|я) 0029 0030 %% 0031 0032 {SPACE} {} 0033 0034 [0-9]+ { 0035 yylval->num = std::strtol(yytext, nullptr, 10); 0036 if (yyleng == 4) { 0037 if ((yylval->num > 2000 && yylval->num < 2100) || (yylval->num >= 1000 && (yylval->num % 100) >= 60)) { 0038 return T_YEAR; 0039 } 0040 if (yylval->num <= 2400 && (yylval->num % 100) < 60) { 0041 return T_4DIGIT_TIME; 0042 } 0043 } 0044 return T_INTEGER; 0045 } 0046 0047 ;/. { return T_NORMAL_RULE_SEPARATOR; } // technically this should have space after the semicolon, but that is not always followed in OSM data 0048 ", " { return T_ADDITIONAL_RULE_SEPARATOR; } 0049 "||" { return T_FALLBACK_SEPARATOR; } // technically this should have a space on either side, but that is not always followed in OSM data 0050 0051 "open" { yylval->state = State::Open; return T_STATE; } 0052 "closed" { yylval->state = State::Closed; return T_STATE; } 0053 "off" { yylval->state = State::Off; return T_STATE; } 0054 "unknown" { yylval->state = State::Unknown; return T_STATE; } 0055 0056 "24/7" { return T_24_7; } 0057 0058 "+" { return T_PLUS; } 0059 -|‒|–|‑|—|―|-|−|ー { return T_MINUS; } 0060 "/" { return T_SLASH; } 0061 ":" { return T_COLON; } 0062 ,/. { return T_COMMA; } 0063 0064 [,;] {} // eat trailing commas/semicolons, while invalid those occur commonly in OSM data. Practically this is done indirectly in combination with the above rules as we cannot lookahead to EOF 0065 0066 "dawn" { yylval->time = { Time::Dawn, 0, 0 }; return T_EVENT; } 0067 "sunrise" { yylval->time = { Time::Sunrise, 0, 0 }; return T_EVENT; } 0068 "sunset" { yylval->time = { Time::Sunset , 0, 0 }; return T_EVENT; } 0069 "dusk" { yylval->time = { Time::Dusk, 0, 0 }; return T_EVENT; } 0070 0071 "[" { return T_LBRACKET; } 0072 "]" { return T_RBRACKET; } 0073 "(" { return T_LPAREN; } 0074 ")" { return T_RPAREN; } 0075 0076 "PH" { return T_PH; } 0077 "SH" { return T_SH; } 0078 0079 " day" { return T_KEYWORD_DAY; } 0080 " days" { return T_KEYWORD_DAY; } 0081 "week" { return T_KEYWORD_WEEK; } 0082 "easter" { return T_EASTER; } 0083 "whitsun" { return T_WHITSUN; } // non-standard, will be turned into "easter +49 days" 0084 0085 /* am/pm time format support, non-standard and has to appear before the generic number token. */ 0086 [0-5]?[0-9](\ ?a\.?m\.?|a) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_AM; } 0087 [0-5]?[0-9](\ ?p\.?m\.?|p) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_PM; } 0088 0089 /* technically weekday names should be two letter English abbreviations, but reality is more creative */ 0090 Mondays? { yylval->num = 1; return T_WEEKDAY; } 0091 Tuesdays? { yylval->num = 2; return T_WEEKDAY; } 0092 Wednesdays? { yylval->num = 3; return T_WEEKDAY; } 0093 Thursdays? { yylval->num = 4; return T_WEEKDAY; } 0094 Fridays? { yylval->num = 5; return T_WEEKDAY; } 0095 Saturdays? { yylval->num = 6; return T_WEEKDAY; } 0096 Sundays? { yylval->num = 7; return T_WEEKDAY; } 0097 0098 Tues { yylval->num = 2; return T_WEEKDAY; } 0099 Thurs? { yylval->num = 4; return T_WEEKDAY; } 0100 0101 Mon? { yylval->num = 1; return T_WEEKDAY; } 0102 Tue? { yylval->num = 2; return T_WEEKDAY; } 0103 Wed? { yylval->num = 3; return T_WEEKDAY; } 0104 Thu? { yylval->num = 4; return T_WEEKDAY; } 0105 Fri? { yylval->num = 5; return T_WEEKDAY; } 0106 Sat? { yylval->num = 6; return T_WEEKDAY; } 0107 Sun? { yylval->num = 7; return T_WEEKDAY; } 0108 0109 Mo\. { yylval->num = 1; return T_WEEKDAY; } 0110 Tu\. { yylval->num = 2; return T_WEEKDAY; } 0111 We\. { yylval->num = 3; return T_WEEKDAY; } 0112 Th\. { yylval->num = 4; return T_WEEKDAY; } 0113 Fr\. { yylval->num = 5; return T_WEEKDAY; } 0114 Sa\. { yylval->num = 6; return T_WEEKDAY; } 0115 Su\. { yylval->num = 7; return T_WEEKDAY; } 0116 0117 /* same for month names, technically those should be three letter English abbreviations */ 0118 "January" { yylval->num = 1; return T_MONTH; } 0119 "February" { yylval->num = 2; return T_MONTH; } 0120 "March" { yylval->num = 3; return T_MONTH; } 0121 "April" { yylval->num = 4; return T_MONTH; } 0122 "June" { yylval->num = 6; return T_MONTH; } 0123 "July" { yylval->num = 7; return T_MONTH; } 0124 "August" { yylval->num = 8; return T_MONTH; } 0125 "September" { yylval->num = 9; return T_MONTH; } 0126 "October" { yylval->num = 10; return T_MONTH; } 0127 "November" { yylval->num = 11; return T_MONTH; } 0128 "December" { yylval->num = 12; return T_MONTH; } 0129 0130 Jan\.? { yylval->num = 1; return T_MONTH; } 0131 Feb\.? { yylval->num = 2; return T_MONTH; } 0132 Mar\.? { yylval->num = 3; return T_MONTH; } 0133 Apr\.? { yylval->num = 4; return T_MONTH; } 0134 May\.? { yylval->num = 5; return T_MONTH; } 0135 Jun\.? { yylval->num = 6; return T_MONTH; } 0136 Jul\.? { yylval->num = 7; return T_MONTH; } 0137 Aug\.? { yylval->num = 8; return T_MONTH; } 0138 Sept?\.? { yylval->num = 9; return T_MONTH; } 0139 Oct\.? { yylval->num = 10; return T_MONTH; } 0140 Nov\.? { yylval->num = 11; return T_MONTH; } 0141 Dec\.? { yylval->num = 12; return T_MONTH; } 0142 0143 /* Month names in French */ 0144 "Janvier" { yylval->num = 1; return T_MONTH; } 0145 "Février" { yylval->num = 2; return T_MONTH; } 0146 "Mars" { yylval->num = 3; return T_MONTH; } 0147 "Avril" { yylval->num = 4; return T_MONTH; } 0148 "Mai" { yylval->num = 5; return T_MONTH; } 0149 "Juin" { yylval->num = 6; return T_MONTH; } 0150 "Juillet" { yylval->num = 7; return T_MONTH; } 0151 "Août" { yylval->num = 8; return T_MONTH; } 0152 "Septembre" { yylval->num = 9; return T_MONTH; } 0153 "Octobre" { yylval->num = 10; return T_MONTH; } 0154 "Novembre" { yylval->num = 11; return T_MONTH; } 0155 "Décembre" { yylval->num = 12; return T_MONTH; } 0156 0157 /* Month names in Russian */ 0158 "Январь" { yylval->num = 1; return T_MONTH; } 0159 "Февраль" { yylval->num = 2; return T_MONTH; } 0160 "Март" { yylval->num = 3; return T_MONTH; } 0161 "Апрель" { yylval->num = 4; return T_MONTH; } 0162 "Май" { yylval->num = 5; return T_MONTH; } 0163 "Июнь" { yylval->num = 6; return T_MONTH; } 0164 "Июль" { yylval->num = 7; return T_MONTH; } 0165 "Август" { yylval->num = 8; return T_MONTH; } 0166 "Сентябрь" { yylval->num = 9; return T_MONTH; } 0167 "Октябрь" { yylval->num = 10; return T_MONTH; } 0168 "Ноябрь" { yylval->num = 11; return T_MONTH; } 0169 "Декабрь" { yylval->num = 12; return T_MONTH; } 0170 0171 /* Month abbreviations in Dutch */ 0172 "Mrt" { yylval->num = 3; return T_MONTH; } 0173 "Mei" { yylval->num = 5; return T_MONTH; } 0174 "Okt" { yylval->num = 10; return T_MONTH; } 0175 0176 /* different quote types are sometimes mixed and/or used nested, so this is a compromise to catch most of them */ 0177 ["][^"]*["] { 0178 yylval->strRef.str = yytext + 1; 0179 yylval->strRef.len = yyleng - 2; 0180 return T_COMMENT; 0181 } 0182 (“|”|„)[^(\"|“|”|„)]*(\"|“|”|„) { 0183 int startOffset = 1; int endOffset = 1; 0184 for (const auto quote : { "\"", "“", "”", "„" }) { 0185 const int len = std::strlen(quote); 0186 if (yyleng <= len) { 0187 continue; 0188 } 0189 if (std::strncmp(yytext, quote, len) == 0) { 0190 startOffset = len; 0191 } 0192 if (std::strncmp(yytext + yyleng - len, quote, len) == 0) { 0193 endOffset = len; 0194 } 0195 } 0196 yylval->strRef.str = yytext + startOffset; 0197 yylval->strRef.len = yyleng - startOffset - endOffset; 0198 return T_COMMENT; 0199 } 0200 0201 /* various alternative formats, none of this is remotely compliant with the specification, but appears in reality nevertheless */ 0202 0203 /* alternative time formats */ 0204 :|︓|ː|\. { return T_ALT_TIME_SEP; } 0205 h|時 { return T_ALT_TIME_SEP_OR_SUFFIX; } 0206 0207 /* alternative range separators */ 0208 ~|~|〜|to|through|à|bis|a|ás|às|as|au|al|→|до|дo|пo { return T_ALT_RANGE_SEP; } 0209 0210 /* localized state names */ 0211 ferm(e|é)|geschlossen|ruhetag|encerrado|chiuso|закры{CYRILLIC}*|Вых{CYRILLIC}*|cerrado|libre { yylval->state = State::Closed; return T_STATE; } 0212 откры{CYRILLIC}*|abierto { yylval->state = State::Open; return T_STATE; } 0213 неизв{CYRILLIC}* { yylval->state = State::Unknown; return T_STATE; } 0214 0215 /* German localized day names. */ 0216 Montags? { yylval->num = 1; return T_WEEKDAY; } 0217 Die(nstags?)? { yylval->num = 2; return T_WEEKDAY; } 0218 Mittwochs? { yylval->num = 3; return T_WEEKDAY; } 0219 Donnerstags? { yylval->num = 4; return T_WEEKDAY; } 0220 Freitags? { yylval->num = 5; return T_WEEKDAY; } 0221 Samstags? { yylval->num = 6; return T_WEEKDAY; } 0222 Sonntags? { yylval->num = 7; return T_WEEKDAY; } 0223 Feiertage? { return T_PH; } 0224 0225 /* French, Spanish, Italian */ 0226 Lu { yylval->num = 1; return T_WEEKDAY; } 0227 /* French, Italian */ 0228 Me { yylval->num = 3; return T_WEEKDAY; } 0229 /* German, Spanish, Italian */ 0230 Mi { yylval->num = 3; return T_WEEKDAY; } 0231 /* French */ 0232 Je { yylval->num = 4; return T_WEEKDAY; } 0233 /* Italian */ 0234 Gi { yylval->num = 4; return T_WEEKDAY; } 0235 /* French, Italian */ 0236 Ve { yylval->num = 5; return T_WEEKDAY; } 0237 /* German */ 0238 So { yylval->num = 7; return T_WEEKDAY; } 0239 0240 /* "Ma" conflicts between Dutch and French+Spanish+Italian... 0241 "Di" conflicts between Dutch+German and French... 0242 "Do" conflicts between Dutch+German and Spanish+Italian... */ 0243 0244 /* French localized day names. */ 0245 Lun(di)? { yylval->num = 1; return T_WEEKDAY; } 0246 Mardi { yylval->num = 2; return T_WEEKDAY; } 0247 Mer(credi)? { yylval->num = 3; return T_WEEKDAY; } 0248 Jeu(di)? { yylval->num = 4; return T_WEEKDAY; } 0249 Ven(dredi)? { yylval->num = 5; return T_WEEKDAY; } 0250 Sam(edi)? { yylval->num = 6; return T_WEEKDAY; } 0251 Dim(anche)? { yylval->num = 7; return T_WEEKDAY; } 0252 "jours fériés" { return T_PH; } 0253 0254 /* Spanish localized day names */ 0255 Lunes { yylval->num = 1; return T_WEEKDAY; } 0256 Martes { yylval->num = 2; return T_WEEKDAY; } 0257 Mi(é|e)rcoles { yylval->num = 3; return T_WEEKDAY; } 0258 Jueves { yylval->num = 4; return T_WEEKDAY; } 0259 Vie(rnes)? { yylval->num = 5; return T_WEEKDAY; } 0260 S(á|a)b(ado)? { yylval->num = 6; return T_WEEKDAY; } 0261 Dom(ingo)? { yylval->num = 7; return T_WEEKDAY; } 0262 0263 /* Italian localized day names */ 0264 Luned(ì|i) { yylval->num = 1; return T_WEEKDAY; } 0265 Marted(ì|i) { yylval->num = 2; return T_WEEKDAY; } 0266 Mercoled(ì|i) { yylval->num = 3; return T_WEEKDAY; } 0267 Gio(ved(ì|i))? { yylval->num = 4; return T_WEEKDAY; } 0268 Venerd(ì|i) { yylval->num = 5; return T_WEEKDAY; } 0269 Sabato { yylval->num = 6; return T_WEEKDAY; } 0270 Domenica { yylval->num = 7; return T_WEEKDAY; } 0271 0272 /* Portuguese localized day names */ 0273 feira { yylval->num = 1; return T_WEEKDAY; } 0274 segunda { yylval->num = 1; return T_WEEKDAY; } 0275 ter(ç|c)a { yylval->num = 2; return T_WEEKDAY; } 0276 quarta { yylval->num = 3; return T_WEEKDAY; } 0277 quinta { yylval->num = 4; return T_WEEKDAY; } 0278 sexta { yylval->num = 5; return T_WEEKDAY; } 0279 0280 /* Japanese localized day names */ 0281 月|月曜|月曜日 { yylval->num = 1; return T_WEEKDAY; } 0282 火|火曜|火曜日 { yylval->num = 2; return T_WEEKDAY; } 0283 水|水曜|水曜日 { yylval->num = 3; return T_WEEKDAY; } 0284 木|木曜|木曜日 { yylval->num = 4; return T_WEEKDAY; } 0285 金|金曜|金曜日 { yylval->num = 5; return T_WEEKDAY; } 0286 土|土曜|土曜日 { yylval->num = 6; return T_WEEKDAY; } 0287 日|日曜|日曜日 { yylval->num = 7; return T_WEEKDAY; } 0288 祝日 { return T_PH; } 0289 0290 /* Indonesian localized day names */ 0291 Senin { yylval->num = 1; return T_WEEKDAY; } 0292 Selasa { yylval->num = 2; return T_WEEKDAY; } 0293 Rabu { yylval->num = 3; return T_WEEKDAY; } 0294 Kamis { yylval->num = 4; return T_WEEKDAY; } 0295 Jumat { yylval->num = 5; return T_WEEKDAY; } 0296 Sabtu { yylval->num = 6; return T_WEEKDAY; } 0297 Minggu { yylval->num = 7; return T_WEEKDAY; } 0298 0299 /* Russian localized day names */ 0300 Понедельник|Пон|Пк { yylval->num = 1; return T_WEEKDAY; } 0301 Вторник|Вто|Вт { yylval->num = 2; return T_WEEKDAY; } 0302 Среда|Сре|Ср { yylval->num = 3; return T_WEEKDAY; } 0303 Четверг|Чет|Чт { yylval->num = 4; return T_WEEKDAY; } 0304 Пятница|Пят|Пя|Пт { yylval->num = 5; return T_WEEKDAY; } 0305 Суббота|Суб|Су|Сб { yylval->num = 6; return T_WEEKDAY; } 0306 Воскресенье|Вос|Во { yylval->num = 7; return T_WEEKDAY; } 0307 0308 /* Dutch localized day names */ 0309 Maandag { yylval->num = 1; return T_WEEKDAY; } 0310 Dinsdag { yylval->num = 2; return T_WEEKDAY; } 0311 Wo(e(nsdag)?)? { yylval->num = 3; return T_WEEKDAY; } 0312 Donderdag { yylval->num = 4; return T_WEEKDAY; } 0313 Vr(ijdag)? { yylval->num = 5; return T_WEEKDAY; } 0314 Za(terdag)? { yylval->num = 6; return T_WEEKDAY; } 0315 Zo(ndag)? { yylval->num = 7; return T_WEEKDAY; } 0316 0317 /* creative rule separators */ 0318 、|and|et|e|y|und|& { return T_ADDITIONAL_RULE_SEPARATOR; } 0319 0320 /* skip filler words */ 0321 /* Note: the 'с' is not an ASCII 'c'! */ 0322 from|von|du|de|le|das|分|uhr|"en continu"|с|от {} 0323 0324 /* localized time event names */ 0325 рассвет{CYRILLIC}* { yylval->time = { Time::Dawn, 0, 0 }; return T_EVENT; } 0326 сумер{CYRILLIC}?к{CYRILLIC}* { yylval->time = { Time::Dusk, 0, 0 }; return T_EVENT; } 0327 восход{CYRILLIC}* { yylval->time = { Time::Sunrise, 0, 0 }; return T_EVENT; } 0328 закат{CYRILLIC}* { yylval->time = { Time::Sunset , 0, 0 }; return T_EVENT; } 0329 0330 . { 0331 //printf("unexpected character: %s at %d:%d\n", yytext, yylloc->first_line, yylloc->first_column); 0332 return T_INVALID; 0333 } 0334 0335 %%