Warning, /libraries/kopeninghours/src/lib/openinghourslexer.l is written in an unsupported language. File is not indexed.

0001 %{
0002 /*
0003     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "openinghoursparser_p.h"
0008 
0009 #include <cstring>
0010 
0011 #define YY_USER_ACTION yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng;
0012 
0013 %}
0014 
0015 %option warn nodefault
0016 /* technically the case of all tokens is clearly defined, but reality ignores that in parts, so we do the same */
0017 %option caseless
0018 %option reentrant
0019 %option noyywrap
0020 %option nounput
0021 %option never-interactive
0022 %option bison-bridge
0023 %option bison-locations
0024 %option yylineno
0025 
0026 SPACE       ([ \t\r\n]| | | )+
0027 
0028 CYRILLIC    (а|б|в|г|д|е|ё|ж|з|и|й|к|л|м|н|о|п|р|с|т|у|ф|х|ц|ч|ш|щ|ъ|ы|ь|э|ю|я)
0029 
0030 %%
0031 
0032 {SPACE} {}
0033 
0034 [0-9]+ {
0035     yylval->num = std::strtol(yytext, nullptr, 10);
0036     if (yyleng == 4) {
0037         if ((yylval->num > 2000 && yylval->num < 2100) || (yylval->num >= 1000 && (yylval->num % 100) >= 60)) {
0038             return T_YEAR;
0039         }
0040         if (yylval->num <= 2400 && (yylval->num % 100) < 60) {
0041             return T_4DIGIT_TIME;
0042         }
0043     }
0044     return T_INTEGER;
0045 }
0046 
0047 ;/. { return T_NORMAL_RULE_SEPARATOR; } // technically this should have space after the semicolon, but that is not always followed in OSM data
0048 ", " { return T_ADDITIONAL_RULE_SEPARATOR; }
0049 "||" { return T_FALLBACK_SEPARATOR; } // technically this should have a space on either side, but that is not always followed in OSM data
0050 
0051 "open"    { yylval->state = State::Open;    return T_STATE; }
0052 "closed"  { yylval->state = State::Closed;  return T_STATE; }
0053 "off"     { yylval->state = State::Off;  return T_STATE; }
0054 "unknown" { yylval->state = State::Unknown; return T_STATE; }
0055 
0056 "24/7" { return T_24_7; }
0057 
0058 "+" { return T_PLUS; }
0059 -|‒|–|‑|—|―|-|−|ー { return T_MINUS; }
0060 "/" { return T_SLASH; }
0061 ":" { return T_COLON; }
0062 ,/. { return T_COMMA; }
0063 
0064 [,;] {} // eat trailing commas/semicolons, while invalid those occur commonly in OSM data. Practically this is done indirectly in combination with the above rules as we cannot lookahead to EOF
0065 
0066 "dawn"    { yylval->time = { Time::Dawn,    0, 0 }; return T_EVENT; }
0067 "sunrise" { yylval->time = { Time::Sunrise, 0, 0 }; return T_EVENT; }
0068 "sunset"  { yylval->time = { Time::Sunset , 0, 0 }; return T_EVENT; }
0069 "dusk"    { yylval->time = { Time::Dusk,    0, 0 }; return T_EVENT; }
0070 
0071 "[" { return T_LBRACKET; }
0072 "]" { return T_RBRACKET; }
0073 "(" { return T_LPAREN; }
0074 ")" { return T_RPAREN; }
0075 
0076 "PH" { return T_PH; }
0077 "SH" { return T_SH; }
0078 
0079 " day" { return T_KEYWORD_DAY; }
0080 " days" { return T_KEYWORD_DAY; }
0081 "week" { return T_KEYWORD_WEEK; }
0082 "easter" { return T_EASTER; }
0083 "whitsun" { return T_WHITSUN; } // non-standard, will be turned into "easter +49 days"
0084 
0085   /* am/pm time format support, non-standard and has to appear before the generic number token. */
0086 [0-5]?[0-9](\ ?a\.?m\.?|a) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_AM; }
0087 [0-5]?[0-9](\ ?p\.?m\.?|p) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_PM; }
0088 
0089  /* technically weekday names should be two letter English abbreviations, but reality is more creative */
0090 Mondays?    { yylval->num = 1; return T_WEEKDAY; }
0091 Tuesdays?   { yylval->num = 2; return T_WEEKDAY; }
0092 Wednesdays? { yylval->num = 3; return T_WEEKDAY; }
0093 Thursdays?  { yylval->num = 4; return T_WEEKDAY; }
0094 Fridays?    { yylval->num = 5; return T_WEEKDAY; }
0095 Saturdays?  { yylval->num = 6; return T_WEEKDAY; }
0096 Sundays?    { yylval->num = 7; return T_WEEKDAY; }
0097 
0098 Tues  { yylval->num = 2; return T_WEEKDAY; }
0099 Thurs? { yylval->num = 4; return T_WEEKDAY; }
0100 
0101 Mon? { yylval->num = 1; return T_WEEKDAY; }
0102 Tue? { yylval->num = 2; return T_WEEKDAY; }
0103 Wed? { yylval->num = 3; return T_WEEKDAY; }
0104 Thu? { yylval->num = 4; return T_WEEKDAY; }
0105 Fri? { yylval->num = 5; return T_WEEKDAY; }
0106 Sat? { yylval->num = 6; return T_WEEKDAY; }
0107 Sun? { yylval->num = 7; return T_WEEKDAY; }
0108 
0109 Mo\. { yylval->num = 1; return T_WEEKDAY; }
0110 Tu\. { yylval->num = 2; return T_WEEKDAY; }
0111 We\. { yylval->num = 3; return T_WEEKDAY; }
0112 Th\. { yylval->num = 4; return T_WEEKDAY; }
0113 Fr\. { yylval->num = 5; return T_WEEKDAY; }
0114 Sa\. { yylval->num = 6; return T_WEEKDAY; }
0115 Su\. { yylval->num = 7; return T_WEEKDAY; }
0116 
0117  /* same for month names, technically those should be three letter English abbreviations */
0118 "January" { yylval->num = 1; return T_MONTH; }
0119 "February" { yylval->num = 2; return T_MONTH; }
0120 "March" { yylval->num = 3; return T_MONTH; }
0121 "April" { yylval->num = 4; return T_MONTH; }
0122 "June" { yylval->num = 6; return T_MONTH; }
0123 "July" { yylval->num = 7; return T_MONTH; }
0124 "August" { yylval->num = 8; return T_MONTH; }
0125 "September" { yylval->num = 9; return T_MONTH; }
0126 "October" { yylval->num = 10; return T_MONTH; }
0127 "November" { yylval->num = 11; return T_MONTH; }
0128 "December" { yylval->num = 12; return T_MONTH; }
0129 
0130 Jan\.? { yylval->num = 1; return T_MONTH; }
0131 Feb\.? { yylval->num = 2; return T_MONTH; }
0132 Mar\.? { yylval->num = 3; return T_MONTH; }
0133 Apr\.? { yylval->num = 4; return T_MONTH; }
0134 May\.? { yylval->num = 5; return T_MONTH; }
0135 Jun\.? { yylval->num = 6; return T_MONTH; }
0136 Jul\.? { yylval->num = 7; return T_MONTH; }
0137 Aug\.? { yylval->num = 8; return T_MONTH; }
0138 Sept?\.? { yylval->num = 9; return T_MONTH; }
0139 Oct\.? { yylval->num = 10; return T_MONTH; }
0140 Nov\.? { yylval->num = 11; return T_MONTH; }
0141 Dec\.? { yylval->num = 12; return T_MONTH; }
0142 
0143  /* Month names in French */
0144 "Janvier" { yylval->num = 1; return T_MONTH; }
0145 "Février" { yylval->num = 2; return T_MONTH; }
0146 "Mars" { yylval->num = 3; return T_MONTH; }
0147 "Avril" { yylval->num = 4; return T_MONTH; }
0148 "Mai" { yylval->num = 5; return T_MONTH; }
0149 "Juin" { yylval->num = 6; return T_MONTH; }
0150 "Juillet" { yylval->num = 7; return T_MONTH; }
0151 "Août" { yylval->num = 8; return T_MONTH; }
0152 "Septembre" { yylval->num = 9; return T_MONTH; }
0153 "Octobre" { yylval->num = 10; return T_MONTH; }
0154 "Novembre" { yylval->num = 11; return T_MONTH; }
0155 "Décembre" { yylval->num = 12; return T_MONTH; }
0156 
0157  /* Month names in Russian */
0158 "Январь"   { yylval->num = 1; return T_MONTH; }
0159 "Февраль"  { yylval->num = 2; return T_MONTH; }
0160 "Март"     { yylval->num = 3; return T_MONTH; }
0161 "Апрель"   { yylval->num = 4; return T_MONTH; }
0162 "Май"      { yylval->num = 5; return T_MONTH; }
0163 "Июнь"     { yylval->num = 6; return T_MONTH; }
0164 "Июль"     { yylval->num = 7; return T_MONTH; }
0165 "Август"   { yylval->num = 8; return T_MONTH; }
0166 "Сентябрь" { yylval->num = 9; return T_MONTH; }
0167 "Октябрь"  { yylval->num = 10; return T_MONTH; }
0168 "Ноябрь"   { yylval->num = 11; return T_MONTH; }
0169 "Декабрь"  { yylval->num = 12; return T_MONTH; }
0170 
0171  /* Month abbreviations in Dutch */
0172 "Mrt" { yylval->num = 3; return T_MONTH; }
0173 "Mei" { yylval->num = 5; return T_MONTH; }
0174 "Okt" { yylval->num = 10; return T_MONTH; }
0175 
0176  /* different quote types are sometimes mixed and/or used nested, so this is a compromise to catch most of them */
0177 ["][^"]*["] {
0178     yylval->strRef.str = yytext + 1;
0179     yylval->strRef.len = yyleng - 2;
0180     return T_COMMENT;
0181 }
0182 (“|”|„)[^(\"|“|”|„)]*(\"|“|”|„) {
0183     int startOffset = 1; int endOffset = 1;
0184     for (const auto quote : { "\"", "“", "”", "„" }) {
0185         const int len = std::strlen(quote);
0186         if (yyleng <= len) {
0187             continue;
0188         }
0189         if (std::strncmp(yytext, quote, len) == 0) {
0190             startOffset = len;
0191         }
0192         if (std::strncmp(yytext + yyleng - len, quote, len) == 0) {
0193             endOffset = len;
0194         }
0195     }
0196     yylval->strRef.str = yytext + startOffset;
0197     yylval->strRef.len = yyleng - startOffset - endOffset;
0198     return T_COMMENT;
0199 }
0200 
0201   /* various alternative formats, none of this is remotely compliant with the specification, but appears in reality nevertheless */
0202 
0203   /* alternative time formats */
0204 :|︓|ː|\. { return T_ALT_TIME_SEP; }
0205 h|時 { return T_ALT_TIME_SEP_OR_SUFFIX; }
0206 
0207   /* alternative range separators */
0208 ~|~|〜|to|through|à|bis|a|ás|às|as|au|al|→|до|дo|пo { return T_ALT_RANGE_SEP; }
0209 
0210   /* localized state names */
0211 ferm(e|é)|geschlossen|ruhetag|encerrado|chiuso|закры{CYRILLIC}*|Вых{CYRILLIC}*|cerrado|libre { yylval->state = State::Closed; return T_STATE; }
0212 откры{CYRILLIC}*|abierto { yylval->state = State::Open;    return T_STATE; }
0213 неизв{CYRILLIC}* { yylval->state = State::Unknown; return T_STATE; }
0214 
0215  /* German localized day names. */
0216 Montags?     { yylval->num = 1; return T_WEEKDAY; }
0217 Die(nstags?)?   { yylval->num = 2; return T_WEEKDAY; }
0218 Mittwochs?   { yylval->num = 3; return T_WEEKDAY; }
0219 Donnerstags? { yylval->num = 4; return T_WEEKDAY; }
0220 Freitags?    { yylval->num = 5; return T_WEEKDAY; }
0221 Samstags?    { yylval->num = 6; return T_WEEKDAY; }
0222 Sonntags?    { yylval->num = 7; return T_WEEKDAY; }
0223 Feiertage?   { return T_PH; }
0224 
0225   /* French, Spanish, Italian */
0226 Lu { yylval->num = 1; return T_WEEKDAY; }
0227   /* French, Italian */
0228 Me { yylval->num = 3; return T_WEEKDAY; }
0229   /* German, Spanish, Italian */
0230 Mi { yylval->num = 3; return T_WEEKDAY; }
0231   /* French */
0232 Je { yylval->num = 4; return T_WEEKDAY; }
0233   /* Italian */
0234 Gi { yylval->num = 4; return T_WEEKDAY; }
0235   /* French, Italian */
0236 Ve { yylval->num = 5; return T_WEEKDAY; }
0237   /* German */
0238 So { yylval->num = 7; return T_WEEKDAY; }
0239 
0240  /* "Ma" conflicts between Dutch and French+Spanish+Italian...
0241     "Di" conflicts between Dutch+German and French...
0242     "Do" conflicts between Dutch+German and Spanish+Italian... */
0243 
0244   /* French localized day names. */
0245 Lun(di)?    { yylval->num = 1; return T_WEEKDAY; }
0246 Mardi       { yylval->num = 2; return T_WEEKDAY; }
0247 Mer(credi)? { yylval->num = 3; return T_WEEKDAY; }
0248 Jeu(di)?    { yylval->num = 4; return T_WEEKDAY; }
0249 Ven(dredi)? { yylval->num = 5; return T_WEEKDAY; }
0250 Sam(edi)?   { yylval->num = 6; return T_WEEKDAY; }
0251 Dim(anche)? { yylval->num = 7; return T_WEEKDAY; }
0252 "jours fériés" { return T_PH; }
0253 
0254   /* Spanish localized day names */
0255 Lunes         { yylval->num = 1; return T_WEEKDAY; }
0256 Martes        { yylval->num = 2; return T_WEEKDAY; }
0257 Mi(é|e)rcoles { yylval->num = 3; return T_WEEKDAY; }
0258 Jueves        { yylval->num = 4; return T_WEEKDAY; }
0259 Vie(rnes)?    { yylval->num = 5; return T_WEEKDAY; }
0260 S(á|a)b(ado)? { yylval->num = 6; return T_WEEKDAY; }
0261 Dom(ingo)?    { yylval->num = 7; return T_WEEKDAY; }
0262 
0263   /* Italian localized day names */
0264 Luned(ì|i)     { yylval->num = 1; return T_WEEKDAY; }
0265 Marted(ì|i)    { yylval->num = 2; return T_WEEKDAY; }
0266 Mercoled(ì|i)  { yylval->num = 3; return T_WEEKDAY; }
0267 Gio(ved(ì|i))? { yylval->num = 4; return T_WEEKDAY; }
0268 Venerd(ì|i)    { yylval->num = 5; return T_WEEKDAY; }
0269 Sabato         { yylval->num = 6; return T_WEEKDAY; }
0270 Domenica       { yylval->num = 7; return T_WEEKDAY; }
0271 
0272   /* Portuguese localized day names */
0273 feira   { yylval->num = 1; return T_WEEKDAY; }
0274 segunda { yylval->num = 1; return T_WEEKDAY; }
0275 ter(ç|c)a   { yylval->num = 2; return T_WEEKDAY; }
0276 quarta  { yylval->num = 3; return T_WEEKDAY; }
0277 quinta  { yylval->num = 4; return T_WEEKDAY; }
0278 sexta   { yylval->num = 5; return T_WEEKDAY; }
0279 
0280   /* Japanese localized day names */
0281 月|月曜|月曜日 { yylval->num = 1; return T_WEEKDAY; }
0282 火|火曜|火曜日 { yylval->num = 2; return T_WEEKDAY; }
0283 水|水曜|水曜日 { yylval->num = 3; return T_WEEKDAY; }
0284 木|木曜|木曜日 { yylval->num = 4; return T_WEEKDAY; }
0285 金|金曜|金曜日 { yylval->num = 5; return T_WEEKDAY; }
0286 土|土曜|土曜日 { yylval->num = 6; return T_WEEKDAY; }
0287 日|日曜|日曜日 { yylval->num = 7; return T_WEEKDAY; }
0288 祝日 { return T_PH; }
0289 
0290   /* Indonesian localized day names */
0291 Senin   { yylval->num = 1; return T_WEEKDAY; }
0292 Selasa  { yylval->num = 2; return T_WEEKDAY; }
0293 Rabu    { yylval->num = 3; return T_WEEKDAY; }
0294 Kamis   { yylval->num = 4; return T_WEEKDAY; }
0295 Jumat   { yylval->num = 5; return T_WEEKDAY; }
0296 Sabtu   { yylval->num = 6; return T_WEEKDAY; }
0297 Minggu  { yylval->num = 7; return T_WEEKDAY; }
0298 
0299   /* Russian localized day names */
0300 Понедельник|Пон|Пк  { yylval->num = 1; return T_WEEKDAY; }
0301 Вторник|Вто|Вт      { yylval->num = 2; return T_WEEKDAY; }
0302 Среда|Сре|Ср        { yylval->num = 3; return T_WEEKDAY; }
0303 Четверг|Чет|Чт      { yylval->num = 4; return T_WEEKDAY; }
0304 Пятница|Пят|Пя|Пт   { yylval->num = 5; return T_WEEKDAY; }
0305 Суббота|Суб|Су|Сб   { yylval->num = 6; return T_WEEKDAY; }
0306 Воскресенье|Вос|Во  { yylval->num = 7; return T_WEEKDAY; }
0307 
0308   /* Dutch localized day names */
0309 Maandag        { yylval->num = 1; return T_WEEKDAY; }
0310 Dinsdag        { yylval->num = 2; return T_WEEKDAY; }
0311 Wo(e(nsdag)?)? { yylval->num = 3; return T_WEEKDAY; }
0312 Donderdag      { yylval->num = 4; return T_WEEKDAY; }
0313 Vr(ijdag)?     { yylval->num = 5; return T_WEEKDAY; }
0314 Za(terdag)?    { yylval->num = 6; return T_WEEKDAY; }
0315 Zo(ndag)?      { yylval->num = 7; return T_WEEKDAY; }
0316 
0317   /* creative rule separators */
0318 、|and|et|e|y|und|& { return T_ADDITIONAL_RULE_SEPARATOR; }
0319 
0320   /* skip filler words */
0321   /* Note: the 'с' is not an ASCII 'c'! */
0322 from|von|du|de|le|das|分|uhr|"en continu"|с|от {}
0323 
0324   /* localized time event names */
0325 рассвет{CYRILLIC}*           { yylval->time = { Time::Dawn,    0, 0 }; return T_EVENT; }
0326 сумер{CYRILLIC}?к{CYRILLIC}* { yylval->time = { Time::Dusk,    0, 0 }; return T_EVENT; }
0327 восход{CYRILLIC}*            { yylval->time = { Time::Sunrise, 0, 0 }; return T_EVENT; }
0328 закат{CYRILLIC}*             { yylval->time = { Time::Sunset , 0, 0 }; return T_EVENT; }
0329 
0330 . {
0331     //printf("unexpected character: %s at %d:%d\n", yytext, yylloc->first_line, yylloc->first_column);
0332     return T_INVALID;
0333 }
0334 
0335 %%