File indexing completed on 2024-04-21 03:54:30
0001 /* 0002 SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "kcountry.h" 0008 #include "isocodes_p.h" 0009 #include "isocodescache_p.h" 0010 #include "kcatalog_p.h" 0011 #include "klocalizedstring.h" 0012 #include "logging.h" 0013 #include "spatial_index_p.h" 0014 #include "timezonedata_p.h" 0015 0016 #include <cstring> 0017 0018 static_assert(sizeof(KCountry) == 2); 0019 0020 KCountry::KCountry() 0021 : d(0) 0022 { 0023 } 0024 0025 KCountry::KCountry(const KCountry &) = default; 0026 KCountry::~KCountry() = default; 0027 0028 KCountry &KCountry::operator=(const KCountry &) = default; 0029 0030 bool KCountry::operator==(const KCountry &other) const 0031 { 0032 return d == other.d; 0033 } 0034 0035 bool KCountry::operator!=(const KCountry &other) const 0036 { 0037 return d != other.d; 0038 } 0039 0040 bool KCountry::isValid() const 0041 { 0042 return d != 0; 0043 } 0044 0045 QString KCountry::alpha2() const 0046 { 0047 if (d == 0) { 0048 return {}; 0049 } 0050 0051 QString code(2, QLatin1Char('\0')); 0052 code[0] = QLatin1Char(d >> 8); 0053 code[1] = QLatin1Char(d & 0xff); 0054 return code; 0055 } 0056 0057 QString KCountry::alpha3() const 0058 { 0059 const auto cache = IsoCodesCache::instance(); 0060 const auto it = std::find_if(cache->countryAlpha3MapBegin(), cache->countryAlpha3MapEnd(), [this](auto entry) { 0061 return entry.value == d; 0062 }); 0063 if (it != cache->countryAlpha3MapEnd()) { 0064 uint16_t alpha3Key = (*it).key; 0065 QString code(3, QLatin1Char('\0')); 0066 code[2] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key)); 0067 alpha3Key /= IsoCodes::AlphaNumKeyFactor; 0068 code[1] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key)); 0069 alpha3Key /= IsoCodes::AlphaNumKeyFactor; 0070 code[0] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key)); 0071 return code; 0072 } 0073 return {}; 0074 } 0075 0076 QString KCountry::name() const 0077 { 0078 if (d == 0) { 0079 return {}; 0080 } 0081 0082 auto cache = IsoCodesCache::instance(); 0083 cache->loadIso3166_1(); 0084 const auto it = std::lower_bound(cache->countryNameMapBegin(), cache->countryNameMapEnd(), d); 0085 if (it != cache->countryNameMapEnd() && (*it).key == d) { 0086 return i18nd("iso_3166-1", cache->countryStringTableLookup((*it).value)); 0087 } 0088 return {}; 0089 } 0090 0091 QString KCountry::emojiFlag() const 0092 { 0093 if (d == 0) { 0094 return {}; 0095 } 0096 0097 QString flag; 0098 char flagA[] = "\xF0\x9F\x87\xA6"; 0099 flagA[3] = 0xA6 + ((d >> 8) - 'A'); 0100 flag += QString::fromUtf8(flagA); 0101 flagA[3] = 0xA6 + ((d & 0xff) - 'A'); 0102 flag += QString::fromUtf8(flagA); 0103 return flag; 0104 } 0105 0106 QLocale::Country KCountry::country() const 0107 { 0108 if (d == 0) { 0109 return QLocale::AnyCountry; 0110 } 0111 0112 return QLocale::codeToTerritory(alpha2()); 0113 } 0114 0115 QList<const char *> KCountry::timeZoneIds() const 0116 { 0117 QList<const char *> tzs; 0118 if (d == 0) { 0119 return tzs; 0120 } 0121 0122 const auto countryIt = std::lower_bound(TimezoneData::countryTimezoneMapBegin(), TimezoneData::countryTimezoneMapEnd(), d); 0123 if (countryIt != TimezoneData::countryTimezoneMapEnd() && (*countryIt).key == d) { 0124 tzs.push_back(TimezoneData::ianaIdLookup((*countryIt).value)); 0125 return tzs; 0126 } 0127 0128 const auto [subdivBegin, subdivEnd] = 0129 std::equal_range(TimezoneData::subdivisionTimezoneMapBegin(), TimezoneData::subdivisionTimezoneMapEnd(), d, [](auto lhs, auto rhs) { 0130 if constexpr (std::is_same_v<decltype(lhs), uint16_t>) 0131 return lhs < (rhs.key >> 16); 0132 else 0133 return (lhs.key >> 16) < rhs; 0134 }); 0135 for (auto it = subdivBegin; it != subdivEnd; ++it) { 0136 const auto tzId = TimezoneData::ianaIdLookup((*it).value); 0137 if (!tzs.contains(tzId)) { 0138 tzs.push_back(tzId); 0139 } 0140 } 0141 0142 return tzs; 0143 } 0144 0145 QString KCountry::currencyCode() const 0146 { 0147 if (d == 0) { 0148 return {}; 0149 } 0150 0151 QString currency; 0152 const auto ls = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, country()); 0153 for (const auto &l : ls) { 0154 if (currency.isEmpty()) { 0155 currency = l.currencySymbol(QLocale::CurrencyIsoCode); 0156 } else if (currency != l.currencySymbol(QLocale::CurrencyIsoCode)) { 0157 qCDebug(KI18NLD) << "conflicting currency information in QLocale for" << alpha2(); 0158 return {}; 0159 } 0160 } 0161 return currency; 0162 } 0163 0164 QList<KCountrySubdivision> KCountry::subdivisions() const 0165 { 0166 if (d == 0) { 0167 return {}; 0168 } 0169 0170 QList<KCountrySubdivision> l; 0171 auto cache = IsoCodesCache::instance(); 0172 cache->loadIso3166_2(); 0173 // we don't have a country->subdivisions map, instead we use the full list of subdivisions 0174 // (which is sorted by country due to the country being in the two most significant bytes of its key), 0175 // and check the child->parent subdivision map for root elements 0176 auto it = std::lower_bound(cache->subdivisionNameMapBegin(), cache->subdivisionNameMapEnd(), d, [](auto lhs, auto rhs) { 0177 return (lhs.key >> 16) < rhs; 0178 }); 0179 0180 auto [parentBegin, parentEnd] = std::equal_range(cache->subdivisionParentMapBegin(), cache->subdivisionParentMapEnd(), d, [](auto lhs, auto rhs) { 0181 if constexpr (std::is_same_v<decltype(lhs), uint16_t>) 0182 return lhs < (rhs.key >> 16); 0183 else 0184 return (lhs.key >> 16) < rhs; 0185 }); 0186 0187 for (; it != cache->subdivisionNameMapEnd() && ((*it).key >> 16) == d; ++it) { 0188 if (!std::binary_search(parentBegin, parentEnd, (*it).key)) { 0189 KCountrySubdivision s; 0190 s.d = (*it).key; 0191 l.push_back(s); 0192 } 0193 } 0194 0195 return l; 0196 } 0197 0198 static uint16_t validatedAlpha2Key(uint16_t alpha2Key) 0199 { 0200 if (!alpha2Key) { 0201 return 0; 0202 } 0203 0204 auto cache = IsoCodesCache::instance(); 0205 cache->loadIso3166_1(); 0206 const auto it = std::lower_bound(cache->countryNameMapBegin(), cache->countryNameMapEnd(), alpha2Key); 0207 if (it != cache->countryNameMapEnd() && (*it).key == alpha2Key) { 0208 return alpha2Key; 0209 } 0210 return 0; 0211 } 0212 0213 KCountry KCountry::fromAlpha2(QStringView alpha2Code) 0214 { 0215 KCountry c; 0216 c.d = validatedAlpha2Key(IsoCodes::alpha2CodeToKey(alpha2Code)); 0217 return c; 0218 } 0219 0220 KCountry KCountry::fromAlpha2(const char *alpha2Code) 0221 { 0222 KCountry c; 0223 if (!alpha2Code) { 0224 return c; 0225 } 0226 c.d = validatedAlpha2Key(IsoCodes::alpha2CodeToKey(alpha2Code, std::strlen(alpha2Code))); 0227 return c; 0228 } 0229 0230 static uint16_t alpha3Lookup(uint16_t alpha3Key) 0231 { 0232 if (!alpha3Key) { 0233 return 0; 0234 } 0235 0236 auto cache = IsoCodesCache::instance(); 0237 cache->loadIso3166_1(); 0238 const auto it = std::lower_bound(cache->countryAlpha3MapBegin(), cache->countryAlpha3MapEnd(), alpha3Key); 0239 if (it != cache->countryAlpha3MapEnd() && (*it).key == alpha3Key) { 0240 return (*it).value; 0241 } 0242 return 0; 0243 } 0244 0245 KCountry KCountry::fromAlpha3(QStringView alpha3Code) 0246 { 0247 KCountry c; 0248 c.d = alpha3Lookup(IsoCodes::alpha3CodeToKey(alpha3Code)); 0249 return c; 0250 } 0251 0252 KCountry KCountry::fromAlpha3(const char *alpha3Code) 0253 { 0254 KCountry c; 0255 if (!alpha3Code) { 0256 return c; 0257 } 0258 c.d = alpha3Lookup(IsoCodes::alpha3CodeToKey(alpha3Code, std::strlen(alpha3Code))); 0259 return c; 0260 } 0261 0262 KCountry KCountry::fromLocation(float latitude, float longitude) 0263 { 0264 const auto entry = SpatialIndex::lookup(latitude, longitude); 0265 KCountry c; 0266 c.d = entry.m_subdiv >> 16; 0267 return c; 0268 } 0269 0270 KCountry KCountry::fromQLocale(QLocale::Country country) 0271 { 0272 return fromAlpha2(QLocale::territoryToCode(country).data()); 0273 } 0274 0275 static QString normalizeCountryName(QStringView name) 0276 { 0277 QString res; 0278 res.reserve(name.size()); 0279 for (const auto c : name) { 0280 // the following needs to be done fairly fine-grained, as this can easily mess up scripts 0281 // that rely on some non-letter characters to work 0282 // all values used below were obtained by similar code in KContacts, which used to do 0283 // a full offline pre-computation of this and checked for ambiguities introduced by too 0284 // aggressive normalization 0285 switch (c.category()) { 0286 // strip decorative elements that don't contribute to identification (parenthesis, dashes, quotes, etc) 0287 case QChar::Punctuation_Connector: 0288 case QChar::Punctuation_Dash: 0289 case QChar::Punctuation_Open: 0290 case QChar::Punctuation_Close: 0291 case QChar::Punctuation_InitialQuote: 0292 case QChar::Punctuation_FinalQuote: 0293 case QChar::Punctuation_Other: 0294 continue; 0295 default: 0296 break; 0297 } 0298 0299 if (c.isSpace()) { 0300 continue; 0301 } 0302 0303 // if the character has a canonical decomposition skip the combining diacritic markers following it 0304 // this works particularly well for Latin, but messes up Hangul 0305 if (c.script() != QChar::Script_Hangul && c.decompositionTag() == QChar::Canonical) { 0306 res.push_back(c.decomposition().at(0).toCaseFolded()); 0307 } else { 0308 res.push_back(c.toCaseFolded()); 0309 } 0310 } 0311 0312 return res; 0313 } 0314 0315 static void checkSubstringMatch(QStringView lhs, QStringView rhs, uint16_t code, uint16_t &result) 0316 { 0317 if (result == std::numeric_limits<uint16_t>::max() || result == code || rhs.isEmpty()) { 0318 return; 0319 } 0320 const auto matches = lhs.startsWith(rhs) || rhs.startsWith(lhs) || lhs.endsWith(rhs) || rhs.endsWith(lhs); 0321 if (!matches) { 0322 return; 0323 } 0324 result = result == 0 ? code : std::numeric_limits<uint16_t>::max(); 0325 } 0326 0327 KCountry KCountry::fromName(QStringView name) 0328 { 0329 if (name.isEmpty()) { 0330 return {}; 0331 } 0332 const auto normalizedName = normalizeCountryName(name); 0333 0334 auto cache = IsoCodesCache::instance(); 0335 cache->loadIso3166_1(); 0336 0337 uint16_t substrMatch = 0; 0338 0339 // check untranslated names 0340 for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) { 0341 const auto normalizedCountry = normalizeCountryName(QString::fromUtf8(cache->countryStringTableLookup((*it).value))); 0342 if (normalizedName == normalizedCountry) { 0343 KCountry c; 0344 c.d = (*it).key; 0345 return c; 0346 } 0347 checkSubstringMatch(normalizedName, normalizedCountry, (*it).key, substrMatch); 0348 } 0349 0350 // check translated names 0351 const auto langs = KCatalog::availableCatalogLanguages("iso_3166-1"); 0352 for (const auto &lang : langs) { 0353 const auto catalog = KCatalog("iso_3166-1", lang); 0354 for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) { 0355 const auto normalizedCountry = normalizeCountryName(catalog.translate(cache->countryStringTableLookup((*it).value))); 0356 if (normalizedName == normalizedCountry) { 0357 KCountry c; 0358 c.d = (*it).key; 0359 return c; 0360 } 0361 checkSubstringMatch(normalizedName, normalizedCountry, (*it).key, substrMatch); 0362 } 0363 } 0364 0365 // unique prefix/suffix match 0366 if (substrMatch != std::numeric_limits<uint16_t>::max() && substrMatch != 0) { 0367 KCountry c; 0368 c.d = substrMatch; 0369 return c; 0370 } 0371 0372 // fallback to code lookups 0373 if (normalizedName.size() == 3) { 0374 return fromAlpha3(normalizedName); 0375 } 0376 if (normalizedName.size() == 2) { 0377 return fromAlpha2(normalizedName); 0378 } 0379 0380 return {}; 0381 } 0382 0383 QList<KCountry> KCountry::allCountries() 0384 { 0385 QList<KCountry> l; 0386 auto cache = IsoCodesCache::instance(); 0387 cache->loadIso3166_1(); 0388 l.reserve(cache->countryCount()); 0389 std::transform(cache->countryNameMapBegin(), cache->countryNameMapEnd(), std::back_inserter(l), [](auto entry) { 0390 KCountry c; 0391 c.d = entry.key; 0392 return c; 0393 }); 0394 return l; 0395 } 0396 0397 QStringList KCountry::timeZoneIdsStringList() const 0398 { 0399 const auto tzIds = timeZoneIds(); 0400 QStringList l; 0401 l.reserve(tzIds.size()); 0402 std::transform(tzIds.begin(), tzIds.end(), std::back_inserter(l), [](const char *tzId) { 0403 return QString::fromUtf8(tzId); 0404 }); 0405 return l; 0406 } 0407 0408 #include "moc_kcountry.cpp"