File indexing completed on 2024-04-21 03:54:30

0001 /*
0002     SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "kcountry.h"
0008 #include "isocodes_p.h"
0009 #include "isocodescache_p.h"
0010 #include "kcatalog_p.h"
0011 #include "klocalizedstring.h"
0012 #include "logging.h"
0013 #include "spatial_index_p.h"
0014 #include "timezonedata_p.h"
0015 
0016 #include <cstring>
0017 
0018 static_assert(sizeof(KCountry) == 2);
0019 
0020 KCountry::KCountry()
0021     : d(0)
0022 {
0023 }
0024 
0025 KCountry::KCountry(const KCountry &) = default;
0026 KCountry::~KCountry() = default;
0027 
0028 KCountry &KCountry::operator=(const KCountry &) = default;
0029 
0030 bool KCountry::operator==(const KCountry &other) const
0031 {
0032     return d == other.d;
0033 }
0034 
0035 bool KCountry::operator!=(const KCountry &other) const
0036 {
0037     return d != other.d;
0038 }
0039 
0040 bool KCountry::isValid() const
0041 {
0042     return d != 0;
0043 }
0044 
0045 QString KCountry::alpha2() const
0046 {
0047     if (d == 0) {
0048         return {};
0049     }
0050 
0051     QString code(2, QLatin1Char('\0'));
0052     code[0] = QLatin1Char(d >> 8);
0053     code[1] = QLatin1Char(d & 0xff);
0054     return code;
0055 }
0056 
0057 QString KCountry::alpha3() const
0058 {
0059     const auto cache = IsoCodesCache::instance();
0060     const auto it = std::find_if(cache->countryAlpha3MapBegin(), cache->countryAlpha3MapEnd(), [this](auto entry) {
0061         return entry.value == d;
0062     });
0063     if (it != cache->countryAlpha3MapEnd()) {
0064         uint16_t alpha3Key = (*it).key;
0065         QString code(3, QLatin1Char('\0'));
0066         code[2] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key));
0067         alpha3Key /= IsoCodes::AlphaNumKeyFactor;
0068         code[1] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key));
0069         alpha3Key /= IsoCodes::AlphaNumKeyFactor;
0070         code[0] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key));
0071         return code;
0072     }
0073     return {};
0074 }
0075 
0076 QString KCountry::name() const
0077 {
0078     if (d == 0) {
0079         return {};
0080     }
0081 
0082     auto cache = IsoCodesCache::instance();
0083     cache->loadIso3166_1();
0084     const auto it = std::lower_bound(cache->countryNameMapBegin(), cache->countryNameMapEnd(), d);
0085     if (it != cache->countryNameMapEnd() && (*it).key == d) {
0086         return i18nd("iso_3166-1", cache->countryStringTableLookup((*it).value));
0087     }
0088     return {};
0089 }
0090 
0091 QString KCountry::emojiFlag() const
0092 {
0093     if (d == 0) {
0094         return {};
0095     }
0096 
0097     QString flag;
0098     char flagA[] = "\xF0\x9F\x87\xA6";
0099     flagA[3] = 0xA6 + ((d >> 8) - 'A');
0100     flag += QString::fromUtf8(flagA);
0101     flagA[3] = 0xA6 + ((d & 0xff) - 'A');
0102     flag += QString::fromUtf8(flagA);
0103     return flag;
0104 }
0105 
0106 QLocale::Country KCountry::country() const
0107 {
0108     if (d == 0) {
0109         return QLocale::AnyCountry;
0110     }
0111 
0112     return QLocale::codeToTerritory(alpha2());
0113 }
0114 
0115 QList<const char *> KCountry::timeZoneIds() const
0116 {
0117     QList<const char *> tzs;
0118     if (d == 0) {
0119         return tzs;
0120     }
0121 
0122     const auto countryIt = std::lower_bound(TimezoneData::countryTimezoneMapBegin(), TimezoneData::countryTimezoneMapEnd(), d);
0123     if (countryIt != TimezoneData::countryTimezoneMapEnd() && (*countryIt).key == d) {
0124         tzs.push_back(TimezoneData::ianaIdLookup((*countryIt).value));
0125         return tzs;
0126     }
0127 
0128     const auto [subdivBegin, subdivEnd] =
0129         std::equal_range(TimezoneData::subdivisionTimezoneMapBegin(), TimezoneData::subdivisionTimezoneMapEnd(), d, [](auto lhs, auto rhs) {
0130             if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
0131                 return lhs < (rhs.key >> 16);
0132             else
0133                 return (lhs.key >> 16) < rhs;
0134         });
0135     for (auto it = subdivBegin; it != subdivEnd; ++it) {
0136         const auto tzId = TimezoneData::ianaIdLookup((*it).value);
0137         if (!tzs.contains(tzId)) {
0138             tzs.push_back(tzId);
0139         }
0140     }
0141 
0142     return tzs;
0143 }
0144 
0145 QString KCountry::currencyCode() const
0146 {
0147     if (d == 0) {
0148         return {};
0149     }
0150 
0151     QString currency;
0152     const auto ls = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, country());
0153     for (const auto &l : ls) {
0154         if (currency.isEmpty()) {
0155             currency = l.currencySymbol(QLocale::CurrencyIsoCode);
0156         } else if (currency != l.currencySymbol(QLocale::CurrencyIsoCode)) {
0157             qCDebug(KI18NLD) << "conflicting currency information in QLocale for" << alpha2();
0158             return {};
0159         }
0160     }
0161     return currency;
0162 }
0163 
0164 QList<KCountrySubdivision> KCountry::subdivisions() const
0165 {
0166     if (d == 0) {
0167         return {};
0168     }
0169 
0170     QList<KCountrySubdivision> l;
0171     auto cache = IsoCodesCache::instance();
0172     cache->loadIso3166_2();
0173     // we don't have a country->subdivisions map, instead we use the full list of subdivisions
0174     // (which is sorted by country due to the country being in the two most significant bytes of its key),
0175     // and check the child->parent subdivision map for root elements
0176     auto it = std::lower_bound(cache->subdivisionNameMapBegin(), cache->subdivisionNameMapEnd(), d, [](auto lhs, auto rhs) {
0177         return (lhs.key >> 16) < rhs;
0178     });
0179 
0180     auto [parentBegin, parentEnd] = std::equal_range(cache->subdivisionParentMapBegin(), cache->subdivisionParentMapEnd(), d, [](auto lhs, auto rhs) {
0181         if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
0182             return lhs < (rhs.key >> 16);
0183         else
0184             return (lhs.key >> 16) < rhs;
0185     });
0186 
0187     for (; it != cache->subdivisionNameMapEnd() && ((*it).key >> 16) == d; ++it) {
0188         if (!std::binary_search(parentBegin, parentEnd, (*it).key)) {
0189             KCountrySubdivision s;
0190             s.d = (*it).key;
0191             l.push_back(s);
0192         }
0193     }
0194 
0195     return l;
0196 }
0197 
0198 static uint16_t validatedAlpha2Key(uint16_t alpha2Key)
0199 {
0200     if (!alpha2Key) {
0201         return 0;
0202     }
0203 
0204     auto cache = IsoCodesCache::instance();
0205     cache->loadIso3166_1();
0206     const auto it = std::lower_bound(cache->countryNameMapBegin(), cache->countryNameMapEnd(), alpha2Key);
0207     if (it != cache->countryNameMapEnd() && (*it).key == alpha2Key) {
0208         return alpha2Key;
0209     }
0210     return 0;
0211 }
0212 
0213 KCountry KCountry::fromAlpha2(QStringView alpha2Code)
0214 {
0215     KCountry c;
0216     c.d = validatedAlpha2Key(IsoCodes::alpha2CodeToKey(alpha2Code));
0217     return c;
0218 }
0219 
0220 KCountry KCountry::fromAlpha2(const char *alpha2Code)
0221 {
0222     KCountry c;
0223     if (!alpha2Code) {
0224         return c;
0225     }
0226     c.d = validatedAlpha2Key(IsoCodes::alpha2CodeToKey(alpha2Code, std::strlen(alpha2Code)));
0227     return c;
0228 }
0229 
0230 static uint16_t alpha3Lookup(uint16_t alpha3Key)
0231 {
0232     if (!alpha3Key) {
0233         return 0;
0234     }
0235 
0236     auto cache = IsoCodesCache::instance();
0237     cache->loadIso3166_1();
0238     const auto it = std::lower_bound(cache->countryAlpha3MapBegin(), cache->countryAlpha3MapEnd(), alpha3Key);
0239     if (it != cache->countryAlpha3MapEnd() && (*it).key == alpha3Key) {
0240         return (*it).value;
0241     }
0242     return 0;
0243 }
0244 
0245 KCountry KCountry::fromAlpha3(QStringView alpha3Code)
0246 {
0247     KCountry c;
0248     c.d = alpha3Lookup(IsoCodes::alpha3CodeToKey(alpha3Code));
0249     return c;
0250 }
0251 
0252 KCountry KCountry::fromAlpha3(const char *alpha3Code)
0253 {
0254     KCountry c;
0255     if (!alpha3Code) {
0256         return c;
0257     }
0258     c.d = alpha3Lookup(IsoCodes::alpha3CodeToKey(alpha3Code, std::strlen(alpha3Code)));
0259     return c;
0260 }
0261 
0262 KCountry KCountry::fromLocation(float latitude, float longitude)
0263 {
0264     const auto entry = SpatialIndex::lookup(latitude, longitude);
0265     KCountry c;
0266     c.d = entry.m_subdiv >> 16;
0267     return c;
0268 }
0269 
0270 KCountry KCountry::fromQLocale(QLocale::Country country)
0271 {
0272     return fromAlpha2(QLocale::territoryToCode(country).data());
0273 }
0274 
0275 static QString normalizeCountryName(QStringView name)
0276 {
0277     QString res;
0278     res.reserve(name.size());
0279     for (const auto c : name) {
0280         // the following needs to be done fairly fine-grained, as this can easily mess up scripts
0281         // that rely on some non-letter characters to work
0282         // all values used below were obtained by similar code in KContacts, which used to do
0283         // a full offline pre-computation of this and checked for ambiguities introduced by too
0284         // aggressive normalization
0285         switch (c.category()) {
0286         // strip decorative elements that don't contribute to identification (parenthesis, dashes, quotes, etc)
0287         case QChar::Punctuation_Connector:
0288         case QChar::Punctuation_Dash:
0289         case QChar::Punctuation_Open:
0290         case QChar::Punctuation_Close:
0291         case QChar::Punctuation_InitialQuote:
0292         case QChar::Punctuation_FinalQuote:
0293         case QChar::Punctuation_Other:
0294             continue;
0295         default:
0296             break;
0297         }
0298 
0299         if (c.isSpace()) {
0300             continue;
0301         }
0302 
0303         // if the character has a canonical decomposition skip the combining diacritic markers following it
0304         // this works particularly well for Latin, but messes up Hangul
0305         if (c.script() != QChar::Script_Hangul && c.decompositionTag() == QChar::Canonical) {
0306             res.push_back(c.decomposition().at(0).toCaseFolded());
0307         } else {
0308             res.push_back(c.toCaseFolded());
0309         }
0310     }
0311 
0312     return res;
0313 }
0314 
0315 static void checkSubstringMatch(QStringView lhs, QStringView rhs, uint16_t code, uint16_t &result)
0316 {
0317     if (result == std::numeric_limits<uint16_t>::max() || result == code || rhs.isEmpty()) {
0318         return;
0319     }
0320     const auto matches = lhs.startsWith(rhs) || rhs.startsWith(lhs) || lhs.endsWith(rhs) || rhs.endsWith(lhs);
0321     if (!matches) {
0322         return;
0323     }
0324     result = result == 0 ? code : std::numeric_limits<uint16_t>::max();
0325 }
0326 
0327 KCountry KCountry::fromName(QStringView name)
0328 {
0329     if (name.isEmpty()) {
0330         return {};
0331     }
0332     const auto normalizedName = normalizeCountryName(name);
0333 
0334     auto cache = IsoCodesCache::instance();
0335     cache->loadIso3166_1();
0336 
0337     uint16_t substrMatch = 0;
0338 
0339     // check untranslated names
0340     for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
0341         const auto normalizedCountry = normalizeCountryName(QString::fromUtf8(cache->countryStringTableLookup((*it).value)));
0342         if (normalizedName == normalizedCountry) {
0343             KCountry c;
0344             c.d = (*it).key;
0345             return c;
0346         }
0347         checkSubstringMatch(normalizedName, normalizedCountry, (*it).key, substrMatch);
0348     }
0349 
0350     // check translated names
0351     const auto langs = KCatalog::availableCatalogLanguages("iso_3166-1");
0352     for (const auto &lang : langs) {
0353         const auto catalog = KCatalog("iso_3166-1", lang);
0354         for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
0355             const auto normalizedCountry = normalizeCountryName(catalog.translate(cache->countryStringTableLookup((*it).value)));
0356             if (normalizedName == normalizedCountry) {
0357                 KCountry c;
0358                 c.d = (*it).key;
0359                 return c;
0360             }
0361             checkSubstringMatch(normalizedName, normalizedCountry, (*it).key, substrMatch);
0362         }
0363     }
0364 
0365     // unique prefix/suffix match
0366     if (substrMatch != std::numeric_limits<uint16_t>::max() && substrMatch != 0) {
0367         KCountry c;
0368         c.d = substrMatch;
0369         return c;
0370     }
0371 
0372     // fallback to code lookups
0373     if (normalizedName.size() == 3) {
0374         return fromAlpha3(normalizedName);
0375     }
0376     if (normalizedName.size() == 2) {
0377         return fromAlpha2(normalizedName);
0378     }
0379 
0380     return {};
0381 }
0382 
0383 QList<KCountry> KCountry::allCountries()
0384 {
0385     QList<KCountry> l;
0386     auto cache = IsoCodesCache::instance();
0387     cache->loadIso3166_1();
0388     l.reserve(cache->countryCount());
0389     std::transform(cache->countryNameMapBegin(), cache->countryNameMapEnd(), std::back_inserter(l), [](auto entry) {
0390         KCountry c;
0391         c.d = entry.key;
0392         return c;
0393     });
0394     return l;
0395 }
0396 
0397 QStringList KCountry::timeZoneIdsStringList() const
0398 {
0399     const auto tzIds = timeZoneIds();
0400     QStringList l;
0401     l.reserve(tzIds.size());
0402     std::transform(tzIds.begin(), tzIds.end(), std::back_inserter(l), [](const char *tzId) {
0403         return QString::fromUtf8(tzId);
0404     });
0405     return l;
0406 }
0407 
0408 #include "moc_kcountry.cpp"