File indexing completed on 2024-04-28 15:25:29

0001 /*
0002     SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "kcountry.h"
0008 #include "isocodes_p.h"
0009 #include "isocodescache_p.h"
0010 #include "kcatalog_p.h"
0011 #include "klocalizedstring.h"
0012 #include "logging.h"
0013 #include "spatial_index_p.h"
0014 #include "timezonedata_p.h"
0015 
0016 #if QT_VERSION < QT_VERSION_CHECK(6, 2, 0)
0017 #include <private/qlocale_p.h>
0018 #endif
0019 
0020 #include <cstring>
0021 
0022 static_assert(sizeof(KCountry) == 2);
0023 
0024 KCountry::KCountry()
0025     : d(0)
0026 {
0027 }
0028 
0029 KCountry::KCountry(const KCountry &) = default;
0030 KCountry::~KCountry() = default;
0031 
0032 KCountry &KCountry::operator=(const KCountry &) = default;
0033 
0034 bool KCountry::operator==(const KCountry &other) const
0035 {
0036     return d == other.d;
0037 }
0038 
0039 bool KCountry::operator!=(const KCountry &other) const
0040 {
0041     return d != other.d;
0042 }
0043 
0044 bool KCountry::isValid() const
0045 {
0046     return d != 0;
0047 }
0048 
0049 QString KCountry::alpha2() const
0050 {
0051     if (d == 0) {
0052         return {};
0053     }
0054 
0055     QString code(2, QLatin1Char('\0'));
0056     code[0] = QLatin1Char(d >> 8);
0057     code[1] = QLatin1Char(d & 0xff);
0058     return code;
0059 }
0060 
0061 QString KCountry::alpha3() const
0062 {
0063     const auto cache = IsoCodesCache::instance();
0064     const auto it = std::find_if(cache->countryAlpha3MapBegin(), cache->countryAlpha3MapEnd(), [this](auto entry) {
0065         return entry.value == d;
0066     });
0067     if (it != cache->countryAlpha3MapEnd()) {
0068         uint16_t alpha3Key = (*it).key;
0069         QString code(3, QLatin1Char('\0'));
0070         code[2] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key));
0071         alpha3Key /= IsoCodes::AlphaNumKeyFactor;
0072         code[1] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key));
0073         alpha3Key /= IsoCodes::AlphaNumKeyFactor;
0074         code[0] = QLatin1Char(IsoCodes::mapFromAlphaNumKey(alpha3Key));
0075         return code;
0076     }
0077     return {};
0078 }
0079 
0080 QString KCountry::name() const
0081 {
0082     if (d == 0) {
0083         return {};
0084     }
0085 
0086     auto cache = IsoCodesCache::instance();
0087     cache->loadIso3166_1();
0088     const auto it = std::lower_bound(cache->countryNameMapBegin(), cache->countryNameMapEnd(), d);
0089     if (it != cache->countryNameMapEnd() && (*it).key == d) {
0090         return i18nd("iso_3166-1", cache->countryStringTableLookup((*it).value));
0091     }
0092     return {};
0093 }
0094 
0095 QString KCountry::emojiFlag() const
0096 {
0097     if (d == 0) {
0098         return {};
0099     }
0100 
0101     QString flag;
0102     char flagA[] = "\xF0\x9F\x87\xA6";
0103     flagA[3] = 0xA6 + ((d >> 8) - 'A');
0104     flag += QString::fromUtf8(flagA);
0105     flagA[3] = 0xA6 + ((d & 0xff) - 'A');
0106     flag += QString::fromUtf8(flagA);
0107     return flag;
0108 }
0109 
0110 QLocale::Country KCountry::country() const
0111 {
0112     if (d == 0) {
0113         return QLocale::AnyCountry;
0114     }
0115 
0116 #if QT_VERSION >= QT_VERSION_CHECK(6, 2, 0)
0117     return QLocale::codeToTerritory(alpha2());
0118 #else
0119     return QLocalePrivate::codeToCountry(alpha2());
0120 #endif
0121 }
0122 
0123 QList<const char *> KCountry::timeZoneIds() const
0124 {
0125     QList<const char *> tzs;
0126     if (d == 0) {
0127         return tzs;
0128     }
0129 
0130     const auto countryIt = std::lower_bound(TimezoneData::countryTimezoneMapBegin(), TimezoneData::countryTimezoneMapEnd(), d);
0131     if (countryIt != TimezoneData::countryTimezoneMapEnd() && (*countryIt).key == d) {
0132         tzs.push_back(TimezoneData::ianaIdLookup((*countryIt).value));
0133         return tzs;
0134     }
0135 
0136     const auto [subdivBegin, subdivEnd] =
0137         std::equal_range(TimezoneData::subdivisionTimezoneMapBegin(), TimezoneData::subdivisionTimezoneMapEnd(), d, [](auto lhs, auto rhs) {
0138             if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
0139                 return lhs < (rhs.key >> 16);
0140             else
0141                 return (lhs.key >> 16) < rhs;
0142         });
0143     for (auto it = subdivBegin; it != subdivEnd; ++it) {
0144         const auto tzId = TimezoneData::ianaIdLookup((*it).value);
0145         if (!tzs.contains(tzId)) {
0146             tzs.push_back(tzId);
0147         }
0148     }
0149 
0150     return tzs;
0151 }
0152 
0153 QString KCountry::currencyCode() const
0154 {
0155     if (d == 0) {
0156         return {};
0157     }
0158 
0159     QString currency;
0160     const auto ls = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, country());
0161     for (const auto &l : ls) {
0162         if (currency.isEmpty()) {
0163             currency = l.currencySymbol(QLocale::CurrencyIsoCode);
0164         } else if (currency != l.currencySymbol(QLocale::CurrencyIsoCode)) {
0165             qCDebug(KI18NLD) << "conflicting currency information in QLocale for" << alpha2();
0166             return {};
0167         }
0168     }
0169     return currency;
0170 }
0171 
0172 QList<KCountrySubdivision> KCountry::subdivisions() const
0173 {
0174     if (d == 0) {
0175         return {};
0176     }
0177 
0178     QList<KCountrySubdivision> l;
0179     auto cache = IsoCodesCache::instance();
0180     cache->loadIso3166_2();
0181     // we don't have a country->subdivisions map, instead we use the full list of subdivisions
0182     // (which is sorted by country due to the country being in the two most significant bytes of its key),
0183     // and check the child->parent subdivision map for root elements
0184     auto it = std::lower_bound(cache->subdivisionNameMapBegin(), cache->subdivisionNameMapEnd(), d, [](auto lhs, auto rhs) {
0185         return (lhs.key >> 16) < rhs;
0186     });
0187 
0188     auto [parentBegin, parentEnd] = std::equal_range(cache->subdivisionParentMapBegin(), cache->subdivisionParentMapEnd(), d, [](auto lhs, auto rhs) {
0189         if constexpr (std::is_same_v<decltype(lhs), uint16_t>)
0190             return lhs < (rhs.key >> 16);
0191         else
0192             return (lhs.key >> 16) < rhs;
0193     });
0194 
0195     for (; it != cache->subdivisionNameMapEnd() && ((*it).key >> 16) == d; ++it) {
0196         if (!std::binary_search(parentBegin, parentEnd, (*it).key)) {
0197             KCountrySubdivision s;
0198             s.d = (*it).key;
0199             l.push_back(s);
0200         }
0201     }
0202 
0203     return l;
0204 }
0205 
0206 static uint16_t validatedAlpha2Key(uint16_t alpha2Key)
0207 {
0208     if (!alpha2Key) {
0209         return 0;
0210     }
0211 
0212     auto cache = IsoCodesCache::instance();
0213     cache->loadIso3166_1();
0214     const auto it = std::lower_bound(cache->countryNameMapBegin(), cache->countryNameMapEnd(), alpha2Key);
0215     if (it != cache->countryNameMapEnd() && (*it).key == alpha2Key) {
0216         return alpha2Key;
0217     }
0218     return 0;
0219 }
0220 
0221 KCountry KCountry::fromAlpha2(QStringView alpha2Code)
0222 {
0223     KCountry c;
0224     c.d = validatedAlpha2Key(IsoCodes::alpha2CodeToKey(alpha2Code));
0225     return c;
0226 }
0227 
0228 KCountry KCountry::fromAlpha2(const char *alpha2Code)
0229 {
0230     KCountry c;
0231     if (!alpha2Code) {
0232         return c;
0233     }
0234     c.d = validatedAlpha2Key(IsoCodes::alpha2CodeToKey(alpha2Code, std::strlen(alpha2Code)));
0235     return c;
0236 }
0237 
0238 static uint16_t alpha3Lookup(uint16_t alpha3Key)
0239 {
0240     if (!alpha3Key) {
0241         return 0;
0242     }
0243 
0244     auto cache = IsoCodesCache::instance();
0245     cache->loadIso3166_1();
0246     const auto it = std::lower_bound(cache->countryAlpha3MapBegin(), cache->countryAlpha3MapEnd(), alpha3Key);
0247     if (it != cache->countryAlpha3MapEnd() && (*it).key == alpha3Key) {
0248         return (*it).value;
0249     }
0250     return 0;
0251 }
0252 
0253 KCountry KCountry::fromAlpha3(QStringView alpha3Code)
0254 {
0255     KCountry c;
0256     c.d = alpha3Lookup(IsoCodes::alpha3CodeToKey(alpha3Code));
0257     return c;
0258 }
0259 
0260 KCountry KCountry::fromAlpha3(const char *alpha3Code)
0261 {
0262     KCountry c;
0263     if (!alpha3Code) {
0264         return c;
0265     }
0266     c.d = alpha3Lookup(IsoCodes::alpha3CodeToKey(alpha3Code, std::strlen(alpha3Code)));
0267     return c;
0268 }
0269 
0270 KCountry KCountry::fromLocation(float latitude, float longitude)
0271 {
0272     const auto entry = SpatialIndex::lookup(latitude, longitude);
0273     KCountry c;
0274     c.d = entry.m_subdiv >> 16;
0275     return c;
0276 }
0277 
0278 KCountry KCountry::fromQLocale(QLocale::Country country)
0279 {
0280 #if QT_VERSION >= QT_VERSION_CHECK(6, 2, 0)
0281     return fromAlpha2(QLocale::territoryToCode(country).data());
0282 #else
0283     return fromAlpha2(QLocalePrivate::countryToCode(country).data());
0284 #endif
0285 }
0286 
0287 static QString normalizeCountryName(QStringView name)
0288 {
0289     QString res;
0290     res.reserve(name.size());
0291     for (const auto c : name) {
0292         // the following needs to be done fairly fine-grained, as this can easily mess up scripts
0293         // that rely on some non-letter characters to work
0294         // all values used below were obtained by similar code in KContacts, which used to do
0295         // a full offline pre-computation of this and checked for ambiguities introduced by too
0296         // aggressive normalization
0297         switch (c.category()) {
0298         // strip decorative elements that don't contribute to identification (parenthesis, dashes, quotes, etc)
0299         case QChar::Punctuation_Connector:
0300         case QChar::Punctuation_Dash:
0301         case QChar::Punctuation_Open:
0302         case QChar::Punctuation_Close:
0303         case QChar::Punctuation_InitialQuote:
0304         case QChar::Punctuation_FinalQuote:
0305         case QChar::Punctuation_Other:
0306             continue;
0307         default:
0308             break;
0309         }
0310 
0311         if (c.isSpace()) {
0312             continue;
0313         }
0314 
0315         // if the character has a canonical decomposition skip the combining diacritic markers following it
0316         // this works particularly well for Latin, but messes up Hangul
0317         if (c.script() != QChar::Script_Hangul && c.decompositionTag() == QChar::Canonical) {
0318             res.push_back(c.decomposition().at(0).toCaseFolded());
0319         } else {
0320             res.push_back(c.toCaseFolded());
0321         }
0322     }
0323 
0324     return res;
0325 }
0326 
0327 static void checkSubstringMatch(QStringView lhs, QStringView rhs, uint16_t code, uint16_t &result)
0328 {
0329     if (result == std::numeric_limits<uint16_t>::max() || result == code || rhs.isEmpty()) {
0330         return;
0331     }
0332     const auto matches = lhs.startsWith(rhs) || rhs.startsWith(lhs) || lhs.endsWith(rhs) || rhs.endsWith(lhs);
0333     if (!matches) {
0334         return;
0335     }
0336     result = result == 0 ? code : std::numeric_limits<uint16_t>::max();
0337 }
0338 
0339 KCountry KCountry::fromName(QStringView name)
0340 {
0341     if (name.isEmpty()) {
0342         return {};
0343     }
0344     const auto normalizedName = normalizeCountryName(name);
0345 
0346     auto cache = IsoCodesCache::instance();
0347     cache->loadIso3166_1();
0348 
0349     uint16_t substrMatch = 0;
0350 
0351     // check untranslated names
0352     for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
0353         const auto normalizedCountry = normalizeCountryName(QString::fromUtf8(cache->countryStringTableLookup((*it).value)));
0354         if (normalizedName == normalizedCountry) {
0355             KCountry c;
0356             c.d = (*it).key;
0357             return c;
0358         }
0359         checkSubstringMatch(normalizedName, normalizedCountry, (*it).key, substrMatch);
0360     }
0361 
0362     // check translated names
0363     const auto langs = KCatalog::availableCatalogLanguages("iso_3166-1");
0364     for (const auto &lang : langs) {
0365         const auto catalog = KCatalog("iso_3166-1", lang);
0366         for (auto it = cache->countryNameMapBegin(); it != cache->countryNameMapEnd(); ++it) {
0367             const auto normalizedCountry = normalizeCountryName(catalog.translate(cache->countryStringTableLookup((*it).value)));
0368             if (normalizedName == normalizedCountry) {
0369                 KCountry c;
0370                 c.d = (*it).key;
0371                 return c;
0372             }
0373             checkSubstringMatch(normalizedName, normalizedCountry, (*it).key, substrMatch);
0374         }
0375     }
0376 
0377     // unique prefix/suffix match
0378     if (substrMatch != std::numeric_limits<uint16_t>::max() && substrMatch != 0) {
0379         KCountry c;
0380         c.d = substrMatch;
0381         return c;
0382     }
0383 
0384     // fallback to code lookups
0385     if (normalizedName.size() == 3) {
0386         return fromAlpha3(normalizedName);
0387     }
0388     if (normalizedName.size() == 2) {
0389         return fromAlpha2(normalizedName);
0390     }
0391 
0392     return {};
0393 }
0394 
0395 QList<KCountry> KCountry::allCountries()
0396 {
0397     QList<KCountry> l;
0398     auto cache = IsoCodesCache::instance();
0399     cache->loadIso3166_1();
0400     l.reserve(cache->countryCount());
0401     std::transform(cache->countryNameMapBegin(), cache->countryNameMapEnd(), std::back_inserter(l), [](auto entry) {
0402         KCountry c;
0403         c.d = entry.key;
0404         return c;
0405     });
0406     return l;
0407 }
0408 
0409 QStringList KCountry::timeZoneIdsStringList() const
0410 {
0411     const auto tzIds = timeZoneIds();
0412     QStringList l;
0413     l.reserve(tzIds.size());
0414     std::transform(tzIds.begin(), tzIds.end(), std::back_inserter(l), [](const char *tzId) {
0415         return QString::fromUtf8(tzId);
0416     });
0417     return l;
0418 }
0419 
0420 #include "moc_kcountry.cpp"