File indexing completed on 2024-04-21 07:41:20

0001 /*
0002     SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "config-localedata.h"
0008 
0009 #include "isocodes_p.h"
0010 #include "isocodescache_p.h"
0011 #include "logging.h"
0012 
0013 #include <QDir>
0014 #include <QFile>
0015 #include <QFileInfo>
0016 #include <QJsonArray>
0017 #include <QJsonDocument>
0018 #include <QJsonObject>
0019 #include <QStandardPaths>
0020 
0021 // increment those when changing the format
0022 enum : uint32_t {
0023     Iso3166_1CacheHeader = 0x4B493101,
0024     Iso3166_2CacheHeader = 0x4B493201,
0025 };
0026 
0027 static QString isoCodesPath(QStringView file)
0028 {
0029 #ifndef Q_OS_ANDROID
0030     auto path = QStandardPaths::locate(QStandardPaths::GenericDataLocation, QLatin1String("iso-codes/json/") + file, QStandardPaths::LocateFile);
0031     if (!path.isEmpty()) {
0032         return path;
0033     }
0034 
0035     // search manually in the compile-time determined prefix
0036     // needed for example for non-installed Windows binaries to work, such as unit tests
0037     for (const char *installLocation : {"/share", "/bin/data"}) {
0038         path = QLatin1String(ISO_CODES_PREFIX) + QLatin1String(installLocation) + QLatin1String("/iso-codes/json/") + file;
0039         if (QFileInfo::exists(path)) {
0040             return path;
0041         }
0042     }
0043 
0044     return {};
0045 #else
0046     return QLatin1String("assets:/share/iso-codes/json/") + file;
0047 #endif
0048 }
0049 
0050 static QString cachePath()
0051 {
0052     return QStandardPaths::writableLocation(QStandardPaths::GenericCacheLocation) + QLatin1String("/org.kde.ki18n/iso-codes/");
0053 }
0054 
0055 static QString cacheFilePath(QStringView file)
0056 {
0057     return cachePath() + file;
0058 }
0059 
0060 IsoCodesCache::~IsoCodesCache() = default;
0061 
0062 IsoCodesCache *IsoCodesCache::instance()
0063 {
0064     static IsoCodesCache s_cache;
0065     return &s_cache;
0066 }
0067 
0068 void IsoCodesCache::loadIso3166_1()
0069 {
0070     if (!m_iso3166_1CacheData && !loadIso3166_1Cache()) {
0071         QDir().mkpath(cachePath());
0072         createIso3166_1Cache(isoCodesPath(u"iso_3166-1.json"), cacheFilePath(u"iso_3166-1"));
0073         loadIso3166_1Cache();
0074     }
0075 }
0076 
0077 static std::unique_ptr<QFile> openCacheFile(QStringView cacheFileName, QStringView isoCodesFileName)
0078 {
0079     QFileInfo jsonFi(isoCodesPath(isoCodesFileName));
0080     if (!jsonFi.exists()) { // no source file means we can only use an embedded cache
0081         auto f = std::make_unique<QFile>(QLatin1String(":/org.kde.ki18n/iso-codes/cache/") + cacheFileName);
0082         if (!f->open(QFile::ReadOnly) || f->size() < 8) {
0083             return {};
0084         }
0085         return f;
0086     }
0087     auto f = std::make_unique<QFile>(cacheFilePath(cacheFileName));
0088     if (!f->open(QFile::ReadOnly) || f->fileTime(QFile::FileModificationTime) < jsonFi.lastModified() || f->size() < 8) {
0089         return {};
0090     }
0091     return f;
0092 }
0093 
0094 bool IsoCodesCache::loadIso3166_1Cache()
0095 {
0096     auto f = openCacheFile(u"iso_3166-1", u"iso_3166-1.json");
0097     if (!f) {
0098         return false;
0099     }
0100     m_iso3166_1CacheSize = f->size();
0101 
0102     // validate cache file is usable
0103     // header matches
0104     const auto data = f->map(0, m_iso3166_1CacheSize);
0105     if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_1CacheHeader) {
0106         return false;
0107     }
0108     // lookup tables fit into the available size
0109     const auto size = *(reinterpret_cast<const uint32_t *>(data) + 1);
0110     if (sizeof(Iso3166_1CacheHeader) + sizeof(size) + size * sizeof(MapEntry<uint16_t>) * 2 >= m_iso3166_1CacheSize) {
0111         return false;
0112     }
0113     // string table is 0 terminated
0114     if (data[m_iso3166_1CacheSize - 1] != '\0') {
0115         return false;
0116     }
0117 
0118     m_iso3166_1CacheFile = std::move(f);
0119     m_iso3166_1CacheData = data;
0120     return true;
0121 }
0122 
0123 uint32_t IsoCodesCache::countryCount() const
0124 {
0125     return m_iso3166_1CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_1CacheData) + 1) : 0;
0126 }
0127 
0128 const MapEntry<uint16_t> *IsoCodesCache::countryNameMapBegin() const
0129 {
0130     return m_iso3166_1CacheData ? reinterpret_cast<const MapEntry<uint16_t> *>(m_iso3166_1CacheData + sizeof(uint32_t) * 2) : nullptr;
0131 }
0132 
0133 const MapEntry<uint16_t> *IsoCodesCache::countryAlpha3MapBegin() const
0134 {
0135     return m_iso3166_1CacheData ? countryNameMapBegin() + countryCount() : nullptr;
0136 }
0137 
0138 const char *IsoCodesCache::countryStringTableLookup(uint16_t offset) const
0139 {
0140     if (m_iso3166_1CacheData) {
0141         const auto pos = offset + 2 * sizeof(uint32_t) + 2 * countryCount() * sizeof(MapEntry<uint16_t>);
0142         return m_iso3166_1CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_1CacheData + pos) : nullptr;
0143     }
0144     return nullptr;
0145 }
0146 
0147 void IsoCodesCache::createIso3166_1Cache(const QString &isoCodesPath, const QString &cacheFilePath)
0148 {
0149     qCDebug(KI18NLD) << "Rebuilding ISO 3166-1 cache";
0150 
0151     QFile file(isoCodesPath);
0152     if (!file.open(QFile::ReadOnly)) {
0153         qCWarning(KI18NLD) << "Unable to open iso_3166-1.json" << isoCodesPath << file.errorString();
0154         return;
0155     }
0156 
0157     std::vector<MapEntry<uint16_t>> alpha2NameMap;
0158     std::vector<MapEntry<uint16_t>> alpha3alpha2Map;
0159     QByteArray iso3166_1stringTable;
0160 
0161     const auto doc = QJsonDocument::fromJson(file.readAll());
0162     const auto array = doc.object().value(QLatin1String("3166-1")).toArray();
0163     for (const auto &entryVal : array) {
0164         const auto entry = entryVal.toObject();
0165         const auto alpha2 = entry.value(QLatin1String("alpha_2")).toString();
0166         if (alpha2.size() != 2) {
0167             continue;
0168         }
0169         const auto alpha2Key = IsoCodes::alpha2CodeToKey(alpha2);
0170 
0171         assert(std::numeric_limits<uint16_t>::max() > iso3166_1stringTable.size());
0172         alpha2NameMap.push_back({alpha2Key, (uint16_t)iso3166_1stringTable.size()});
0173         iso3166_1stringTable.append(entry.value(QLatin1String("name")).toString().toUtf8());
0174         iso3166_1stringTable.append('\0');
0175 
0176         const auto alpha3Key = IsoCodes::alpha3CodeToKey(entry.value(QLatin1String("alpha_3")).toString());
0177         alpha3alpha2Map.push_back({alpha3Key, alpha2Key});
0178     }
0179 
0180     std::sort(alpha2NameMap.begin(), alpha2NameMap.end());
0181     std::sort(alpha3alpha2Map.begin(), alpha3alpha2Map.end());
0182 
0183     // write out binary cache file
0184     QFile cache(cacheFilePath);
0185     if (!cache.open(QFile::WriteOnly)) {
0186         qCWarning(KI18NLD) << "Failed to write ISO 3166-1 cache:" << cache.errorString() << cache.fileName();
0187         return;
0188     }
0189 
0190     uint32_t n = Iso3166_1CacheHeader;
0191     cache.write(reinterpret_cast<const char *>(&n), 4); // header
0192     n = alpha2NameMap.size();
0193     cache.write(reinterpret_cast<const char *>(&n), 4); // size
0194     for (auto entry : alpha2NameMap) {
0195         cache.write(reinterpret_cast<const char *>(&entry), sizeof(entry));
0196     }
0197     for (auto entry : alpha3alpha2Map) {
0198         cache.write(reinterpret_cast<const char *>(&entry), sizeof(entry));
0199     }
0200     cache.write(iso3166_1stringTable);
0201 }
0202 
0203 void IsoCodesCache::loadIso3166_2()
0204 {
0205     if (!m_iso3166_2CacheData && !loadIso3166_2Cache()) {
0206         QDir().mkpath(cachePath());
0207         createIso3166_2Cache(isoCodesPath(u"iso_3166-2.json"), cacheFilePath(u"iso_3166-2"));
0208         loadIso3166_2Cache();
0209     }
0210 }
0211 
0212 bool IsoCodesCache::loadIso3166_2Cache()
0213 {
0214     auto f = openCacheFile(u"iso_3166-2", u"iso_3166-2.json");
0215     if (!f) {
0216         return false;
0217     }
0218     m_iso3166_2CacheSize = f->size();
0219 
0220     // validate cache file is usable
0221     // header matches
0222     const auto data = f->map(0, m_iso3166_2CacheSize);
0223     if (*reinterpret_cast<const uint32_t *>(data) != Iso3166_2CacheHeader) {
0224         return false;
0225     }
0226     // name lookup table fits into the available size
0227     auto size = *(reinterpret_cast<const uint32_t *>(data) + 1);
0228     auto offset = 3 * sizeof(uint32_t) + size * sizeof(MapEntry<uint32_t>);
0229     if (offset >= m_iso3166_2CacheSize) {
0230         return false;
0231     }
0232     // hierarchy map boundary check
0233     size = *(reinterpret_cast<const uint32_t *>(data + offset) - 1);
0234     offset += size * sizeof(MapEntry<uint32_t>);
0235     if (offset >= m_iso3166_2CacheSize) {
0236         return false;
0237     }
0238     // string table is 0 terminated
0239     if (data[m_iso3166_2CacheSize - 1] != '\0') {
0240         return false;
0241     }
0242 
0243     m_iso3166_2CacheFile = std::move(f);
0244     m_iso3166_2CacheData = data;
0245     return true;
0246 }
0247 
0248 uint32_t IsoCodesCache::subdivisionCount() const
0249 {
0250     return m_iso3166_2CacheData ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData) + 1) : 0;
0251 }
0252 
0253 const MapEntry<uint32_t> *IsoCodesCache::subdivisionNameMapBegin() const
0254 {
0255     return m_iso3166_2CacheData ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t)) : nullptr;
0256 }
0257 
0258 uint32_t IsoCodesCache::subdivisionHierachyMapSize() const
0259 {
0260     return m_iso3166_2CacheData
0261         ? *(reinterpret_cast<const uint32_t *>(m_iso3166_2CacheData + 2 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>)))
0262         : 0;
0263 }
0264 
0265 const MapEntry<uint32_t> *IsoCodesCache::subdivisionParentMapBegin() const
0266 {
0267     return m_iso3166_2CacheData
0268         ? reinterpret_cast<const MapEntry<uint32_t> *>(m_iso3166_2CacheData + 3 * sizeof(uint32_t) + subdivisionCount() * sizeof(MapEntry<uint32_t>))
0269         : nullptr;
0270 }
0271 
0272 const char *IsoCodesCache::subdivisionStringTableLookup(uint16_t offset) const
0273 {
0274     if (m_iso3166_2CacheData) {
0275         const auto pos = offset + 3 * sizeof(uint32_t) + (subdivisionCount() + subdivisionHierachyMapSize()) * sizeof(MapEntry<uint32_t>);
0276         return m_iso3166_2CacheSize > pos ? reinterpret_cast<const char *>(m_iso3166_2CacheData + pos) : nullptr;
0277     }
0278     return nullptr;
0279 }
0280 
0281 void IsoCodesCache::createIso3166_2Cache(const QString &isoCodesPath, const QString &cacheFilePath)
0282 {
0283     qCDebug(KI18NLD) << "Rebuilding ISO 3166-2 cache";
0284     QFile file(isoCodesPath);
0285     if (!file.open(QFile::ReadOnly)) {
0286         qCWarning(KI18NLD) << "Unable to open iso_3166-2.json" << isoCodesPath << file.errorString();
0287         return;
0288     }
0289 
0290     std::vector<MapEntry<uint32_t>> subdivNameMap;
0291     std::vector<MapEntry<uint32_t>> subdivParentMap;
0292     QByteArray iso3166_2stringTable;
0293 
0294     const auto doc = QJsonDocument::fromJson(file.readAll());
0295     const auto array = doc.object().value(QLatin1String("3166-2")).toArray();
0296     for (const auto &entryVal : array) {
0297         const auto entry = entryVal.toObject();
0298         const auto key = IsoCodes::subdivisionCodeToKey(entry.value(QLatin1String("code")).toString());
0299 
0300         assert(std::numeric_limits<uint16_t>::max() > iso3166_2stringTable.size());
0301         subdivNameMap.push_back({key, (uint16_t)iso3166_2stringTable.size()});
0302         iso3166_2stringTable.append(entry.value(QLatin1String("name")).toString().toUtf8());
0303         iso3166_2stringTable.append('\0');
0304 
0305         const auto parentKey = IsoCodes::parentCodeToKey(entry.value(QLatin1String("parent")).toString());
0306         if (parentKey) {
0307             subdivParentMap.push_back({key, parentKey});
0308         }
0309     }
0310 
0311     std::sort(subdivNameMap.begin(), subdivNameMap.end());
0312     std::sort(subdivParentMap.begin(), subdivParentMap.end());
0313 
0314     // write out binary cache file
0315     QFile cache(cacheFilePath);
0316     if (!cache.open(QFile::WriteOnly)) {
0317         qCWarning(KI18NLD) << "Failed to write ISO 3166-2 cache:" << cache.errorString() << cache.fileName();
0318         return;
0319     }
0320 
0321     uint32_t n = Iso3166_2CacheHeader;
0322     cache.write(reinterpret_cast<const char *>(&n), 4); // header
0323     n = subdivNameMap.size();
0324     cache.write(reinterpret_cast<const char *>(&n), 4); // size of the name map
0325     for (auto entry : subdivNameMap) {
0326         cache.write(reinterpret_cast<const char *>(&entry), sizeof(entry));
0327     }
0328     n = subdivParentMap.size();
0329     cache.write(reinterpret_cast<const char *>(&n), 4); // size of the hierarchy map
0330     for (auto entry : subdivParentMap) {
0331         cache.write(reinterpret_cast<const char *>(&entry), sizeof(entry));
0332     }
0333     cache.write(iso3166_2stringTable);
0334 }