File indexing completed on 2025-10-26 04:25:08

0001 /*
0002     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "o5mparser.h"
0008 #include "datatypes.h"
0009 #include "datasetmergebuffer.h"
0010 
0011 #include <QDebug>
0012 
0013 #include <cstdlib>
0014 #include <cstring>
0015 
0016 using namespace OSM;
0017 
0018 enum : uint8_t {
0019     O5M_BLOCK_RESET = 0xff,
0020     O5M_BLOCK_NODE = 0x10,
0021     O5M_BLOCK_WAY = 0x11,
0022     O5M_BLOCK_RELATION = 0x12,
0023     O5M_BLOCK_BOUNDING_BOX = 0xdb,
0024     O5M_BLOCK_TIMESTAMP = 0xdc,
0025     O5M_BLOCK_HEADER = 0xe0,
0026 
0027     O5M_NUMBER_CONTINUATION = 0b1000'0000,
0028     O5M_NUMBER_MASK = 0b0111'1111,
0029     O5M_NUMBER_SIGNED_BIT = 0b1,
0030 
0031     O5M_MEMTYPE_NODE = 0x30,
0032     O5M_MEMTYPE_WAY = 0x31,
0033     O5M_MEMTYPE_RELATION = 0x32,
0034 };
0035 
0036 enum : uint16_t {
0037     O5M_STRING_TABLE_SIZE = 15000,
0038     O5M_STRING_TABLE_MAXLEN = 250,
0039 };
0040 
0041 O5mParser::O5mParser(DataSet *dataSet)
0042     : AbstractReader(dataSet)
0043 {
0044     m_stringLookupTable.resize(O5M_STRING_TABLE_SIZE);
0045 }
0046 
0047 void O5mParser::readFromData(const uint8_t* data, std::size_t len)
0048 {
0049     std::fill(m_stringLookupTable.begin(), m_stringLookupTable.end(), nullptr);
0050     resetDeltaCodingState();
0051 
0052     const auto endIt = data + len;
0053     for (auto it = data; it < endIt - 1;) {
0054         const auto blockType = (*it);
0055         if (blockType == O5M_BLOCK_RESET) {
0056             resetDeltaCodingState();
0057             ++it;
0058             continue;
0059         }
0060 
0061         auto blockSize = readUnsigned(++it, endIt);
0062         if (blockSize >= (uint64_t)(endIt - it)) {
0063             qWarning() << "premature end of file, or blocksize too large" << (endIt - it) << blockType << blockSize;
0064             break;
0065         }
0066         switch (blockType) {
0067             case O5M_BLOCK_HEADER:
0068                 if (blockSize != 4 || std::strncmp(reinterpret_cast<const char*>(it), "o5m2", 4) != 0) {
0069                     qWarning() << "Invalid file header";
0070                     return;
0071                 }
0072                 break;
0073             case O5M_BLOCK_BOUNDING_BOX:
0074             case O5M_BLOCK_TIMESTAMP:
0075                 // not of interest at the moment
0076                 break;
0077             case O5M_BLOCK_NODE:
0078                 readNode(it, it + blockSize);
0079                 break;
0080             case O5M_BLOCK_WAY:
0081                 readWay(it, it + blockSize);
0082                 break;
0083             case O5M_BLOCK_RELATION:
0084                 readRelation(it, it + blockSize);
0085                 break;
0086             default:
0087                 qDebug() << "unhandled o5m block type:" << (it - data) << blockType << blockSize;
0088         }
0089 
0090         it += blockSize;
0091     }
0092 }
0093 
0094 uint64_t O5mParser::readUnsigned(const uint8_t *&it, const uint8_t *endIt) const
0095 {
0096     uint64_t result = 0;
0097     int i = 0;
0098     for (; it < endIt && ((*it) & O5M_NUMBER_CONTINUATION); ++it, ++i) {
0099         result |= ((*it) & O5M_NUMBER_MASK) << (i * 7);
0100     }
0101     result |= ((uint64_t)(*it++) & O5M_NUMBER_MASK) << (i * 7);
0102     return result;
0103 }
0104 
0105 int64_t O5mParser::readSigned(const uint8_t *&it, const uint8_t *endIt) const
0106 {
0107     const uint64_t u = readUnsigned(it, endIt);
0108     return (u & O5M_NUMBER_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1);
0109 }
0110 
0111 template <typename T>
0112 T O5mParser::readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState)
0113 {
0114     deltaState += (T)readSigned(it, endIt);
0115     return deltaState;
0116 }
0117 
0118 const char* O5mParser::readString(const uint8_t *&it, const uint8_t *endIt)
0119 {
0120     auto ref = readUnsigned(it, endIt);
0121     if (ref) {
0122         return m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE];
0123     } else {
0124         const auto s = reinterpret_cast<const char*>(it);
0125         const auto len = std::strlen(s);
0126         if (len <= O5M_STRING_TABLE_MAXLEN) {
0127             m_stringLookupTable[m_stringLookupPosition] = s;
0128             m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE;
0129         }
0130         it += len + 1;
0131         return s;
0132     }
0133 }
0134 
0135 std::pair<const char*, const char*> O5mParser::readStringPair(const uint8_t *&it, const uint8_t *endIt)
0136 {
0137     auto ref = readUnsigned(it, endIt);
0138     if (ref) {
0139         const auto s = m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE];
0140         if (!s) {
0141             return {};
0142         }
0143         const auto len1 = std::strlen(s);
0144         return std::make_pair(s, s + len1 + 1);
0145     } else {
0146         const auto s = reinterpret_cast<const char*>(it);
0147         const auto len1 = std::strlen(s);
0148         const auto len2 = std::strlen(s + len1 + 1);
0149 
0150         if (len1 + len2 <= O5M_STRING_TABLE_MAXLEN) {
0151             m_stringLookupTable[m_stringLookupPosition] = s;
0152             m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE;
0153         }
0154 
0155         it += len1 + len2 + 2;
0156         return std::make_pair(s, s + len1 + 1);
0157     }
0158 }
0159 
0160 void O5mParser::skipVersionInformation(const uint8_t *&it, const uint8_t *end)
0161 {
0162     if (it >= end) { return; }
0163     const auto version = readUnsigned(it, end);
0164     if (version > 0) {
0165         qWarning() << "skipping changeset data not implemented yet!";
0166         //    timestamp (seconds since 1970, signed, delta-coded)
0167         //    author information – only if timestamp is not 0:
0168         //        changeset (signed, delta-coded)
0169         //        uid, user (string pair)
0170         it = end;
0171     }
0172 }
0173 
0174 template<typename Elem>
0175 void O5mParser::readTagOrBbox(Elem &e, const uint8_t *&it, const uint8_t *endIt)
0176 {
0177     const auto tagData = readStringPair(it, endIt);
0178     if (!tagData.first) {
0179         return;
0180     }
0181     if (std::strcmp(tagData.first, "bBox") == 0) {
0182         char *next = nullptr;
0183         const auto lon1 = std::strtod(tagData.second, &next);
0184         ++next;
0185         const auto lat1 = std::strtod(next, &next);
0186         ++next;
0187         const auto lon2 = std::strtod(next, &next);
0188         ++next;
0189         const auto lat2 = std::strtod(next, &next);
0190         e.bbox = OSM::BoundingBox(OSM::Coordinate(lat1, lon1), OSM::Coordinate(lat2, lon2));
0191         return;
0192     }
0193 
0194     OSM::Tag tag;
0195     tag.key = m_dataSet->makeTagKey(tagData.first, DataSet::StringIsTransient); // TODO make use of mmap'ed data for this
0196     tag.value = QByteArray(tagData.second);
0197     OSM::setTag(e, std::move(tag));
0198 }
0199 
0200 void O5mParser::readNode(const uint8_t *begin, const uint8_t *end)
0201 {
0202     OSM::Node node;
0203 
0204     auto it = begin;
0205     node.id = readDelta(it, end, m_nodeIdDelta);
0206     skipVersionInformation(it, end);
0207     if (it >= end) { return; }
0208 
0209     node.coordinate.longitude = (int64_t)readDelta(it, end, m_lonDelta) + 1'800'000'000ll;
0210     node.coordinate.latitude = (int64_t)readDelta(it, end, m_latDelata) + 900'000'000ll;
0211 
0212     while (it < end) {
0213         OSM::Tag tag;
0214         const auto tagData = readStringPair(it, end);
0215         if (tagData.first) {
0216             tag.key = m_dataSet->makeTagKey(tagData.first, DataSet::StringIsTransient); // TODO use the fact this is mmap'ed data here
0217             tag.value = QByteArray(tagData.second);
0218             OSM::setTag(node, std::move(tag));
0219         }
0220     }
0221 
0222     addNode(std::move(node));
0223 }
0224 
0225 void O5mParser::readWay(const uint8_t *begin, const uint8_t *end)
0226 {
0227     OSM::Way way;
0228 
0229     auto it = begin;
0230     way.id = readDelta(it, end, m_wayIdDelta);
0231     skipVersionInformation(it, end);
0232     if (it >= end) { return; }
0233 
0234     const auto nodesBlockSize = readUnsigned(it, end);
0235     if (it + nodesBlockSize > end) { return; }
0236 
0237     const auto nodesBlockEnd = it + nodesBlockSize;
0238     while(it < nodesBlockEnd) {
0239         way.nodes.push_back(readDelta(it, end, m_wayNodeIdDelta));
0240     }
0241 
0242     while (it < end) {
0243         readTagOrBbox(way, it, end);
0244     }
0245 
0246    addWay(std::move(way));
0247 }
0248 
0249 void O5mParser::readRelation(const uint8_t *begin, const uint8_t *end)
0250 {
0251     OSM::Relation rel;
0252 
0253     auto it = begin;
0254     rel.id = readDelta(it, end, m_relIdDelta);
0255     skipVersionInformation(it, end);
0256     if (it >= end) { return; }
0257 
0258     const auto relBlockSize = readUnsigned(it, end);
0259     if (it + relBlockSize > end) { return; }
0260 
0261     const auto relBlockEnd = it + relBlockSize;
0262     while (it < relBlockEnd) {
0263         const int64_t memId = readSigned(it, end);
0264         OSM::Member mem;
0265         const auto typeAndRole = readString(it, end);
0266         switch (typeAndRole[0]) {
0267             case O5M_MEMTYPE_NODE:
0268                 mem.id = m_relNodeMemberIdDelta += memId;
0269                 mem.setType(OSM::Type::Node);
0270                 break;
0271             case O5M_MEMTYPE_WAY:
0272                 mem.id = m_relWayMemberIdDelta += memId;
0273                 mem.setType(OSM::Type::Way);
0274                 break;
0275             case O5M_MEMTYPE_RELATION:
0276                 mem.id = m_relRelMemberIdDelta += memId;
0277                 mem.setType(OSM::Type::Relation);
0278                 break;
0279         }
0280         mem.setRole(m_dataSet->makeRole(typeAndRole + 1, DataSet::StringIsTransient));
0281 
0282         rel.members.push_back(std::move(mem));
0283     }
0284 
0285 
0286 
0287     while (it < end) {
0288         readTagOrBbox(rel, it, end);
0289     }
0290 
0291     addRelation(std::move(rel));
0292 }
0293 
0294 void O5mParser::resetDeltaCodingState()
0295 {
0296     m_nodeIdDelta = 0;
0297     m_latDelata = 0;
0298     m_lonDelta = 0;
0299 
0300     m_wayIdDelta = 0;
0301     m_wayNodeIdDelta = 0;
0302 
0303     m_relIdDelta = 0;
0304     m_relNodeMemberIdDelta = 0;
0305     m_relWayMemberIdDelta = 0;
0306     m_relRelMemberIdDelta = 0;
0307 }