File indexing completed on 2024-05-12 04:42:18

0001 /*
0002     SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "o5mparser.h"
0008 #include "o5m.h"
0009 #include "datatypes.h"
0010 #include "datasetmergebuffer.h"
0011 
0012 #include <QDebug>
0013 
0014 #include <cstdlib>
0015 #include <cstring>
0016 
0017 using namespace OSM;
0018 
0019 O5mParser::O5mParser(DataSet *dataSet)
0020     : AbstractReader(dataSet)
0021 {
0022     m_stringLookupTable.resize(O5M_STRING_TABLE_SIZE);
0023 }
0024 
0025 void O5mParser::readFromData(const uint8_t* data, std::size_t len)
0026 {
0027     std::fill(m_stringLookupTable.begin(), m_stringLookupTable.end(), nullptr);
0028     resetDeltaCodingState();
0029 
0030     const auto endIt = data + len;
0031     for (auto it = data; it < endIt - 1;) {
0032         const auto blockType = (*it);
0033         if (blockType == O5M_BLOCK_RESET) {
0034             resetDeltaCodingState();
0035             ++it;
0036             continue;
0037         }
0038 
0039         auto blockSize = readUnsigned(++it, endIt);
0040         if (blockSize >= (uint64_t)(endIt - it)) {
0041             qWarning() << "premature end of file, or blocksize too large" << (endIt - it) << blockType << blockSize;
0042             break;
0043         }
0044         switch (blockType) {
0045             case O5M_BLOCK_HEADER:
0046                 if (blockSize != 4 || std::strncmp(reinterpret_cast<const char*>(it), O5M_HEADER, 4) != 0) {
0047                     qWarning() << "Invalid file header";
0048                     return;
0049                 }
0050                 break;
0051             case O5M_BLOCK_BOUNDING_BOX:
0052             case O5M_BLOCK_TIMESTAMP:
0053                 // not of interest at the moment
0054                 break;
0055             case O5M_BLOCK_NODE:
0056                 readNode(it, it + blockSize);
0057                 break;
0058             case O5M_BLOCK_WAY:
0059                 readWay(it, it + blockSize);
0060                 break;
0061             case O5M_BLOCK_RELATION:
0062                 readRelation(it, it + blockSize);
0063                 break;
0064             default:
0065                 qDebug() << "unhandled o5m block type:" << (it - data) << blockType << blockSize;
0066         }
0067 
0068         it += blockSize;
0069     }
0070 }
0071 
0072 uint64_t O5mParser::readUnsigned(const uint8_t *&it, const uint8_t *endIt) const
0073 {
0074     uint64_t result = 0;
0075     int i = 0;
0076     for (; it < endIt && ((*it) & O5M_NUMBER_CONTINUATION); ++it, ++i) {
0077         result |= ((*it) & O5M_NUMBER_MASK) << (i * 7);
0078     }
0079     result |= ((uint64_t)(*it++) & O5M_NUMBER_MASK) << (i * 7);
0080     return result;
0081 }
0082 
0083 int64_t O5mParser::readSigned(const uint8_t *&it, const uint8_t *endIt) const
0084 {
0085     const uint64_t u = readUnsigned(it, endIt);
0086     return (u & O5M_NUMBER_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1);
0087 }
0088 
0089 template <typename T>
0090 T O5mParser::readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState)
0091 {
0092     deltaState += (T)readSigned(it, endIt);
0093     return deltaState;
0094 }
0095 
0096 const char* O5mParser::readString(const uint8_t *&it, const uint8_t *endIt)
0097 {
0098     auto ref = readUnsigned(it, endIt);
0099     if (ref) {
0100         return m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE];
0101     } else {
0102         const auto s = reinterpret_cast<const char*>(it);
0103         const auto len = std::strlen(s);
0104         if (len <= O5M_STRING_TABLE_MAXLEN) {
0105             m_stringLookupTable[m_stringLookupPosition] = s;
0106             m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE;
0107         }
0108         it += len + 1;
0109         return s;
0110     }
0111 }
0112 
0113 std::pair<const char*, const char*> O5mParser::readStringPair(const uint8_t *&it, const uint8_t *endIt)
0114 {
0115     auto ref = readUnsigned(it, endIt);
0116     if (ref) {
0117         const auto s = m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE];
0118         if (!s) {
0119             return {};
0120         }
0121         const auto len1 = std::strlen(s);
0122         return std::make_pair(s, s + len1 + 1);
0123     } else {
0124         const auto s = reinterpret_cast<const char*>(it);
0125         const auto len1 = std::strlen(s);
0126         const auto len2 = std::strlen(s + len1 + 1);
0127 
0128         if (len1 + len2 <= O5M_STRING_TABLE_MAXLEN) {
0129             m_stringLookupTable[m_stringLookupPosition] = s;
0130             m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE;
0131         }
0132 
0133         it += len1 + len2 + 2;
0134         return std::make_pair(s, s + len1 + 1);
0135     }
0136 }
0137 
0138 void O5mParser::skipVersionInformation(const uint8_t *&it, const uint8_t *end)
0139 {
0140     if (it >= end) { return; }
0141     const auto version = readUnsigned(it, end);
0142     if (version > 0) {
0143         qWarning() << "skipping changeset data not implemented yet!";
0144         //    timestamp (seconds since 1970, signed, delta-coded)
0145         //    author information – only if timestamp is not 0:
0146         //        changeset (signed, delta-coded)
0147         //        uid, user (string pair)
0148         it = end;
0149     }
0150 }
0151 
0152 template<typename Elem>
0153 void O5mParser::readTagOrBbox(Elem &e, const uint8_t *&it, const uint8_t *endIt)
0154 {
0155     const auto tagData = readStringPair(it, endIt);
0156     if (!tagData.first) {
0157         return;
0158     }
0159     if (std::strcmp(tagData.first, "bBox") == 0) {
0160         char *next = nullptr;
0161         const auto lon1 = std::strtod(tagData.second, &next);
0162         ++next;
0163         const auto lat1 = std::strtod(next, &next);
0164         ++next;
0165         const auto lon2 = std::strtod(next, &next);
0166         ++next;
0167         const auto lat2 = std::strtod(next, &next);
0168         e.bbox = OSM::BoundingBox(OSM::Coordinate(lat1, lon1), OSM::Coordinate(lat2, lon2));
0169         return;
0170     }
0171 
0172     OSM::Tag tag;
0173     tag.key = m_dataSet->makeTagKey(tagData.first, OSM::StringMemory::Transient); // TODO make use of mmap'ed data for this
0174     tag.value = QByteArray(tagData.second);
0175     e.tags.push_back(std::move(tag));
0176 }
0177 
0178 void O5mParser::readNode(const uint8_t *begin, const uint8_t *end)
0179 {
0180     OSM::Node node;
0181 
0182     auto it = begin;
0183     node.id = readDelta(it, end, m_nodeIdDelta);
0184     skipVersionInformation(it, end);
0185     if (it >= end) { return; }
0186 
0187     node.coordinate.longitude = (int64_t)readDelta(it, end, m_lonDelta) + 1'800'000'000ll;
0188     node.coordinate.latitude = (int64_t)readDelta(it, end, m_latDelata) + 900'000'000ll;
0189 
0190     while (it < end) {
0191         OSM::Tag tag;
0192         const auto tagData = readStringPair(it, end);
0193         if (tagData.first) {
0194             tag.key = m_dataSet->makeTagKey(tagData.first, OSM::StringMemory::Transient); // TODO use the fact this is mmap'ed data here
0195             tag.value = QByteArray(tagData.second);
0196             node.tags.push_back(std::move(tag));
0197         }
0198     }
0199     std::sort(node.tags.begin(), node.tags.end());
0200 
0201     addNode(std::move(node));
0202 }
0203 
0204 void O5mParser::readWay(const uint8_t *begin, const uint8_t *end)
0205 {
0206     OSM::Way way;
0207 
0208     auto it = begin;
0209     way.id = readDelta(it, end, m_wayIdDelta);
0210     skipVersionInformation(it, end);
0211     if (it >= end) { return; }
0212 
0213     const auto nodesBlockSize = readUnsigned(it, end);
0214     if (it + nodesBlockSize > end) { return; }
0215 
0216     const auto nodesBlockEnd = it + nodesBlockSize;
0217     while(it < nodesBlockEnd) {
0218         way.nodes.push_back(readDelta(it, end, m_wayNodeIdDelta));
0219     }
0220 
0221     while (it < end) {
0222         readTagOrBbox(way, it, end);
0223     }
0224     std::sort(way.tags.begin(), way.tags.end());
0225 
0226    addWay(std::move(way));
0227 }
0228 
0229 void O5mParser::readRelation(const uint8_t *begin, const uint8_t *end)
0230 {
0231     OSM::Relation rel;
0232 
0233     auto it = begin;
0234     rel.id = readDelta(it, end, m_relIdDelta);
0235     skipVersionInformation(it, end);
0236     if (it >= end) { return; }
0237 
0238     const auto relBlockSize = readUnsigned(it, end);
0239     if (it + relBlockSize > end) { return; }
0240 
0241     const auto relBlockEnd = it + relBlockSize;
0242     while (it < relBlockEnd) {
0243         const int64_t memId = readSigned(it, end);
0244         OSM::Member mem;
0245         const auto typeAndRole = readString(it, end);
0246         switch (typeAndRole[0]) {
0247             case O5M_MEMTYPE_NODE:
0248                 mem.id = m_relNodeMemberIdDelta += memId;
0249                 mem.setType(OSM::Type::Node);
0250                 break;
0251             case O5M_MEMTYPE_WAY:
0252                 mem.id = m_relWayMemberIdDelta += memId;
0253                 mem.setType(OSM::Type::Way);
0254                 break;
0255             case O5M_MEMTYPE_RELATION:
0256                 mem.id = m_relRelMemberIdDelta += memId;
0257                 mem.setType(OSM::Type::Relation);
0258                 break;
0259         }
0260         mem.setRole(m_dataSet->makeRole(typeAndRole + 1, OSM::StringMemory::Transient));
0261 
0262         rel.members.push_back(std::move(mem));
0263     }
0264 
0265 
0266 
0267     while (it < end) {
0268         readTagOrBbox(rel, it, end);
0269     }
0270     std::sort(rel.tags.begin(), rel.tags.end());
0271 
0272     addRelation(std::move(rel));
0273 }
0274 
0275 void O5mParser::resetDeltaCodingState()
0276 {
0277     m_nodeIdDelta = 0;
0278     m_latDelata = 0;
0279     m_lonDelta = 0;
0280 
0281     m_wayIdDelta = 0;
0282     m_wayNodeIdDelta = 0;
0283 
0284     m_relIdDelta = 0;
0285     m_relNodeMemberIdDelta = 0;
0286     m_relWayMemberIdDelta = 0;
0287     m_relRelMemberIdDelta = 0;
0288 }