File indexing completed on 2024-12-08 10:16:04
0001 /* 0002 SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "o5mparser.h" 0008 #include "datatypes.h" 0009 #include "datasetmergebuffer.h" 0010 0011 #include <QDebug> 0012 0013 #include <cstdlib> 0014 #include <cstring> 0015 0016 using namespace OSM; 0017 0018 enum : uint8_t { 0019 O5M_BLOCK_RESET = 0xff, 0020 O5M_BLOCK_NODE = 0x10, 0021 O5M_BLOCK_WAY = 0x11, 0022 O5M_BLOCK_RELATION = 0x12, 0023 O5M_BLOCK_BOUNDING_BOX = 0xdb, 0024 O5M_BLOCK_TIMESTAMP = 0xdc, 0025 O5M_BLOCK_HEADER = 0xe0, 0026 0027 O5M_NUMBER_CONTINUATION = 0b1000'0000, 0028 O5M_NUMBER_MASK = 0b0111'1111, 0029 O5M_NUMBER_SIGNED_BIT = 0b1, 0030 0031 O5M_MEMTYPE_NODE = 0x30, 0032 O5M_MEMTYPE_WAY = 0x31, 0033 O5M_MEMTYPE_RELATION = 0x32, 0034 }; 0035 0036 enum : uint16_t { 0037 O5M_STRING_TABLE_SIZE = 15000, 0038 O5M_STRING_TABLE_MAXLEN = 250, 0039 }; 0040 0041 O5mParser::O5mParser(DataSet *dataSet) 0042 : AbstractReader(dataSet) 0043 { 0044 m_stringLookupTable.resize(O5M_STRING_TABLE_SIZE); 0045 } 0046 0047 void O5mParser::readFromData(const uint8_t* data, std::size_t len) 0048 { 0049 std::fill(m_stringLookupTable.begin(), m_stringLookupTable.end(), nullptr); 0050 resetDeltaCodingState(); 0051 0052 const auto endIt = data + len; 0053 for (auto it = data; it < endIt - 1;) { 0054 const auto blockType = (*it); 0055 if (blockType == O5M_BLOCK_RESET) { 0056 resetDeltaCodingState(); 0057 ++it; 0058 continue; 0059 } 0060 0061 auto blockSize = readUnsigned(++it, endIt); 0062 if (blockSize >= (uint64_t)(endIt - it)) { 0063 qWarning() << "premature end of file, or blocksize too large" << (endIt - it) << blockType << blockSize; 0064 break; 0065 } 0066 switch (blockType) { 0067 case O5M_BLOCK_HEADER: 0068 if (blockSize != 4 || std::strncmp(reinterpret_cast<const char*>(it), "o5m2", 4) != 0) { 0069 qWarning() << "Invalid file header"; 0070 return; 0071 } 0072 break; 0073 case O5M_BLOCK_BOUNDING_BOX: 0074 case O5M_BLOCK_TIMESTAMP: 0075 // not of interest at the moment 0076 break; 0077 case O5M_BLOCK_NODE: 0078 readNode(it, it + blockSize); 0079 break; 0080 case O5M_BLOCK_WAY: 0081 readWay(it, it + blockSize); 0082 break; 0083 case O5M_BLOCK_RELATION: 0084 readRelation(it, it + blockSize); 0085 break; 0086 default: 0087 qDebug() << "unhandled o5m block type:" << (it - data) << blockType << blockSize; 0088 } 0089 0090 it += blockSize; 0091 } 0092 } 0093 0094 uint64_t O5mParser::readUnsigned(const uint8_t *&it, const uint8_t *endIt) const 0095 { 0096 uint64_t result = 0; 0097 int i = 0; 0098 for (; it < endIt && ((*it) & O5M_NUMBER_CONTINUATION); ++it, ++i) { 0099 result |= ((*it) & O5M_NUMBER_MASK) << (i * 7); 0100 } 0101 result |= ((uint64_t)(*it++) & O5M_NUMBER_MASK) << (i * 7); 0102 return result; 0103 } 0104 0105 int64_t O5mParser::readSigned(const uint8_t *&it, const uint8_t *endIt) const 0106 { 0107 const uint64_t u = readUnsigned(it, endIt); 0108 return (u & O5M_NUMBER_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1); 0109 } 0110 0111 template <typename T> 0112 T O5mParser::readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState) 0113 { 0114 deltaState += (T)readSigned(it, endIt); 0115 return deltaState; 0116 } 0117 0118 const char* O5mParser::readString(const uint8_t *&it, const uint8_t *endIt) 0119 { 0120 auto ref = readUnsigned(it, endIt); 0121 if (ref) { 0122 return m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; 0123 } else { 0124 const auto s = reinterpret_cast<const char*>(it); 0125 const auto len = std::strlen(s); 0126 if (len <= O5M_STRING_TABLE_MAXLEN) { 0127 m_stringLookupTable[m_stringLookupPosition] = s; 0128 m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; 0129 } 0130 it += len + 1; 0131 return s; 0132 } 0133 } 0134 0135 std::pair<const char*, const char*> O5mParser::readStringPair(const uint8_t *&it, const uint8_t *endIt) 0136 { 0137 auto ref = readUnsigned(it, endIt); 0138 if (ref) { 0139 const auto s = m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; 0140 if (!s) { 0141 return {}; 0142 } 0143 const auto len1 = std::strlen(s); 0144 return std::make_pair(s, s + len1 + 1); 0145 } else { 0146 const auto s = reinterpret_cast<const char*>(it); 0147 const auto len1 = std::strlen(s); 0148 const auto len2 = std::strlen(s + len1 + 1); 0149 0150 if (len1 + len2 <= O5M_STRING_TABLE_MAXLEN) { 0151 m_stringLookupTable[m_stringLookupPosition] = s; 0152 m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; 0153 } 0154 0155 it += len1 + len2 + 2; 0156 return std::make_pair(s, s + len1 + 1); 0157 } 0158 } 0159 0160 void O5mParser::skipVersionInformation(const uint8_t *&it, const uint8_t *end) 0161 { 0162 if (it >= end) { return; } 0163 const auto version = readUnsigned(it, end); 0164 if (version > 0) { 0165 qWarning() << "skipping changeset data not implemented yet!"; 0166 // timestamp (seconds since 1970, signed, delta-coded) 0167 // author information – only if timestamp is not 0: 0168 // changeset (signed, delta-coded) 0169 // uid, user (string pair) 0170 it = end; 0171 } 0172 } 0173 0174 template<typename Elem> 0175 void O5mParser::readTagOrBbox(Elem &e, const uint8_t *&it, const uint8_t *endIt) 0176 { 0177 const auto tagData = readStringPair(it, endIt); 0178 if (!tagData.first) { 0179 return; 0180 } 0181 if (std::strcmp(tagData.first, "bBox") == 0) { 0182 char *next = nullptr; 0183 const auto lon1 = std::strtod(tagData.second, &next); 0184 ++next; 0185 const auto lat1 = std::strtod(next, &next); 0186 ++next; 0187 const auto lon2 = std::strtod(next, &next); 0188 ++next; 0189 const auto lat2 = std::strtod(next, &next); 0190 e.bbox = OSM::BoundingBox(OSM::Coordinate(lat1, lon1), OSM::Coordinate(lat2, lon2)); 0191 return; 0192 } 0193 0194 OSM::Tag tag; 0195 tag.key = m_dataSet->makeTagKey(tagData.first, DataSet::StringIsTransient); // TODO make use of mmap'ed data for this 0196 tag.value = QByteArray(tagData.second); 0197 OSM::setTag(e, std::move(tag)); 0198 } 0199 0200 void O5mParser::readNode(const uint8_t *begin, const uint8_t *end) 0201 { 0202 OSM::Node node; 0203 0204 auto it = begin; 0205 node.id = readDelta(it, end, m_nodeIdDelta); 0206 skipVersionInformation(it, end); 0207 if (it >= end) { return; } 0208 0209 node.coordinate.longitude = (int64_t)readDelta(it, end, m_lonDelta) + 1'800'000'000ll; 0210 node.coordinate.latitude = (int64_t)readDelta(it, end, m_latDelata) + 900'000'000ll; 0211 0212 while (it < end) { 0213 OSM::Tag tag; 0214 const auto tagData = readStringPair(it, end); 0215 if (tagData.first) { 0216 tag.key = m_dataSet->makeTagKey(tagData.first, DataSet::StringIsTransient); // TODO use the fact this is mmap'ed data here 0217 tag.value = QByteArray(tagData.second); 0218 OSM::setTag(node, std::move(tag)); 0219 } 0220 } 0221 0222 addNode(std::move(node)); 0223 } 0224 0225 void O5mParser::readWay(const uint8_t *begin, const uint8_t *end) 0226 { 0227 OSM::Way way; 0228 0229 auto it = begin; 0230 way.id = readDelta(it, end, m_wayIdDelta); 0231 skipVersionInformation(it, end); 0232 if (it >= end) { return; } 0233 0234 const auto nodesBlockSize = readUnsigned(it, end); 0235 if (it + nodesBlockSize > end) { return; } 0236 0237 const auto nodesBlockEnd = it + nodesBlockSize; 0238 while(it < nodesBlockEnd) { 0239 way.nodes.push_back(readDelta(it, end, m_wayNodeIdDelta)); 0240 } 0241 0242 while (it < end) { 0243 readTagOrBbox(way, it, end); 0244 } 0245 0246 addWay(std::move(way)); 0247 } 0248 0249 void O5mParser::readRelation(const uint8_t *begin, const uint8_t *end) 0250 { 0251 OSM::Relation rel; 0252 0253 auto it = begin; 0254 rel.id = readDelta(it, end, m_relIdDelta); 0255 skipVersionInformation(it, end); 0256 if (it >= end) { return; } 0257 0258 const auto relBlockSize = readUnsigned(it, end); 0259 if (it + relBlockSize > end) { return; } 0260 0261 const auto relBlockEnd = it + relBlockSize; 0262 while (it < relBlockEnd) { 0263 const int64_t memId = readSigned(it, end); 0264 OSM::Member mem; 0265 const auto typeAndRole = readString(it, end); 0266 switch (typeAndRole[0]) { 0267 case O5M_MEMTYPE_NODE: 0268 mem.id = m_relNodeMemberIdDelta += memId; 0269 mem.setType(OSM::Type::Node); 0270 break; 0271 case O5M_MEMTYPE_WAY: 0272 mem.id = m_relWayMemberIdDelta += memId; 0273 mem.setType(OSM::Type::Way); 0274 break; 0275 case O5M_MEMTYPE_RELATION: 0276 mem.id = m_relRelMemberIdDelta += memId; 0277 mem.setType(OSM::Type::Relation); 0278 break; 0279 } 0280 mem.setRole(m_dataSet->makeRole(typeAndRole + 1, DataSet::StringIsTransient)); 0281 0282 rel.members.push_back(std::move(mem)); 0283 } 0284 0285 0286 0287 while (it < end) { 0288 readTagOrBbox(rel, it, end); 0289 } 0290 0291 addRelation(std::move(rel)); 0292 } 0293 0294 void O5mParser::resetDeltaCodingState() 0295 { 0296 m_nodeIdDelta = 0; 0297 m_latDelata = 0; 0298 m_lonDelta = 0; 0299 0300 m_wayIdDelta = 0; 0301 m_wayNodeIdDelta = 0; 0302 0303 m_relIdDelta = 0; 0304 m_relNodeMemberIdDelta = 0; 0305 m_relWayMemberIdDelta = 0; 0306 m_relRelMemberIdDelta = 0; 0307 }