File indexing completed on 2024-11-24 04:44:39
0001 /* 0002 * SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org> 0003 * SPDX-License-Identifier: LGPL-2.0-or-later 0004 */ 0005 0006 #include "rdf_p.h" 0007 0008 #include <QBuffer> 0009 #include <QCryptographicHash> 0010 #include <QHash> 0011 #include <QMap> 0012 #include <QIODevice> 0013 0014 bool Rdf::Term::operator<(const Rdf::Term& other) const 0015 { 0016 if (type == other.type) { 0017 if (value == other.value) { 0018 return literalType < other.literalType; 0019 } 0020 return value < other.value; 0021 } 0022 return type < other.type; 0023 } 0024 0025 bool Rdf::Term::operator==(const Rdf::Term& other) const 0026 { 0027 return type == other.type && value == other.value && literalType == other.literalType; 0028 } 0029 0030 bool Rdf::Quad::operator<(const Rdf::Quad &other) const 0031 { 0032 if (subject == other.subject) { 0033 if (predicate == other.predicate) { 0034 return object < other.object; 0035 } 0036 return predicate < other.predicate; 0037 } 0038 return subject < other.subject; 0039 } 0040 0041 // see https://json-ld.github.io/rdf-dataset-canonicalization/spec/#hash-first-degree-quads 0042 static QByteArray hashFirstDegreeQuads(const std::vector<Rdf::Quad> &quads, const QString &refBlankNode) 0043 { 0044 const auto renameBlankNode = [&refBlankNode](Rdf::Term &term) { 0045 if (term.type == Rdf::Term::BlankNode) { 0046 if (term.value == refBlankNode) { 0047 term.value = QStringLiteral("a"); 0048 } else { 0049 term.value = QStringLiteral("z"); 0050 } 0051 } 0052 }; 0053 0054 std::vector<Rdf::Quad> toHash; 0055 for (auto quad : quads) { 0056 renameBlankNode(quad.subject); 0057 renameBlankNode(quad.predicate); 0058 renameBlankNode(quad.object); 0059 toHash.push_back(std::move(quad)); 0060 } 0061 0062 std::sort(toHash.begin(), toHash.end()); 0063 return QCryptographicHash::hash(serialize(toHash), QCryptographicHash::Sha256).toHex(); 0064 } 0065 0066 QByteArray Rdf::serialize(const std::vector<Rdf::Quad>& quads) 0067 { 0068 QByteArray out; 0069 QBuffer buffer(&out); 0070 buffer.open(QIODevice::WriteOnly); 0071 Rdf::serialize(&buffer, quads); 0072 buffer.close(); 0073 return out; 0074 } 0075 0076 void Rdf::normalize(std::vector<Rdf::Quad>& quads) 0077 { 0078 // see https://json-ld.github.io/rdf-dataset-canonicalization/spec/#algorithm 0079 QHash<QString, std::vector<Rdf::Quad>> blankNodeToQuadMap; 0080 for (const auto &quad : quads) { 0081 // ignores predicates and the same blank nodes used multiple times in a quad, as that doesn't happen for us 0082 if (quad.subject.type == Rdf::Term::BlankNode) { 0083 blankNodeToQuadMap[quad.subject.value].push_back(quad); 0084 } 0085 if (quad.object.type == Rdf::Term::BlankNode) { 0086 blankNodeToQuadMap[quad.object.value].push_back(quad); 0087 } 0088 } 0089 0090 QMap<QByteArray, QString> hashToBlankNodeMap; 0091 for (auto it = blankNodeToQuadMap.begin(); it != blankNodeToQuadMap.end(); ++it) { 0092 hashToBlankNodeMap.insert(hashFirstDegreeQuads(it.value(), it.key()), it.key()); 0093 } 0094 0095 int c14nIdCounter = 0; 0096 QHash<QString, QString> blankNodeC14nMap; 0097 for (auto it = hashToBlankNodeMap.begin(); it != hashToBlankNodeMap.end(); ++it) { 0098 blankNodeC14nMap.insert(it.value(), QLatin1String("c14n") + QString::number(c14nIdCounter++)); 0099 } 0100 0101 const auto translateBlankNode = [&blankNodeC14nMap](Rdf::Term &term) { 0102 if (term.type == Rdf::Term::BlankNode) { 0103 const auto it = blankNodeC14nMap.constFind(term.value); 0104 if (it != blankNodeC14nMap.constEnd()) { 0105 term.value = it.value(); 0106 } 0107 } 0108 }; 0109 for (auto &quad : quads) { 0110 translateBlankNode(quad.subject); 0111 translateBlankNode(quad.predicate); 0112 translateBlankNode(quad.object); 0113 } 0114 0115 std::sort(quads.begin(), quads.end()); 0116 quads.erase(std::unique(quads.begin(), quads.end(), [](const auto &lhs, const auto &rhs) { 0117 return lhs.subject == rhs.subject && lhs.predicate == rhs.predicate && lhs.object == rhs.object; 0118 }), quads.end()); 0119 } 0120 0121 void Rdf::serialize(QIODevice *out, const std::vector<Rdf::Quad> &quads) 0122 { 0123 for (const auto &quad : quads) { 0124 serialize(out, quad); 0125 } 0126 } 0127 0128 void Rdf::serialize(QIODevice *out, const Rdf::Quad &quad) 0129 { 0130 serialize(out, quad.subject); 0131 out->write(" "); 0132 serialize(out, quad.predicate); 0133 out->write(" "); 0134 serialize(out, quad.object); 0135 out->write(" .\n"); 0136 } 0137 0138 void Rdf::serialize(QIODevice* out, const Rdf::Term &term) 0139 { 0140 switch (term.type) { 0141 case Term::IRI: 0142 out->write("<"); 0143 out->write(term.value.toUtf8()); 0144 out->write(">"); 0145 break; 0146 case Term::BlankNode: 0147 out->write("_:"); 0148 out->write(term.value.toUtf8()); 0149 break; 0150 case Term::Literal: 0151 out->write("\""); 0152 out->write(term.value.toUtf8()); 0153 out->write("\""); 0154 if (!term.literalType.isEmpty()) { 0155 out->write("^^<"); 0156 out->write(term.literalType.toUtf8()); 0157 out->write(">"); 0158 } 0159 break; 0160 case Term::Undefined: 0161 out->write(term.value.toUtf8()); 0162 break; 0163 } 0164 }