File indexing completed on 2024-05-05 05:52:23
0001 /*************************************************************************** 0002 pseudoDtd.cpp 0003 copyright : (C) 2001-2002 by Daniel Naber 0004 email : daniel.naber@t-online.de 0005 ***************************************************************************/ 0006 0007 /*************************************************************************** 0008 This program is free software; you can redistribute it and/or 0009 modify it under the terms of the GNU General Public License 0010 as published by the Free Software Foundation; either version 2 0011 of the License, or ( at your option ) any later version. 0012 0013 This program is distributed in the hope that it will be useful, 0014 but WITHOUT ANY WARRANTY; without even the implied warranty of 0015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0016 GNU General Public License for more details. 0017 0018 You should have received a copy of the GNU General Public License 0019 along with this program; if not, write to the Free Software 0020 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 0021 ***************************************************************************/ 0022 0023 #include "pseudo_dtd.h" 0024 0025 // #include <QRegExp> 0026 0027 #include <KLocalizedString> 0028 #include <KMessageBox> 0029 0030 PseudoDTD::PseudoDTD() 0031 { 0032 // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4: 0033 m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set ) 0034 } 0035 0036 PseudoDTD::~PseudoDTD() 0037 { 0038 } 0039 0040 void PseudoDTD::analyzeDTD(QString &metaDtdUrl, QString &metaDtd) 0041 { 0042 QDomDocument doc(QStringLiteral("dtdIn_xml")); 0043 if (!doc.setContent(metaDtd)) { 0044 KMessageBox::error(nullptr, 0045 i18n("The file '%1' could not be parsed. " 0046 "Please check that the file is well-formed XML.", 0047 metaDtdUrl), 0048 i18n("XML Plugin Error")); 0049 return; 0050 } 0051 0052 if (doc.doctype().name() != QLatin1String("dtd")) { 0053 KMessageBox::error(nullptr, 0054 i18n("The file '%1' is not in the expected format. " 0055 "Please check that the file is of this type:\n" 0056 "-//Norman Walsh//DTD DTDParse V2.0//EN\n" 0057 "You can produce such files with dtdparse. " 0058 "See the Kate Plugin documentation for more information.", 0059 metaDtdUrl), 0060 i18n("XML Plugin Error")); 0061 return; 0062 } 0063 0064 uint listLength = 0; 0065 listLength += doc.elementsByTagName(QStringLiteral("entity")).count(); 0066 listLength += doc.elementsByTagName(QStringLiteral("element")).count(); 0067 // count this twice, as it will be iterated twice ( TODO: optimize that? ): 0068 listLength += doc.elementsByTagName(QStringLiteral("attlist")).count() * 2; 0069 0070 QProgressDialog progress(i18n("Analyzing meta DTD..."), i18n("Cancel"), 0, listLength); 0071 progress.setMinimumDuration(400); 0072 progress.setValue(0); 0073 0074 // Get information from meta DTD and put it in Qt data structures for fast access: 0075 if (!parseEntities(&doc, &progress)) { 0076 return; 0077 } 0078 0079 if (!parseElements(&doc, &progress)) { 0080 return; 0081 } 0082 0083 if (!parseAttributes(&doc, &progress)) { 0084 return; 0085 } 0086 0087 if (!parseAttributeValues(&doc, &progress)) { 0088 return; 0089 } 0090 0091 progress.setValue(listLength); // just to make sure the dialog disappears 0092 } 0093 0094 // ======================================================================== 0095 // DOM stuff: 0096 0097 /** 0098 * Iterate through the XML to get a mapping which sub-elements are allowed for 0099 * all elements. 0100 */ 0101 bool PseudoDTD::parseElements(QDomDocument *doc, QProgressDialog *progress) 0102 { 0103 m_elementsList.clear(); 0104 // We only display a list, i.e. we pretend that the content model is just 0105 // a set, so we use a map. This is necessary e.g. for xhtml 1.0's head element, 0106 // which would otherwise display some elements twice. 0107 std::map<QString, bool> subelementList; // the bool is not used 0108 0109 QDomNodeList list = doc->elementsByTagName(QStringLiteral("element")); 0110 uint listLength = list.count(); // speedup (really! ) 0111 0112 for (uint i = 0; i < listLength; i++) { 0113 if (progress->wasCanceled()) { 0114 return false; 0115 } 0116 0117 progress->setValue(progress->value() + 1); 0118 // FIXME!: 0119 // qApp->processEvents(); 0120 0121 subelementList.clear(); 0122 QDomNode node = list.item(i); 0123 QDomElement elem = node.toElement(); 0124 0125 if (!elem.isNull()) { 0126 // Enter the expanded content model, which may also include stuff not allowed. 0127 // We do not care if it's a <sequence-group> or whatever. 0128 QDomNodeList contentModelList = elem.elementsByTagName(QStringLiteral("content-model-expanded")); 0129 QDomNode contentModelNode = contentModelList.item(0); 0130 QDomElement contentModelElem = contentModelNode.toElement(); 0131 if (!contentModelElem.isNull()) { 0132 // check for <pcdata/>: 0133 QDomNodeList pcdataList = contentModelElem.elementsByTagName(QStringLiteral("pcdata")); 0134 0135 // check for other sub elements: 0136 QDomNodeList subList = contentModelElem.elementsByTagName(QStringLiteral("element-name")); 0137 uint subListLength = subList.count(); 0138 for (uint l = 0; l < subListLength; l++) { 0139 QDomNode subNode = subList.item(l); 0140 QDomElement subElem = subNode.toElement(); 0141 if (!subElem.isNull()) { 0142 subelementList[subElem.attribute(QStringLiteral("name"))] = true; 0143 } 0144 } 0145 0146 // anders: check if this is an EMPTY element, and put "__EMPTY" in the 0147 // sub list, so that we can insert tags in empty form if required. 0148 QDomNodeList emptyList = elem.elementsByTagName(QStringLiteral("empty")); 0149 if (emptyList.count()) { 0150 subelementList[QStringLiteral("__EMPTY")] = true; 0151 } 0152 } 0153 0154 // Now remove the elements not allowed (e.g. <a> is explicitly not allowed in <a> 0155 // in the HTML 4.01 Strict DTD): 0156 QDomNodeList exclusionsList = elem.elementsByTagName(QStringLiteral("exclusions")); 0157 if (exclusionsList.length() > 0) { 0158 // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions ) 0159 QDomNode exclusionsNode = exclusionsList.item(0); 0160 QDomElement exclusionsElem = exclusionsNode.toElement(); 0161 if (!exclusionsElem.isNull()) { 0162 QDomNodeList subList = exclusionsElem.elementsByTagName(QStringLiteral("element-name")); 0163 uint subListLength = subList.count(); 0164 for (uint l = 0; l < subListLength; l++) { 0165 QDomNode subNode = subList.item(l); 0166 QDomElement subElem = subNode.toElement(); 0167 if (!subElem.isNull()) { 0168 auto it = subelementList.find(subElem.attribute(QStringLiteral("name"))); 0169 if (it != subelementList.end()) { 0170 subelementList.erase(it); 0171 } 0172 } 0173 } 0174 } 0175 } 0176 0177 // turn the map into a list: 0178 QStringList subelementListTmp; 0179 for (auto it = subelementList.begin(); it != subelementList.end(); ++it) { 0180 subelementListTmp.append(it->first); 0181 } 0182 0183 m_elementsList.insert_or_assign(elem.attribute(QStringLiteral("name")), subelementListTmp); 0184 } 0185 0186 } // end iteration over all <element> nodes 0187 return true; 0188 } 0189 0190 /** 0191 * Check which elements are allowed inside a parent element. This returns 0192 * a list of allowed elements, but it doesn't care about order or if only a certain 0193 * number of occurrences is allowed. 0194 */ 0195 QStringList PseudoDTD::allowedElements(const QString &parentElement) 0196 { 0197 if (m_sgmlSupport) { 0198 // find the matching element, ignoring case: 0199 for (const auto &[key, elements] : m_elementsList) { 0200 if (key.compare(parentElement, Qt::CaseInsensitive) == 0) { 0201 return elements; 0202 } 0203 } 0204 } else if (auto it = m_elementsList.find(parentElement); it != m_elementsList.end()) { 0205 return it->second; 0206 } 0207 0208 return QStringList(); 0209 } 0210 0211 /** 0212 * Iterate through the XML to get a mapping which attributes are allowed inside 0213 * all elements. 0214 */ 0215 bool PseudoDTD::parseAttributes(QDomDocument *doc, QProgressDialog *progress) 0216 { 0217 m_attributesList.clear(); 0218 // QStringList allowedAttributes; 0219 QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist")); 0220 uint listLength = list.count(); 0221 0222 for (uint i = 0; i < listLength; i++) { 0223 if (progress->wasCanceled()) { 0224 return false; 0225 } 0226 0227 progress->setValue(progress->value() + 1); 0228 // FIXME!! 0229 // qApp->processEvents(); 0230 0231 ElementAttributes attrs; 0232 QDomNode node = list.item(i); 0233 QDomElement elem = node.toElement(); 0234 if (!elem.isNull()) { 0235 QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute")); 0236 uint attributeListLength = attributeList.count(); 0237 for (uint l = 0; l < attributeListLength; l++) { 0238 QDomNode attributeNode = attributeList.item(l); 0239 QDomElement attributeElem = attributeNode.toElement(); 0240 0241 if (!attributeElem.isNull()) { 0242 if (attributeElem.attribute(QStringLiteral("type")) == QLatin1String("#REQUIRED")) { 0243 attrs.requiredAttributes.append(attributeElem.attribute(QStringLiteral("name"))); 0244 } else { 0245 attrs.optionalAttributes.append(attributeElem.attribute(QStringLiteral("name"))); 0246 } 0247 } 0248 } 0249 m_attributesList.insert_or_assign(elem.attribute(QStringLiteral("name")), attrs); 0250 } 0251 } 0252 0253 return true; 0254 } 0255 0256 /** Check which attributes are allowed for an element. 0257 */ 0258 QStringList PseudoDTD::allowedAttributes(const QString &element) 0259 { 0260 if (m_sgmlSupport) { 0261 // find the matching element, ignoring case: 0262 for (const auto &[key, attributes] : m_attributesList) { 0263 if (key.compare(element, Qt::CaseInsensitive) == 0) { 0264 return attributes.optionalAttributes + attributes.requiredAttributes; 0265 } 0266 } 0267 } else if (auto it = m_attributesList.find(element); it != m_attributesList.end()) { 0268 return it->second.optionalAttributes + it->second.requiredAttributes; 0269 } 0270 0271 return QStringList(); 0272 } 0273 0274 QStringList PseudoDTD::requiredAttributes(const QString &element) const 0275 { 0276 if (m_sgmlSupport) { 0277 for (const auto &[key, attributes] : m_attributesList) { 0278 if (key.compare(element, Qt::CaseInsensitive) == 0) { 0279 return attributes.requiredAttributes; 0280 } 0281 } 0282 } else if (auto it = m_attributesList.find(element); it != m_attributesList.end()) { 0283 return it->second.requiredAttributes; 0284 } 0285 0286 return QStringList(); 0287 } 0288 0289 /** 0290 * Iterate through the XML to get a mapping which attribute values are allowed 0291 * for all attributes inside all elements. 0292 */ 0293 bool PseudoDTD::parseAttributeValues(QDomDocument *doc, QProgressDialog *progress) 0294 { 0295 m_attributevaluesList.clear(); // 1 element : n possible attributes 0296 std::map<QString, QStringList> attributevaluesTmp; // 1 attribute : n possible values 0297 QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist")); 0298 uint listLength = list.count(); 0299 0300 for (uint i = 0; i < listLength; i++) { 0301 if (progress->wasCanceled()) { 0302 return false; 0303 } 0304 0305 progress->setValue(progress->value() + 1); 0306 // FIXME! 0307 // qApp->processEvents(); 0308 0309 attributevaluesTmp.clear(); 0310 QDomNode node = list.item(i); 0311 QDomElement elem = node.toElement(); 0312 if (!elem.isNull()) { 0313 // Enter the list of <attribute>: 0314 QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute")); 0315 uint attributeListLength = attributeList.count(); 0316 for (uint l = 0; l < attributeListLength; l++) { 0317 QDomNode attributeNode = attributeList.item(l); 0318 QDomElement attributeElem = attributeNode.toElement(); 0319 if (!attributeElem.isNull()) { 0320 QString value = attributeElem.attribute(QStringLiteral("value")); 0321 attributevaluesTmp.insert_or_assign(attributeElem.attribute(QStringLiteral("name")), value.split(QChar(' '))); 0322 } 0323 } 0324 m_attributevaluesList.insert_or_assign(elem.attribute(QStringLiteral("name")), attributevaluesTmp); 0325 } 0326 } 0327 return true; 0328 } 0329 0330 /** 0331 * Check which attributes values are allowed for an attribute in an element 0332 * (the element is necessary because e.g. "href" inside <a> could be different 0333 * to an "href" inside <link>): 0334 */ 0335 QStringList PseudoDTD::attributeValues(const QString &element, const QString &attribute) 0336 { 0337 // Direct access would be faster than iteration of course but not always correct, 0338 // because we need to be case-insensitive. 0339 if (m_sgmlSupport) { 0340 // first find the matching element, ignoring case: 0341 for (const auto &[key, attrVals] : m_attributevaluesList) { 0342 if (key.compare(element, Qt::CaseInsensitive) == 0) { 0343 // then find the matching attribute for that element, ignoring case: 0344 for (const auto &[k, attrs] : attrVals) { 0345 if (k.compare(attribute, Qt::CaseInsensitive) == 0) { 0346 return attrs; 0347 } 0348 } 0349 } 0350 } 0351 } else if (auto it = m_attributevaluesList.find(element); it != m_attributevaluesList.end()) { 0352 const std::map<QString, QStringList> &attrVals = it->second; 0353 if (auto it = attrVals.find(attribute); it != attrVals.end()) { 0354 return it->second; 0355 } 0356 } 0357 0358 // no predefined values available: 0359 return QStringList(); 0360 } 0361 0362 /** 0363 * Iterate through the XML to get a mapping of all entity names and their expanded 0364 * version, e.g. nbsp =>  . Parameter entities are ignored. 0365 */ 0366 bool PseudoDTD::parseEntities(QDomDocument *doc, QProgressDialog *progress) 0367 { 0368 m_entityList.clear(); 0369 QDomNodeList list = doc->elementsByTagName(QStringLiteral("entity")); 0370 uint listLength = list.count(); 0371 0372 for (uint i = 0; i < listLength; i++) { 0373 if (progress->wasCanceled()) { 0374 return false; 0375 } 0376 0377 progress->setValue(progress->value() + 1); 0378 // FIXME!! 0379 // qApp->processEvents(); 0380 QDomNode node = list.item(i); 0381 QDomElement elem = node.toElement(); 0382 if (!elem.isNull() && elem.attribute(QStringLiteral("type")) != QLatin1String("param")) { 0383 // TODO: what's cdata <-> gen ? 0384 QDomNodeList expandedList = elem.elementsByTagName(QStringLiteral("text-expanded")); 0385 QDomNode expandedNode = expandedList.item(0); 0386 QDomElement expandedElem = expandedNode.toElement(); 0387 if (!expandedElem.isNull()) { 0388 QString exp = expandedElem.text(); 0389 // TODO: support more than one &#...; in the expanded text 0390 /* TODO include do this when the unicode font problem is solved: 0391 if( exp.contains(QRegularExpression("^&#x[a-zA-Z0-9]+;$")) ) { 0392 // hexadecimal numbers, e.g. "ȶ" 0393 uint end = exp.find( ";" ); 0394 exp = exp.mid( 3, end-3 ); 0395 exp = QChar(); 0396 } else if( exp.contains(QRegularExpression("^&#[0-9]+;$")) ) { 0397 // decimal numbers, e.g. "ì" 0398 uint end = exp.find( ";" ); 0399 exp = exp.mid( 2, end-2 ); 0400 exp = QChar( exp.toInt() ); 0401 } 0402 */ 0403 m_entityList.insert_or_assign(elem.attribute(QStringLiteral("name")), exp); 0404 } else { 0405 m_entityList.insert_or_assign(elem.attribute(QStringLiteral("name")), QString()); 0406 } 0407 } 0408 } 0409 return true; 0410 } 0411 0412 /** 0413 * Get a list of all ( non-parameter ) entities that start with a certain string. 0414 */ 0415 QStringList PseudoDTD::entities(const QString &start) 0416 { 0417 QStringList entities; 0418 for (const auto &[key, value] : m_entityList) { 0419 if (value.startsWith(start)) { 0420 const QString &str = key; 0421 /* TODO: show entities as unicode character 0422 if( !it.data().isEmpty() ) { 0423 //str += " -- " + it.data(); 0424 QRegExp re( "&#(\\d+);" ); 0425 if( re.search(it.data()) != -1 ) { 0426 uint ch = re.cap( 1).toUInt(); 0427 str += " -- " + QChar( ch).decomposition(); 0428 } 0429 //qDebug() << "#" << it.data(); 0430 } 0431 */ 0432 entities.append(str); 0433 // TODO: later use a table view 0434 } 0435 } 0436 return entities; 0437 } 0438 0439 // kate: space-indent on; indent-width 4; replace-tabs on; mixed-indent off;