File indexing completed on 2024-05-05 05:52:23

0001 /***************************************************************************
0002     pseudoDtd.cpp
0003     copyright           : (C) 2001-2002 by Daniel Naber
0004     email               : daniel.naber@t-online.de
0005  ***************************************************************************/
0006 
0007 /***************************************************************************
0008  This program is free software; you can redistribute it and/or
0009  modify it under the terms of the GNU General Public License
0010  as published by the Free Software Foundation; either version 2
0011  of the License, or ( at your option ) any later version.
0012 
0013  This program is distributed in the hope that it will be useful,
0014  but WITHOUT ANY WARRANTY; without even the implied warranty of
0015  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0016  GNU General Public License for more details.
0017 
0018  You should have received a copy of the GNU General Public License
0019  along with this program; if not, write to the Free Software
0020  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
0021  ***************************************************************************/
0022 
0023 #include "pseudo_dtd.h"
0024 
0025 // #include <QRegExp>
0026 
0027 #include <KLocalizedString>
0028 #include <KMessageBox>
0029 
0030 PseudoDTD::PseudoDTD()
0031 {
0032     // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
0033     m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set )
0034 }
0035 
0036 PseudoDTD::~PseudoDTD()
0037 {
0038 }
0039 
0040 void PseudoDTD::analyzeDTD(QString &metaDtdUrl, QString &metaDtd)
0041 {
0042     QDomDocument doc(QStringLiteral("dtdIn_xml"));
0043     if (!doc.setContent(metaDtd)) {
0044         KMessageBox::error(nullptr,
0045                            i18n("The file '%1' could not be parsed. "
0046                                 "Please check that the file is well-formed XML.",
0047                                 metaDtdUrl),
0048                            i18n("XML Plugin Error"));
0049         return;
0050     }
0051 
0052     if (doc.doctype().name() != QLatin1String("dtd")) {
0053         KMessageBox::error(nullptr,
0054                            i18n("The file '%1' is not in the expected format. "
0055                                 "Please check that the file is of this type:\n"
0056                                 "-//Norman Walsh//DTD DTDParse V2.0//EN\n"
0057                                 "You can produce such files with dtdparse. "
0058                                 "See the Kate Plugin documentation for more information.",
0059                                 metaDtdUrl),
0060                            i18n("XML Plugin Error"));
0061         return;
0062     }
0063 
0064     uint listLength = 0;
0065     listLength += doc.elementsByTagName(QStringLiteral("entity")).count();
0066     listLength += doc.elementsByTagName(QStringLiteral("element")).count();
0067     // count this twice, as it will be iterated twice ( TODO: optimize that? ):
0068     listLength += doc.elementsByTagName(QStringLiteral("attlist")).count() * 2;
0069 
0070     QProgressDialog progress(i18n("Analyzing meta DTD..."), i18n("Cancel"), 0, listLength);
0071     progress.setMinimumDuration(400);
0072     progress.setValue(0);
0073 
0074     // Get information from meta DTD and put it in Qt data structures for fast access:
0075     if (!parseEntities(&doc, &progress)) {
0076         return;
0077     }
0078 
0079     if (!parseElements(&doc, &progress)) {
0080         return;
0081     }
0082 
0083     if (!parseAttributes(&doc, &progress)) {
0084         return;
0085     }
0086 
0087     if (!parseAttributeValues(&doc, &progress)) {
0088         return;
0089     }
0090 
0091     progress.setValue(listLength); // just to make sure the dialog disappears
0092 }
0093 
0094 // ========================================================================
0095 // DOM stuff:
0096 
0097 /**
0098  * Iterate through the XML to get a mapping which sub-elements are allowed for
0099  * all elements.
0100  */
0101 bool PseudoDTD::parseElements(QDomDocument *doc, QProgressDialog *progress)
0102 {
0103     m_elementsList.clear();
0104     // We only display a list, i.e. we pretend that the content model is just
0105     // a set, so we use a map. This is necessary e.g. for xhtml 1.0's head element,
0106     // which would otherwise display some elements twice.
0107     std::map<QString, bool> subelementList; // the bool is not used
0108 
0109     QDomNodeList list = doc->elementsByTagName(QStringLiteral("element"));
0110     uint listLength = list.count(); // speedup (really! )
0111 
0112     for (uint i = 0; i < listLength; i++) {
0113         if (progress->wasCanceled()) {
0114             return false;
0115         }
0116 
0117         progress->setValue(progress->value() + 1);
0118         // FIXME!:
0119         // qApp->processEvents();
0120 
0121         subelementList.clear();
0122         QDomNode node = list.item(i);
0123         QDomElement elem = node.toElement();
0124 
0125         if (!elem.isNull()) {
0126             // Enter the expanded content model, which may also include stuff not allowed.
0127             // We do not care if it's a <sequence-group> or whatever.
0128             QDomNodeList contentModelList = elem.elementsByTagName(QStringLiteral("content-model-expanded"));
0129             QDomNode contentModelNode = contentModelList.item(0);
0130             QDomElement contentModelElem = contentModelNode.toElement();
0131             if (!contentModelElem.isNull()) {
0132                 // check for <pcdata/>:
0133                 QDomNodeList pcdataList = contentModelElem.elementsByTagName(QStringLiteral("pcdata"));
0134 
0135                 // check for other sub elements:
0136                 QDomNodeList subList = contentModelElem.elementsByTagName(QStringLiteral("element-name"));
0137                 uint subListLength = subList.count();
0138                 for (uint l = 0; l < subListLength; l++) {
0139                     QDomNode subNode = subList.item(l);
0140                     QDomElement subElem = subNode.toElement();
0141                     if (!subElem.isNull()) {
0142                         subelementList[subElem.attribute(QStringLiteral("name"))] = true;
0143                     }
0144                 }
0145 
0146                 // anders: check if this is an EMPTY element, and put "__EMPTY" in the
0147                 // sub list, so that we can insert tags in empty form if required.
0148                 QDomNodeList emptyList = elem.elementsByTagName(QStringLiteral("empty"));
0149                 if (emptyList.count()) {
0150                     subelementList[QStringLiteral("__EMPTY")] = true;
0151                 }
0152             }
0153 
0154             // Now remove the elements not allowed (e.g. <a> is explicitly not allowed in <a>
0155             // in the HTML 4.01 Strict DTD):
0156             QDomNodeList exclusionsList = elem.elementsByTagName(QStringLiteral("exclusions"));
0157             if (exclusionsList.length() > 0) {
0158                 // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
0159                 QDomNode exclusionsNode = exclusionsList.item(0);
0160                 QDomElement exclusionsElem = exclusionsNode.toElement();
0161                 if (!exclusionsElem.isNull()) {
0162                     QDomNodeList subList = exclusionsElem.elementsByTagName(QStringLiteral("element-name"));
0163                     uint subListLength = subList.count();
0164                     for (uint l = 0; l < subListLength; l++) {
0165                         QDomNode subNode = subList.item(l);
0166                         QDomElement subElem = subNode.toElement();
0167                         if (!subElem.isNull()) {
0168                             auto it = subelementList.find(subElem.attribute(QStringLiteral("name")));
0169                             if (it != subelementList.end()) {
0170                                 subelementList.erase(it);
0171                             }
0172                         }
0173                     }
0174                 }
0175             }
0176 
0177             // turn the map into a list:
0178             QStringList subelementListTmp;
0179             for (auto it = subelementList.begin(); it != subelementList.end(); ++it) {
0180                 subelementListTmp.append(it->first);
0181             }
0182 
0183             m_elementsList.insert_or_assign(elem.attribute(QStringLiteral("name")), subelementListTmp);
0184         }
0185 
0186     } // end iteration over all <element> nodes
0187     return true;
0188 }
0189 
0190 /**
0191  * Check which elements are allowed inside a parent element. This returns
0192  * a list of allowed elements, but it doesn't care about order or if only a certain
0193  * number of occurrences is allowed.
0194  */
0195 QStringList PseudoDTD::allowedElements(const QString &parentElement)
0196 {
0197     if (m_sgmlSupport) {
0198         // find the matching element, ignoring case:
0199         for (const auto &[key, elements] : m_elementsList) {
0200             if (key.compare(parentElement, Qt::CaseInsensitive) == 0) {
0201                 return elements;
0202             }
0203         }
0204     } else if (auto it = m_elementsList.find(parentElement); it != m_elementsList.end()) {
0205         return it->second;
0206     }
0207 
0208     return QStringList();
0209 }
0210 
0211 /**
0212  * Iterate through the XML to get a mapping which attributes are allowed inside
0213  * all elements.
0214  */
0215 bool PseudoDTD::parseAttributes(QDomDocument *doc, QProgressDialog *progress)
0216 {
0217     m_attributesList.clear();
0218     //   QStringList allowedAttributes;
0219     QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist"));
0220     uint listLength = list.count();
0221 
0222     for (uint i = 0; i < listLength; i++) {
0223         if (progress->wasCanceled()) {
0224             return false;
0225         }
0226 
0227         progress->setValue(progress->value() + 1);
0228         // FIXME!!
0229         // qApp->processEvents();
0230 
0231         ElementAttributes attrs;
0232         QDomNode node = list.item(i);
0233         QDomElement elem = node.toElement();
0234         if (!elem.isNull()) {
0235             QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute"));
0236             uint attributeListLength = attributeList.count();
0237             for (uint l = 0; l < attributeListLength; l++) {
0238                 QDomNode attributeNode = attributeList.item(l);
0239                 QDomElement attributeElem = attributeNode.toElement();
0240 
0241                 if (!attributeElem.isNull()) {
0242                     if (attributeElem.attribute(QStringLiteral("type")) == QLatin1String("#REQUIRED")) {
0243                         attrs.requiredAttributes.append(attributeElem.attribute(QStringLiteral("name")));
0244                     } else {
0245                         attrs.optionalAttributes.append(attributeElem.attribute(QStringLiteral("name")));
0246                     }
0247                 }
0248             }
0249             m_attributesList.insert_or_assign(elem.attribute(QStringLiteral("name")), attrs);
0250         }
0251     }
0252 
0253     return true;
0254 }
0255 
0256 /** Check which attributes are allowed for an element.
0257  */
0258 QStringList PseudoDTD::allowedAttributes(const QString &element)
0259 {
0260     if (m_sgmlSupport) {
0261         // find the matching element, ignoring case:
0262         for (const auto &[key, attributes] : m_attributesList) {
0263             if (key.compare(element, Qt::CaseInsensitive) == 0) {
0264                 return attributes.optionalAttributes + attributes.requiredAttributes;
0265             }
0266         }
0267     } else if (auto it = m_attributesList.find(element); it != m_attributesList.end()) {
0268         return it->second.optionalAttributes + it->second.requiredAttributes;
0269     }
0270 
0271     return QStringList();
0272 }
0273 
0274 QStringList PseudoDTD::requiredAttributes(const QString &element) const
0275 {
0276     if (m_sgmlSupport) {
0277         for (const auto &[key, attributes] : m_attributesList) {
0278             if (key.compare(element, Qt::CaseInsensitive) == 0) {
0279                 return attributes.requiredAttributes;
0280             }
0281         }
0282     } else if (auto it = m_attributesList.find(element); it != m_attributesList.end()) {
0283         return it->second.requiredAttributes;
0284     }
0285 
0286     return QStringList();
0287 }
0288 
0289 /**
0290  * Iterate through the XML to get a mapping which attribute values are allowed
0291  * for all attributes inside all elements.
0292  */
0293 bool PseudoDTD::parseAttributeValues(QDomDocument *doc, QProgressDialog *progress)
0294 {
0295     m_attributevaluesList.clear(); // 1 element : n possible attributes
0296     std::map<QString, QStringList> attributevaluesTmp; // 1 attribute : n possible values
0297     QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist"));
0298     uint listLength = list.count();
0299 
0300     for (uint i = 0; i < listLength; i++) {
0301         if (progress->wasCanceled()) {
0302             return false;
0303         }
0304 
0305         progress->setValue(progress->value() + 1);
0306         // FIXME!
0307         // qApp->processEvents();
0308 
0309         attributevaluesTmp.clear();
0310         QDomNode node = list.item(i);
0311         QDomElement elem = node.toElement();
0312         if (!elem.isNull()) {
0313             // Enter the list of <attribute>:
0314             QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute"));
0315             uint attributeListLength = attributeList.count();
0316             for (uint l = 0; l < attributeListLength; l++) {
0317                 QDomNode attributeNode = attributeList.item(l);
0318                 QDomElement attributeElem = attributeNode.toElement();
0319                 if (!attributeElem.isNull()) {
0320                     QString value = attributeElem.attribute(QStringLiteral("value"));
0321                     attributevaluesTmp.insert_or_assign(attributeElem.attribute(QStringLiteral("name")), value.split(QChar(' ')));
0322                 }
0323             }
0324             m_attributevaluesList.insert_or_assign(elem.attribute(QStringLiteral("name")), attributevaluesTmp);
0325         }
0326     }
0327     return true;
0328 }
0329 
0330 /**
0331  * Check which attributes values are allowed for an attribute in an element
0332  * (the element is necessary because e.g. "href" inside <a> could be different
0333  * to an "href" inside <link>):
0334  */
0335 QStringList PseudoDTD::attributeValues(const QString &element, const QString &attribute)
0336 {
0337     // Direct access would be faster than iteration of course but not always correct,
0338     // because we need to be case-insensitive.
0339     if (m_sgmlSupport) {
0340         // first find the matching element, ignoring case:
0341         for (const auto &[key, attrVals] : m_attributevaluesList) {
0342             if (key.compare(element, Qt::CaseInsensitive) == 0) {
0343                 // then find the matching attribute for that element, ignoring case:
0344                 for (const auto &[k, attrs] : attrVals) {
0345                     if (k.compare(attribute, Qt::CaseInsensitive) == 0) {
0346                         return attrs;
0347                     }
0348                 }
0349             }
0350         }
0351     } else if (auto it = m_attributevaluesList.find(element); it != m_attributevaluesList.end()) {
0352         const std::map<QString, QStringList> &attrVals = it->second;
0353         if (auto it = attrVals.find(attribute); it != attrVals.end()) {
0354             return it->second;
0355         }
0356     }
0357 
0358     // no predefined values available:
0359     return QStringList();
0360 }
0361 
0362 /**
0363  * Iterate through the XML to get a mapping of all entity names and their expanded
0364  * version, e.g. nbsp => &#160;. Parameter entities are ignored.
0365  */
0366 bool PseudoDTD::parseEntities(QDomDocument *doc, QProgressDialog *progress)
0367 {
0368     m_entityList.clear();
0369     QDomNodeList list = doc->elementsByTagName(QStringLiteral("entity"));
0370     uint listLength = list.count();
0371 
0372     for (uint i = 0; i < listLength; i++) {
0373         if (progress->wasCanceled()) {
0374             return false;
0375         }
0376 
0377         progress->setValue(progress->value() + 1);
0378         // FIXME!!
0379         // qApp->processEvents();
0380         QDomNode node = list.item(i);
0381         QDomElement elem = node.toElement();
0382         if (!elem.isNull() && elem.attribute(QStringLiteral("type")) != QLatin1String("param")) {
0383             // TODO: what's cdata <-> gen ?
0384             QDomNodeList expandedList = elem.elementsByTagName(QStringLiteral("text-expanded"));
0385             QDomNode expandedNode = expandedList.item(0);
0386             QDomElement expandedElem = expandedNode.toElement();
0387             if (!expandedElem.isNull()) {
0388                 QString exp = expandedElem.text();
0389                 // TODO: support more than one &#...; in the expanded text
0390                 /* TODO include do this when the unicode font problem is solved:
0391                 if( exp.contains(QRegularExpression("^&#x[a-zA-Z0-9]+;$")) ) {
0392                 // hexadecimal numbers, e.g. "&#x236;"
0393                 uint end = exp.find( ";" );
0394                 exp = exp.mid( 3, end-3 );
0395                 exp = QChar();
0396                 } else if( exp.contains(QRegularExpression("^&#[0-9]+;$")) ) {
0397                 // decimal numbers, e.g. "&#236;"
0398                 uint end = exp.find( ";" );
0399                 exp = exp.mid( 2, end-2 );
0400                 exp = QChar( exp.toInt() );
0401                 }
0402                 */
0403                 m_entityList.insert_or_assign(elem.attribute(QStringLiteral("name")), exp);
0404             } else {
0405                 m_entityList.insert_or_assign(elem.attribute(QStringLiteral("name")), QString());
0406             }
0407         }
0408     }
0409     return true;
0410 }
0411 
0412 /**
0413  * Get a list of all ( non-parameter ) entities that start with a certain string.
0414  */
0415 QStringList PseudoDTD::entities(const QString &start)
0416 {
0417     QStringList entities;
0418     for (const auto &[key, value] : m_entityList) {
0419         if (value.startsWith(start)) {
0420             const QString &str = key;
0421             /* TODO: show entities as unicode character
0422             if( !it.data().isEmpty() ) {
0423             //str += " -- " + it.data();
0424             QRegExp re( "&#(\\d+);" );
0425             if( re.search(it.data()) != -1 ) {
0426             uint ch = re.cap( 1).toUInt();
0427             str += " -- " + QChar( ch).decomposition();
0428             }
0429             //qDebug() << "#" << it.data();
0430             }
0431             */
0432             entities.append(str);
0433             // TODO: later use a table view
0434         }
0435     }
0436     return entities;
0437 }
0438 
0439 // kate: space-indent on; indent-width 4; replace-tabs on; mixed-indent off;