Warning, file /frameworks/khtml/src/xpath/step.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002  * step.cc - Copyright 2005 Frerich Raabe   <raabe@kde.org>
0003  *           Copyright 2010 Maksim Orlovich <maksim@kde.org>
0004  *
0005  * Redistribution and use in source and binary forms, with or without
0006  * modification, are permitted provided that the following conditions
0007  * are met:
0008  *
0009  * 1. Redistributions of source code must retain the above copyright
0010  *    notice, this list of conditions and the following disclaimer.
0011  * 2. Redistributions in binary form must reproduce the above copyright
0012  *    notice, this list of conditions and the following disclaimer in the
0013  *    documentation and/or other materials provided with the distribution.
0014  *
0015  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
0016  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0017  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
0018  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
0019  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
0020  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0021  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0022  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
0024  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025  */
0026 #include "step.h"
0027 
0028 #include "dom/dom3_xpath.h"
0029 #include "xml/dom_docimpl.h"
0030 #include "xml/dom_nodeimpl.h"
0031 #include "xml/dom_textimpl.h"
0032 #include "xml/dom3_xpathimpl.h"
0033 
0034 #include "step.h"
0035 
0036 #include <QtDebug>
0037 
0038 using namespace DOM;
0039 using namespace DOM::XPath;
0040 using namespace khtml;
0041 using namespace khtml::XPath;
0042 
0043 static bool areAdjacentTextNodes(NodeImpl *n, NodeImpl *m)
0044 {
0045     if (!n || !m) {
0046         return false;
0047     }
0048 
0049     if (n->nodeType() != Node::TEXT_NODE && n->nodeType() != Node::CDATA_SECTION_NODE) {
0050         return false;
0051     }
0052     if (m->nodeType() != Node::TEXT_NODE && m->nodeType() != Node::CDATA_SECTION_NODE) {
0053         return false;
0054     }
0055 
0056     // ###
0057 #ifdef __GNUC__
0058 #warning "Might need more generic adjacency -- check"
0059 #endif
0060 
0061     return (n->nextSibling() && (n->nextSibling() == m)) ||
0062            (m->nextSibling() && (m->nextSibling() == n));
0063 }
0064 
0065 static DomNodeList compressTextNodes(const DomNodeList &nodes)
0066 {
0067     DomNodeList outNodes = new StaticNodeListImpl;
0068 
0069     for (unsigned long n = 0; n < nodes->length(); ++n) {
0070         NodeImpl *node = nodes->item(n);
0071         NodeImpl *next = n + 1 < nodes->length() ? nodes->item(n + 1) : nullptr;
0072 
0073         if (!next || !areAdjacentTextNodes(node, next)) {
0074             outNodes->append(node);
0075         } else if (areAdjacentTextNodes(node, next)) {
0076             QString s = node->nodeValue().string();
0077 
0078             // n2 is a potential successor, and is always in-range
0079             unsigned long n2 = n + 1;
0080             while (n2 < nodes->length() && areAdjacentTextNodes(nodes->item(n2), nodes->item(n2 - 1))) {
0081                 s += nodes->item(n2)->nodeValue().string();
0082                 ++n2;
0083             }
0084             outNodes->append(node->document()->createTextNode(new DOMStringImpl(s.data(), s.length())));
0085         }
0086     }
0087     return outNodes;
0088 }
0089 
0090 QString Step::axisAsString(AxisType axis)
0091 {
0092     switch (axis) {
0093     case AncestorAxis: return "ancestor";
0094     case AncestorOrSelfAxis: return "ancestor-or-self";
0095     case AttributeAxis: return "attribute";
0096     case ChildAxis: return "child";
0097     case DescendantAxis: return "descendant";
0098     case DescendantOrSelfAxis: return "descendant-or-self";
0099     case FollowingAxis: return "following";
0100     case FollowingSiblingAxis: return "following-sibling";
0101     case NamespaceAxis: return "namespace";
0102     case ParentAxis: return "parent";
0103     case PrecedingAxis: return "preceding";
0104     case PrecedingSiblingAxis: return "preceding-sibling";
0105     case SelfAxis: return "self";
0106     }
0107     return QString();
0108 }
0109 
0110 Step::Step(): m_compileState(NotCompiled)
0111 {
0112 }
0113 
0114 Step::Step(AxisType axis, const DOMString &nodeTest,
0115            const QList<Predicate *> &predicates)
0116     : m_axis(axis),
0117       m_nodeTest(nodeTest),
0118       m_compileState(NotCompiled),
0119       m_predicates(predicates)
0120 {
0121 }
0122 
0123 Step::~Step()
0124 {
0125     qDeleteAll(m_predicates);
0126 }
0127 
0128 DomNodeList Step::evaluate(NodeImpl *context) const
0129 {
0130     assert(context);
0131 #ifdef XPATH_VERBOSE
0132     qCDebug(KHTML_LOG) << "Evaluating step, axis='" << axisAsString(m_axis) << "'"
0133              << ", nodetest='" << m_nodeTest << "'"
0134              << ", " << m_predicates.count() << " predicates";
0135     qCDebug(KHTML_LOG) << DOM::getPrintableName(context->id());
0136 #endif
0137 
0138     DomNodeList inNodes = nodesInAxis(context), outNodes;
0139 
0140     // ### optimization opportunity: can say DocumentOrder for most
0141     StaticNodeListImpl::NormalizationKind known = StaticNodeListImpl::AxisOrder;
0142     inNodes->setKnownNormalization(known);
0143 
0144 #ifdef XPATH_VERBOSE
0145     qCDebug(KHTML_LOG) << "Axis " << axisAsString(m_axis) << " matches " << inNodes->length() << " nodes.";
0146 #endif
0147 
0148     inNodes = nodeTestMatches(context, inNodes);
0149     inNodes->setKnownNormalization(known); // nodeTest doesn't change order
0150 
0151 #ifdef XPATH_VERBOSE
0152     qCDebug(KHTML_LOG) << "\tNodetest " << m_nodeTest << " trims this number to " << inNodes->length();
0153 #endif
0154 
0155     if (m_predicates.isEmpty()) {
0156         return inNodes;
0157     }
0158 
0159     foreach (Predicate *predicate, m_predicates) {
0160         outNodes = new StaticNodeListImpl;
0161         Expression::evaluationContext().size = int(inNodes->length());
0162         Expression::evaluationContext().position = 1;
0163 
0164         for (unsigned long n = 0; n < inNodes->length(); ++n) {
0165             NodeImpl *node = inNodes->item(n);
0166             Expression::evaluationContext().node = node;
0167             EvaluationContext backupCtx = Expression::evaluationContext();
0168 #ifdef XPATH_VERBOSE
0169             qCDebug(KHTML_LOG) << Expression::evaluationContext().position << "/" << node;
0170 #endif
0171             if (predicate->evaluate()) {
0172                 outNodes->append(node);
0173             }
0174             Expression::evaluationContext() = backupCtx;
0175             ++Expression::evaluationContext().position;
0176         }
0177 #ifdef XPATH_VERBOSE
0178         qCDebug(KHTML_LOG) << "\tPredicate trims this number to " << outNodes->length();
0179 #endif
0180         inNodes = outNodes;
0181         inNodes->setKnownNormalization(known); // predicates don't change order
0182     }
0183 
0184     return outNodes;
0185 }
0186 
0187 DomNodeList Step::nodesInAxis(NodeImpl *context) const
0188 {
0189     //assert(context);
0190     DomNodeList nodes = new StaticNodeListImpl;
0191     switch (m_axis) {
0192     case ChildAxis: {
0193         NodeImpl *n = xpathFirstChild(context);
0194         while (n) {
0195             nodes->append(n);
0196             n = n->nextSibling();
0197         }
0198         return nodes;
0199     }
0200     case DescendantAxis: {
0201         collectChildrenRecursively(nodes, context);
0202         return nodes;
0203     }
0204     case ParentAxis: {
0205         NodeImpl *p = xpathParentNode(context);
0206 
0207         if (p) {
0208             nodes->append(p);
0209         }
0210         return nodes;
0211     }
0212     case AncestorAxis: {
0213         NodeImpl *n = xpathParentNode(context);
0214         while (n) {
0215             nodes->append(n);
0216             n = xpathParentNode(n);
0217         }
0218         return nodes;
0219     }
0220     case FollowingSiblingAxis: {
0221         if (context->nodeType() == Node::ATTRIBUTE_NODE ||
0222                 context->nodeType() == Node::XPATH_NAMESPACE_NODE) {
0223             return nodes; // empty
0224         }
0225 
0226         NodeImpl *n = context->nextSibling();
0227         while (n) {
0228             nodes->append(n);
0229             n = n->nextSibling();
0230         }
0231         return nodes;
0232     }
0233     case PrecedingSiblingAxis: {
0234         if (context->nodeType() == Node::ATTRIBUTE_NODE ||
0235                 context->nodeType() == Node::XPATH_NAMESPACE_NODE) {
0236             return nodes; // empty
0237         }
0238 
0239         NodeImpl *n = context->previousSibling();
0240         while (n) {
0241             nodes->append(n);
0242             n = n->previousSibling();
0243         }
0244         return nodes;
0245     }
0246     case FollowingAxis: {
0247         NodeImpl *p = context;
0248         while (!isRootDomNode(p)) {
0249             NodeImpl *n = nextSiblingForFollowing(p);
0250             while (n) {
0251                 nodes->append(n);
0252                 collectChildrenRecursively(nodes, n);
0253                 n = n->nextSibling();
0254             }
0255             p = xpathParentNode(p);
0256         }
0257         return nodes;
0258     }
0259     case PrecedingAxis: {
0260         NodeImpl *p = context;
0261         while (!isRootDomNode(p)) {
0262             NodeImpl *n = p->previousSibling();
0263             while (n) {
0264                 collectChildrenReverse(nodes, n);
0265                 nodes->append(n);
0266                 n = n->previousSibling();
0267             }
0268             p = xpathParentNode(p);
0269         }
0270         return nodes;
0271     }
0272     case AttributeAxis: {
0273         if (context->nodeType() != Node::ELEMENT_NODE) {
0274             return nodes; // empty
0275         }
0276 
0277         NamedAttrMapImpl *attrs = static_cast<ElementImpl *>(context)->attributes(true /*read-only*/);
0278         if (!attrs) {
0279             return nodes;
0280         }
0281 
0282         for (unsigned long i = 0; i < attrs->length(); ++i) {
0283             nodes->append(attrs->item(i));
0284         }
0285         return nodes;
0286     }
0287     case NamespaceAxis: {
0288         // ### TODO: Need to implement this, but not a priority, since
0289         // other impls don't.
0290 #if 0
0291         if (context->nodeType() != Node::ELEMENT_NODE) {
0292             return nodes;
0293         }
0294 
0295         bool foundXmlNsNode = false;
0296         NodeImpl *node = context;
0297         while (node) {
0298             NamedAttrMapImpl *attrs =
0299                 node->isElementNode() ? static_cast<ElementImpl *>(node)->attributes() : 0;
0300             if (!attrs) {
0301                 node = xpathParentNode(node);
0302                 continue;
0303             }
0304 
0305             for (unsigned long i = 0; i < attrs->length(); ++i) {
0306                 NodeImpl *n = attrs->item(i);
0307                 if (n->nodeName().string().startsWith("xmlns:")) {
0308                     nodes->append(n);
0309                 } else if (n->nodeName() == "xmlns" &&
0310                            !foundXmlNsNode
0311                           ) {
0312                     foundXmlNsNode = true;
0313                     if (!n->nodeValue().string().isEmpty()) {
0314                         nodes->append(n);
0315                     }
0316                 }
0317             }
0318             node = xpathParentNode(node);
0319         }
0320 #endif
0321         return nodes;
0322     }
0323     case SelfAxis:
0324         nodes->append(context);
0325         return nodes;
0326     case DescendantOrSelfAxis:
0327         nodes->append(context);
0328         collectChildrenRecursively(nodes, context);
0329         return nodes;
0330     case AncestorOrSelfAxis: {
0331         nodes->append(context);
0332         NodeImpl *n = xpathParentNode(context);
0333         while (n) {
0334             nodes->append(n);
0335             n = xpathParentNode(n);
0336         }
0337         return nodes;
0338     }
0339     default:
0340         qCWarning(KHTML_LOG) << "Unknown axis " << axisAsString(m_axis) << " passed to Step::nodesInAxis";
0341     }
0342 
0343     return nodes; // empty
0344 }
0345 
0346 void Step::compileNodeTest(bool htmlCompat) const
0347 {
0348     m_compileState = htmlCompat ? CompiledForHTML : CompiledForXML;
0349 
0350     if (m_nodeTest == "*") {
0351         m_nodeTestType = NT_Star;
0352     } else if (m_nodeTest == "text()") {
0353         m_nodeTestType = NT_Text;
0354     } else if (m_nodeTest == "comment()") {
0355         m_nodeTestType = NT_Comment;
0356     } else if (m_nodeTest.startsWith("processing-instruction")) {
0357         DOMString param;
0358 
0359         // ### is this right? (parens?)
0360         const int space = m_nodeTest.find(' ');
0361         if (space > -1) {
0362             param = m_nodeTest.substring(space + 1);
0363         }
0364 
0365         if (param.isEmpty()) {
0366             m_nodeTestType = NT_PI;
0367         } else {
0368             m_nodeTestType = NT_PI_Lit;
0369             m_piInfo = param;
0370         }
0371     } else if (m_nodeTest == "node()") {
0372         m_nodeTestType = NT_AnyNode;
0373     } else {
0374         // Some sort of name combo.
0375         PrefixName prefix;
0376         LocalName localName;
0377 
0378         splitPrefixLocalName(m_nodeTest,  prefix, localName, htmlCompat);
0379 
0380         if (prefix.id() == DOM::emptyPrefix) {
0381             // localname only
0382             m_nodeTestType = NT_LocalName;
0383             m_localName    = localName;
0384         } else if (localName.toString() == "*") {
0385             // namespace only
0386             m_nodeTestType = NT_Namespace;
0387             m_namespace    = NamespaceName::fromString(namespaceFromNodetest(m_nodeTest));
0388         } else {
0389             // Both parts.
0390             m_nodeTestType = NT_QName;
0391             m_localName    = localName;
0392             m_namespace    = NamespaceName::fromString(namespaceFromNodetest(m_nodeTest));
0393         }
0394     }
0395 }
0396 
0397 DomNodeList Step::nodeTestMatches(NodeImpl *ctx, const DomNodeList &nodes) const
0398 {
0399     CompileState desired = ctx->htmlCompat() ? CompiledForHTML : CompiledForXML;
0400     if (m_compileState != desired) {
0401         compileNodeTest(ctx->htmlCompat());
0402     }
0403 
0404     if (m_nodeTestType == NT_AnyNode) { // no need for a new set
0405         return nodes;
0406     }
0407 
0408     DomNodeList matches = new StaticNodeListImpl;
0409 
0410     // A lot of things can be handled by just checking the type,
0411     // and maybe a hair more special handling
0412     int matchType;
0413     switch (m_nodeTestType) {
0414     case NT_Star:
0415         matchType = primaryNodeType(m_axis);
0416         break;
0417     case NT_Comment:
0418         matchType = Node::COMMENT_NODE;
0419         break;
0420     case NT_Text:
0421         matchType = Node::TEXT_NODE;
0422         break;
0423     case NT_PI:
0424     case NT_PI_Lit:
0425         matchType = Node::PROCESSING_INSTRUCTION_NODE;
0426         break;
0427     default:
0428         matchType = -1;
0429     }
0430 
0431     if (matchType != -1) {
0432         for (unsigned long n = 0; n < nodes->length(); ++n) {
0433             NodeImpl *node = nodes->item(n);
0434             int nodeType   = node->nodeType();
0435             if (nodeType == matchType) {
0436                 if (m_nodeTestType == NT_PI_Lit && node->nodeName() != m_piInfo) {
0437                     continue;
0438                 }
0439 
0440                 matches->append(node);
0441             }
0442 
0443             if (matchType == NT_Text && nodeType == Node::CDATA_SECTION_NODE) {
0444                 matches->append(node);
0445             }
0446         }
0447         return matches;
0448     }
0449 
0450     // Now we are checking by name. We always want to filter out
0451     // the primary axis here
0452     // ### CHECK: this used to have special handling for some axes.
0453     matchType = primaryNodeType(m_axis);
0454     for (unsigned long n = 0; n < nodes->length(); ++n) {
0455         NodeImpl *node = nodes->item(n);
0456         if (node->nodeType() != matchType) {
0457             continue;
0458         }
0459 
0460         if (m_nodeTestType == NT_LocalName) {
0461             if (localNamePart(node->id()) == m_localName.id()) {
0462                 matches->append(node);
0463             }
0464         } else if (m_nodeTestType == NT_Namespace) {
0465             if (namespacePart(node->id()) == m_namespace.id()) {
0466                 matches->append(node);
0467             }
0468         } else {
0469             // NT_QName
0470             if (node->id() == makeId(m_namespace.id(), m_localName.id())) {
0471                 matches->append(node);
0472             }
0473         }
0474     }
0475     return matches;
0476 }
0477 
0478 void Step::optimize()
0479 {
0480     foreach (Predicate *predicate, m_predicates) {
0481         predicate->optimize();
0482     }
0483 }
0484 
0485 QString Step::dump() const
0486 {
0487     QString s = QString("<step axis=\"%1\" nodetest=\"%2\">").arg(axisAsString(m_axis)).arg(m_nodeTest.string());
0488     foreach (Predicate *predicate, m_predicates) {
0489         s += predicate->dump();
0490     }
0491     s += "</step>";
0492     return s;
0493 }
0494 
0495 DOMString Step::namespaceFromNodetest(const DOMString &nodeTest) const
0496 {
0497     int i = nodeTest.find(':');
0498     if (i == -1) {
0499         return DOMString();
0500     }
0501 
0502     DOMString prefix(nodeTest.substring(0, i));
0503     XPathNSResolverImpl *resolver = Expression::evaluationContext().resolver;
0504 
0505     DOM::DOMString ns;
0506     if (resolver) {
0507         ns = resolver->lookupNamespaceURI(prefix);
0508     }
0509 
0510     if (ns.isNull()) {
0511         Expression::reportNamespaceErr();
0512     }
0513 
0514     return ns;
0515 }
0516 
0517 unsigned int Step::primaryNodeType(AxisType axis) const
0518 {
0519     switch (axis) {
0520     case AttributeAxis:
0521         return Node::ATTRIBUTE_NODE;
0522     case NamespaceAxis:
0523         return Node::XPATH_NAMESPACE_NODE;
0524     default:
0525         return Node::ELEMENT_NODE;
0526     }
0527 }
0528