File indexing completed on 2024-05-05 12:16:50
0001 /* 0002 * step.cc - Copyright 2005 Frerich Raabe <raabe@kde.org> 0003 * Copyright 2010 Maksim Orlovich <maksim@kde.org> 0004 * 0005 * Redistribution and use in source and binary forms, with or without 0006 * modification, are permitted provided that the following conditions 0007 * are met: 0008 * 0009 * 1. Redistributions of source code must retain the above copyright 0010 * notice, this list of conditions and the following disclaimer. 0011 * 2. Redistributions in binary form must reproduce the above copyright 0012 * notice, this list of conditions and the following disclaimer in the 0013 * documentation and/or other materials provided with the distribution. 0014 * 0015 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 0016 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 0017 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 0018 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 0019 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 0020 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 0021 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 0022 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 0024 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0025 */ 0026 #include "step.h" 0027 0028 #include "dom/dom3_xpath.h" 0029 #include "xml/dom_docimpl.h" 0030 #include "xml/dom_nodeimpl.h" 0031 #include "xml/dom_textimpl.h" 0032 #include "xml/dom3_xpathimpl.h" 0033 0034 #include "step.h" 0035 0036 #include <QtDebug> 0037 0038 using namespace DOM; 0039 using namespace DOM::XPath; 0040 using namespace khtml; 0041 using namespace khtml::XPath; 0042 0043 static bool areAdjacentTextNodes(NodeImpl *n, NodeImpl *m) 0044 { 0045 if (!n || !m) { 0046 return false; 0047 } 0048 0049 if (n->nodeType() != Node::TEXT_NODE && n->nodeType() != Node::CDATA_SECTION_NODE) { 0050 return false; 0051 } 0052 if (m->nodeType() != Node::TEXT_NODE && m->nodeType() != Node::CDATA_SECTION_NODE) { 0053 return false; 0054 } 0055 0056 // ### 0057 #ifdef __GNUC__ 0058 #warning "Might need more generic adjacency -- check" 0059 #endif 0060 0061 return (n->nextSibling() && (n->nextSibling() == m)) || 0062 (m->nextSibling() && (m->nextSibling() == n)); 0063 } 0064 0065 static DomNodeList compressTextNodes(const DomNodeList &nodes) 0066 { 0067 DomNodeList outNodes = new StaticNodeListImpl; 0068 0069 for (unsigned long n = 0; n < nodes->length(); ++n) { 0070 NodeImpl *node = nodes->item(n); 0071 NodeImpl *next = n + 1 < nodes->length() ? nodes->item(n + 1) : nullptr; 0072 0073 if (!next || !areAdjacentTextNodes(node, next)) { 0074 outNodes->append(node); 0075 } else if (areAdjacentTextNodes(node, next)) { 0076 QString s = node->nodeValue().string(); 0077 0078 // n2 is a potential successor, and is always in-range 0079 unsigned long n2 = n + 1; 0080 while (n2 < nodes->length() && areAdjacentTextNodes(nodes->item(n2), nodes->item(n2 - 1))) { 0081 s += nodes->item(n2)->nodeValue().string(); 0082 ++n2; 0083 } 0084 outNodes->append(node->document()->createTextNode(new DOMStringImpl(s.data(), s.length()))); 0085 } 0086 } 0087 return outNodes; 0088 } 0089 0090 QString Step::axisAsString(AxisType axis) 0091 { 0092 switch (axis) { 0093 case AncestorAxis: return "ancestor"; 0094 case AncestorOrSelfAxis: return "ancestor-or-self"; 0095 case AttributeAxis: return "attribute"; 0096 case ChildAxis: return "child"; 0097 case DescendantAxis: return "descendant"; 0098 case DescendantOrSelfAxis: return "descendant-or-self"; 0099 case FollowingAxis: return "following"; 0100 case FollowingSiblingAxis: return "following-sibling"; 0101 case NamespaceAxis: return "namespace"; 0102 case ParentAxis: return "parent"; 0103 case PrecedingAxis: return "preceding"; 0104 case PrecedingSiblingAxis: return "preceding-sibling"; 0105 case SelfAxis: return "self"; 0106 } 0107 return QString(); 0108 } 0109 0110 Step::Step(): m_compileState(NotCompiled) 0111 { 0112 } 0113 0114 Step::Step(AxisType axis, const DOMString &nodeTest, 0115 const QList<Predicate *> &predicates) 0116 : m_axis(axis), 0117 m_nodeTest(nodeTest), 0118 m_compileState(NotCompiled), 0119 m_predicates(predicates) 0120 { 0121 } 0122 0123 Step::~Step() 0124 { 0125 qDeleteAll(m_predicates); 0126 } 0127 0128 DomNodeList Step::evaluate(NodeImpl *context) const 0129 { 0130 assert(context); 0131 #ifdef XPATH_VERBOSE 0132 qCDebug(KHTML_LOG) << "Evaluating step, axis='" << axisAsString(m_axis) << "'" 0133 << ", nodetest='" << m_nodeTest << "'" 0134 << ", " << m_predicates.count() << " predicates"; 0135 qCDebug(KHTML_LOG) << DOM::getPrintableName(context->id()); 0136 #endif 0137 0138 DomNodeList inNodes = nodesInAxis(context), outNodes; 0139 0140 // ### optimization opportunity: can say DocumentOrder for most 0141 StaticNodeListImpl::NormalizationKind known = StaticNodeListImpl::AxisOrder; 0142 inNodes->setKnownNormalization(known); 0143 0144 #ifdef XPATH_VERBOSE 0145 qCDebug(KHTML_LOG) << "Axis " << axisAsString(m_axis) << " matches " << inNodes->length() << " nodes."; 0146 #endif 0147 0148 inNodes = nodeTestMatches(context, inNodes); 0149 inNodes->setKnownNormalization(known); // nodeTest doesn't change order 0150 0151 #ifdef XPATH_VERBOSE 0152 qCDebug(KHTML_LOG) << "\tNodetest " << m_nodeTest << " trims this number to " << inNodes->length(); 0153 #endif 0154 0155 if (m_predicates.isEmpty()) { 0156 return inNodes; 0157 } 0158 0159 foreach (Predicate *predicate, m_predicates) { 0160 outNodes = new StaticNodeListImpl; 0161 Expression::evaluationContext().size = int(inNodes->length()); 0162 Expression::evaluationContext().position = 1; 0163 0164 for (unsigned long n = 0; n < inNodes->length(); ++n) { 0165 NodeImpl *node = inNodes->item(n); 0166 Expression::evaluationContext().node = node; 0167 EvaluationContext backupCtx = Expression::evaluationContext(); 0168 #ifdef XPATH_VERBOSE 0169 qCDebug(KHTML_LOG) << Expression::evaluationContext().position << "/" << node; 0170 #endif 0171 if (predicate->evaluate()) { 0172 outNodes->append(node); 0173 } 0174 Expression::evaluationContext() = backupCtx; 0175 ++Expression::evaluationContext().position; 0176 } 0177 #ifdef XPATH_VERBOSE 0178 qCDebug(KHTML_LOG) << "\tPredicate trims this number to " << outNodes->length(); 0179 #endif 0180 inNodes = outNodes; 0181 inNodes->setKnownNormalization(known); // predicates don't change order 0182 } 0183 0184 return outNodes; 0185 } 0186 0187 DomNodeList Step::nodesInAxis(NodeImpl *context) const 0188 { 0189 //assert(context); 0190 DomNodeList nodes = new StaticNodeListImpl; 0191 switch (m_axis) { 0192 case ChildAxis: { 0193 NodeImpl *n = xpathFirstChild(context); 0194 while (n) { 0195 nodes->append(n); 0196 n = n->nextSibling(); 0197 } 0198 return nodes; 0199 } 0200 case DescendantAxis: { 0201 collectChildrenRecursively(nodes, context); 0202 return nodes; 0203 } 0204 case ParentAxis: { 0205 NodeImpl *p = xpathParentNode(context); 0206 0207 if (p) { 0208 nodes->append(p); 0209 } 0210 return nodes; 0211 } 0212 case AncestorAxis: { 0213 NodeImpl *n = xpathParentNode(context); 0214 while (n) { 0215 nodes->append(n); 0216 n = xpathParentNode(n); 0217 } 0218 return nodes; 0219 } 0220 case FollowingSiblingAxis: { 0221 if (context->nodeType() == Node::ATTRIBUTE_NODE || 0222 context->nodeType() == Node::XPATH_NAMESPACE_NODE) { 0223 return nodes; // empty 0224 } 0225 0226 NodeImpl *n = context->nextSibling(); 0227 while (n) { 0228 nodes->append(n); 0229 n = n->nextSibling(); 0230 } 0231 return nodes; 0232 } 0233 case PrecedingSiblingAxis: { 0234 if (context->nodeType() == Node::ATTRIBUTE_NODE || 0235 context->nodeType() == Node::XPATH_NAMESPACE_NODE) { 0236 return nodes; // empty 0237 } 0238 0239 NodeImpl *n = context->previousSibling(); 0240 while (n) { 0241 nodes->append(n); 0242 n = n->previousSibling(); 0243 } 0244 return nodes; 0245 } 0246 case FollowingAxis: { 0247 NodeImpl *p = context; 0248 while (!isRootDomNode(p)) { 0249 NodeImpl *n = nextSiblingForFollowing(p); 0250 while (n) { 0251 nodes->append(n); 0252 collectChildrenRecursively(nodes, n); 0253 n = n->nextSibling(); 0254 } 0255 p = xpathParentNode(p); 0256 } 0257 return nodes; 0258 } 0259 case PrecedingAxis: { 0260 NodeImpl *p = context; 0261 while (!isRootDomNode(p)) { 0262 NodeImpl *n = p->previousSibling(); 0263 while (n) { 0264 collectChildrenReverse(nodes, n); 0265 nodes->append(n); 0266 n = n->previousSibling(); 0267 } 0268 p = xpathParentNode(p); 0269 } 0270 return nodes; 0271 } 0272 case AttributeAxis: { 0273 if (context->nodeType() != Node::ELEMENT_NODE) { 0274 return nodes; // empty 0275 } 0276 0277 NamedAttrMapImpl *attrs = static_cast<ElementImpl *>(context)->attributes(true /*read-only*/); 0278 if (!attrs) { 0279 return nodes; 0280 } 0281 0282 for (unsigned long i = 0; i < attrs->length(); ++i) { 0283 nodes->append(attrs->item(i)); 0284 } 0285 return nodes; 0286 } 0287 case NamespaceAxis: { 0288 // ### TODO: Need to implement this, but not a priority, since 0289 // other impls don't. 0290 #if 0 0291 if (context->nodeType() != Node::ELEMENT_NODE) { 0292 return nodes; 0293 } 0294 0295 bool foundXmlNsNode = false; 0296 NodeImpl *node = context; 0297 while (node) { 0298 NamedAttrMapImpl *attrs = 0299 node->isElementNode() ? static_cast<ElementImpl *>(node)->attributes() : 0; 0300 if (!attrs) { 0301 node = xpathParentNode(node); 0302 continue; 0303 } 0304 0305 for (unsigned long i = 0; i < attrs->length(); ++i) { 0306 NodeImpl *n = attrs->item(i); 0307 if (n->nodeName().string().startsWith("xmlns:")) { 0308 nodes->append(n); 0309 } else if (n->nodeName() == "xmlns" && 0310 !foundXmlNsNode 0311 ) { 0312 foundXmlNsNode = true; 0313 if (!n->nodeValue().string().isEmpty()) { 0314 nodes->append(n); 0315 } 0316 } 0317 } 0318 node = xpathParentNode(node); 0319 } 0320 #endif 0321 return nodes; 0322 } 0323 case SelfAxis: 0324 nodes->append(context); 0325 return nodes; 0326 case DescendantOrSelfAxis: 0327 nodes->append(context); 0328 collectChildrenRecursively(nodes, context); 0329 return nodes; 0330 case AncestorOrSelfAxis: { 0331 nodes->append(context); 0332 NodeImpl *n = xpathParentNode(context); 0333 while (n) { 0334 nodes->append(n); 0335 n = xpathParentNode(n); 0336 } 0337 return nodes; 0338 } 0339 default: 0340 qCWarning(KHTML_LOG) << "Unknown axis " << axisAsString(m_axis) << " passed to Step::nodesInAxis"; 0341 } 0342 0343 return nodes; // empty 0344 } 0345 0346 void Step::compileNodeTest(bool htmlCompat) const 0347 { 0348 m_compileState = htmlCompat ? CompiledForHTML : CompiledForXML; 0349 0350 if (m_nodeTest == "*") { 0351 m_nodeTestType = NT_Star; 0352 } else if (m_nodeTest == "text()") { 0353 m_nodeTestType = NT_Text; 0354 } else if (m_nodeTest == "comment()") { 0355 m_nodeTestType = NT_Comment; 0356 } else if (m_nodeTest.startsWith("processing-instruction")) { 0357 DOMString param; 0358 0359 // ### is this right? (parens?) 0360 const int space = m_nodeTest.find(' '); 0361 if (space > -1) { 0362 param = m_nodeTest.substring(space + 1); 0363 } 0364 0365 if (param.isEmpty()) { 0366 m_nodeTestType = NT_PI; 0367 } else { 0368 m_nodeTestType = NT_PI_Lit; 0369 m_piInfo = param; 0370 } 0371 } else if (m_nodeTest == "node()") { 0372 m_nodeTestType = NT_AnyNode; 0373 } else { 0374 // Some sort of name combo. 0375 PrefixName prefix; 0376 LocalName localName; 0377 0378 splitPrefixLocalName(m_nodeTest, prefix, localName, htmlCompat); 0379 0380 if (prefix.id() == DOM::emptyPrefix) { 0381 // localname only 0382 m_nodeTestType = NT_LocalName; 0383 m_localName = localName; 0384 } else if (localName.toString() == "*") { 0385 // namespace only 0386 m_nodeTestType = NT_Namespace; 0387 m_namespace = NamespaceName::fromString(namespaceFromNodetest(m_nodeTest)); 0388 } else { 0389 // Both parts. 0390 m_nodeTestType = NT_QName; 0391 m_localName = localName; 0392 m_namespace = NamespaceName::fromString(namespaceFromNodetest(m_nodeTest)); 0393 } 0394 } 0395 } 0396 0397 DomNodeList Step::nodeTestMatches(NodeImpl *ctx, const DomNodeList &nodes) const 0398 { 0399 CompileState desired = ctx->htmlCompat() ? CompiledForHTML : CompiledForXML; 0400 if (m_compileState != desired) { 0401 compileNodeTest(ctx->htmlCompat()); 0402 } 0403 0404 if (m_nodeTestType == NT_AnyNode) { // no need for a new set 0405 return nodes; 0406 } 0407 0408 DomNodeList matches = new StaticNodeListImpl; 0409 0410 // A lot of things can be handled by just checking the type, 0411 // and maybe a hair more special handling 0412 int matchType; 0413 switch (m_nodeTestType) { 0414 case NT_Star: 0415 matchType = primaryNodeType(m_axis); 0416 break; 0417 case NT_Comment: 0418 matchType = Node::COMMENT_NODE; 0419 break; 0420 case NT_Text: 0421 matchType = Node::TEXT_NODE; 0422 break; 0423 case NT_PI: 0424 case NT_PI_Lit: 0425 matchType = Node::PROCESSING_INSTRUCTION_NODE; 0426 break; 0427 default: 0428 matchType = -1; 0429 } 0430 0431 if (matchType != -1) { 0432 for (unsigned long n = 0; n < nodes->length(); ++n) { 0433 NodeImpl *node = nodes->item(n); 0434 int nodeType = node->nodeType(); 0435 if (nodeType == matchType) { 0436 if (m_nodeTestType == NT_PI_Lit && node->nodeName() != m_piInfo) { 0437 continue; 0438 } 0439 0440 matches->append(node); 0441 } 0442 0443 if (matchType == NT_Text && nodeType == Node::CDATA_SECTION_NODE) { 0444 matches->append(node); 0445 } 0446 } 0447 return matches; 0448 } 0449 0450 // Now we are checking by name. We always want to filter out 0451 // the primary axis here 0452 // ### CHECK: this used to have special handling for some axes. 0453 matchType = primaryNodeType(m_axis); 0454 for (unsigned long n = 0; n < nodes->length(); ++n) { 0455 NodeImpl *node = nodes->item(n); 0456 if (node->nodeType() != matchType) { 0457 continue; 0458 } 0459 0460 if (m_nodeTestType == NT_LocalName) { 0461 if (localNamePart(node->id()) == m_localName.id()) { 0462 matches->append(node); 0463 } 0464 } else if (m_nodeTestType == NT_Namespace) { 0465 if (namespacePart(node->id()) == m_namespace.id()) { 0466 matches->append(node); 0467 } 0468 } else { 0469 // NT_QName 0470 if (node->id() == makeId(m_namespace.id(), m_localName.id())) { 0471 matches->append(node); 0472 } 0473 } 0474 } 0475 return matches; 0476 } 0477 0478 void Step::optimize() 0479 { 0480 foreach (Predicate *predicate, m_predicates) { 0481 predicate->optimize(); 0482 } 0483 } 0484 0485 QString Step::dump() const 0486 { 0487 QString s = QString("<step axis=\"%1\" nodetest=\"%2\">").arg(axisAsString(m_axis)).arg(m_nodeTest.string()); 0488 foreach (Predicate *predicate, m_predicates) { 0489 s += predicate->dump(); 0490 } 0491 s += "</step>"; 0492 return s; 0493 } 0494 0495 DOMString Step::namespaceFromNodetest(const DOMString &nodeTest) const 0496 { 0497 int i = nodeTest.find(':'); 0498 if (i == -1) { 0499 return DOMString(); 0500 } 0501 0502 DOMString prefix(nodeTest.substring(0, i)); 0503 XPathNSResolverImpl *resolver = Expression::evaluationContext().resolver; 0504 0505 DOM::DOMString ns; 0506 if (resolver) { 0507 ns = resolver->lookupNamespaceURI(prefix); 0508 } 0509 0510 if (ns.isNull()) { 0511 Expression::reportNamespaceErr(); 0512 } 0513 0514 return ns; 0515 } 0516 0517 unsigned int Step::primaryNodeType(AxisType axis) const 0518 { 0519 switch (axis) { 0520 case AttributeAxis: 0521 return Node::ATTRIBUTE_NODE; 0522 case NamespaceAxis: 0523 return Node::XPATH_NAMESPACE_NODE; 0524 default: 0525 return Node::ELEMENT_NODE; 0526 } 0527 } 0528