File indexing completed on 2024-05-12 06:02:27

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Dom
0017  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0018  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0019  * @version    $Id$
0020  */
0021 
0022 /**
0023  * @see Zend_Dom_Query_Css2Xpath
0024  */
0025 // require_once 'Zend/Dom/Query/Css2Xpath.php';
0026 
0027 /**
0028  * @see Zend_Dom_Query_Result
0029  */
0030 // require_once 'Zend/Dom/Query/Result.php';
0031 
0032 /** @see Zend_Xml_Security */
0033 // require_once 'Zend/Xml/Security.php';
0034 
0035 /** @see Zend_Xml_Exception */
0036 // require_once 'Zend/Xml/Exception.php';
0037 
0038 /**
0039  * Query DOM structures based on CSS selectors and/or XPath
0040  *
0041  * @package    Zend_Dom
0042  * @subpackage Query
0043  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0044  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0045  */
0046 class Zend_Dom_Query
0047 {
0048     /**#@+
0049      * Document types
0050      */
0051     const DOC_DOM   = 'docDom';
0052     const DOC_XML   = 'docXml';
0053     const DOC_HTML  = 'docHtml';
0054     const DOC_XHTML = 'docXhtml';
0055     /**#@-*/
0056 
0057     /**
0058      * @var string|DOMDocument
0059      */
0060     protected $_document;
0061 
0062     /**
0063      * DOMDocument errors, if any
0064      * @var false|array
0065      */
0066     protected $_documentErrors = false;
0067 
0068     /**
0069      * Document type
0070      * @var string
0071      */
0072     protected $_docType;
0073 
0074     /**
0075      * Document encoding
0076      * @var null|string
0077      */
0078     protected $_encoding;
0079 
0080     /**
0081      * XPath namespaces
0082      * @var array
0083      */
0084     protected $_xpathNamespaces = array();
0085 
0086     /**
0087      * Constructor
0088      *
0089      * @param null|string|DOMDocument $document
0090      * @param null|string $encoding
0091      */
0092     public function __construct($document = null, $encoding = null)
0093     {
0094         $this->setEncoding($encoding);
0095         $this->setDocument($document);
0096     }
0097 
0098     /**
0099      * Set document encoding
0100      *
0101      * @param  string $encoding
0102      * @return Zend_Dom_Query
0103      */
0104     public function setEncoding($encoding)
0105     {
0106         $this->_encoding = (null === $encoding) ? null : (string) $encoding;
0107         return $this;
0108     }
0109 
0110     /**
0111      * Get document encoding
0112      *
0113      * @return null|string
0114      */
0115     public function getEncoding()
0116     {
0117         return $this->_encoding;
0118     }
0119 
0120     /**
0121      * Set document to query
0122      *
0123      * @param  string|DOMDocument $document
0124      * @param  null|string $encoding Document encoding
0125      * @return Zend_Dom_Query
0126      */
0127     public function setDocument($document, $encoding = null)
0128     {
0129         if ($document instanceof DOMDocument) {
0130             return $this->setDocumentDom($document);
0131         }
0132         if (0 === strlen($document)) {
0133             return $this;
0134         }
0135         // breaking XML declaration to make syntax highlighting work
0136         if ('<' . '?xml' == substr(trim($document), 0, 5)) {
0137             if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) {
0138                 $this->_xpathNamespaces[] = $matches[1];
0139                 return $this->setDocumentXhtml($document, $encoding);
0140             }
0141             return $this->setDocumentXml($document, $encoding);
0142         }
0143         if (strstr($document, 'DTD XHTML')) {
0144             return $this->setDocumentXhtml($document, $encoding);
0145         }
0146         return $this->setDocumentHtml($document, $encoding);
0147     }
0148 
0149     /**
0150      * Set DOMDocument to query
0151      *
0152      * @param  DOMDocument $document
0153      * @return Zend_Dom_Query
0154      */
0155     public function setDocumentDom(DOMDocument $document)
0156     {
0157         $this->_document = $document;
0158         $this->_docType  = self::DOC_DOM;
0159         if (null !== $document->encoding) {
0160             $this->setEncoding($document->encoding);
0161         }
0162         return $this;
0163     }
0164 
0165     /**
0166      * Register HTML document
0167      *
0168      * @param  string $document
0169      * @param  null|string $encoding Document encoding
0170      * @return Zend_Dom_Query
0171      */
0172     public function setDocumentHtml($document, $encoding = null)
0173     {
0174         $this->_document = (string) $document;
0175         $this->_docType  = self::DOC_HTML;
0176         if (null !== $encoding) {
0177             $this->setEncoding($encoding);
0178         }
0179         return $this;
0180     }
0181 
0182     /**
0183      * Register XHTML document
0184      *
0185      * @param  string $document
0186      * @param  null|string $encoding Document encoding
0187      * @return Zend_Dom_Query
0188      */
0189     public function setDocumentXhtml($document, $encoding = null)
0190     {
0191         $this->_document = (string) $document;
0192         $this->_docType  = self::DOC_XHTML;
0193         if (null !== $encoding) {
0194             $this->setEncoding($encoding);
0195         }
0196         return $this;
0197     }
0198 
0199     /**
0200      * Register XML document
0201      *
0202      * @param  string $document
0203      * @param  null|string $encoding Document encoding
0204      * @return Zend_Dom_Query
0205      */
0206     public function setDocumentXml($document, $encoding = null)
0207     {
0208         $this->_document = (string) $document;
0209         $this->_docType  = self::DOC_XML;
0210         if (null !== $encoding) {
0211             $this->setEncoding($encoding);
0212         }
0213         return $this;
0214     }
0215 
0216     /**
0217      * Retrieve current document
0218      *
0219      * @return string|DOMDocument
0220      */
0221     public function getDocument()
0222     {
0223         return $this->_document;
0224     }
0225 
0226     /**
0227      * Get document type
0228      *
0229      * @return string
0230      */
0231     public function getDocumentType()
0232     {
0233         return $this->_docType;
0234     }
0235 
0236     /**
0237      * Get any DOMDocument errors found
0238      *
0239      * @return false|array
0240      */
0241     public function getDocumentErrors()
0242     {
0243         return $this->_documentErrors;
0244     }
0245 
0246     /**
0247      * Perform a CSS selector query
0248      *
0249      * @param  string $query
0250      * @return Zend_Dom_Query_Result
0251      */
0252     public function query($query)
0253     {
0254         $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query);
0255         return $this->queryXpath($xpathQuery, $query);
0256     }
0257 
0258     /**
0259      * Perform an XPath query
0260      *
0261      * @param  string|array $xpathQuery
0262      * @param  string       $query CSS selector query
0263      * @throws Zend_Dom_Exception
0264      * @return Zend_Dom_Query_Result
0265      */
0266     public function queryXpath($xpathQuery, $query = null)
0267     {
0268         if (null === ($document = $this->getDocument())) {
0269             // require_once 'Zend/Dom/Exception.php';
0270             throw new Zend_Dom_Exception('Cannot query; no document registered');
0271         }
0272 
0273         $encoding = $this->getEncoding();
0274         libxml_use_internal_errors(true);
0275         if (null === $encoding) {
0276             $domDoc = new DOMDocument('1.0');
0277         } else {
0278             $domDoc = new DOMDocument('1.0', $encoding);
0279         }
0280         $type   = $this->getDocumentType();
0281         switch ($type) {
0282             case self::DOC_DOM:
0283                 $domDoc = $this->_document;
0284                 $success = true;
0285                 break;
0286             case self::DOC_XML:
0287                 try {
0288                     $domDoc = Zend_Xml_Security::scan($document, $domDoc);
0289                     $success = ($domDoc !== false);
0290                 } catch (Zend_Xml_Exception $e) {
0291                     // require_once 'Zend/Dom/Exception.php';
0292                     throw new Zend_Dom_Exception(
0293                         $e->getMessage()
0294                     );
0295                 }
0296                 break;
0297             case self::DOC_HTML:
0298             case self::DOC_XHTML:
0299             default:
0300                 $success = $domDoc->loadHTML($document);
0301                 break;
0302         }
0303         $errors = libxml_get_errors();
0304         if (!empty($errors)) {
0305             $this->_documentErrors = $errors;
0306             libxml_clear_errors();
0307         }
0308         libxml_use_internal_errors(false);
0309 
0310         if (!$success) {
0311             // require_once 'Zend/Dom/Exception.php';
0312             throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type));
0313         }
0314 
0315         $nodeList = $this->_getNodeList($domDoc, $xpathQuery);
0316         return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList);
0317     }
0318 
0319     /**
0320      * Register XPath namespaces
0321      *
0322      * @param array $xpathNamespaces
0323      */
0324     public function registerXpathNamespaces($xpathNamespaces)
0325     {
0326         $this->_xpathNamespaces = $xpathNamespaces;
0327     }
0328 
0329     /**
0330      * Prepare node list
0331      *
0332      * @param  DOMDocument $document
0333      * @param  string|array $xpathQuery
0334      * @return array
0335      */
0336     protected function _getNodeList($document, $xpathQuery)
0337     {
0338         $xpath      = new DOMXPath($document);
0339         foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) {
0340             $xpath->registerNamespace($prefix, $namespaceUri);
0341         }
0342         $xpathQuery = (string) $xpathQuery;
0343         if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) {
0344             foreach ($matches[1] as $attribute) {
0345                 $queryString = '//*[' . $attribute . ']';
0346                 $attributeName = substr($attribute, 1);
0347                 $nodes = $xpath->query($queryString);
0348                 foreach ($nodes as $node) {
0349                     $attr = $node->attributes->getNamedItem($attributeName);
0350                     $attr->value = ' ' . $attr->value . ' ';
0351                 }
0352             }
0353         }
0354         return $xpath->query($xpathQuery);
0355     }
0356 }