File indexing completed on 2025-01-19 05:21:04
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Dom 0017 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0018 * @license http://framework.zend.com/license/new-bsd New BSD License 0019 * @version $Id$ 0020 */ 0021 0022 /** 0023 * @see Zend_Dom_Query_Css2Xpath 0024 */ 0025 // require_once 'Zend/Dom/Query/Css2Xpath.php'; 0026 0027 /** 0028 * @see Zend_Dom_Query_Result 0029 */ 0030 // require_once 'Zend/Dom/Query/Result.php'; 0031 0032 /** @see Zend_Xml_Security */ 0033 // require_once 'Zend/Xml/Security.php'; 0034 0035 /** @see Zend_Xml_Exception */ 0036 // require_once 'Zend/Xml/Exception.php'; 0037 0038 /** 0039 * Query DOM structures based on CSS selectors and/or XPath 0040 * 0041 * @package Zend_Dom 0042 * @subpackage Query 0043 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0044 * @license http://framework.zend.com/license/new-bsd New BSD License 0045 */ 0046 class Zend_Dom_Query 0047 { 0048 /**#@+ 0049 * Document types 0050 */ 0051 const DOC_DOM = 'docDom'; 0052 const DOC_XML = 'docXml'; 0053 const DOC_HTML = 'docHtml'; 0054 const DOC_XHTML = 'docXhtml'; 0055 /**#@-*/ 0056 0057 /** 0058 * @var string|DOMDocument 0059 */ 0060 protected $_document; 0061 0062 /** 0063 * DOMDocument errors, if any 0064 * @var false|array 0065 */ 0066 protected $_documentErrors = false; 0067 0068 /** 0069 * Document type 0070 * @var string 0071 */ 0072 protected $_docType; 0073 0074 /** 0075 * Document encoding 0076 * @var null|string 0077 */ 0078 protected $_encoding; 0079 0080 /** 0081 * XPath namespaces 0082 * @var array 0083 */ 0084 protected $_xpathNamespaces = array(); 0085 0086 /** 0087 * Constructor 0088 * 0089 * @param null|string|DOMDocument $document 0090 * @param null|string $encoding 0091 */ 0092 public function __construct($document = null, $encoding = null) 0093 { 0094 $this->setEncoding($encoding); 0095 $this->setDocument($document); 0096 } 0097 0098 /** 0099 * Set document encoding 0100 * 0101 * @param string $encoding 0102 * @return Zend_Dom_Query 0103 */ 0104 public function setEncoding($encoding) 0105 { 0106 $this->_encoding = (null === $encoding) ? null : (string) $encoding; 0107 return $this; 0108 } 0109 0110 /** 0111 * Get document encoding 0112 * 0113 * @return null|string 0114 */ 0115 public function getEncoding() 0116 { 0117 return $this->_encoding; 0118 } 0119 0120 /** 0121 * Set document to query 0122 * 0123 * @param string|DOMDocument $document 0124 * @param null|string $encoding Document encoding 0125 * @return Zend_Dom_Query 0126 */ 0127 public function setDocument($document, $encoding = null) 0128 { 0129 if ($document instanceof DOMDocument) { 0130 return $this->setDocumentDom($document); 0131 } 0132 if (0 === strlen($document)) { 0133 return $this; 0134 } 0135 // breaking XML declaration to make syntax highlighting work 0136 if ('<' . '?xml' == substr(trim($document), 0, 5)) { 0137 if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) { 0138 $this->_xpathNamespaces[] = $matches[1]; 0139 return $this->setDocumentXhtml($document, $encoding); 0140 } 0141 return $this->setDocumentXml($document, $encoding); 0142 } 0143 if (strstr($document, 'DTD XHTML')) { 0144 return $this->setDocumentXhtml($document, $encoding); 0145 } 0146 return $this->setDocumentHtml($document, $encoding); 0147 } 0148 0149 /** 0150 * Set DOMDocument to query 0151 * 0152 * @param DOMDocument $document 0153 * @return Zend_Dom_Query 0154 */ 0155 public function setDocumentDom(DOMDocument $document) 0156 { 0157 $this->_document = $document; 0158 $this->_docType = self::DOC_DOM; 0159 if (null !== $document->encoding) { 0160 $this->setEncoding($document->encoding); 0161 } 0162 return $this; 0163 } 0164 0165 /** 0166 * Register HTML document 0167 * 0168 * @param string $document 0169 * @param null|string $encoding Document encoding 0170 * @return Zend_Dom_Query 0171 */ 0172 public function setDocumentHtml($document, $encoding = null) 0173 { 0174 $this->_document = (string) $document; 0175 $this->_docType = self::DOC_HTML; 0176 if (null !== $encoding) { 0177 $this->setEncoding($encoding); 0178 } 0179 return $this; 0180 } 0181 0182 /** 0183 * Register XHTML document 0184 * 0185 * @param string $document 0186 * @param null|string $encoding Document encoding 0187 * @return Zend_Dom_Query 0188 */ 0189 public function setDocumentXhtml($document, $encoding = null) 0190 { 0191 $this->_document = (string) $document; 0192 $this->_docType = self::DOC_XHTML; 0193 if (null !== $encoding) { 0194 $this->setEncoding($encoding); 0195 } 0196 return $this; 0197 } 0198 0199 /** 0200 * Register XML document 0201 * 0202 * @param string $document 0203 * @param null|string $encoding Document encoding 0204 * @return Zend_Dom_Query 0205 */ 0206 public function setDocumentXml($document, $encoding = null) 0207 { 0208 $this->_document = (string) $document; 0209 $this->_docType = self::DOC_XML; 0210 if (null !== $encoding) { 0211 $this->setEncoding($encoding); 0212 } 0213 return $this; 0214 } 0215 0216 /** 0217 * Retrieve current document 0218 * 0219 * @return string|DOMDocument 0220 */ 0221 public function getDocument() 0222 { 0223 return $this->_document; 0224 } 0225 0226 /** 0227 * Get document type 0228 * 0229 * @return string 0230 */ 0231 public function getDocumentType() 0232 { 0233 return $this->_docType; 0234 } 0235 0236 /** 0237 * Get any DOMDocument errors found 0238 * 0239 * @return false|array 0240 */ 0241 public function getDocumentErrors() 0242 { 0243 return $this->_documentErrors; 0244 } 0245 0246 /** 0247 * Perform a CSS selector query 0248 * 0249 * @param string $query 0250 * @return Zend_Dom_Query_Result 0251 */ 0252 public function query($query) 0253 { 0254 $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query); 0255 return $this->queryXpath($xpathQuery, $query); 0256 } 0257 0258 /** 0259 * Perform an XPath query 0260 * 0261 * @param string|array $xpathQuery 0262 * @param string $query CSS selector query 0263 * @throws Zend_Dom_Exception 0264 * @return Zend_Dom_Query_Result 0265 */ 0266 public function queryXpath($xpathQuery, $query = null) 0267 { 0268 if (null === ($document = $this->getDocument())) { 0269 // require_once 'Zend/Dom/Exception.php'; 0270 throw new Zend_Dom_Exception('Cannot query; no document registered'); 0271 } 0272 0273 $encoding = $this->getEncoding(); 0274 libxml_use_internal_errors(true); 0275 if (null === $encoding) { 0276 $domDoc = new DOMDocument('1.0'); 0277 } else { 0278 $domDoc = new DOMDocument('1.0', $encoding); 0279 } 0280 $type = $this->getDocumentType(); 0281 switch ($type) { 0282 case self::DOC_DOM: 0283 $domDoc = $this->_document; 0284 $success = true; 0285 break; 0286 case self::DOC_XML: 0287 try { 0288 $domDoc = Zend_Xml_Security::scan($document, $domDoc); 0289 $success = ($domDoc !== false); 0290 } catch (Zend_Xml_Exception $e) { 0291 // require_once 'Zend/Dom/Exception.php'; 0292 throw new Zend_Dom_Exception( 0293 $e->getMessage() 0294 ); 0295 } 0296 break; 0297 case self::DOC_HTML: 0298 case self::DOC_XHTML: 0299 default: 0300 $success = $domDoc->loadHTML($document); 0301 break; 0302 } 0303 $errors = libxml_get_errors(); 0304 if (!empty($errors)) { 0305 $this->_documentErrors = $errors; 0306 libxml_clear_errors(); 0307 } 0308 libxml_use_internal_errors(false); 0309 0310 if (!$success) { 0311 // require_once 'Zend/Dom/Exception.php'; 0312 throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type)); 0313 } 0314 0315 $nodeList = $this->_getNodeList($domDoc, $xpathQuery); 0316 return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList); 0317 } 0318 0319 /** 0320 * Register XPath namespaces 0321 * 0322 * @param array $xpathNamespaces 0323 */ 0324 public function registerXpathNamespaces($xpathNamespaces) 0325 { 0326 $this->_xpathNamespaces = $xpathNamespaces; 0327 } 0328 0329 /** 0330 * Prepare node list 0331 * 0332 * @param DOMDocument $document 0333 * @param string|array $xpathQuery 0334 * @return array 0335 */ 0336 protected function _getNodeList($document, $xpathQuery) 0337 { 0338 $xpath = new DOMXPath($document); 0339 foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) { 0340 $xpath->registerNamespace($prefix, $namespaceUri); 0341 } 0342 $xpathQuery = (string) $xpathQuery; 0343 if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) { 0344 foreach ($matches[1] as $attribute) { 0345 $queryString = '//*[' . $attribute . ']'; 0346 $attributeName = substr($attribute, 1); 0347 $nodes = $xpath->query($queryString); 0348 foreach ($nodes as $node) { 0349 $attr = $node->attributes->getNamedItem($attributeName); 0350 $attr->value = ' ' . $attr->value . ' '; 0351 } 0352 } 0353 } 0354 return $xpath->query($xpathQuery); 0355 } 0356 }