File indexing completed on 2025-01-19 05:21:04

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Dom
0017  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0018  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0019  */
0020 
0021 /**
0022  * Transform CSS selectors to XPath
0023  *
0024  * @package    Zend_Dom
0025  * @subpackage Query
0026  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0027  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0028  * @version    $Id$
0029  */
0030 class Zend_Dom_Query_Css2Xpath
0031 {
0032     /**
0033      * Transform CSS expression to XPath
0034      *
0035      * @param  string $path
0036      * @return string
0037      */
0038     public static function transform($path)
0039     {
0040         $path = (string) $path;
0041         if (strstr($path, ',')) {
0042             $paths       = explode(',', $path);
0043             $expressions = array();
0044             foreach ($paths as $path) {
0045                 $xpath = self::transform(trim($path));
0046                 if (is_string($xpath)) {
0047                     $expressions[] = $xpath;
0048                 } elseif (is_array($xpath)) {
0049                     $expressions = array_merge($expressions, $xpath);
0050                 }
0051             }
0052             return implode('|', $expressions);
0053         }
0054 
0055         $paths    = array('//');
0056         $path     = preg_replace('|\s+>\s+|', '>', $path);
0057         $segments = preg_split('/\s+/', $path);
0058         foreach ($segments as $key => $segment) {
0059             $pathSegment = self::_tokenize($segment);
0060             if (0 == $key) {
0061                 if (0 === strpos($pathSegment, '[contains(')) {
0062                     $paths[0] .= '*' . ltrim($pathSegment, '*');
0063                 } else {
0064                     $paths[0] .= $pathSegment;
0065                 }
0066                 continue;
0067             }
0068             if (0 === strpos($pathSegment, '[contains(')) {
0069                 foreach ($paths as $key => $xpath) {
0070                     $paths[$key] .= '//*' . ltrim($pathSegment, '*');
0071                     $paths[]      = $xpath . $pathSegment;
0072                 }
0073             } else {
0074                 foreach ($paths as $key => $xpath) {
0075                     $paths[$key] .= '//' . $pathSegment;
0076                 }
0077             }
0078         }
0079 
0080         if (1 == count($paths)) {
0081             return $paths[0];
0082         }
0083         return implode('|', $paths);
0084     }
0085 
0086     /**
0087      * Tokenize CSS expressions to XPath
0088      *
0089      * @param  string $expression
0090      * @return string
0091      */
0092     protected static function _tokenize($expression)
0093     {
0094         // Child selectors
0095         $expression = str_replace('>', '/', $expression);
0096 
0097         // IDs
0098         $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression);
0099         $expression = preg_replace('|(?<![a-z0-9_-])(\[@id=)|i', '*$1', $expression);
0100 
0101         // arbitrary attribute strict equality
0102         $expression = preg_replace_callback(
0103             '|\[([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
0104             array(__CLASS__, '_createEqualityExpression'),
0105             $expression
0106         );
0107 
0108         // arbitrary attribute contains full word
0109         $expression = preg_replace_callback(
0110             '|\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
0111             array(__CLASS__, '_normalizeSpaceAttribute'),
0112             $expression
0113         );
0114 
0115         // arbitrary attribute contains specified content
0116         $expression = preg_replace_callback(
0117             '|\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
0118             array(__CLASS__, '_createContainsExpression'),
0119             $expression
0120         );
0121 
0122         // Classes
0123         $expression = preg_replace(
0124             '|\.([a-z][a-z0-9_-]*)|i',
0125             "[contains(concat(' ', normalize-space(@class), ' '), ' \$1 ')]",
0126             $expression
0127         );
0128 
0129         /** ZF-9764 -- remove double asterix */
0130         $expression = str_replace('**', '*', $expression);
0131 
0132         return $expression;
0133     }
0134 
0135     /**
0136      * Callback for creating equality expressions
0137      *
0138      * @param  array $matches
0139      * @return string
0140      */
0141     protected static function _createEqualityExpression($matches)
0142     {
0143         return '[@' . strtolower($matches[1]) . "='" . $matches[2] . "']";
0144     }
0145 
0146     /**
0147      * Callback for creating expressions to match one or more attribute values
0148      *
0149      * @param  array $matches
0150      * @return string
0151      */
0152     protected static function _normalizeSpaceAttribute($matches)
0153     {
0154         return "[contains(concat(' ', normalize-space(@" . strtolower($matches[1]) . "), ' '), ' "
0155              . $matches[2] . " ')]";
0156     }
0157 
0158     /**
0159      * Callback for creating a strict "contains" expression
0160      *
0161      * @param  array $matches
0162      * @return string
0163      */
0164     protected static function _createContainsExpression($matches)
0165     {
0166         return "[contains(@" . strtolower($matches[1]) . ", '"
0167              . $matches[2] . "')]";
0168     }
0169 }