File indexing completed on 2024-12-22 05:37:14

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Xml
0017  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0018  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0019  * @version    $Id$
0020  */
0021 
0022  
0023 /**
0024  * @category   Zend
0025  * @package    Zend_Xml_SecurityScan
0026  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0027  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0028  */
0029 class Zend_Xml_Security
0030 {
0031     const ENTITY_DETECT = 'Detected use of ENTITY in XML, disabled to prevent XXE/XEE attacks';
0032 
0033     /**
0034      * Heuristic scan to detect entity in XML
0035      *
0036      * @param  string $xml
0037      * @throws Zend_Xml_Exception If entity expansion or external entity declaration was discovered.
0038      */
0039     protected static function heuristicScan($xml)
0040     {
0041         foreach (self::getEntityComparison($xml) as $compare) {
0042             if (strpos($xml, $compare) !== false) {
0043                 throw new Zend_Xml_Exception(self::ENTITY_DETECT);
0044             }
0045         }
0046     }
0047 
0048     /**
0049      * @param integer $errno
0050      * @param string $errstr
0051      * @param string $errfile
0052      * @param integer $errline
0053      * @return bool
0054      */
0055     public static function loadXmlErrorHandler($errno, $errstr, $errfile, $errline)
0056     {
0057         if (substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
0058             return true;
0059         }
0060         return false;
0061     }
0062 
0063     /**
0064      * Scan XML string for potential XXE and XEE attacks
0065      *
0066      * @param   string $xml
0067      * @param   DomDocument $dom
0068      * @throws  Zend_Xml_Exception
0069      * @return  SimpleXMLElement|DomDocument|boolean
0070      */
0071     public static function scan($xml, DOMDocument $dom = null)
0072     {
0073         // If running with PHP-FPM we perform an heuristic scan
0074         // We cannot use libxml_disable_entity_loader because of this bug
0075         // @see https://bugs.php.net/bug.php?id=64938
0076         if (self::isPhpFpm()) {
0077             self::heuristicScan($xml);
0078         }
0079 
0080         if (null === $dom) {
0081             $simpleXml = true;
0082             $dom = new DOMDocument();
0083         }
0084 
0085         if (!self::isPhpFpm()) {
0086             $loadEntities = libxml_disable_entity_loader(true);
0087             $useInternalXmlErrors = libxml_use_internal_errors(true);
0088         }
0089 
0090         // Load XML with network access disabled (LIBXML_NONET)
0091         // error disabled with @ for PHP-FPM scenario
0092         set_error_handler(array('Zend_Xml_Security', 'loadXmlErrorHandler'), E_WARNING);
0093 
0094         $result = $dom->loadXml($xml, LIBXML_NONET);
0095         restore_error_handler();
0096 
0097         if (!$result) {
0098             // Entity load to previous setting
0099             if (!self::isPhpFpm()) {
0100                 libxml_disable_entity_loader($loadEntities);
0101                 libxml_use_internal_errors($useInternalXmlErrors);
0102             }
0103             return false;
0104         }
0105 
0106         // Scan for potential XEE attacks using ENTITY, if not PHP-FPM
0107         if (!self::isPhpFpm()) {
0108             foreach ($dom->childNodes as $child) {
0109                 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
0110                     if ($child->entities->length > 0) {
0111                         // require_once 'Exception.php';
0112                         throw new Zend_Xml_Exception(self::ENTITY_DETECT);
0113                     }
0114                 }
0115             }
0116         }
0117 
0118         // Entity load to previous setting
0119         if (!self::isPhpFpm()) {
0120             libxml_disable_entity_loader($loadEntities);
0121             libxml_use_internal_errors($useInternalXmlErrors);
0122         }
0123 
0124         if (isset($simpleXml)) {
0125             $result = simplexml_import_dom($dom);
0126             if (!$result instanceof SimpleXMLElement) {
0127                 return false;
0128             }
0129             return $result;
0130         }
0131         return $dom;
0132     }
0133 
0134     /**
0135      * Scan XML file for potential XXE/XEE attacks
0136      *
0137      * @param  string $file
0138      * @param  DOMDocument $dom
0139      * @throws Zend_Xml_Exception
0140      * @return SimpleXMLElement|DomDocument
0141      */
0142     public static function scanFile($file, DOMDocument $dom = null)
0143     {
0144         if (!file_exists($file)) {
0145             // require_once 'Exception.php';
0146             throw new Zend_Xml_Exception(
0147                 "The file $file specified doesn't exist"
0148             );
0149         }
0150         return self::scan(file_get_contents($file), $dom);
0151     }
0152 
0153     /**
0154      * Return true if PHP is running with PHP-FPM
0155      *
0156      * This method is mainly used to determine whether or not heuristic checks
0157      * (vs libxml checks) should be made, due to threading issues in libxml;
0158      * under php-fpm, threading becomes a concern.
0159      *
0160      * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the
0161      * libxml support in PHP that makes the libxml checks viable; in such
0162      * versions, this method will return false to enforce those checks, which
0163      * are more strict and accurate than the heuristic checks.
0164      *
0165      * @return boolean
0166      */
0167     public static function isPhpFpm()
0168     {
0169         $isVulnerableVersion = (
0170             version_compare(PHP_VERSION, '5.5.22', 'lt')
0171             || (
0172                 version_compare(PHP_VERSION, '5.6', 'gte')
0173                 && version_compare(PHP_VERSION, '5.6.6', 'lt')
0174             )
0175         );
0176 
0177         if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) {
0178             return true;
0179         }
0180         return false;
0181     }
0182 
0183     /**
0184      * Determine and return the string(s) to use for the <!ENTITY comparison.
0185      *
0186      * @param string $xml
0187      * @return string[]
0188      */
0189     protected static function getEntityComparison($xml)
0190     {
0191         $encodingMap = self::getAsciiEncodingMap();
0192         return array_map(
0193             array(__CLASS__, 'generateEntityComparison'),
0194             self::detectXmlEncoding($xml, self::detectStringEncoding($xml))
0195         );
0196     }
0197 
0198     /**
0199      * Determine the string encoding.
0200      *
0201      * Determines string encoding from either a detected BOM or a
0202      * heuristic.
0203      *
0204      * @param string $xml
0205      * @return string File encoding
0206      */
0207     protected static function detectStringEncoding($xml)
0208     {
0209         $encoding = self::detectBom($xml);
0210         return ($encoding) ? $encoding : self::detectXmlStringEncoding($xml);
0211     }
0212 
0213     /**
0214      * Attempt to match a known BOM.
0215      *
0216      * Iterates through the return of getBomMap(), comparing the initial bytes
0217      * of the provided string to the BOM of each; if a match is determined,
0218      * it returns the encoding.
0219      *
0220      * @param string $string
0221      * @return false|string Returns encoding on success.
0222      */
0223     protected static function detectBom($string)
0224     {
0225         foreach (self::getBomMap() as $criteria) {
0226             if (0 === strncmp($string, $criteria['bom'], $criteria['length'])) {
0227                 return $criteria['encoding'];
0228             }
0229         }
0230         return false;
0231     }
0232 
0233     /**
0234      * Attempt to detect the string encoding of an XML string.
0235      *
0236      * @param string $xml
0237      * @return string Encoding
0238      */
0239     protected static function detectXmlStringEncoding($xml)
0240     {
0241         foreach (self::getAsciiEncodingMap() as $encoding => $generator) {
0242             $prefix = call_user_func($generator, '<' . '?xml');
0243             if (0 === strncmp($xml, $prefix, strlen($prefix))) {
0244                 return $encoding;
0245             }
0246         }
0247 
0248         // Fallback
0249         return 'UTF-8';
0250     }
0251 
0252     /**
0253      * Attempt to detect the specified XML encoding.
0254      *
0255      * Using the file's encoding, determines if an "encoding" attribute is
0256      * present and well-formed in the XML declaration; if so, it returns a
0257      * list with both the ASCII representation of that declaration and the
0258      * original file encoding.
0259      *
0260      * If not, a list containing only the provided file encoding is returned.
0261      *
0262      * @param string $xml
0263      * @param string $fileEncoding
0264      * @return string[] Potential XML encodings
0265      */
0266     protected static function detectXmlEncoding($xml, $fileEncoding)
0267     {
0268         $encodingMap = self::getAsciiEncodingMap();
0269         $generator   = $encodingMap[$fileEncoding];
0270         $encAttr     = call_user_func($generator, 'encoding="');
0271         $quote       = call_user_func($generator, '"');
0272         $close       = call_user_func($generator, '>');
0273 
0274         $closePos    = strpos($xml, $close);
0275         if (false === $closePos) {
0276             return array($fileEncoding);
0277         }
0278 
0279         $encPos = strpos($xml, $encAttr);
0280         if (false === $encPos
0281             || $encPos > $closePos
0282         ) {
0283             return array($fileEncoding);
0284         }
0285 
0286         $encPos   += strlen($encAttr);
0287         $quotePos = strpos($xml, $quote, $encPos);
0288         if (false === $quotePos) {
0289             return array($fileEncoding);
0290         }
0291 
0292         $encoding = self::substr($xml, $encPos, $quotePos);
0293         return array(
0294             // Following line works because we're only supporting 8-bit safe encodings at this time.
0295             str_replace('\0', '', $encoding), // detected encoding
0296             $fileEncoding,                    // file encoding
0297         );
0298     }
0299 
0300     /**
0301      * Return a list of BOM maps.
0302      *
0303      * Returns a list of common encoding -> BOM maps, along with the character
0304      * length to compare against.
0305      *
0306      * @link https://en.wikipedia.org/wiki/Byte_order_mark
0307      * @return array
0308      */
0309     protected static function getBomMap()
0310     {
0311         return array(
0312             array(
0313                 'encoding' => 'UTF-32BE',
0314                 'bom'      => pack('CCCC', 0x00, 0x00, 0xfe, 0xff),
0315                 'length'   => 4,
0316             ),
0317             array(
0318                 'encoding' => 'UTF-32LE',
0319                 'bom'      => pack('CCCC', 0xff, 0xfe, 0x00, 0x00),
0320                 'length'   => 4,
0321             ),
0322             array(
0323                 'encoding' => 'GB-18030',
0324                 'bom'      => pack('CCCC', 0x84, 0x31, 0x95, 0x33),
0325                 'length'   => 4,
0326             ),
0327             array(
0328                 'encoding' => 'UTF-16BE',
0329                 'bom'      => pack('CC', 0xfe, 0xff),
0330                 'length'   => 2,
0331             ),
0332             array(
0333                 'encoding' => 'UTF-16LE',
0334                 'bom'      => pack('CC', 0xff, 0xfe),
0335                 'length'   => 2,
0336             ),
0337             array(
0338                 'encoding' => 'UTF-8',
0339                 'bom'      => pack('CCC', 0xef, 0xbb, 0xbf),
0340                 'length'   => 3,
0341             ),
0342         );
0343     }
0344 
0345     /**
0346      * Return a map of encoding => generator pairs.
0347      *
0348      * Returns a map of encoding => generator pairs, where the generator is a
0349      * callable that accepts a string and returns the appropriate byte order
0350      * sequence of that string for the encoding.
0351      *
0352      * @return array
0353      */
0354     protected static function getAsciiEncodingMap()
0355     {
0356         return array(
0357             'UTF-32BE'   => array(__CLASS__, 'encodeToUTF32BE'),
0358             'UTF-32LE'   => array(__CLASS__, 'encodeToUTF32LE'),
0359             'UTF-32odd1' => array(__CLASS__, 'encodeToUTF32odd1'),
0360             'UTF-32odd2' => array(__CLASS__, 'encodeToUTF32odd2'),
0361             'UTF-16BE'   => array(__CLASS__, 'encodeToUTF16BE'),
0362             'UTF-16LE'   => array(__CLASS__, 'encodeToUTF16LE'),
0363             'UTF-8'      => array(__CLASS__, 'encodeToUTF8'),
0364             'GB-18030'   => array(__CLASS__, 'encodeToUTF8'),
0365         );
0366     }
0367 
0368     /**
0369      * Binary-safe substr.
0370      *
0371      * substr() is not binary-safe; this method loops by character to ensure
0372      * multi-byte characters are aggregated correctly.
0373      *
0374      * @param string $string
0375      * @param int $start
0376      * @param int $end
0377      * @return string
0378      */
0379     protected static function substr($string, $start, $end)
0380     {
0381         $substr = '';
0382         for ($i = $start; $i < $end; $i += 1) {
0383             $substr .= $string[$i];
0384         }
0385         return $substr;
0386     }
0387 
0388     /**
0389      * Generate an entity comparison based on the given encoding.
0390      *
0391      * This patch is internal only, and public only so it can be used as a
0392      * callable to pass to array_map.
0393      *
0394      * @internal
0395      * @param string $encoding
0396      * @return string
0397      */
0398     public static function generateEntityComparison($encoding)
0399     {
0400         $encodingMap = self::getAsciiEncodingMap();
0401         $generator   = isset($encodingMap[$encoding]) ? $encodingMap[$encoding] : $encodingMap['UTF-8'];
0402         return call_user_func($generator, '<!ENTITY');
0403     }
0404 
0405     /**
0406      * Encode an ASCII string to UTF-32BE
0407      *
0408      * @internal
0409      * @param string $ascii
0410      * @return string
0411      */
0412     public static function encodeToUTF32BE($ascii)
0413     {
0414         return preg_replace('/(.)/', "\0\0\0\\1", $ascii);
0415     }
0416 
0417     /**
0418      * Encode an ASCII string to UTF-32LE
0419      *
0420      * @internal
0421      * @param string $ascii
0422      * @return string
0423      */
0424     public static function encodeToUTF32LE($ascii)
0425     {
0426         return preg_replace('/(.)/', "\\1\0\0\0", $ascii);
0427     }
0428 
0429     /**
0430      * Encode an ASCII string to UTF-32odd1
0431      *
0432      * @internal
0433      * @param string $ascii
0434      * @return string
0435      */
0436     public static function encodeToUTF32odd1($ascii)
0437     {
0438         return preg_replace('/(.)/', "\0\\1\0\0", $ascii);
0439     }
0440 
0441     /**
0442      * Encode an ASCII string to UTF-32odd2
0443      *
0444      * @internal
0445      * @param string $ascii
0446      * @return string
0447      */
0448     public static function encodeToUTF32odd2($ascii)
0449     {
0450         return preg_replace('/(.)/', "\0\0\\1\0", $ascii);
0451     }
0452 
0453     /**
0454      * Encode an ASCII string to UTF-16BE
0455      *
0456      * @internal
0457      * @param string $ascii
0458      * @return string
0459      */
0460     public static function encodeToUTF16BE($ascii)
0461     {
0462         return preg_replace('/(.)/', "\0\\1", $ascii);
0463     }
0464 
0465     /**
0466      * Encode an ASCII string to UTF-16LE
0467      *
0468      * @internal
0469      * @param string $ascii
0470      * @return string
0471      */
0472     public static function encodeToUTF16LE($ascii)
0473     {
0474         return preg_replace('/(.)/', "\\1\0", $ascii);
0475     }
0476 
0477     /**
0478      * Encode an ASCII string to UTF-8
0479      *
0480      * @internal
0481      * @param string $ascii
0482      * @return string
0483      */
0484     public static function encodeToUTF8($ascii)
0485     {
0486         return $ascii;
0487     }
0488 }