File indexing completed on 2024-12-22 05:37:14
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Xml 0017 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0018 * @license http://framework.zend.com/license/new-bsd New BSD License 0019 * @version $Id$ 0020 */ 0021 0022 0023 /** 0024 * @category Zend 0025 * @package Zend_Xml_SecurityScan 0026 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0027 * @license http://framework.zend.com/license/new-bsd New BSD License 0028 */ 0029 class Zend_Xml_Security 0030 { 0031 const ENTITY_DETECT = 'Detected use of ENTITY in XML, disabled to prevent XXE/XEE attacks'; 0032 0033 /** 0034 * Heuristic scan to detect entity in XML 0035 * 0036 * @param string $xml 0037 * @throws Zend_Xml_Exception If entity expansion or external entity declaration was discovered. 0038 */ 0039 protected static function heuristicScan($xml) 0040 { 0041 foreach (self::getEntityComparison($xml) as $compare) { 0042 if (strpos($xml, $compare) !== false) { 0043 throw new Zend_Xml_Exception(self::ENTITY_DETECT); 0044 } 0045 } 0046 } 0047 0048 /** 0049 * @param integer $errno 0050 * @param string $errstr 0051 * @param string $errfile 0052 * @param integer $errline 0053 * @return bool 0054 */ 0055 public static function loadXmlErrorHandler($errno, $errstr, $errfile, $errline) 0056 { 0057 if (substr_count($errstr, 'DOMDocument::loadXML()') > 0) { 0058 return true; 0059 } 0060 return false; 0061 } 0062 0063 /** 0064 * Scan XML string for potential XXE and XEE attacks 0065 * 0066 * @param string $xml 0067 * @param DomDocument $dom 0068 * @throws Zend_Xml_Exception 0069 * @return SimpleXMLElement|DomDocument|boolean 0070 */ 0071 public static function scan($xml, DOMDocument $dom = null) 0072 { 0073 // If running with PHP-FPM we perform an heuristic scan 0074 // We cannot use libxml_disable_entity_loader because of this bug 0075 // @see https://bugs.php.net/bug.php?id=64938 0076 if (self::isPhpFpm()) { 0077 self::heuristicScan($xml); 0078 } 0079 0080 if (null === $dom) { 0081 $simpleXml = true; 0082 $dom = new DOMDocument(); 0083 } 0084 0085 if (!self::isPhpFpm()) { 0086 $loadEntities = libxml_disable_entity_loader(true); 0087 $useInternalXmlErrors = libxml_use_internal_errors(true); 0088 } 0089 0090 // Load XML with network access disabled (LIBXML_NONET) 0091 // error disabled with @ for PHP-FPM scenario 0092 set_error_handler(array('Zend_Xml_Security', 'loadXmlErrorHandler'), E_WARNING); 0093 0094 $result = $dom->loadXml($xml, LIBXML_NONET); 0095 restore_error_handler(); 0096 0097 if (!$result) { 0098 // Entity load to previous setting 0099 if (!self::isPhpFpm()) { 0100 libxml_disable_entity_loader($loadEntities); 0101 libxml_use_internal_errors($useInternalXmlErrors); 0102 } 0103 return false; 0104 } 0105 0106 // Scan for potential XEE attacks using ENTITY, if not PHP-FPM 0107 if (!self::isPhpFpm()) { 0108 foreach ($dom->childNodes as $child) { 0109 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { 0110 if ($child->entities->length > 0) { 0111 // require_once 'Exception.php'; 0112 throw new Zend_Xml_Exception(self::ENTITY_DETECT); 0113 } 0114 } 0115 } 0116 } 0117 0118 // Entity load to previous setting 0119 if (!self::isPhpFpm()) { 0120 libxml_disable_entity_loader($loadEntities); 0121 libxml_use_internal_errors($useInternalXmlErrors); 0122 } 0123 0124 if (isset($simpleXml)) { 0125 $result = simplexml_import_dom($dom); 0126 if (!$result instanceof SimpleXMLElement) { 0127 return false; 0128 } 0129 return $result; 0130 } 0131 return $dom; 0132 } 0133 0134 /** 0135 * Scan XML file for potential XXE/XEE attacks 0136 * 0137 * @param string $file 0138 * @param DOMDocument $dom 0139 * @throws Zend_Xml_Exception 0140 * @return SimpleXMLElement|DomDocument 0141 */ 0142 public static function scanFile($file, DOMDocument $dom = null) 0143 { 0144 if (!file_exists($file)) { 0145 // require_once 'Exception.php'; 0146 throw new Zend_Xml_Exception( 0147 "The file $file specified doesn't exist" 0148 ); 0149 } 0150 return self::scan(file_get_contents($file), $dom); 0151 } 0152 0153 /** 0154 * Return true if PHP is running with PHP-FPM 0155 * 0156 * This method is mainly used to determine whether or not heuristic checks 0157 * (vs libxml checks) should be made, due to threading issues in libxml; 0158 * under php-fpm, threading becomes a concern. 0159 * 0160 * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the 0161 * libxml support in PHP that makes the libxml checks viable; in such 0162 * versions, this method will return false to enforce those checks, which 0163 * are more strict and accurate than the heuristic checks. 0164 * 0165 * @return boolean 0166 */ 0167 public static function isPhpFpm() 0168 { 0169 $isVulnerableVersion = ( 0170 version_compare(PHP_VERSION, '5.5.22', 'lt') 0171 || ( 0172 version_compare(PHP_VERSION, '5.6', 'gte') 0173 && version_compare(PHP_VERSION, '5.6.6', 'lt') 0174 ) 0175 ); 0176 0177 if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) { 0178 return true; 0179 } 0180 return false; 0181 } 0182 0183 /** 0184 * Determine and return the string(s) to use for the <!ENTITY comparison. 0185 * 0186 * @param string $xml 0187 * @return string[] 0188 */ 0189 protected static function getEntityComparison($xml) 0190 { 0191 $encodingMap = self::getAsciiEncodingMap(); 0192 return array_map( 0193 array(__CLASS__, 'generateEntityComparison'), 0194 self::detectXmlEncoding($xml, self::detectStringEncoding($xml)) 0195 ); 0196 } 0197 0198 /** 0199 * Determine the string encoding. 0200 * 0201 * Determines string encoding from either a detected BOM or a 0202 * heuristic. 0203 * 0204 * @param string $xml 0205 * @return string File encoding 0206 */ 0207 protected static function detectStringEncoding($xml) 0208 { 0209 $encoding = self::detectBom($xml); 0210 return ($encoding) ? $encoding : self::detectXmlStringEncoding($xml); 0211 } 0212 0213 /** 0214 * Attempt to match a known BOM. 0215 * 0216 * Iterates through the return of getBomMap(), comparing the initial bytes 0217 * of the provided string to the BOM of each; if a match is determined, 0218 * it returns the encoding. 0219 * 0220 * @param string $string 0221 * @return false|string Returns encoding on success. 0222 */ 0223 protected static function detectBom($string) 0224 { 0225 foreach (self::getBomMap() as $criteria) { 0226 if (0 === strncmp($string, $criteria['bom'], $criteria['length'])) { 0227 return $criteria['encoding']; 0228 } 0229 } 0230 return false; 0231 } 0232 0233 /** 0234 * Attempt to detect the string encoding of an XML string. 0235 * 0236 * @param string $xml 0237 * @return string Encoding 0238 */ 0239 protected static function detectXmlStringEncoding($xml) 0240 { 0241 foreach (self::getAsciiEncodingMap() as $encoding => $generator) { 0242 $prefix = call_user_func($generator, '<' . '?xml'); 0243 if (0 === strncmp($xml, $prefix, strlen($prefix))) { 0244 return $encoding; 0245 } 0246 } 0247 0248 // Fallback 0249 return 'UTF-8'; 0250 } 0251 0252 /** 0253 * Attempt to detect the specified XML encoding. 0254 * 0255 * Using the file's encoding, determines if an "encoding" attribute is 0256 * present and well-formed in the XML declaration; if so, it returns a 0257 * list with both the ASCII representation of that declaration and the 0258 * original file encoding. 0259 * 0260 * If not, a list containing only the provided file encoding is returned. 0261 * 0262 * @param string $xml 0263 * @param string $fileEncoding 0264 * @return string[] Potential XML encodings 0265 */ 0266 protected static function detectXmlEncoding($xml, $fileEncoding) 0267 { 0268 $encodingMap = self::getAsciiEncodingMap(); 0269 $generator = $encodingMap[$fileEncoding]; 0270 $encAttr = call_user_func($generator, 'encoding="'); 0271 $quote = call_user_func($generator, '"'); 0272 $close = call_user_func($generator, '>'); 0273 0274 $closePos = strpos($xml, $close); 0275 if (false === $closePos) { 0276 return array($fileEncoding); 0277 } 0278 0279 $encPos = strpos($xml, $encAttr); 0280 if (false === $encPos 0281 || $encPos > $closePos 0282 ) { 0283 return array($fileEncoding); 0284 } 0285 0286 $encPos += strlen($encAttr); 0287 $quotePos = strpos($xml, $quote, $encPos); 0288 if (false === $quotePos) { 0289 return array($fileEncoding); 0290 } 0291 0292 $encoding = self::substr($xml, $encPos, $quotePos); 0293 return array( 0294 // Following line works because we're only supporting 8-bit safe encodings at this time. 0295 str_replace('\0', '', $encoding), // detected encoding 0296 $fileEncoding, // file encoding 0297 ); 0298 } 0299 0300 /** 0301 * Return a list of BOM maps. 0302 * 0303 * Returns a list of common encoding -> BOM maps, along with the character 0304 * length to compare against. 0305 * 0306 * @link https://en.wikipedia.org/wiki/Byte_order_mark 0307 * @return array 0308 */ 0309 protected static function getBomMap() 0310 { 0311 return array( 0312 array( 0313 'encoding' => 'UTF-32BE', 0314 'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff), 0315 'length' => 4, 0316 ), 0317 array( 0318 'encoding' => 'UTF-32LE', 0319 'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00), 0320 'length' => 4, 0321 ), 0322 array( 0323 'encoding' => 'GB-18030', 0324 'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33), 0325 'length' => 4, 0326 ), 0327 array( 0328 'encoding' => 'UTF-16BE', 0329 'bom' => pack('CC', 0xfe, 0xff), 0330 'length' => 2, 0331 ), 0332 array( 0333 'encoding' => 'UTF-16LE', 0334 'bom' => pack('CC', 0xff, 0xfe), 0335 'length' => 2, 0336 ), 0337 array( 0338 'encoding' => 'UTF-8', 0339 'bom' => pack('CCC', 0xef, 0xbb, 0xbf), 0340 'length' => 3, 0341 ), 0342 ); 0343 } 0344 0345 /** 0346 * Return a map of encoding => generator pairs. 0347 * 0348 * Returns a map of encoding => generator pairs, where the generator is a 0349 * callable that accepts a string and returns the appropriate byte order 0350 * sequence of that string for the encoding. 0351 * 0352 * @return array 0353 */ 0354 protected static function getAsciiEncodingMap() 0355 { 0356 return array( 0357 'UTF-32BE' => array(__CLASS__, 'encodeToUTF32BE'), 0358 'UTF-32LE' => array(__CLASS__, 'encodeToUTF32LE'), 0359 'UTF-32odd1' => array(__CLASS__, 'encodeToUTF32odd1'), 0360 'UTF-32odd2' => array(__CLASS__, 'encodeToUTF32odd2'), 0361 'UTF-16BE' => array(__CLASS__, 'encodeToUTF16BE'), 0362 'UTF-16LE' => array(__CLASS__, 'encodeToUTF16LE'), 0363 'UTF-8' => array(__CLASS__, 'encodeToUTF8'), 0364 'GB-18030' => array(__CLASS__, 'encodeToUTF8'), 0365 ); 0366 } 0367 0368 /** 0369 * Binary-safe substr. 0370 * 0371 * substr() is not binary-safe; this method loops by character to ensure 0372 * multi-byte characters are aggregated correctly. 0373 * 0374 * @param string $string 0375 * @param int $start 0376 * @param int $end 0377 * @return string 0378 */ 0379 protected static function substr($string, $start, $end) 0380 { 0381 $substr = ''; 0382 for ($i = $start; $i < $end; $i += 1) { 0383 $substr .= $string[$i]; 0384 } 0385 return $substr; 0386 } 0387 0388 /** 0389 * Generate an entity comparison based on the given encoding. 0390 * 0391 * This patch is internal only, and public only so it can be used as a 0392 * callable to pass to array_map. 0393 * 0394 * @internal 0395 * @param string $encoding 0396 * @return string 0397 */ 0398 public static function generateEntityComparison($encoding) 0399 { 0400 $encodingMap = self::getAsciiEncodingMap(); 0401 $generator = isset($encodingMap[$encoding]) ? $encodingMap[$encoding] : $encodingMap['UTF-8']; 0402 return call_user_func($generator, '<!ENTITY'); 0403 } 0404 0405 /** 0406 * Encode an ASCII string to UTF-32BE 0407 * 0408 * @internal 0409 * @param string $ascii 0410 * @return string 0411 */ 0412 public static function encodeToUTF32BE($ascii) 0413 { 0414 return preg_replace('/(.)/', "\0\0\0\\1", $ascii); 0415 } 0416 0417 /** 0418 * Encode an ASCII string to UTF-32LE 0419 * 0420 * @internal 0421 * @param string $ascii 0422 * @return string 0423 */ 0424 public static function encodeToUTF32LE($ascii) 0425 { 0426 return preg_replace('/(.)/', "\\1\0\0\0", $ascii); 0427 } 0428 0429 /** 0430 * Encode an ASCII string to UTF-32odd1 0431 * 0432 * @internal 0433 * @param string $ascii 0434 * @return string 0435 */ 0436 public static function encodeToUTF32odd1($ascii) 0437 { 0438 return preg_replace('/(.)/', "\0\\1\0\0", $ascii); 0439 } 0440 0441 /** 0442 * Encode an ASCII string to UTF-32odd2 0443 * 0444 * @internal 0445 * @param string $ascii 0446 * @return string 0447 */ 0448 public static function encodeToUTF32odd2($ascii) 0449 { 0450 return preg_replace('/(.)/', "\0\0\\1\0", $ascii); 0451 } 0452 0453 /** 0454 * Encode an ASCII string to UTF-16BE 0455 * 0456 * @internal 0457 * @param string $ascii 0458 * @return string 0459 */ 0460 public static function encodeToUTF16BE($ascii) 0461 { 0462 return preg_replace('/(.)/', "\0\\1", $ascii); 0463 } 0464 0465 /** 0466 * Encode an ASCII string to UTF-16LE 0467 * 0468 * @internal 0469 * @param string $ascii 0470 * @return string 0471 */ 0472 public static function encodeToUTF16LE($ascii) 0473 { 0474 return preg_replace('/(.)/', "\\1\0", $ascii); 0475 } 0476 0477 /** 0478 * Encode an ASCII string to UTF-8 0479 * 0480 * @internal 0481 * @param string $ascii 0482 * @return string 0483 */ 0484 public static function encodeToUTF8($ascii) 0485 { 0486 return $ascii; 0487 } 0488 }