File indexing completed on 2024-12-22 05:36:21
0001 <?php 0002 0003 /** 0004 * Class that handles operations involving percent-encoding in URIs. 0005 * 0006 * @warning 0007 * Be careful when reusing instances of PercentEncoder. The object 0008 * you use for normalize() SHOULD NOT be used for encode(), or 0009 * vice-versa. 0010 */ 0011 class HTMLPurifier_PercentEncoder 0012 { 0013 0014 /** 0015 * Reserved characters to preserve when using encode(). 0016 * @type array 0017 */ 0018 protected $preserve = array(); 0019 0020 /** 0021 * String of characters that should be preserved while using encode(). 0022 * @param bool $preserve 0023 */ 0024 public function __construct($preserve = false) 0025 { 0026 // unreserved letters, ought to const-ify 0027 for ($i = 48; $i <= 57; $i++) { // digits 0028 $this->preserve[$i] = true; 0029 } 0030 for ($i = 65; $i <= 90; $i++) { // upper-case 0031 $this->preserve[$i] = true; 0032 } 0033 for ($i = 97; $i <= 122; $i++) { // lower-case 0034 $this->preserve[$i] = true; 0035 } 0036 $this->preserve[45] = true; // Dash - 0037 $this->preserve[46] = true; // Period . 0038 $this->preserve[95] = true; // Underscore _ 0039 $this->preserve[126]= true; // Tilde ~ 0040 0041 // extra letters not to escape 0042 if ($preserve !== false) { 0043 for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { 0044 $this->preserve[ord($preserve[$i])] = true; 0045 } 0046 } 0047 } 0048 0049 /** 0050 * Our replacement for urlencode, it encodes all non-reserved characters, 0051 * as well as any extra characters that were instructed to be preserved. 0052 * @note 0053 * Assumes that the string has already been normalized, making any 0054 * and all percent escape sequences valid. Percents will not be 0055 * re-escaped, regardless of their status in $preserve 0056 * @param string $string String to be encoded 0057 * @return string Encoded string. 0058 */ 0059 public function encode($string) 0060 { 0061 $ret = ''; 0062 for ($i = 0, $c = strlen($string); $i < $c; $i++) { 0063 if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) { 0064 $ret .= '%' . sprintf('%02X', $int); 0065 } else { 0066 $ret .= $string[$i]; 0067 } 0068 } 0069 return $ret; 0070 } 0071 0072 /** 0073 * Fix up percent-encoding by decoding unreserved characters and normalizing. 0074 * @warning This function is affected by $preserve, even though the 0075 * usual desired behavior is for this not to preserve those 0076 * characters. Be careful when reusing instances of PercentEncoder! 0077 * @param string $string String to normalize 0078 * @return string 0079 */ 0080 public function normalize($string) 0081 { 0082 if ($string == '') { 0083 return ''; 0084 } 0085 $parts = explode('%', $string); 0086 $ret = array_shift($parts); 0087 foreach ($parts as $part) { 0088 $length = strlen($part); 0089 if ($length < 2) { 0090 $ret .= '%25' . $part; 0091 continue; 0092 } 0093 $encoding = substr($part, 0, 2); 0094 $text = substr($part, 2); 0095 if (!ctype_xdigit($encoding)) { 0096 $ret .= '%25' . $part; 0097 continue; 0098 } 0099 $int = hexdec($encoding); 0100 if (isset($this->preserve[$int])) { 0101 $ret .= chr($int) . $text; 0102 continue; 0103 } 0104 $encoding = strtoupper($encoding); 0105 $ret .= '%' . $encoding . $text; 0106 } 0107 return $ret; 0108 } 0109 } 0110 0111 // vim: et sw=4 sts=4