File indexing completed on 2024-12-22 05:36:21

0001 <?php
0002 
0003 /**
0004  * Class that handles operations involving percent-encoding in URIs.
0005  *
0006  * @warning
0007  *      Be careful when reusing instances of PercentEncoder. The object
0008  *      you use for normalize() SHOULD NOT be used for encode(), or
0009  *      vice-versa.
0010  */
0011 class HTMLPurifier_PercentEncoder
0012 {
0013 
0014     /**
0015      * Reserved characters to preserve when using encode().
0016      * @type array
0017      */
0018     protected $preserve = array();
0019 
0020     /**
0021      * String of characters that should be preserved while using encode().
0022      * @param bool $preserve
0023      */
0024     public function __construct($preserve = false)
0025     {
0026         // unreserved letters, ought to const-ify
0027         for ($i = 48; $i <= 57; $i++) { // digits
0028             $this->preserve[$i] = true;
0029         }
0030         for ($i = 65; $i <= 90; $i++) { // upper-case
0031             $this->preserve[$i] = true;
0032         }
0033         for ($i = 97; $i <= 122; $i++) { // lower-case
0034             $this->preserve[$i] = true;
0035         }
0036         $this->preserve[45] = true; // Dash         -
0037         $this->preserve[46] = true; // Period       .
0038         $this->preserve[95] = true; // Underscore   _
0039         $this->preserve[126]= true; // Tilde        ~
0040 
0041         // extra letters not to escape
0042         if ($preserve !== false) {
0043             for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
0044                 $this->preserve[ord($preserve[$i])] = true;
0045             }
0046         }
0047     }
0048 
0049     /**
0050      * Our replacement for urlencode, it encodes all non-reserved characters,
0051      * as well as any extra characters that were instructed to be preserved.
0052      * @note
0053      *      Assumes that the string has already been normalized, making any
0054      *      and all percent escape sequences valid. Percents will not be
0055      *      re-escaped, regardless of their status in $preserve
0056      * @param string $string String to be encoded
0057      * @return string Encoded string.
0058      */
0059     public function encode($string)
0060     {
0061         $ret = '';
0062         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
0063             if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
0064                 $ret .= '%' . sprintf('%02X', $int);
0065             } else {
0066                 $ret .= $string[$i];
0067             }
0068         }
0069         return $ret;
0070     }
0071 
0072     /**
0073      * Fix up percent-encoding by decoding unreserved characters and normalizing.
0074      * @warning This function is affected by $preserve, even though the
0075      *          usual desired behavior is for this not to preserve those
0076      *          characters. Be careful when reusing instances of PercentEncoder!
0077      * @param string $string String to normalize
0078      * @return string
0079      */
0080     public function normalize($string)
0081     {
0082         if ($string == '') {
0083             return '';
0084         }
0085         $parts = explode('%', $string);
0086         $ret = array_shift($parts);
0087         foreach ($parts as $part) {
0088             $length = strlen($part);
0089             if ($length < 2) {
0090                 $ret .= '%25' . $part;
0091                 continue;
0092             }
0093             $encoding = substr($part, 0, 2);
0094             $text     = substr($part, 2);
0095             if (!ctype_xdigit($encoding)) {
0096                 $ret .= '%25' . $part;
0097                 continue;
0098             }
0099             $int = hexdec($encoding);
0100             if (isset($this->preserve[$int])) {
0101                 $ret .= chr($int) . $text;
0102                 continue;
0103             }
0104             $encoding = strtoupper($encoding);
0105             $ret .= '%' . $encoding . $text;
0106         }
0107         return $ret;
0108     }
0109 }
0110 
0111 // vim: et sw=4 sts=4