File indexing completed on 2024-12-22 05:36:21

0001 <?php
0002 
0003 /**
0004  * Base class for all validating attribute definitions.
0005  *
0006  * This family of classes forms the core for not only HTML attribute validation,
0007  * but also any sort of string that needs to be validated or cleaned (which
0008  * means CSS properties and composite definitions are defined here too).
0009  * Besides defining (through code) what precisely makes the string valid,
0010  * subclasses are also responsible for cleaning the code if possible.
0011  */
0012 
0013 abstract class HTMLPurifier_AttrDef
0014 {
0015 
0016     /**
0017      * Tells us whether or not an HTML attribute is minimized.
0018      * Has no meaning in other contexts.
0019      * @type bool
0020      */
0021     public $minimized = false;
0022 
0023     /**
0024      * Tells us whether or not an HTML attribute is required.
0025      * Has no meaning in other contexts
0026      * @type bool
0027      */
0028     public $required = false;
0029 
0030     /**
0031      * Validates and cleans passed string according to a definition.
0032      *
0033      * @param string $string String to be validated and cleaned.
0034      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
0035      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
0036      */
0037     abstract public function validate($string, $config, $context);
0038 
0039     /**
0040      * Convenience method that parses a string as if it were CDATA.
0041      *
0042      * This method process a string in the manner specified at
0043      * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
0044      * leading and trailing whitespace, ignoring line feeds, and replacing
0045      * carriage returns and tabs with spaces.  While most useful for HTML
0046      * attributes specified as CDATA, it can also be applied to most CSS
0047      * values.
0048      *
0049      * @note This method is not entirely standards compliant, as trim() removes
0050      *       more types of whitespace than specified in the spec. In practice,
0051      *       this is rarely a problem, as those extra characters usually have
0052      *       already been removed by HTMLPurifier_Encoder.
0053      *
0054      * @warning This processing is inconsistent with XML's whitespace handling
0055      *          as specified by section 3.3.3 and referenced XHTML 1.0 section
0056      *          4.7.  However, note that we are NOT necessarily
0057      *          parsing XML, thus, this behavior may still be correct. We
0058      *          assume that newlines have been normalized.
0059      */
0060     public function parseCDATA($string)
0061     {
0062         $string = trim($string);
0063         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
0064         return $string;
0065     }
0066 
0067     /**
0068      * Factory method for creating this class from a string.
0069      * @param string $string String construction info
0070      * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
0071      */
0072     public function make($string)
0073     {
0074         // default implementation, return a flyweight of this object.
0075         // If $string has an effect on the returned object (i.e. you
0076         // need to overload this method), it is best
0077         // to clone or instantiate new copies. (Instantiation is safer.)
0078         return $this;
0079     }
0080 
0081     /**
0082      * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
0083      * properly. THIS IS A HACK!
0084      * @param string $string a CSS colour definition
0085      * @return string
0086      */
0087     protected function mungeRgb($string)
0088     {
0089         $p = '\s*(\d+(\.\d+)?([%]?))\s*';
0090 
0091         if (preg_match('/(rgba|hsla)\(/', $string)) {
0092             return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
0093         }
0094 
0095         return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
0096     }
0097 
0098     /**
0099      * Parses a possibly escaped CSS string and returns the "pure"
0100      * version of it.
0101      */
0102     protected function expandCSSEscape($string)
0103     {
0104         // flexibly parse it
0105         $ret = '';
0106         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
0107             if ($string[$i] === '\\') {
0108                 $i++;
0109                 if ($i >= $c) {
0110                     $ret .= '\\';
0111                     break;
0112                 }
0113                 if (ctype_xdigit($string[$i])) {
0114                     $code = $string[$i];
0115                     for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
0116                         if (!ctype_xdigit($string[$i])) {
0117                             break;
0118                         }
0119                         $code .= $string[$i];
0120                     }
0121                     // We have to be extremely careful when adding
0122                     // new characters, to make sure we're not breaking
0123                     // the encoding.
0124                     $char = HTMLPurifier_Encoder::unichr(hexdec($code));
0125                     if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
0126                         continue;
0127                     }
0128                     $ret .= $char;
0129                     if ($i < $c && trim($string[$i]) !== '') {
0130                         $i--;
0131                     }
0132                     continue;
0133                 }
0134                 if ($string[$i] === "\n") {
0135                     continue;
0136                 }
0137             }
0138             $ret .= $string[$i];
0139         }
0140         return $ret;
0141     }
0142 }
0143 
0144 // vim: et sw=4 sts=4