File indexing completed on 2024-12-22 05:36:21
0001 <?php 0002 0003 /** 0004 * Base class for all validating attribute definitions. 0005 * 0006 * This family of classes forms the core for not only HTML attribute validation, 0007 * but also any sort of string that needs to be validated or cleaned (which 0008 * means CSS properties and composite definitions are defined here too). 0009 * Besides defining (through code) what precisely makes the string valid, 0010 * subclasses are also responsible for cleaning the code if possible. 0011 */ 0012 0013 abstract class HTMLPurifier_AttrDef 0014 { 0015 0016 /** 0017 * Tells us whether or not an HTML attribute is minimized. 0018 * Has no meaning in other contexts. 0019 * @type bool 0020 */ 0021 public $minimized = false; 0022 0023 /** 0024 * Tells us whether or not an HTML attribute is required. 0025 * Has no meaning in other contexts 0026 * @type bool 0027 */ 0028 public $required = false; 0029 0030 /** 0031 * Validates and cleans passed string according to a definition. 0032 * 0033 * @param string $string String to be validated and cleaned. 0034 * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object. 0035 * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object. 0036 */ 0037 abstract public function validate($string, $config, $context); 0038 0039 /** 0040 * Convenience method that parses a string as if it were CDATA. 0041 * 0042 * This method process a string in the manner specified at 0043 * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing 0044 * leading and trailing whitespace, ignoring line feeds, and replacing 0045 * carriage returns and tabs with spaces. While most useful for HTML 0046 * attributes specified as CDATA, it can also be applied to most CSS 0047 * values. 0048 * 0049 * @note This method is not entirely standards compliant, as trim() removes 0050 * more types of whitespace than specified in the spec. In practice, 0051 * this is rarely a problem, as those extra characters usually have 0052 * already been removed by HTMLPurifier_Encoder. 0053 * 0054 * @warning This processing is inconsistent with XML's whitespace handling 0055 * as specified by section 3.3.3 and referenced XHTML 1.0 section 0056 * 4.7. However, note that we are NOT necessarily 0057 * parsing XML, thus, this behavior may still be correct. We 0058 * assume that newlines have been normalized. 0059 */ 0060 public function parseCDATA($string) 0061 { 0062 $string = trim($string); 0063 $string = str_replace(array("\n", "\t", "\r"), ' ', $string); 0064 return $string; 0065 } 0066 0067 /** 0068 * Factory method for creating this class from a string. 0069 * @param string $string String construction info 0070 * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string 0071 */ 0072 public function make($string) 0073 { 0074 // default implementation, return a flyweight of this object. 0075 // If $string has an effect on the returned object (i.e. you 0076 // need to overload this method), it is best 0077 // to clone or instantiate new copies. (Instantiation is safer.) 0078 return $this; 0079 } 0080 0081 /** 0082 * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work 0083 * properly. THIS IS A HACK! 0084 * @param string $string a CSS colour definition 0085 * @return string 0086 */ 0087 protected function mungeRgb($string) 0088 { 0089 $p = '\s*(\d+(\.\d+)?([%]?))\s*'; 0090 0091 if (preg_match('/(rgba|hsla)\(/', $string)) { 0092 return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); 0093 } 0094 0095 return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); 0096 } 0097 0098 /** 0099 * Parses a possibly escaped CSS string and returns the "pure" 0100 * version of it. 0101 */ 0102 protected function expandCSSEscape($string) 0103 { 0104 // flexibly parse it 0105 $ret = ''; 0106 for ($i = 0, $c = strlen($string); $i < $c; $i++) { 0107 if ($string[$i] === '\\') { 0108 $i++; 0109 if ($i >= $c) { 0110 $ret .= '\\'; 0111 break; 0112 } 0113 if (ctype_xdigit($string[$i])) { 0114 $code = $string[$i]; 0115 for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { 0116 if (!ctype_xdigit($string[$i])) { 0117 break; 0118 } 0119 $code .= $string[$i]; 0120 } 0121 // We have to be extremely careful when adding 0122 // new characters, to make sure we're not breaking 0123 // the encoding. 0124 $char = HTMLPurifier_Encoder::unichr(hexdec($code)); 0125 if (HTMLPurifier_Encoder::cleanUTF8($char) === '') { 0126 continue; 0127 } 0128 $ret .= $char; 0129 if ($i < $c && trim($string[$i]) !== '') { 0130 $i--; 0131 } 0132 continue; 0133 } 0134 if ($string[$i] === "\n") { 0135 continue; 0136 } 0137 } 0138 $ret .= $string[$i]; 0139 } 0140 return $ret; 0141 } 0142 } 0143 0144 // vim: et sw=4 sts=4