File indexing completed on 2025-01-05 05:24:27
0001 <?php 0002 0003 /** 0004 * Represents an XHTML 1.1 module, with information on elements, tags 0005 * and attributes. 0006 * @note Even though this is technically XHTML 1.1, it is also used for 0007 * regular HTML parsing. We are using modulization as a convenient 0008 * way to represent the internals of HTMLDefinition, and our 0009 * implementation is by no means conforming and does not directly 0010 * use the normative DTDs or XML schemas. 0011 * @note The public variables in a module should almost directly 0012 * correspond to the variables in HTMLPurifier_HTMLDefinition. 0013 * However, the prefix info carries no special meaning in these 0014 * objects (include it anyway if that's the correspondence though). 0015 * @todo Consider making some member functions protected 0016 */ 0017 0018 class HTMLPurifier_HTMLModule 0019 { 0020 0021 // -- Overloadable ---------------------------------------------------- 0022 0023 /** 0024 * Short unique string identifier of the module. 0025 * @type string 0026 */ 0027 public $name; 0028 0029 /** 0030 * Informally, a list of elements this module changes. 0031 * Not used in any significant way. 0032 * @type array 0033 */ 0034 public $elements = array(); 0035 0036 /** 0037 * Associative array of element names to element definitions. 0038 * Some definitions may be incomplete, to be merged in later 0039 * with the full definition. 0040 * @type array 0041 */ 0042 public $info = array(); 0043 0044 /** 0045 * Associative array of content set names to content set additions. 0046 * This is commonly used to, say, add an A element to the Inline 0047 * content set. This corresponds to an internal variable $content_sets 0048 * and NOT info_content_sets member variable of HTMLDefinition. 0049 * @type array 0050 */ 0051 public $content_sets = array(); 0052 0053 /** 0054 * Associative array of attribute collection names to attribute 0055 * collection additions. More rarely used for adding attributes to 0056 * the global collections. Example is the StyleAttribute module adding 0057 * the style attribute to the Core. Corresponds to HTMLDefinition's 0058 * attr_collections->info, since the object's data is only info, 0059 * with extra behavior associated with it. 0060 * @type array 0061 */ 0062 public $attr_collections = array(); 0063 0064 /** 0065 * Associative array of deprecated tag name to HTMLPurifier_TagTransform. 0066 * @type array 0067 */ 0068 public $info_tag_transform = array(); 0069 0070 /** 0071 * List of HTMLPurifier_AttrTransform to be performed before validation. 0072 * @type array 0073 */ 0074 public $info_attr_transform_pre = array(); 0075 0076 /** 0077 * List of HTMLPurifier_AttrTransform to be performed after validation. 0078 * @type array 0079 */ 0080 public $info_attr_transform_post = array(); 0081 0082 /** 0083 * List of HTMLPurifier_Injector to be performed during well-formedness fixing. 0084 * An injector will only be invoked if all of it's pre-requisites are met; 0085 * if an injector fails setup, there will be no error; it will simply be 0086 * silently disabled. 0087 * @type array 0088 */ 0089 public $info_injector = array(); 0090 0091 /** 0092 * Boolean flag that indicates whether or not getChildDef is implemented. 0093 * For optimization reasons: may save a call to a function. Be sure 0094 * to set it if you do implement getChildDef(), otherwise it will have 0095 * no effect! 0096 * @type bool 0097 */ 0098 public $defines_child_def = false; 0099 0100 /** 0101 * Boolean flag whether or not this module is safe. If it is not safe, all 0102 * of its members are unsafe. Modules are safe by default (this might be 0103 * slightly dangerous, but it doesn't make much sense to force HTML Purifier, 0104 * which is based off of safe HTML, to explicitly say, "This is safe," even 0105 * though there are modules which are "unsafe") 0106 * 0107 * @type bool 0108 * @note Previously, safety could be applied at an element level granularity. 0109 * We've removed this ability, so in order to add "unsafe" elements 0110 * or attributes, a dedicated module with this property set to false 0111 * must be used. 0112 */ 0113 public $safe = true; 0114 0115 /** 0116 * Retrieves a proper HTMLPurifier_ChildDef subclass based on 0117 * content_model and content_model_type member variables of 0118 * the HTMLPurifier_ElementDef class. There is a similar function 0119 * in HTMLPurifier_HTMLDefinition. 0120 * @param HTMLPurifier_ElementDef $def 0121 * @return HTMLPurifier_ChildDef subclass 0122 */ 0123 public function getChildDef($def) 0124 { 0125 return false; 0126 } 0127 0128 // -- Convenience ----------------------------------------------------- 0129 0130 /** 0131 * Convenience function that sets up a new element 0132 * @param string $element Name of element to add 0133 * @param string|bool $type What content set should element be registered to? 0134 * Set as false to skip this step. 0135 * @param string $contents Allowed children in form of: 0136 * "$content_model_type: $content_model" 0137 * @param array $attr_includes What attribute collections to register to 0138 * element? 0139 * @param array $attr What unique attributes does the element define? 0140 * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters. 0141 * @return HTMLPurifier_ElementDef Created element definition object, so you 0142 * can set advanced parameters 0143 */ 0144 public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) 0145 { 0146 $this->elements[] = $element; 0147 // parse content_model 0148 list($content_model_type, $content_model) = $this->parseContents($contents); 0149 // merge in attribute inclusions 0150 $this->mergeInAttrIncludes($attr, $attr_includes); 0151 // add element to content sets 0152 if ($type) { 0153 $this->addElementToContentSet($element, $type); 0154 } 0155 // create element 0156 $this->info[$element] = HTMLPurifier_ElementDef::create( 0157 $content_model, 0158 $content_model_type, 0159 $attr 0160 ); 0161 // literal object $contents means direct child manipulation 0162 if (!is_string($contents)) { 0163 $this->info[$element]->child = $contents; 0164 } 0165 return $this->info[$element]; 0166 } 0167 0168 /** 0169 * Convenience function that creates a totally blank, non-standalone 0170 * element. 0171 * @param string $element Name of element to create 0172 * @return HTMLPurifier_ElementDef Created element 0173 */ 0174 public function addBlankElement($element) 0175 { 0176 if (!isset($this->info[$element])) { 0177 $this->elements[] = $element; 0178 $this->info[$element] = new HTMLPurifier_ElementDef(); 0179 $this->info[$element]->standalone = false; 0180 } else { 0181 trigger_error("Definition for $element already exists in module, cannot redefine"); 0182 } 0183 return $this->info[$element]; 0184 } 0185 0186 /** 0187 * Convenience function that registers an element to a content set 0188 * @param string $element Element to register 0189 * @param string $type Name content set (warning: case sensitive, usually upper-case 0190 * first letter) 0191 */ 0192 public function addElementToContentSet($element, $type) 0193 { 0194 if (!isset($this->content_sets[$type])) { 0195 $this->content_sets[$type] = ''; 0196 } else { 0197 $this->content_sets[$type] .= ' | '; 0198 } 0199 $this->content_sets[$type] .= $element; 0200 } 0201 0202 /** 0203 * Convenience function that transforms single-string contents 0204 * into separate content model and content model type 0205 * @param string $contents Allowed children in form of: 0206 * "$content_model_type: $content_model" 0207 * @return array 0208 * @note If contents is an object, an array of two nulls will be 0209 * returned, and the callee needs to take the original $contents 0210 * and use it directly. 0211 */ 0212 public function parseContents($contents) 0213 { 0214 if (!is_string($contents)) { 0215 return array(null, null); 0216 } // defer 0217 switch ($contents) { 0218 // check for shorthand content model forms 0219 case 'Empty': 0220 return array('empty', ''); 0221 case 'Inline': 0222 return array('optional', 'Inline | #PCDATA'); 0223 case 'Flow': 0224 return array('optional', 'Flow | #PCDATA'); 0225 } 0226 list($content_model_type, $content_model) = explode(':', $contents); 0227 $content_model_type = strtolower(trim($content_model_type)); 0228 $content_model = trim($content_model); 0229 return array($content_model_type, $content_model); 0230 } 0231 0232 /** 0233 * Convenience function that merges a list of attribute includes into 0234 * an attribute array. 0235 * @param array $attr Reference to attr array to modify 0236 * @param array $attr_includes Array of includes / string include to merge in 0237 */ 0238 public function mergeInAttrIncludes(&$attr, $attr_includes) 0239 { 0240 if (!is_array($attr_includes)) { 0241 if (empty($attr_includes)) { 0242 $attr_includes = array(); 0243 } else { 0244 $attr_includes = array($attr_includes); 0245 } 0246 } 0247 $attr[0] = $attr_includes; 0248 } 0249 0250 /** 0251 * Convenience function that generates a lookup table with boolean 0252 * true as value. 0253 * @param string $list List of values to turn into a lookup 0254 * @note You can also pass an arbitrary number of arguments in 0255 * place of the regular argument 0256 * @return array array equivalent of list 0257 */ 0258 public function makeLookup($list) 0259 { 0260 if (is_string($list)) { 0261 $list = func_get_args(); 0262 } 0263 $ret = array(); 0264 foreach ($list as $value) { 0265 if (is_null($value)) { 0266 continue; 0267 } 0268 $ret[$value] = true; 0269 } 0270 return $ret; 0271 } 0272 0273 /** 0274 * Lazy load construction of the module after determining whether 0275 * or not it's needed, and also when a finalized configuration object 0276 * is available. 0277 * @param HTMLPurifier_Config $config 0278 */ 0279 public function setup($config) 0280 { 0281 } 0282 } 0283 0284 // vim: et sw=4 sts=4