File indexing completed on 2024-12-22 05:36:21
0001 <?php 0002 0003 /** 0004 * Definition of the purified HTML that describes allowed children, 0005 * attributes, and many other things. 0006 * 0007 * Conventions: 0008 * 0009 * All member variables that are prefixed with info 0010 * (including the main $info array) are used by HTML Purifier internals 0011 * and should not be directly edited when customizing the HTMLDefinition. 0012 * They can usually be set via configuration directives or custom 0013 * modules. 0014 * 0015 * On the other hand, member variables without the info prefix are used 0016 * internally by the HTMLDefinition and MUST NOT be used by other HTML 0017 * Purifier internals. Many of them, however, are public, and may be 0018 * edited by userspace code to tweak the behavior of HTMLDefinition. 0019 * 0020 * @note This class is inspected by Printer_HTMLDefinition; please 0021 * update that class if things here change. 0022 * 0023 * @warning Directives that change this object's structure must be in 0024 * the HTML or Attr namespace! 0025 */ 0026 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition 0027 { 0028 0029 // FULLY-PUBLIC VARIABLES --------------------------------------------- 0030 0031 /** 0032 * Associative array of element names to HTMLPurifier_ElementDef. 0033 * @type HTMLPurifier_ElementDef[] 0034 */ 0035 public $info = array(); 0036 0037 /** 0038 * Associative array of global attribute name to attribute definition. 0039 * @type array 0040 */ 0041 public $info_global_attr = array(); 0042 0043 /** 0044 * String name of parent element HTML will be going into. 0045 * @type string 0046 */ 0047 public $info_parent = 'div'; 0048 0049 /** 0050 * Definition for parent element, allows parent element to be a 0051 * tag that's not allowed inside the HTML fragment. 0052 * @type HTMLPurifier_ElementDef 0053 */ 0054 public $info_parent_def; 0055 0056 /** 0057 * String name of element used to wrap inline elements in block context. 0058 * @type string 0059 * @note This is rarely used except for BLOCKQUOTEs in strict mode 0060 */ 0061 public $info_block_wrapper = 'p'; 0062 0063 /** 0064 * Associative array of deprecated tag name to HTMLPurifier_TagTransform. 0065 * @type array 0066 */ 0067 public $info_tag_transform = array(); 0068 0069 /** 0070 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. 0071 * @type HTMLPurifier_AttrTransform[] 0072 */ 0073 public $info_attr_transform_pre = array(); 0074 0075 /** 0076 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. 0077 * @type HTMLPurifier_AttrTransform[] 0078 */ 0079 public $info_attr_transform_post = array(); 0080 0081 /** 0082 * Nested lookup array of content set name (Block, Inline) to 0083 * element name to whether or not it belongs in that content set. 0084 * @type array 0085 */ 0086 public $info_content_sets = array(); 0087 0088 /** 0089 * Indexed list of HTMLPurifier_Injector to be used. 0090 * @type HTMLPurifier_Injector[] 0091 */ 0092 public $info_injector = array(); 0093 0094 /** 0095 * Doctype object 0096 * @type HTMLPurifier_Doctype 0097 */ 0098 public $doctype; 0099 0100 0101 0102 // RAW CUSTOMIZATION STUFF -------------------------------------------- 0103 0104 /** 0105 * Adds a custom attribute to a pre-existing element 0106 * @note This is strictly convenience, and does not have a corresponding 0107 * method in HTMLPurifier_HTMLModule 0108 * @param string $element_name Element name to add attribute to 0109 * @param string $attr_name Name of attribute 0110 * @param mixed $def Attribute definition, can be string or object, see 0111 * HTMLPurifier_AttrTypes for details 0112 */ 0113 public function addAttribute($element_name, $attr_name, $def) 0114 { 0115 $module = $this->getAnonymousModule(); 0116 if (!isset($module->info[$element_name])) { 0117 $element = $module->addBlankElement($element_name); 0118 } else { 0119 $element = $module->info[$element_name]; 0120 } 0121 $element->attr[$attr_name] = $def; 0122 } 0123 0124 /** 0125 * Adds a custom element to your HTML definition 0126 * @see HTMLPurifier_HTMLModule::addElement() for detailed 0127 * parameter and return value descriptions. 0128 */ 0129 public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) 0130 { 0131 $module = $this->getAnonymousModule(); 0132 // assume that if the user is calling this, the element 0133 // is safe. This may not be a good idea 0134 $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); 0135 return $element; 0136 } 0137 0138 /** 0139 * Adds a blank element to your HTML definition, for overriding 0140 * existing behavior 0141 * @param string $element_name 0142 * @return HTMLPurifier_ElementDef 0143 * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed 0144 * parameter and return value descriptions. 0145 */ 0146 public function addBlankElement($element_name) 0147 { 0148 $module = $this->getAnonymousModule(); 0149 $element = $module->addBlankElement($element_name); 0150 return $element; 0151 } 0152 0153 /** 0154 * Retrieves a reference to the anonymous module, so you can 0155 * bust out advanced features without having to make your own 0156 * module. 0157 * @return HTMLPurifier_HTMLModule 0158 */ 0159 public function getAnonymousModule() 0160 { 0161 if (!$this->_anonModule) { 0162 $this->_anonModule = new HTMLPurifier_HTMLModule(); 0163 $this->_anonModule->name = 'Anonymous'; 0164 } 0165 return $this->_anonModule; 0166 } 0167 0168 private $_anonModule = null; 0169 0170 // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- 0171 0172 /** 0173 * @type string 0174 */ 0175 public $type = 'HTML'; 0176 0177 /** 0178 * @type HTMLPurifier_HTMLModuleManager 0179 */ 0180 public $manager; 0181 0182 /** 0183 * Performs low-cost, preliminary initialization. 0184 */ 0185 public function __construct() 0186 { 0187 $this->manager = new HTMLPurifier_HTMLModuleManager(); 0188 } 0189 0190 /** 0191 * @param HTMLPurifier_Config $config 0192 */ 0193 protected function doSetup($config) 0194 { 0195 $this->processModules($config); 0196 $this->setupConfigStuff($config); 0197 unset($this->manager); 0198 0199 // cleanup some of the element definitions 0200 foreach ($this->info as $k => $v) { 0201 unset($this->info[$k]->content_model); 0202 unset($this->info[$k]->content_model_type); 0203 } 0204 } 0205 0206 /** 0207 * Extract out the information from the manager 0208 * @param HTMLPurifier_Config $config 0209 */ 0210 protected function processModules($config) 0211 { 0212 if ($this->_anonModule) { 0213 // for user specific changes 0214 // this is late-loaded so we don't have to deal with PHP4 0215 // reference wonky-ness 0216 $this->manager->addModule($this->_anonModule); 0217 unset($this->_anonModule); 0218 } 0219 0220 $this->manager->setup($config); 0221 $this->doctype = $this->manager->doctype; 0222 0223 foreach ($this->manager->modules as $module) { 0224 foreach ($module->info_tag_transform as $k => $v) { 0225 if ($v === false) { 0226 unset($this->info_tag_transform[$k]); 0227 } else { 0228 $this->info_tag_transform[$k] = $v; 0229 } 0230 } 0231 foreach ($module->info_attr_transform_pre as $k => $v) { 0232 if ($v === false) { 0233 unset($this->info_attr_transform_pre[$k]); 0234 } else { 0235 $this->info_attr_transform_pre[$k] = $v; 0236 } 0237 } 0238 foreach ($module->info_attr_transform_post as $k => $v) { 0239 if ($v === false) { 0240 unset($this->info_attr_transform_post[$k]); 0241 } else { 0242 $this->info_attr_transform_post[$k] = $v; 0243 } 0244 } 0245 foreach ($module->info_injector as $k => $v) { 0246 if ($v === false) { 0247 unset($this->info_injector[$k]); 0248 } else { 0249 $this->info_injector[$k] = $v; 0250 } 0251 } 0252 } 0253 $this->info = $this->manager->getElements(); 0254 $this->info_content_sets = $this->manager->contentSets->lookup; 0255 } 0256 0257 /** 0258 * Sets up stuff based on config. We need a better way of doing this. 0259 * @param HTMLPurifier_Config $config 0260 */ 0261 protected function setupConfigStuff($config) 0262 { 0263 $block_wrapper = $config->get('HTML.BlockWrapper'); 0264 if (isset($this->info_content_sets['Block'][$block_wrapper])) { 0265 $this->info_block_wrapper = $block_wrapper; 0266 } else { 0267 trigger_error( 0268 'Cannot use non-block element as block wrapper', 0269 E_USER_ERROR 0270 ); 0271 } 0272 0273 $parent = $config->get('HTML.Parent'); 0274 $def = $this->manager->getElement($parent, true); 0275 if ($def) { 0276 $this->info_parent = $parent; 0277 $this->info_parent_def = $def; 0278 } else { 0279 trigger_error( 0280 'Cannot use unrecognized element as parent', 0281 E_USER_ERROR 0282 ); 0283 $this->info_parent_def = $this->manager->getElement($this->info_parent, true); 0284 } 0285 0286 // support template text 0287 $support = "(for information on implementing this, see the support forums) "; 0288 0289 // setup allowed elements ----------------------------------------- 0290 0291 $allowed_elements = $config->get('HTML.AllowedElements'); 0292 $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early 0293 0294 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { 0295 $allowed = $config->get('HTML.Allowed'); 0296 if (is_string($allowed)) { 0297 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); 0298 } 0299 } 0300 0301 if (is_array($allowed_elements)) { 0302 foreach ($this->info as $name => $d) { 0303 if (!isset($allowed_elements[$name])) { 0304 unset($this->info[$name]); 0305 } 0306 unset($allowed_elements[$name]); 0307 } 0308 // emit errors 0309 foreach ($allowed_elements as $element => $d) { 0310 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! 0311 trigger_error("Element '$element' is not supported $support", E_USER_WARNING); 0312 } 0313 } 0314 0315 // setup allowed attributes --------------------------------------- 0316 0317 $allowed_attributes_mutable = $allowed_attributes; // by copy! 0318 if (is_array($allowed_attributes)) { 0319 // This actually doesn't do anything, since we went away from 0320 // global attributes. It's possible that userland code uses 0321 // it, but HTMLModuleManager doesn't! 0322 foreach ($this->info_global_attr as $attr => $x) { 0323 $keys = array($attr, "*@$attr", "*.$attr"); 0324 $delete = true; 0325 foreach ($keys as $key) { 0326 if ($delete && isset($allowed_attributes[$key])) { 0327 $delete = false; 0328 } 0329 if (isset($allowed_attributes_mutable[$key])) { 0330 unset($allowed_attributes_mutable[$key]); 0331 } 0332 } 0333 if ($delete) { 0334 unset($this->info_global_attr[$attr]); 0335 } 0336 } 0337 0338 foreach ($this->info as $tag => $info) { 0339 foreach ($info->attr as $attr => $x) { 0340 $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); 0341 $delete = true; 0342 foreach ($keys as $key) { 0343 if ($delete && isset($allowed_attributes[$key])) { 0344 $delete = false; 0345 } 0346 if (isset($allowed_attributes_mutable[$key])) { 0347 unset($allowed_attributes_mutable[$key]); 0348 } 0349 } 0350 if ($delete) { 0351 if ($this->info[$tag]->attr[$attr]->required) { 0352 trigger_error( 0353 "Required attribute '$attr' in element '$tag' " . 0354 "was not allowed, which means '$tag' will not be allowed either", 0355 E_USER_WARNING 0356 ); 0357 } 0358 unset($this->info[$tag]->attr[$attr]); 0359 } 0360 } 0361 } 0362 // emit errors 0363 foreach ($allowed_attributes_mutable as $elattr => $d) { 0364 $bits = preg_split('/[.@]/', $elattr, 2); 0365 $c = count($bits); 0366 switch ($c) { 0367 case 2: 0368 if ($bits[0] !== '*') { 0369 $element = htmlspecialchars($bits[0]); 0370 $attribute = htmlspecialchars($bits[1]); 0371 if (!isset($this->info[$element])) { 0372 trigger_error( 0373 "Cannot allow attribute '$attribute' if element " . 0374 "'$element' is not allowed/supported $support" 0375 ); 0376 } else { 0377 trigger_error( 0378 "Attribute '$attribute' in element '$element' not supported $support", 0379 E_USER_WARNING 0380 ); 0381 } 0382 break; 0383 } 0384 // otherwise fall through 0385 case 1: 0386 $attribute = htmlspecialchars($bits[0]); 0387 trigger_error( 0388 "Global attribute '$attribute' is not ". 0389 "supported in any elements $support", 0390 E_USER_WARNING 0391 ); 0392 break; 0393 } 0394 } 0395 } 0396 0397 // setup forbidden elements --------------------------------------- 0398 0399 $forbidden_elements = $config->get('HTML.ForbiddenElements'); 0400 $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); 0401 0402 foreach ($this->info as $tag => $info) { 0403 if (isset($forbidden_elements[$tag])) { 0404 unset($this->info[$tag]); 0405 continue; 0406 } 0407 foreach ($info->attr as $attr => $x) { 0408 if (isset($forbidden_attributes["$tag@$attr"]) || 0409 isset($forbidden_attributes["*@$attr"]) || 0410 isset($forbidden_attributes[$attr]) 0411 ) { 0412 unset($this->info[$tag]->attr[$attr]); 0413 continue; 0414 } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually 0415 // $tag.$attr are not user supplied, so no worries! 0416 trigger_error( 0417 "Error with $tag.$attr: tag.attr syntax not supported for " . 0418 "HTML.ForbiddenAttributes; use tag@attr instead", 0419 E_USER_WARNING 0420 ); 0421 } 0422 } 0423 } 0424 foreach ($forbidden_attributes as $key => $v) { 0425 if (strlen($key) < 2) { 0426 continue; 0427 } 0428 if ($key[0] != '*') { 0429 continue; 0430 } 0431 if ($key[1] == '.') { 0432 trigger_error( 0433 "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", 0434 E_USER_WARNING 0435 ); 0436 } 0437 } 0438 0439 // setup injectors ----------------------------------------------------- 0440 foreach ($this->info_injector as $i => $injector) { 0441 if ($injector->checkNeeded($config) !== false) { 0442 // remove injector that does not have it's required 0443 // elements/attributes present, and is thus not needed. 0444 unset($this->info_injector[$i]); 0445 } 0446 } 0447 } 0448 0449 /** 0450 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into 0451 * separate lists for processing. Format is element[attr1|attr2],element2... 0452 * @warning Although it's largely drawn from TinyMCE's implementation, 0453 * it is different, and you'll probably have to modify your lists 0454 * @param array $list String list to parse 0455 * @return array 0456 * @todo Give this its own class, probably static interface 0457 */ 0458 public function parseTinyMCEAllowedList($list) 0459 { 0460 $list = str_replace(array(' ', "\t"), '', $list); 0461 0462 $elements = array(); 0463 $attributes = array(); 0464 0465 $chunks = preg_split('/(,|[\n\r]+)/', $list); 0466 foreach ($chunks as $chunk) { 0467 if (empty($chunk)) { 0468 continue; 0469 } 0470 // remove TinyMCE element control characters 0471 if (!strpos($chunk, '[')) { 0472 $element = $chunk; 0473 $attr = false; 0474 } else { 0475 list($element, $attr) = explode('[', $chunk); 0476 } 0477 if ($element !== '*') { 0478 $elements[$element] = true; 0479 } 0480 if (!$attr) { 0481 continue; 0482 } 0483 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] 0484 $attr = explode('|', $attr); 0485 foreach ($attr as $key) { 0486 $attributes["$element.$key"] = true; 0487 } 0488 } 0489 return array($elements, $attributes); 0490 } 0491 } 0492 0493 // vim: et sw=4 sts=4