File indexing completed on 2024-12-22 05:36:21
0001 <?php 0002 0003 class HTMLPurifier_HTMLModuleManager 0004 { 0005 0006 /** 0007 * @type HTMLPurifier_DoctypeRegistry 0008 */ 0009 public $doctypes; 0010 0011 /** 0012 * Instance of current doctype. 0013 * @type string 0014 */ 0015 public $doctype; 0016 0017 /** 0018 * @type HTMLPurifier_AttrTypes 0019 */ 0020 public $attrTypes; 0021 0022 /** 0023 * Active instances of modules for the specified doctype are 0024 * indexed, by name, in this array. 0025 * @type HTMLPurifier_HTMLModule[] 0026 */ 0027 public $modules = array(); 0028 0029 /** 0030 * Array of recognized HTMLPurifier_HTMLModule instances, 0031 * indexed by module's class name. This array is usually lazy loaded, but a 0032 * user can overload a module by pre-emptively registering it. 0033 * @type HTMLPurifier_HTMLModule[] 0034 */ 0035 public $registeredModules = array(); 0036 0037 /** 0038 * List of extra modules that were added by the user 0039 * using addModule(). These get unconditionally merged into the current doctype, whatever 0040 * it may be. 0041 * @type HTMLPurifier_HTMLModule[] 0042 */ 0043 public $userModules = array(); 0044 0045 /** 0046 * Associative array of element name to list of modules that have 0047 * definitions for the element; this array is dynamically filled. 0048 * @type array 0049 */ 0050 public $elementLookup = array(); 0051 0052 /** 0053 * List of prefixes we should use for registering small names. 0054 * @type array 0055 */ 0056 public $prefixes = array('HTMLPurifier_HTMLModule_'); 0057 0058 /** 0059 * @type HTMLPurifier_ContentSets 0060 */ 0061 public $contentSets; 0062 0063 /** 0064 * @type HTMLPurifier_AttrCollections 0065 */ 0066 public $attrCollections; 0067 0068 /** 0069 * If set to true, unsafe elements and attributes will be allowed. 0070 * @type bool 0071 */ 0072 public $trusted = false; 0073 0074 public function __construct() 0075 { 0076 // editable internal objects 0077 $this->attrTypes = new HTMLPurifier_AttrTypes(); 0078 $this->doctypes = new HTMLPurifier_DoctypeRegistry(); 0079 0080 // setup basic modules 0081 $common = array( 0082 'CommonAttributes', 'Text', 'Hypertext', 'List', 0083 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 0084 'StyleAttribute', 0085 // Unsafe: 0086 'Scripting', 'Object', 'Forms', 0087 // Sorta legacy, but present in strict: 0088 'Name', 0089 ); 0090 $transitional = array('Legacy', 'Target', 'Iframe'); 0091 $xml = array('XMLCommonAttributes'); 0092 $non_xml = array('NonXMLCommonAttributes'); 0093 0094 // setup basic doctypes 0095 $this->doctypes->register( 0096 'HTML 4.01 Transitional', 0097 false, 0098 array_merge($common, $transitional, $non_xml), 0099 array('Tidy_Transitional', 'Tidy_Proprietary'), 0100 array(), 0101 '-//W3C//DTD HTML 4.01 Transitional//EN', 0102 'http://www.w3.org/TR/html4/loose.dtd' 0103 ); 0104 0105 $this->doctypes->register( 0106 'HTML 4.01 Strict', 0107 false, 0108 array_merge($common, $non_xml), 0109 array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), 0110 array(), 0111 '-//W3C//DTD HTML 4.01//EN', 0112 'http://www.w3.org/TR/html4/strict.dtd' 0113 ); 0114 0115 $this->doctypes->register( 0116 'XHTML 1.0 Transitional', 0117 true, 0118 array_merge($common, $transitional, $xml, $non_xml), 0119 array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), 0120 array(), 0121 '-//W3C//DTD XHTML 1.0 Transitional//EN', 0122 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' 0123 ); 0124 0125 $this->doctypes->register( 0126 'XHTML 1.0 Strict', 0127 true, 0128 array_merge($common, $xml, $non_xml), 0129 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), 0130 array(), 0131 '-//W3C//DTD XHTML 1.0 Strict//EN', 0132 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' 0133 ); 0134 0135 $this->doctypes->register( 0136 'XHTML 1.1', 0137 true, 0138 // Iframe is a real XHTML 1.1 module, despite being 0139 // "transitional"! 0140 array_merge($common, $xml, array('Ruby', 'Iframe')), 0141 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 0142 array(), 0143 '-//W3C//DTD XHTML 1.1//EN', 0144 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' 0145 ); 0146 0147 } 0148 0149 /** 0150 * Registers a module to the recognized module list, useful for 0151 * overloading pre-existing modules. 0152 * @param $module Mixed: string module name, with or without 0153 * HTMLPurifier_HTMLModule prefix, or instance of 0154 * subclass of HTMLPurifier_HTMLModule. 0155 * @param $overload Boolean whether or not to overload previous modules. 0156 * If this is not set, and you do overload a module, 0157 * HTML Purifier will complain with a warning. 0158 * @note This function will not call autoload, you must instantiate 0159 * (and thus invoke) autoload outside the method. 0160 * @note If a string is passed as a module name, different variants 0161 * will be tested in this order: 0162 * - Check for HTMLPurifier_HTMLModule_$name 0163 * - Check all prefixes with $name in order they were added 0164 * - Check for literal object name 0165 * - Throw fatal error 0166 * If your object name collides with an internal class, specify 0167 * your module manually. All modules must have been included 0168 * externally: registerModule will not perform inclusions for you! 0169 */ 0170 public function registerModule($module, $overload = false) 0171 { 0172 if (is_string($module)) { 0173 // attempt to load the module 0174 $original_module = $module; 0175 $ok = false; 0176 foreach ($this->prefixes as $prefix) { 0177 $module = $prefix . $original_module; 0178 if (class_exists($module)) { 0179 $ok = true; 0180 break; 0181 } 0182 } 0183 if (!$ok) { 0184 $module = $original_module; 0185 if (!class_exists($module)) { 0186 trigger_error( 0187 $original_module . ' module does not exist', 0188 E_USER_ERROR 0189 ); 0190 return; 0191 } 0192 } 0193 $module = new $module(); 0194 } 0195 if (empty($module->name)) { 0196 trigger_error('Module instance of ' . get_class($module) . ' must have name'); 0197 return; 0198 } 0199 if (!$overload && isset($this->registeredModules[$module->name])) { 0200 trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); 0201 } 0202 $this->registeredModules[$module->name] = $module; 0203 } 0204 0205 /** 0206 * Adds a module to the current doctype by first registering it, 0207 * and then tacking it on to the active doctype 0208 */ 0209 public function addModule($module) 0210 { 0211 $this->registerModule($module); 0212 if (is_object($module)) { 0213 $module = $module->name; 0214 } 0215 $this->userModules[] = $module; 0216 } 0217 0218 /** 0219 * Adds a class prefix that registerModule() will use to resolve a 0220 * string name to a concrete class 0221 */ 0222 public function addPrefix($prefix) 0223 { 0224 $this->prefixes[] = $prefix; 0225 } 0226 0227 /** 0228 * Performs processing on modules, after being called you may 0229 * use getElement() and getElements() 0230 * @param HTMLPurifier_Config $config 0231 */ 0232 public function setup($config) 0233 { 0234 $this->trusted = $config->get('HTML.Trusted'); 0235 0236 // generate 0237 $this->doctype = $this->doctypes->make($config); 0238 $modules = $this->doctype->modules; 0239 0240 // take out the default modules that aren't allowed 0241 $lookup = $config->get('HTML.AllowedModules'); 0242 $special_cases = $config->get('HTML.CoreModules'); 0243 0244 if (is_array($lookup)) { 0245 foreach ($modules as $k => $m) { 0246 if (isset($special_cases[$m])) { 0247 continue; 0248 } 0249 if (!isset($lookup[$m])) { 0250 unset($modules[$k]); 0251 } 0252 } 0253 } 0254 0255 // custom modules 0256 if ($config->get('HTML.Proprietary')) { 0257 $modules[] = 'Proprietary'; 0258 } 0259 if ($config->get('HTML.SafeObject')) { 0260 $modules[] = 'SafeObject'; 0261 } 0262 if ($config->get('HTML.SafeEmbed')) { 0263 $modules[] = 'SafeEmbed'; 0264 } 0265 if ($config->get('HTML.SafeScripting') !== array()) { 0266 $modules[] = 'SafeScripting'; 0267 } 0268 if ($config->get('HTML.Nofollow')) { 0269 $modules[] = 'Nofollow'; 0270 } 0271 if ($config->get('HTML.TargetBlank')) { 0272 $modules[] = 'TargetBlank'; 0273 } 0274 // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank 0275 // so that its post-attr-transform gets run afterwards. 0276 if ($config->get('HTML.TargetNoreferrer')) { 0277 $modules[] = 'TargetNoreferrer'; 0278 } 0279 if ($config->get('HTML.TargetNoopener')) { 0280 $modules[] = 'TargetNoopener'; 0281 } 0282 0283 // merge in custom modules 0284 $modules = array_merge($modules, $this->userModules); 0285 0286 foreach ($modules as $module) { 0287 $this->processModule($module); 0288 $this->modules[$module]->setup($config); 0289 } 0290 0291 foreach ($this->doctype->tidyModules as $module) { 0292 $this->processModule($module); 0293 $this->modules[$module]->setup($config); 0294 } 0295 0296 // prepare any injectors 0297 foreach ($this->modules as $module) { 0298 $n = array(); 0299 foreach ($module->info_injector as $injector) { 0300 if (!is_object($injector)) { 0301 $class = "HTMLPurifier_Injector_$injector"; 0302 $injector = new $class; 0303 } 0304 $n[$injector->name] = $injector; 0305 } 0306 $module->info_injector = $n; 0307 } 0308 0309 // setup lookup table based on all valid modules 0310 foreach ($this->modules as $module) { 0311 foreach ($module->info as $name => $def) { 0312 if (!isset($this->elementLookup[$name])) { 0313 $this->elementLookup[$name] = array(); 0314 } 0315 $this->elementLookup[$name][] = $module->name; 0316 } 0317 } 0318 0319 // note the different choice 0320 $this->contentSets = new HTMLPurifier_ContentSets( 0321 // content set assembly deals with all possible modules, 0322 // not just ones deemed to be "safe" 0323 $this->modules 0324 ); 0325 $this->attrCollections = new HTMLPurifier_AttrCollections( 0326 $this->attrTypes, 0327 // there is no way to directly disable a global attribute, 0328 // but using AllowedAttributes or simply not including 0329 // the module in your custom doctype should be sufficient 0330 $this->modules 0331 ); 0332 } 0333 0334 /** 0335 * Takes a module and adds it to the active module collection, 0336 * registering it if necessary. 0337 */ 0338 public function processModule($module) 0339 { 0340 if (!isset($this->registeredModules[$module]) || is_object($module)) { 0341 $this->registerModule($module); 0342 } 0343 $this->modules[$module] = $this->registeredModules[$module]; 0344 } 0345 0346 /** 0347 * Retrieves merged element definitions. 0348 * @return Array of HTMLPurifier_ElementDef 0349 */ 0350 public function getElements() 0351 { 0352 $elements = array(); 0353 foreach ($this->modules as $module) { 0354 if (!$this->trusted && !$module->safe) { 0355 continue; 0356 } 0357 foreach ($module->info as $name => $v) { 0358 if (isset($elements[$name])) { 0359 continue; 0360 } 0361 $elements[$name] = $this->getElement($name); 0362 } 0363 } 0364 0365 // remove dud elements, this happens when an element that 0366 // appeared to be safe actually wasn't 0367 foreach ($elements as $n => $v) { 0368 if ($v === false) { 0369 unset($elements[$n]); 0370 } 0371 } 0372 0373 return $elements; 0374 0375 } 0376 0377 /** 0378 * Retrieves a single merged element definition 0379 * @param string $name Name of element 0380 * @param bool $trusted Boolean trusted overriding parameter: set to true 0381 * if you want the full version of an element 0382 * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef 0383 * @note You may notice that modules are getting iterated over twice (once 0384 * in getElements() and once here). This 0385 * is because 0386 */ 0387 public function getElement($name, $trusted = null) 0388 { 0389 if (!isset($this->elementLookup[$name])) { 0390 return false; 0391 } 0392 0393 // setup global state variables 0394 $def = false; 0395 if ($trusted === null) { 0396 $trusted = $this->trusted; 0397 } 0398 0399 // iterate through each module that has registered itself to this 0400 // element 0401 foreach ($this->elementLookup[$name] as $module_name) { 0402 $module = $this->modules[$module_name]; 0403 0404 // refuse to create/merge from a module that is deemed unsafe-- 0405 // pretend the module doesn't exist--when trusted mode is not on. 0406 if (!$trusted && !$module->safe) { 0407 continue; 0408 } 0409 0410 // clone is used because, ideally speaking, the original 0411 // definition should not be modified. Usually, this will 0412 // make no difference, but for consistency's sake 0413 $new_def = clone $module->info[$name]; 0414 0415 if (!$def && $new_def->standalone) { 0416 $def = $new_def; 0417 } elseif ($def) { 0418 // This will occur even if $new_def is standalone. In practice, 0419 // this will usually result in a full replacement. 0420 $def->mergeIn($new_def); 0421 } else { 0422 // :TODO: 0423 // non-standalone definitions that don't have a standalone 0424 // to merge into could be deferred to the end 0425 // HOWEVER, it is perfectly valid for a non-standalone 0426 // definition to lack a standalone definition, even 0427 // after all processing: this allows us to safely 0428 // specify extra attributes for elements that may not be 0429 // enabled all in one place. In particular, this might 0430 // be the case for trusted elements. WARNING: care must 0431 // be taken that the /extra/ definitions are all safe. 0432 continue; 0433 } 0434 0435 // attribute value expansions 0436 $this->attrCollections->performInclusions($def->attr); 0437 $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); 0438 0439 // descendants_are_inline, for ChildDef_Chameleon 0440 if (is_string($def->content_model) && 0441 strpos($def->content_model, 'Inline') !== false) { 0442 if ($name != 'del' && $name != 'ins') { 0443 // this is for you, ins/del 0444 $def->descendants_are_inline = true; 0445 } 0446 } 0447 0448 $this->contentSets->generateChildDef($def, $module); 0449 } 0450 0451 // This can occur if there is a blank definition, but no base to 0452 // mix it in with 0453 if (!$def) { 0454 return false; 0455 } 0456 0457 // add information on required attributes 0458 foreach ($def->attr as $attr_name => $attr_def) { 0459 if ($attr_def->required) { 0460 $def->required_attr[] = $attr_name; 0461 } 0462 } 0463 return $def; 0464 } 0465 } 0466 0467 // vim: et sw=4 sts=4