File indexing completed on 2025-01-05 05:24:27

0001 <?php
0002 
0003 /**
0004  * Definition of the purified HTML that describes allowed children,
0005  * attributes, and many other things.
0006  *
0007  * Conventions:
0008  *
0009  * All member variables that are prefixed with info
0010  * (including the main $info array) are used by HTML Purifier internals
0011  * and should not be directly edited when customizing the HTMLDefinition.
0012  * They can usually be set via configuration directives or custom
0013  * modules.
0014  *
0015  * On the other hand, member variables without the info prefix are used
0016  * internally by the HTMLDefinition and MUST NOT be used by other HTML
0017  * Purifier internals. Many of them, however, are public, and may be
0018  * edited by userspace code to tweak the behavior of HTMLDefinition.
0019  *
0020  * @note This class is inspected by Printer_HTMLDefinition; please
0021  *       update that class if things here change.
0022  *
0023  * @warning Directives that change this object's structure must be in
0024  *          the HTML or Attr namespace!
0025  */
0026 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
0027 {
0028 
0029     // FULLY-PUBLIC VARIABLES ---------------------------------------------
0030 
0031     /**
0032      * Associative array of element names to HTMLPurifier_ElementDef.
0033      * @type HTMLPurifier_ElementDef[]
0034      */
0035     public $info = array();
0036 
0037     /**
0038      * Associative array of global attribute name to attribute definition.
0039      * @type array
0040      */
0041     public $info_global_attr = array();
0042 
0043     /**
0044      * String name of parent element HTML will be going into.
0045      * @type string
0046      */
0047     public $info_parent = 'div';
0048 
0049     /**
0050      * Definition for parent element, allows parent element to be a
0051      * tag that's not allowed inside the HTML fragment.
0052      * @type HTMLPurifier_ElementDef
0053      */
0054     public $info_parent_def;
0055 
0056     /**
0057      * String name of element used to wrap inline elements in block context.
0058      * @type string
0059      * @note This is rarely used except for BLOCKQUOTEs in strict mode
0060      */
0061     public $info_block_wrapper = 'p';
0062 
0063     /**
0064      * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
0065      * @type array
0066      */
0067     public $info_tag_transform = array();
0068 
0069     /**
0070      * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
0071      * @type HTMLPurifier_AttrTransform[]
0072      */
0073     public $info_attr_transform_pre = array();
0074 
0075     /**
0076      * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
0077      * @type HTMLPurifier_AttrTransform[]
0078      */
0079     public $info_attr_transform_post = array();
0080 
0081     /**
0082      * Nested lookup array of content set name (Block, Inline) to
0083      * element name to whether or not it belongs in that content set.
0084      * @type array
0085      */
0086     public $info_content_sets = array();
0087 
0088     /**
0089      * Indexed list of HTMLPurifier_Injector to be used.
0090      * @type HTMLPurifier_Injector[]
0091      */
0092     public $info_injector = array();
0093 
0094     /**
0095      * Doctype object
0096      * @type HTMLPurifier_Doctype
0097      */
0098     public $doctype;
0099 
0100 
0101 
0102     // RAW CUSTOMIZATION STUFF --------------------------------------------
0103 
0104     /**
0105      * Adds a custom attribute to a pre-existing element
0106      * @note This is strictly convenience, and does not have a corresponding
0107      *       method in HTMLPurifier_HTMLModule
0108      * @param string $element_name Element name to add attribute to
0109      * @param string $attr_name Name of attribute
0110      * @param mixed $def Attribute definition, can be string or object, see
0111      *             HTMLPurifier_AttrTypes for details
0112      */
0113     public function addAttribute($element_name, $attr_name, $def)
0114     {
0115         $module = $this->getAnonymousModule();
0116         if (!isset($module->info[$element_name])) {
0117             $element = $module->addBlankElement($element_name);
0118         } else {
0119             $element = $module->info[$element_name];
0120         }
0121         $element->attr[$attr_name] = $def;
0122     }
0123 
0124     /**
0125      * Adds a custom element to your HTML definition
0126      * @see HTMLPurifier_HTMLModule::addElement() for detailed
0127      *       parameter and return value descriptions.
0128      */
0129     public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
0130     {
0131         $module = $this->getAnonymousModule();
0132         // assume that if the user is calling this, the element
0133         // is safe. This may not be a good idea
0134         $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
0135         return $element;
0136     }
0137 
0138     /**
0139      * Adds a blank element to your HTML definition, for overriding
0140      * existing behavior
0141      * @param string $element_name
0142      * @return HTMLPurifier_ElementDef
0143      * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
0144      *       parameter and return value descriptions.
0145      */
0146     public function addBlankElement($element_name)
0147     {
0148         $module  = $this->getAnonymousModule();
0149         $element = $module->addBlankElement($element_name);
0150         return $element;
0151     }
0152 
0153     /**
0154      * Retrieves a reference to the anonymous module, so you can
0155      * bust out advanced features without having to make your own
0156      * module.
0157      * @return HTMLPurifier_HTMLModule
0158      */
0159     public function getAnonymousModule()
0160     {
0161         if (!$this->_anonModule) {
0162             $this->_anonModule = new HTMLPurifier_HTMLModule();
0163             $this->_anonModule->name = 'Anonymous';
0164         }
0165         return $this->_anonModule;
0166     }
0167 
0168     private $_anonModule = null;
0169 
0170     // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
0171 
0172     /**
0173      * @type string
0174      */
0175     public $type = 'HTML';
0176 
0177     /**
0178      * @type HTMLPurifier_HTMLModuleManager
0179      */
0180     public $manager;
0181 
0182     /**
0183      * Performs low-cost, preliminary initialization.
0184      */
0185     public function __construct()
0186     {
0187         $this->manager = new HTMLPurifier_HTMLModuleManager();
0188     }
0189 
0190     /**
0191      * @param HTMLPurifier_Config $config
0192      */
0193     protected function doSetup($config)
0194     {
0195         $this->processModules($config);
0196         $this->setupConfigStuff($config);
0197         unset($this->manager);
0198 
0199         // cleanup some of the element definitions
0200         foreach ($this->info as $k => $v) {
0201             unset($this->info[$k]->content_model);
0202             unset($this->info[$k]->content_model_type);
0203         }
0204     }
0205 
0206     /**
0207      * Extract out the information from the manager
0208      * @param HTMLPurifier_Config $config
0209      */
0210     protected function processModules($config)
0211     {
0212         if ($this->_anonModule) {
0213             // for user specific changes
0214             // this is late-loaded so we don't have to deal with PHP4
0215             // reference wonky-ness
0216             $this->manager->addModule($this->_anonModule);
0217             unset($this->_anonModule);
0218         }
0219 
0220         $this->manager->setup($config);
0221         $this->doctype = $this->manager->doctype;
0222 
0223         foreach ($this->manager->modules as $module) {
0224             foreach ($module->info_tag_transform as $k => $v) {
0225                 if ($v === false) {
0226                     unset($this->info_tag_transform[$k]);
0227                 } else {
0228                     $this->info_tag_transform[$k] = $v;
0229                 }
0230             }
0231             foreach ($module->info_attr_transform_pre as $k => $v) {
0232                 if ($v === false) {
0233                     unset($this->info_attr_transform_pre[$k]);
0234                 } else {
0235                     $this->info_attr_transform_pre[$k] = $v;
0236                 }
0237             }
0238             foreach ($module->info_attr_transform_post as $k => $v) {
0239                 if ($v === false) {
0240                     unset($this->info_attr_transform_post[$k]);
0241                 } else {
0242                     $this->info_attr_transform_post[$k] = $v;
0243                 }
0244             }
0245             foreach ($module->info_injector as $k => $v) {
0246                 if ($v === false) {
0247                     unset($this->info_injector[$k]);
0248                 } else {
0249                     $this->info_injector[$k] = $v;
0250                 }
0251             }
0252         }
0253         $this->info = $this->manager->getElements();
0254         $this->info_content_sets = $this->manager->contentSets->lookup;
0255     }
0256 
0257     /**
0258      * Sets up stuff based on config. We need a better way of doing this.
0259      * @param HTMLPurifier_Config $config
0260      */
0261     protected function setupConfigStuff($config)
0262     {
0263         $block_wrapper = $config->get('HTML.BlockWrapper');
0264         if (isset($this->info_content_sets['Block'][$block_wrapper])) {
0265             $this->info_block_wrapper = $block_wrapper;
0266         } else {
0267             trigger_error(
0268                 'Cannot use non-block element as block wrapper',
0269                 E_USER_ERROR
0270             );
0271         }
0272 
0273         $parent = $config->get('HTML.Parent');
0274         $def = $this->manager->getElement($parent, true);
0275         if ($def) {
0276             $this->info_parent = $parent;
0277             $this->info_parent_def = $def;
0278         } else {
0279             trigger_error(
0280                 'Cannot use unrecognized element as parent',
0281                 E_USER_ERROR
0282             );
0283             $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
0284         }
0285 
0286         // support template text
0287         $support = "(for information on implementing this, see the support forums) ";
0288 
0289         // setup allowed elements -----------------------------------------
0290 
0291         $allowed_elements = $config->get('HTML.AllowedElements');
0292         $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
0293 
0294         if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
0295             $allowed = $config->get('HTML.Allowed');
0296             if (is_string($allowed)) {
0297                 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
0298             }
0299         }
0300 
0301         if (is_array($allowed_elements)) {
0302             foreach ($this->info as $name => $d) {
0303                 if (!isset($allowed_elements[$name])) {
0304                     unset($this->info[$name]);
0305                 }
0306                 unset($allowed_elements[$name]);
0307             }
0308             // emit errors
0309             foreach ($allowed_elements as $element => $d) {
0310                 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
0311                 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
0312             }
0313         }
0314 
0315         // setup allowed attributes ---------------------------------------
0316 
0317         $allowed_attributes_mutable = $allowed_attributes; // by copy!
0318         if (is_array($allowed_attributes)) {
0319             // This actually doesn't do anything, since we went away from
0320             // global attributes. It's possible that userland code uses
0321             // it, but HTMLModuleManager doesn't!
0322             foreach ($this->info_global_attr as $attr => $x) {
0323                 $keys = array($attr, "*@$attr", "*.$attr");
0324                 $delete = true;
0325                 foreach ($keys as $key) {
0326                     if ($delete && isset($allowed_attributes[$key])) {
0327                         $delete = false;
0328                     }
0329                     if (isset($allowed_attributes_mutable[$key])) {
0330                         unset($allowed_attributes_mutable[$key]);
0331                     }
0332                 }
0333                 if ($delete) {
0334                     unset($this->info_global_attr[$attr]);
0335                 }
0336             }
0337 
0338             foreach ($this->info as $tag => $info) {
0339                 foreach ($info->attr as $attr => $x) {
0340                     $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
0341                     $delete = true;
0342                     foreach ($keys as $key) {
0343                         if ($delete && isset($allowed_attributes[$key])) {
0344                             $delete = false;
0345                         }
0346                         if (isset($allowed_attributes_mutable[$key])) {
0347                             unset($allowed_attributes_mutable[$key]);
0348                         }
0349                     }
0350                     if ($delete) {
0351                         if ($this->info[$tag]->attr[$attr]->required) {
0352                             trigger_error(
0353                                 "Required attribute '$attr' in element '$tag' " .
0354                                 "was not allowed, which means '$tag' will not be allowed either",
0355                                 E_USER_WARNING
0356                             );
0357                         }
0358                         unset($this->info[$tag]->attr[$attr]);
0359                     }
0360                 }
0361             }
0362             // emit errors
0363             foreach ($allowed_attributes_mutable as $elattr => $d) {
0364                 $bits = preg_split('/[.@]/', $elattr, 2);
0365                 $c = count($bits);
0366                 switch ($c) {
0367                     case 2:
0368                         if ($bits[0] !== '*') {
0369                             $element = htmlspecialchars($bits[0]);
0370                             $attribute = htmlspecialchars($bits[1]);
0371                             if (!isset($this->info[$element])) {
0372                                 trigger_error(
0373                                     "Cannot allow attribute '$attribute' if element " .
0374                                     "'$element' is not allowed/supported $support"
0375                                 );
0376                             } else {
0377                                 trigger_error(
0378                                     "Attribute '$attribute' in element '$element' not supported $support",
0379                                     E_USER_WARNING
0380                                 );
0381                             }
0382                             break;
0383                         }
0384                         // otherwise fall through
0385                     case 1:
0386                         $attribute = htmlspecialchars($bits[0]);
0387                         trigger_error(
0388                             "Global attribute '$attribute' is not ".
0389                             "supported in any elements $support",
0390                             E_USER_WARNING
0391                         );
0392                         break;
0393                 }
0394             }
0395         }
0396 
0397         // setup forbidden elements ---------------------------------------
0398 
0399         $forbidden_elements   = $config->get('HTML.ForbiddenElements');
0400         $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
0401 
0402         foreach ($this->info as $tag => $info) {
0403             if (isset($forbidden_elements[$tag])) {
0404                 unset($this->info[$tag]);
0405                 continue;
0406             }
0407             foreach ($info->attr as $attr => $x) {
0408                 if (isset($forbidden_attributes["$tag@$attr"]) ||
0409                     isset($forbidden_attributes["*@$attr"]) ||
0410                     isset($forbidden_attributes[$attr])
0411                 ) {
0412                     unset($this->info[$tag]->attr[$attr]);
0413                     continue;
0414                 } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually
0415                     // $tag.$attr are not user supplied, so no worries!
0416                     trigger_error(
0417                         "Error with $tag.$attr: tag.attr syntax not supported for " .
0418                         "HTML.ForbiddenAttributes; use tag@attr instead",
0419                         E_USER_WARNING
0420                     );
0421                 }
0422             }
0423         }
0424         foreach ($forbidden_attributes as $key => $v) {
0425             if (strlen($key) < 2) {
0426                 continue;
0427             }
0428             if ($key[0] != '*') {
0429                 continue;
0430             }
0431             if ($key[1] == '.') {
0432                 trigger_error(
0433                     "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",
0434                     E_USER_WARNING
0435                 );
0436             }
0437         }
0438 
0439         // setup injectors -----------------------------------------------------
0440         foreach ($this->info_injector as $i => $injector) {
0441             if ($injector->checkNeeded($config) !== false) {
0442                 // remove injector that does not have it's required
0443                 // elements/attributes present, and is thus not needed.
0444                 unset($this->info_injector[$i]);
0445             }
0446         }
0447     }
0448 
0449     /**
0450      * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
0451      * separate lists for processing. Format is element[attr1|attr2],element2...
0452      * @warning Although it's largely drawn from TinyMCE's implementation,
0453      *      it is different, and you'll probably have to modify your lists
0454      * @param array $list String list to parse
0455      * @return array
0456      * @todo Give this its own class, probably static interface
0457      */
0458     public function parseTinyMCEAllowedList($list)
0459     {
0460         $list = str_replace(array(' ', "\t"), '', $list);
0461 
0462         $elements = array();
0463         $attributes = array();
0464 
0465         $chunks = preg_split('/(,|[\n\r]+)/', $list);
0466         foreach ($chunks as $chunk) {
0467             if (empty($chunk)) {
0468                 continue;
0469             }
0470             // remove TinyMCE element control characters
0471             if (!strpos($chunk, '[')) {
0472                 $element = $chunk;
0473                 $attr = false;
0474             } else {
0475                 list($element, $attr) = explode('[', $chunk);
0476             }
0477             if ($element !== '*') {
0478                 $elements[$element] = true;
0479             }
0480             if (!$attr) {
0481                 continue;
0482             }
0483             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
0484             $attr = explode('|', $attr);
0485             foreach ($attr as $key) {
0486                 $attributes["$element.$key"] = true;
0487             }
0488         }
0489         return array($elements, $attributes);
0490     }
0491 }
0492 
0493 // vim: et sw=4 sts=4