File indexing completed on 2024-12-22 05:36:21

0001 <?php
0002 
0003 /**
0004  * Represents an XHTML 1.1 module, with information on elements, tags
0005  * and attributes.
0006  * @note Even though this is technically XHTML 1.1, it is also used for
0007  *       regular HTML parsing. We are using modulization as a convenient
0008  *       way to represent the internals of HTMLDefinition, and our
0009  *       implementation is by no means conforming and does not directly
0010  *       use the normative DTDs or XML schemas.
0011  * @note The public variables in a module should almost directly
0012  *       correspond to the variables in HTMLPurifier_HTMLDefinition.
0013  *       However, the prefix info carries no special meaning in these
0014  *       objects (include it anyway if that's the correspondence though).
0015  * @todo Consider making some member functions protected
0016  */
0017 
0018 class HTMLPurifier_HTMLModule
0019 {
0020 
0021     // -- Overloadable ----------------------------------------------------
0022 
0023     /**
0024      * Short unique string identifier of the module.
0025      * @type string
0026      */
0027     public $name;
0028 
0029     /**
0030      * Informally, a list of elements this module changes.
0031      * Not used in any significant way.
0032      * @type array
0033      */
0034     public $elements = array();
0035 
0036     /**
0037      * Associative array of element names to element definitions.
0038      * Some definitions may be incomplete, to be merged in later
0039      * with the full definition.
0040      * @type array
0041      */
0042     public $info = array();
0043 
0044     /**
0045      * Associative array of content set names to content set additions.
0046      * This is commonly used to, say, add an A element to the Inline
0047      * content set. This corresponds to an internal variable $content_sets
0048      * and NOT info_content_sets member variable of HTMLDefinition.
0049      * @type array
0050      */
0051     public $content_sets = array();
0052 
0053     /**
0054      * Associative array of attribute collection names to attribute
0055      * collection additions. More rarely used for adding attributes to
0056      * the global collections. Example is the StyleAttribute module adding
0057      * the style attribute to the Core. Corresponds to HTMLDefinition's
0058      * attr_collections->info, since the object's data is only info,
0059      * with extra behavior associated with it.
0060      * @type array
0061      */
0062     public $attr_collections = array();
0063 
0064     /**
0065      * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
0066      * @type array
0067      */
0068     public $info_tag_transform = array();
0069 
0070     /**
0071      * List of HTMLPurifier_AttrTransform to be performed before validation.
0072      * @type array
0073      */
0074     public $info_attr_transform_pre = array();
0075 
0076     /**
0077      * List of HTMLPurifier_AttrTransform to be performed after validation.
0078      * @type array
0079      */
0080     public $info_attr_transform_post = array();
0081 
0082     /**
0083      * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
0084      * An injector will only be invoked if all of it's pre-requisites are met;
0085      * if an injector fails setup, there will be no error; it will simply be
0086      * silently disabled.
0087      * @type array
0088      */
0089     public $info_injector = array();
0090 
0091     /**
0092      * Boolean flag that indicates whether or not getChildDef is implemented.
0093      * For optimization reasons: may save a call to a function. Be sure
0094      * to set it if you do implement getChildDef(), otherwise it will have
0095      * no effect!
0096      * @type bool
0097      */
0098     public $defines_child_def = false;
0099 
0100     /**
0101      * Boolean flag whether or not this module is safe. If it is not safe, all
0102      * of its members are unsafe. Modules are safe by default (this might be
0103      * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
0104      * which is based off of safe HTML, to explicitly say, "This is safe," even
0105      * though there are modules which are "unsafe")
0106      *
0107      * @type bool
0108      * @note Previously, safety could be applied at an element level granularity.
0109      *       We've removed this ability, so in order to add "unsafe" elements
0110      *       or attributes, a dedicated module with this property set to false
0111      *       must be used.
0112      */
0113     public $safe = true;
0114 
0115     /**
0116      * Retrieves a proper HTMLPurifier_ChildDef subclass based on
0117      * content_model and content_model_type member variables of
0118      * the HTMLPurifier_ElementDef class. There is a similar function
0119      * in HTMLPurifier_HTMLDefinition.
0120      * @param HTMLPurifier_ElementDef $def
0121      * @return HTMLPurifier_ChildDef subclass
0122      */
0123     public function getChildDef($def)
0124     {
0125         return false;
0126     }
0127 
0128     // -- Convenience -----------------------------------------------------
0129 
0130     /**
0131      * Convenience function that sets up a new element
0132      * @param string $element Name of element to add
0133      * @param string|bool $type What content set should element be registered to?
0134      *              Set as false to skip this step.
0135      * @param string $contents Allowed children in form of:
0136      *              "$content_model_type: $content_model"
0137      * @param array $attr_includes What attribute collections to register to
0138      *              element?
0139      * @param array $attr What unique attributes does the element define?
0140      * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
0141      * @return HTMLPurifier_ElementDef Created element definition object, so you
0142      *         can set advanced parameters
0143      */
0144     public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
0145     {
0146         $this->elements[] = $element;
0147         // parse content_model
0148         list($content_model_type, $content_model) = $this->parseContents($contents);
0149         // merge in attribute inclusions
0150         $this->mergeInAttrIncludes($attr, $attr_includes);
0151         // add element to content sets
0152         if ($type) {
0153             $this->addElementToContentSet($element, $type);
0154         }
0155         // create element
0156         $this->info[$element] = HTMLPurifier_ElementDef::create(
0157             $content_model,
0158             $content_model_type,
0159             $attr
0160         );
0161         // literal object $contents means direct child manipulation
0162         if (!is_string($contents)) {
0163             $this->info[$element]->child = $contents;
0164         }
0165         return $this->info[$element];
0166     }
0167 
0168     /**
0169      * Convenience function that creates a totally blank, non-standalone
0170      * element.
0171      * @param string $element Name of element to create
0172      * @return HTMLPurifier_ElementDef Created element
0173      */
0174     public function addBlankElement($element)
0175     {
0176         if (!isset($this->info[$element])) {
0177             $this->elements[] = $element;
0178             $this->info[$element] = new HTMLPurifier_ElementDef();
0179             $this->info[$element]->standalone = false;
0180         } else {
0181             trigger_error("Definition for $element already exists in module, cannot redefine");
0182         }
0183         return $this->info[$element];
0184     }
0185 
0186     /**
0187      * Convenience function that registers an element to a content set
0188      * @param string $element Element to register
0189      * @param string $type Name content set (warning: case sensitive, usually upper-case
0190      *        first letter)
0191      */
0192     public function addElementToContentSet($element, $type)
0193     {
0194         if (!isset($this->content_sets[$type])) {
0195             $this->content_sets[$type] = '';
0196         } else {
0197             $this->content_sets[$type] .= ' | ';
0198         }
0199         $this->content_sets[$type] .= $element;
0200     }
0201 
0202     /**
0203      * Convenience function that transforms single-string contents
0204      * into separate content model and content model type
0205      * @param string $contents Allowed children in form of:
0206      *                  "$content_model_type: $content_model"
0207      * @return array
0208      * @note If contents is an object, an array of two nulls will be
0209      *       returned, and the callee needs to take the original $contents
0210      *       and use it directly.
0211      */
0212     public function parseContents($contents)
0213     {
0214         if (!is_string($contents)) {
0215             return array(null, null);
0216         } // defer
0217         switch ($contents) {
0218             // check for shorthand content model forms
0219             case 'Empty':
0220                 return array('empty', '');
0221             case 'Inline':
0222                 return array('optional', 'Inline | #PCDATA');
0223             case 'Flow':
0224                 return array('optional', 'Flow | #PCDATA');
0225         }
0226         list($content_model_type, $content_model) = explode(':', $contents);
0227         $content_model_type = strtolower(trim($content_model_type));
0228         $content_model = trim($content_model);
0229         return array($content_model_type, $content_model);
0230     }
0231 
0232     /**
0233      * Convenience function that merges a list of attribute includes into
0234      * an attribute array.
0235      * @param array $attr Reference to attr array to modify
0236      * @param array $attr_includes Array of includes / string include to merge in
0237      */
0238     public function mergeInAttrIncludes(&$attr, $attr_includes)
0239     {
0240         if (!is_array($attr_includes)) {
0241             if (empty($attr_includes)) {
0242                 $attr_includes = array();
0243             } else {
0244                 $attr_includes = array($attr_includes);
0245             }
0246         }
0247         $attr[0] = $attr_includes;
0248     }
0249 
0250     /**
0251      * Convenience function that generates a lookup table with boolean
0252      * true as value.
0253      * @param string $list List of values to turn into a lookup
0254      * @note You can also pass an arbitrary number of arguments in
0255      *       place of the regular argument
0256      * @return array array equivalent of list
0257      */
0258     public function makeLookup($list)
0259     {
0260         if (is_string($list)) {
0261             $list = func_get_args();
0262         }
0263         $ret = array();
0264         foreach ($list as $value) {
0265             if (is_null($value)) {
0266                 continue;
0267             }
0268             $ret[$value] = true;
0269         }
0270         return $ret;
0271     }
0272 
0273     /**
0274      * Lazy load construction of the module after determining whether
0275      * or not it's needed, and also when a finalized configuration object
0276      * is available.
0277      * @param HTMLPurifier_Config $config
0278      */
0279     public function setup($config)
0280     {
0281     }
0282 }
0283 
0284 // vim: et sw=4 sts=4