File indexing completed on 2024-12-22 05:37:18
0001 <?php 0002 0003 /*! @mainpage 0004 * 0005 * HTML Purifier is an HTML filter that will take an arbitrary snippet of 0006 * HTML and rigorously test, validate and filter it into a version that 0007 * is safe for output onto webpages. It achieves this by: 0008 * 0009 * -# Lexing (parsing into tokens) the document, 0010 * -# Executing various strategies on the tokens: 0011 * -# Removing all elements not in the whitelist, 0012 * -# Making the tokens well-formed, 0013 * -# Fixing the nesting of the nodes, and 0014 * -# Validating attributes of the nodes; and 0015 * -# Generating HTML from the purified tokens. 0016 * 0017 * However, most users will only need to interface with the HTMLPurifier 0018 * and HTMLPurifier_Config. 0019 */ 0020 0021 /* 0022 HTML Purifier 4.9.3 - Standards Compliant HTML Filtering 0023 Copyright (C) 2006-2008 Edward Z. Yang 0024 0025 This library is free software; you can redistribute it and/or 0026 modify it under the terms of the GNU Lesser General Public 0027 License as published by the Free Software Foundation; either 0028 version 2.1 of the License, or (at your option) any later version. 0029 0030 This library is distributed in the hope that it will be useful, 0031 but WITHOUT ANY WARRANTY; without even the implied warranty of 0032 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0033 Lesser General Public License for more details. 0034 0035 You should have received a copy of the GNU Lesser General Public 0036 License along with this library; if not, write to the Free Software 0037 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 0038 */ 0039 0040 /** 0041 * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. 0042 * 0043 * @note There are several points in which configuration can be specified 0044 * for HTML Purifier. The precedence of these (from lowest to 0045 * highest) is as follows: 0046 * -# Instance: new HTMLPurifier($config) 0047 * -# Invocation: purify($html, $config) 0048 * These configurations are entirely independent of each other and 0049 * are *not* merged (this behavior may change in the future). 0050 * 0051 * @todo We need an easier way to inject strategies using the configuration 0052 * object. 0053 */ 0054 class HTMLPurifier 0055 { 0056 0057 /** 0058 * Version of HTML Purifier. 0059 * @type string 0060 */ 0061 public $version = '4.9.3'; 0062 0063 /** 0064 * Constant with version of HTML Purifier. 0065 */ 0066 const VERSION = '4.9.3'; 0067 0068 /** 0069 * Global configuration object. 0070 * @type HTMLPurifier_Config 0071 */ 0072 public $config; 0073 0074 /** 0075 * Array of extra filter objects to run on HTML, 0076 * for backwards compatibility. 0077 * @type HTMLPurifier_Filter[] 0078 */ 0079 private $filters = array(); 0080 0081 /** 0082 * Single instance of HTML Purifier. 0083 * @type HTMLPurifier 0084 */ 0085 private static $instance; 0086 0087 /** 0088 * @type HTMLPurifier_Strategy_Core 0089 */ 0090 protected $strategy; 0091 0092 /** 0093 * @type HTMLPurifier_Generator 0094 */ 0095 protected $generator; 0096 0097 /** 0098 * Resultant context of last run purification. 0099 * Is an array of contexts if the last called method was purifyArray(). 0100 * @type HTMLPurifier_Context 0101 */ 0102 public $context; 0103 0104 /** 0105 * Initializes the purifier. 0106 * 0107 * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object 0108 * for all instances of the purifier, if omitted, a default 0109 * configuration is supplied (which can be overridden on a 0110 * per-use basis). 0111 * The parameter can also be any type that 0112 * HTMLPurifier_Config::create() supports. 0113 */ 0114 public function __construct($config = null) 0115 { 0116 $this->config = HTMLPurifier_Config::create($config); 0117 $this->strategy = new HTMLPurifier_Strategy_Core(); 0118 } 0119 0120 /** 0121 * Adds a filter to process the output. First come first serve 0122 * 0123 * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object 0124 */ 0125 public function addFilter($filter) 0126 { 0127 trigger_error( 0128 'HTMLPurifier->addFilter() is deprecated, use configuration directives' . 0129 ' in the Filter namespace or Filter.Custom', 0130 E_USER_WARNING 0131 ); 0132 $this->filters[] = $filter; 0133 } 0134 0135 /** 0136 * Filters an HTML snippet/document to be XSS-free and standards-compliant. 0137 * 0138 * @param string $html String of HTML to purify 0139 * @param HTMLPurifier_Config $config Config object for this operation, 0140 * if omitted, defaults to the config object specified during this 0141 * object's construction. The parameter can also be any type 0142 * that HTMLPurifier_Config::create() supports. 0143 * 0144 * @return string Purified HTML 0145 */ 0146 public function purify($html, $config = null) 0147 { 0148 // :TODO: make the config merge in, instead of replace 0149 $config = $config ? HTMLPurifier_Config::create($config) : $this->config; 0150 0151 // implementation is partially environment dependant, partially 0152 // configuration dependant 0153 $lexer = HTMLPurifier_Lexer::create($config); 0154 0155 $context = new HTMLPurifier_Context(); 0156 0157 // setup HTML generator 0158 $this->generator = new HTMLPurifier_Generator($config, $context); 0159 $context->register('Generator', $this->generator); 0160 0161 // set up global context variables 0162 if ($config->get('Core.CollectErrors')) { 0163 // may get moved out if other facilities use it 0164 $language_factory = HTMLPurifier_LanguageFactory::instance(); 0165 $language = $language_factory->create($config, $context); 0166 $context->register('Locale', $language); 0167 0168 $error_collector = new HTMLPurifier_ErrorCollector($context); 0169 $context->register('ErrorCollector', $error_collector); 0170 } 0171 0172 // setup id_accumulator context, necessary due to the fact that 0173 // AttrValidator can be called from many places 0174 $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); 0175 $context->register('IDAccumulator', $id_accumulator); 0176 0177 $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); 0178 0179 // setup filters 0180 $filter_flags = $config->getBatch('Filter'); 0181 $custom_filters = $filter_flags['Custom']; 0182 unset($filter_flags['Custom']); 0183 $filters = array(); 0184 foreach ($filter_flags as $filter => $flag) { 0185 if (!$flag) { 0186 continue; 0187 } 0188 if (strpos($filter, '.') !== false) { 0189 continue; 0190 } 0191 $class = "HTMLPurifier_Filter_$filter"; 0192 $filters[] = new $class; 0193 } 0194 foreach ($custom_filters as $filter) { 0195 // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat 0196 $filters[] = $filter; 0197 } 0198 $filters = array_merge($filters, $this->filters); 0199 // maybe prepare(), but later 0200 0201 for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { 0202 $html = $filters[$i]->preFilter($html, $config, $context); 0203 } 0204 0205 // purified HTML 0206 $html = 0207 $this->generator->generateFromTokens( 0208 // list of tokens 0209 $this->strategy->execute( 0210 // list of un-purified tokens 0211 $lexer->tokenizeHTML( 0212 // un-purified HTML 0213 $html, 0214 $config, 0215 $context 0216 ), 0217 $config, 0218 $context 0219 ) 0220 ); 0221 0222 for ($i = $filter_size - 1; $i >= 0; $i--) { 0223 $html = $filters[$i]->postFilter($html, $config, $context); 0224 } 0225 0226 $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); 0227 $this->context =& $context; 0228 return $html; 0229 } 0230 0231 /** 0232 * Filters an array of HTML snippets 0233 * 0234 * @param string[] $array_of_html Array of html snippets 0235 * @param HTMLPurifier_Config $config Optional config object for this operation. 0236 * See HTMLPurifier::purify() for more details. 0237 * 0238 * @return string[] Array of purified HTML 0239 */ 0240 public function purifyArray($array_of_html, $config = null) 0241 { 0242 $context_array = array(); 0243 foreach ($array_of_html as $key => $html) { 0244 $array_of_html[$key] = $this->purify($html, $config); 0245 $context_array[$key] = $this->context; 0246 } 0247 $this->context = $context_array; 0248 return $array_of_html; 0249 } 0250 0251 /** 0252 * Singleton for enforcing just one HTML Purifier in your system 0253 * 0254 * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype 0255 * HTMLPurifier instance to overload singleton with, 0256 * or HTMLPurifier_Config instance to configure the 0257 * generated version with. 0258 * 0259 * @return HTMLPurifier 0260 */ 0261 public static function instance($prototype = null) 0262 { 0263 if (!self::$instance || $prototype) { 0264 if ($prototype instanceof HTMLPurifier) { 0265 self::$instance = $prototype; 0266 } elseif ($prototype) { 0267 self::$instance = new HTMLPurifier($prototype); 0268 } else { 0269 self::$instance = new HTMLPurifier(); 0270 } 0271 } 0272 return self::$instance; 0273 } 0274 0275 /** 0276 * Singleton for enforcing just one HTML Purifier in your system 0277 * 0278 * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype 0279 * HTMLPurifier instance to overload singleton with, 0280 * or HTMLPurifier_Config instance to configure the 0281 * generated version with. 0282 * 0283 * @return HTMLPurifier 0284 * @note Backwards compatibility, see instance() 0285 */ 0286 public static function getInstance($prototype = null) 0287 { 0288 return HTMLPurifier::instance($prototype); 0289 } 0290 } 0291 0292 // vim: et sw=4 sts=4