File indexing completed on 2024-03-24 06:03:43

0001 <?php
0002 
0003 /*! @mainpage
0004  *
0005  * HTML Purifier is an HTML filter that will take an arbitrary snippet of
0006  * HTML and rigorously test, validate and filter it into a version that
0007  * is safe for output onto webpages. It achieves this by:
0008  *
0009  *  -# Lexing (parsing into tokens) the document,
0010  *  -# Executing various strategies on the tokens:
0011  *      -# Removing all elements not in the whitelist,
0012  *      -# Making the tokens well-formed,
0013  *      -# Fixing the nesting of the nodes, and
0014  *      -# Validating attributes of the nodes; and
0015  *  -# Generating HTML from the purified tokens.
0016  *
0017  * However, most users will only need to interface with the HTMLPurifier
0018  * and HTMLPurifier_Config.
0019  */
0020 
0021 /*
0022     HTML Purifier 4.9.3 - Standards Compliant HTML Filtering
0023     Copyright (C) 2006-2008 Edward Z. Yang
0024 
0025     This library is free software; you can redistribute it and/or
0026     modify it under the terms of the GNU Lesser General Public
0027     License as published by the Free Software Foundation; either
0028     version 2.1 of the License, or (at your option) any later version.
0029 
0030     This library is distributed in the hope that it will be useful,
0031     but WITHOUT ANY WARRANTY; without even the implied warranty of
0032     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0033     Lesser General Public License for more details.
0034 
0035     You should have received a copy of the GNU Lesser General Public
0036     License along with this library; if not, write to the Free Software
0037     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0038  */
0039 
0040 /**
0041  * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
0042  *
0043  * @note There are several points in which configuration can be specified
0044  *       for HTML Purifier.  The precedence of these (from lowest to
0045  *       highest) is as follows:
0046  *          -# Instance: new HTMLPurifier($config)
0047  *          -# Invocation: purify($html, $config)
0048  *       These configurations are entirely independent of each other and
0049  *       are *not* merged (this behavior may change in the future).
0050  *
0051  * @todo We need an easier way to inject strategies using the configuration
0052  *       object.
0053  */
0054 class HTMLPurifier
0055 {
0056 
0057     /**
0058      * Version of HTML Purifier.
0059      * @type string
0060      */
0061     public $version = '4.9.3';
0062 
0063     /**
0064      * Constant with version of HTML Purifier.
0065      */
0066     const VERSION = '4.9.3';
0067 
0068     /**
0069      * Global configuration object.
0070      * @type HTMLPurifier_Config
0071      */
0072     public $config;
0073 
0074     /**
0075      * Array of extra filter objects to run on HTML,
0076      * for backwards compatibility.
0077      * @type HTMLPurifier_Filter[]
0078      */
0079     private $filters = array();
0080 
0081     /**
0082      * Single instance of HTML Purifier.
0083      * @type HTMLPurifier
0084      */
0085     private static $instance;
0086 
0087     /**
0088      * @type HTMLPurifier_Strategy_Core
0089      */
0090     protected $strategy;
0091 
0092     /**
0093      * @type HTMLPurifier_Generator
0094      */
0095     protected $generator;
0096 
0097     /**
0098      * Resultant context of last run purification.
0099      * Is an array of contexts if the last called method was purifyArray().
0100      * @type HTMLPurifier_Context
0101      */
0102     public $context;
0103 
0104     /**
0105      * Initializes the purifier.
0106      *
0107      * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
0108      *                for all instances of the purifier, if omitted, a default
0109      *                configuration is supplied (which can be overridden on a
0110      *                per-use basis).
0111      *                The parameter can also be any type that
0112      *                HTMLPurifier_Config::create() supports.
0113      */
0114     public function __construct($config = null)
0115     {
0116         $this->config = HTMLPurifier_Config::create($config);
0117         $this->strategy = new HTMLPurifier_Strategy_Core();
0118     }
0119 
0120     /**
0121      * Adds a filter to process the output. First come first serve
0122      *
0123      * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
0124      */
0125     public function addFilter($filter)
0126     {
0127         trigger_error(
0128             'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
0129             ' in the Filter namespace or Filter.Custom',
0130             E_USER_WARNING
0131         );
0132         $this->filters[] = $filter;
0133     }
0134 
0135     /**
0136      * Filters an HTML snippet/document to be XSS-free and standards-compliant.
0137      *
0138      * @param string $html String of HTML to purify
0139      * @param HTMLPurifier_Config $config Config object for this operation,
0140      *                if omitted, defaults to the config object specified during this
0141      *                object's construction. The parameter can also be any type
0142      *                that HTMLPurifier_Config::create() supports.
0143      *
0144      * @return string Purified HTML
0145      */
0146     public function purify($html, $config = null)
0147     {
0148         // :TODO: make the config merge in, instead of replace
0149         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
0150 
0151         // implementation is partially environment dependant, partially
0152         // configuration dependant
0153         $lexer = HTMLPurifier_Lexer::create($config);
0154 
0155         $context = new HTMLPurifier_Context();
0156 
0157         // setup HTML generator
0158         $this->generator = new HTMLPurifier_Generator($config, $context);
0159         $context->register('Generator', $this->generator);
0160 
0161         // set up global context variables
0162         if ($config->get('Core.CollectErrors')) {
0163             // may get moved out if other facilities use it
0164             $language_factory = HTMLPurifier_LanguageFactory::instance();
0165             $language = $language_factory->create($config, $context);
0166             $context->register('Locale', $language);
0167 
0168             $error_collector = new HTMLPurifier_ErrorCollector($context);
0169             $context->register('ErrorCollector', $error_collector);
0170         }
0171 
0172         // setup id_accumulator context, necessary due to the fact that
0173         // AttrValidator can be called from many places
0174         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
0175         $context->register('IDAccumulator', $id_accumulator);
0176 
0177         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
0178 
0179         // setup filters
0180         $filter_flags = $config->getBatch('Filter');
0181         $custom_filters = $filter_flags['Custom'];
0182         unset($filter_flags['Custom']);
0183         $filters = array();
0184         foreach ($filter_flags as $filter => $flag) {
0185             if (!$flag) {
0186                 continue;
0187             }
0188             if (strpos($filter, '.') !== false) {
0189                 continue;
0190             }
0191             $class = "HTMLPurifier_Filter_$filter";
0192             $filters[] = new $class;
0193         }
0194         foreach ($custom_filters as $filter) {
0195             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
0196             $filters[] = $filter;
0197         }
0198         $filters = array_merge($filters, $this->filters);
0199         // maybe prepare(), but later
0200 
0201         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
0202             $html = $filters[$i]->preFilter($html, $config, $context);
0203         }
0204 
0205         // purified HTML
0206         $html =
0207             $this->generator->generateFromTokens(
0208                 // list of tokens
0209                 $this->strategy->execute(
0210                     // list of un-purified tokens
0211                     $lexer->tokenizeHTML(
0212                         // un-purified HTML
0213                         $html,
0214                         $config,
0215                         $context
0216                     ),
0217                     $config,
0218                     $context
0219                 )
0220             );
0221 
0222         for ($i = $filter_size - 1; $i >= 0; $i--) {
0223             $html = $filters[$i]->postFilter($html, $config, $context);
0224         }
0225 
0226         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
0227         $this->context =& $context;
0228         return $html;
0229     }
0230 
0231     /**
0232      * Filters an array of HTML snippets
0233      *
0234      * @param string[] $array_of_html Array of html snippets
0235      * @param HTMLPurifier_Config $config Optional config object for this operation.
0236      *                See HTMLPurifier::purify() for more details.
0237      *
0238      * @return string[] Array of purified HTML
0239      */
0240     public function purifyArray($array_of_html, $config = null)
0241     {
0242         $context_array = array();
0243         foreach ($array_of_html as $key => $html) {
0244             $array_of_html[$key] = $this->purify($html, $config);
0245             $context_array[$key] = $this->context;
0246         }
0247         $this->context = $context_array;
0248         return $array_of_html;
0249     }
0250 
0251     /**
0252      * Singleton for enforcing just one HTML Purifier in your system
0253      *
0254      * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
0255      *                   HTMLPurifier instance to overload singleton with,
0256      *                   or HTMLPurifier_Config instance to configure the
0257      *                   generated version with.
0258      *
0259      * @return HTMLPurifier
0260      */
0261     public static function instance($prototype = null)
0262     {
0263         if (!self::$instance || $prototype) {
0264             if ($prototype instanceof HTMLPurifier) {
0265                 self::$instance = $prototype;
0266             } elseif ($prototype) {
0267                 self::$instance = new HTMLPurifier($prototype);
0268             } else {
0269                 self::$instance = new HTMLPurifier();
0270             }
0271         }
0272         return self::$instance;
0273     }
0274 
0275     /**
0276      * Singleton for enforcing just one HTML Purifier in your system
0277      *
0278      * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
0279      *                   HTMLPurifier instance to overload singleton with,
0280      *                   or HTMLPurifier_Config instance to configure the
0281      *                   generated version with.
0282      *
0283      * @return HTMLPurifier
0284      * @note Backwards compatibility, see instance()
0285      */
0286     public static function getInstance($prototype = null)
0287     {
0288         return HTMLPurifier::instance($prototype);
0289     }
0290 }
0291 
0292 // vim: et sw=4 sts=4