File indexing completed on 2024-05-26 06:02:31

0001 <?php
0002 
0003 /**
0004  * Validates contents based on NMTOKENS attribute type.
0005  */
0006 class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
0007 {
0008 
0009     /**
0010      * @param string $string
0011      * @param HTMLPurifier_Config $config
0012      * @param HTMLPurifier_Context $context
0013      * @return bool|string
0014      */
0015     public function validate($string, $config, $context)
0016     {
0017         $string = trim($string);
0018 
0019         // early abort: '' and '0' (strings that convert to false) are invalid
0020         if (!$string) {
0021             return false;
0022         }
0023 
0024         $tokens = $this->split($string, $config, $context);
0025         $tokens = $this->filter($tokens, $config, $context);
0026         if (empty($tokens)) {
0027             return false;
0028         }
0029         return implode(' ', $tokens);
0030     }
0031 
0032     /**
0033      * Splits a space separated list of tokens into its constituent parts.
0034      * @param string $string
0035      * @param HTMLPurifier_Config $config
0036      * @param HTMLPurifier_Context $context
0037      * @return array
0038      */
0039     protected function split($string, $config, $context)
0040     {
0041         // OPTIMIZABLE!
0042         // do the preg_match, capture all subpatterns for reformulation
0043 
0044         // we don't support U+00A1 and up codepoints or
0045         // escaping because I don't know how to do that with regexps
0046         // and plus it would complicate optimization efforts (you never
0047         // see that anyway).
0048         $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start
0049             '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' .
0050             '(?:(?=\s)|\z)/'; // look ahead for space or string end
0051         preg_match_all($pattern, $string, $matches);
0052         return $matches[1];
0053     }
0054 
0055     /**
0056      * Template method for removing certain tokens based on arbitrary criteria.
0057      * @note If we wanted to be really functional, we'd do an array_filter
0058      *       with a callback. But... we're not.
0059      * @param array $tokens
0060      * @param HTMLPurifier_Config $config
0061      * @param HTMLPurifier_Context $context
0062      * @return array
0063      */
0064     protected function filter($tokens, $config, $context)
0065     {
0066         return $tokens;
0067     }
0068 }
0069 
0070 // vim: et sw=4 sts=4