File indexing completed on 2024-05-12 06:02:06

0001 <?php
0002 
0003 /**
0004  * Removes all unrecognized tags from the list of tokens.
0005  *
0006  * This strategy iterates through all the tokens and removes unrecognized
0007  * tokens. If a token is not recognized but a TagTransform is defined for
0008  * that element, the element will be transformed accordingly.
0009  */
0010 
0011 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
0012 {
0013 
0014     /**
0015      * @param HTMLPurifier_Token[] $tokens
0016      * @param HTMLPurifier_Config $config
0017      * @param HTMLPurifier_Context $context
0018      * @return array|HTMLPurifier_Token[]
0019      */
0020     public function execute($tokens, $config, $context)
0021     {
0022         $definition = $config->getHTMLDefinition();
0023         $generator = new HTMLPurifier_Generator($config, $context);
0024         $result = array();
0025 
0026         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
0027         $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
0028 
0029         // currently only used to determine if comments should be kept
0030         $trusted = $config->get('HTML.Trusted');
0031         $comment_lookup = $config->get('HTML.AllowedComments');
0032         $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
0033         $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
0034 
0035         $remove_script_contents = $config->get('Core.RemoveScriptContents');
0036         $hidden_elements = $config->get('Core.HiddenElements');
0037 
0038         // remove script contents compatibility
0039         if ($remove_script_contents === true) {
0040             $hidden_elements['script'] = true;
0041         } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
0042             unset($hidden_elements['script']);
0043         }
0044 
0045         $attr_validator = new HTMLPurifier_AttrValidator();
0046 
0047         // removes tokens until it reaches a closing tag with its value
0048         $remove_until = false;
0049 
0050         // converts comments into text tokens when this is equal to a tag name
0051         $textify_comments = false;
0052 
0053         $token = false;
0054         $context->register('CurrentToken', $token);
0055 
0056         $e = false;
0057         if ($config->get('Core.CollectErrors')) {
0058             $e =& $context->get('ErrorCollector');
0059         }
0060 
0061         foreach ($tokens as $token) {
0062             if ($remove_until) {
0063                 if (empty($token->is_tag) || $token->name !== $remove_until) {
0064                     continue;
0065                 }
0066             }
0067             if (!empty($token->is_tag)) {
0068                 // DEFINITION CALL
0069 
0070                 // before any processing, try to transform the element
0071                 if (isset($definition->info_tag_transform[$token->name])) {
0072                     $original_name = $token->name;
0073                     // there is a transformation for this tag
0074                     // DEFINITION CALL
0075                     $token = $definition->
0076                         info_tag_transform[$token->name]->transform($token, $config, $context);
0077                     if ($e) {
0078                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
0079                     }
0080                 }
0081 
0082                 if (isset($definition->info[$token->name])) {
0083                     // mostly everything's good, but
0084                     // we need to make sure required attributes are in order
0085                     if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
0086                         $definition->info[$token->name]->required_attr &&
0087                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
0088                     ) {
0089                         $attr_validator->validateToken($token, $config, $context);
0090                         $ok = true;
0091                         foreach ($definition->info[$token->name]->required_attr as $name) {
0092                             if (!isset($token->attr[$name])) {
0093                                 $ok = false;
0094                                 break;
0095                             }
0096                         }
0097                         if (!$ok) {
0098                             if ($e) {
0099                                 $e->send(
0100                                     E_ERROR,
0101                                     'Strategy_RemoveForeignElements: Missing required attribute',
0102                                     $name
0103                                 );
0104                             }
0105                             continue;
0106                         }
0107                         $token->armor['ValidateAttributes'] = true;
0108                     }
0109 
0110                     if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
0111                         $textify_comments = $token->name;
0112                     } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
0113                         $textify_comments = false;
0114                     }
0115 
0116                 } elseif ($escape_invalid_tags) {
0117                     // invalid tag, generate HTML representation and insert in
0118                     if ($e) {
0119                         $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
0120                     }
0121                     $token = new HTMLPurifier_Token_Text(
0122                         $generator->generateFromToken($token)
0123                     );
0124                 } else {
0125                     // check if we need to destroy all of the tag's children
0126                     // CAN BE GENERICIZED
0127                     if (isset($hidden_elements[$token->name])) {
0128                         if ($token instanceof HTMLPurifier_Token_Start) {
0129                             $remove_until = $token->name;
0130                         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
0131                             // do nothing: we're still looking
0132                         } else {
0133                             $remove_until = false;
0134                         }
0135                         if ($e) {
0136                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
0137                         }
0138                     } else {
0139                         if ($e) {
0140                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
0141                         }
0142                     }
0143                     continue;
0144                 }
0145             } elseif ($token instanceof HTMLPurifier_Token_Comment) {
0146                 // textify comments in script tags when they are allowed
0147                 if ($textify_comments !== false) {
0148                     $data = $token->data;
0149                     $token = new HTMLPurifier_Token_Text($data);
0150                 } elseif ($trusted || $check_comments) {
0151                     // always cleanup comments
0152                     $trailing_hyphen = false;
0153                     if ($e) {
0154                         // perform check whether or not there's a trailing hyphen
0155                         if (substr($token->data, -1) == '-') {
0156                             $trailing_hyphen = true;
0157                         }
0158                     }
0159                     $token->data = rtrim($token->data, '-');
0160                     $found_double_hyphen = false;
0161                     while (strpos($token->data, '--') !== false) {
0162                         $found_double_hyphen = true;
0163                         $token->data = str_replace('--', '-', $token->data);
0164                     }
0165                     if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
0166                         ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
0167                         // OK good
0168                         if ($e) {
0169                             if ($trailing_hyphen) {
0170                                 $e->send(
0171                                     E_NOTICE,
0172                                     'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
0173                                 );
0174                             }
0175                             if ($found_double_hyphen) {
0176                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
0177                             }
0178                         }
0179                     } else {
0180                         if ($e) {
0181                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
0182                         }
0183                         continue;
0184                     }
0185                 } else {
0186                     // strip comments
0187                     if ($e) {
0188                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
0189                     }
0190                     continue;
0191                 }
0192             } elseif ($token instanceof HTMLPurifier_Token_Text) {
0193             } else {
0194                 continue;
0195             }
0196             $result[] = $token;
0197         }
0198         if ($remove_until && $e) {
0199             // we removed tokens until the end, throw error
0200             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
0201         }
0202         $context->destroy('CurrentToken');
0203         return $result;
0204     }
0205 }
0206 
0207 // vim: et sw=4 sts=4