File indexing completed on 2025-02-02 05:43:44
0001 <?php 0002 0003 /** 0004 * Removes all unrecognized tags from the list of tokens. 0005 * 0006 * This strategy iterates through all the tokens and removes unrecognized 0007 * tokens. If a token is not recognized but a TagTransform is defined for 0008 * that element, the element will be transformed accordingly. 0009 */ 0010 0011 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy 0012 { 0013 0014 /** 0015 * @param HTMLPurifier_Token[] $tokens 0016 * @param HTMLPurifier_Config $config 0017 * @param HTMLPurifier_Context $context 0018 * @return array|HTMLPurifier_Token[] 0019 */ 0020 public function execute($tokens, $config, $context) 0021 { 0022 $definition = $config->getHTMLDefinition(); 0023 $generator = new HTMLPurifier_Generator($config, $context); 0024 $result = array(); 0025 0026 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); 0027 $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); 0028 0029 // currently only used to determine if comments should be kept 0030 $trusted = $config->get('HTML.Trusted'); 0031 $comment_lookup = $config->get('HTML.AllowedComments'); 0032 $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); 0033 $check_comments = $comment_lookup !== array() || $comment_regexp !== null; 0034 0035 $remove_script_contents = $config->get('Core.RemoveScriptContents'); 0036 $hidden_elements = $config->get('Core.HiddenElements'); 0037 0038 // remove script contents compatibility 0039 if ($remove_script_contents === true) { 0040 $hidden_elements['script'] = true; 0041 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { 0042 unset($hidden_elements['script']); 0043 } 0044 0045 $attr_validator = new HTMLPurifier_AttrValidator(); 0046 0047 // removes tokens until it reaches a closing tag with its value 0048 $remove_until = false; 0049 0050 // converts comments into text tokens when this is equal to a tag name 0051 $textify_comments = false; 0052 0053 $token = false; 0054 $context->register('CurrentToken', $token); 0055 0056 $e = false; 0057 if ($config->get('Core.CollectErrors')) { 0058 $e =& $context->get('ErrorCollector'); 0059 } 0060 0061 foreach ($tokens as $token) { 0062 if ($remove_until) { 0063 if (empty($token->is_tag) || $token->name !== $remove_until) { 0064 continue; 0065 } 0066 } 0067 if (!empty($token->is_tag)) { 0068 // DEFINITION CALL 0069 0070 // before any processing, try to transform the element 0071 if (isset($definition->info_tag_transform[$token->name])) { 0072 $original_name = $token->name; 0073 // there is a transformation for this tag 0074 // DEFINITION CALL 0075 $token = $definition-> 0076 info_tag_transform[$token->name]->transform($token, $config, $context); 0077 if ($e) { 0078 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); 0079 } 0080 } 0081 0082 if (isset($definition->info[$token->name])) { 0083 // mostly everything's good, but 0084 // we need to make sure required attributes are in order 0085 if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && 0086 $definition->info[$token->name]->required_attr && 0087 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works 0088 ) { 0089 $attr_validator->validateToken($token, $config, $context); 0090 $ok = true; 0091 foreach ($definition->info[$token->name]->required_attr as $name) { 0092 if (!isset($token->attr[$name])) { 0093 $ok = false; 0094 break; 0095 } 0096 } 0097 if (!$ok) { 0098 if ($e) { 0099 $e->send( 0100 E_ERROR, 0101 'Strategy_RemoveForeignElements: Missing required attribute', 0102 $name 0103 ); 0104 } 0105 continue; 0106 } 0107 $token->armor['ValidateAttributes'] = true; 0108 } 0109 0110 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { 0111 $textify_comments = $token->name; 0112 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { 0113 $textify_comments = false; 0114 } 0115 0116 } elseif ($escape_invalid_tags) { 0117 // invalid tag, generate HTML representation and insert in 0118 if ($e) { 0119 $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); 0120 } 0121 $token = new HTMLPurifier_Token_Text( 0122 $generator->generateFromToken($token) 0123 ); 0124 } else { 0125 // check if we need to destroy all of the tag's children 0126 // CAN BE GENERICIZED 0127 if (isset($hidden_elements[$token->name])) { 0128 if ($token instanceof HTMLPurifier_Token_Start) { 0129 $remove_until = $token->name; 0130 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 0131 // do nothing: we're still looking 0132 } else { 0133 $remove_until = false; 0134 } 0135 if ($e) { 0136 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); 0137 } 0138 } else { 0139 if ($e) { 0140 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); 0141 } 0142 } 0143 continue; 0144 } 0145 } elseif ($token instanceof HTMLPurifier_Token_Comment) { 0146 // textify comments in script tags when they are allowed 0147 if ($textify_comments !== false) { 0148 $data = $token->data; 0149 $token = new HTMLPurifier_Token_Text($data); 0150 } elseif ($trusted || $check_comments) { 0151 // always cleanup comments 0152 $trailing_hyphen = false; 0153 if ($e) { 0154 // perform check whether or not there's a trailing hyphen 0155 if (substr($token->data, -1) == '-') { 0156 $trailing_hyphen = true; 0157 } 0158 } 0159 $token->data = rtrim($token->data, '-'); 0160 $found_double_hyphen = false; 0161 while (strpos($token->data, '--') !== false) { 0162 $found_double_hyphen = true; 0163 $token->data = str_replace('--', '-', $token->data); 0164 } 0165 if ($trusted || !empty($comment_lookup[trim($token->data)]) || 0166 ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) { 0167 // OK good 0168 if ($e) { 0169 if ($trailing_hyphen) { 0170 $e->send( 0171 E_NOTICE, 0172 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' 0173 ); 0174 } 0175 if ($found_double_hyphen) { 0176 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); 0177 } 0178 } 0179 } else { 0180 if ($e) { 0181 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 0182 } 0183 continue; 0184 } 0185 } else { 0186 // strip comments 0187 if ($e) { 0188 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 0189 } 0190 continue; 0191 } 0192 } elseif ($token instanceof HTMLPurifier_Token_Text) { 0193 } else { 0194 continue; 0195 } 0196 $result[] = $token; 0197 } 0198 if ($remove_until && $e) { 0199 // we removed tokens until the end, throw error 0200 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); 0201 } 0202 $context->destroy('CurrentToken'); 0203 return $result; 0204 } 0205 } 0206 0207 // vim: et sw=4 sts=4