File indexing completed on 2025-01-26 05:29:10
0001 <?php 0002 0003 /** 0004 * HTML Purifier's internal representation of a URI. 0005 * @note 0006 * Internal data-structures are completely escaped. If the data needs 0007 * to be used in a non-URI context (which is very unlikely), be sure 0008 * to decode it first. The URI may not necessarily be well-formed until 0009 * validate() is called. 0010 */ 0011 class HTMLPurifier_URI 0012 { 0013 /** 0014 * @type string 0015 */ 0016 public $scheme; 0017 0018 /** 0019 * @type string 0020 */ 0021 public $userinfo; 0022 0023 /** 0024 * @type string 0025 */ 0026 public $host; 0027 0028 /** 0029 * @type int 0030 */ 0031 public $port; 0032 0033 /** 0034 * @type string 0035 */ 0036 public $path; 0037 0038 /** 0039 * @type string 0040 */ 0041 public $query; 0042 0043 /** 0044 * @type string 0045 */ 0046 public $fragment; 0047 0048 /** 0049 * @param string $scheme 0050 * @param string $userinfo 0051 * @param string $host 0052 * @param int $port 0053 * @param string $path 0054 * @param string $query 0055 * @param string $fragment 0056 * @note Automatically normalizes scheme and port 0057 */ 0058 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) 0059 { 0060 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); 0061 $this->userinfo = $userinfo; 0062 $this->host = $host; 0063 $this->port = is_null($port) ? $port : (int)$port; 0064 $this->path = $path; 0065 $this->query = $query; 0066 $this->fragment = $fragment; 0067 } 0068 0069 /** 0070 * Retrieves a scheme object corresponding to the URI's scheme/default 0071 * @param HTMLPurifier_Config $config 0072 * @param HTMLPurifier_Context $context 0073 * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI 0074 */ 0075 public function getSchemeObj($config, $context) 0076 { 0077 $registry = HTMLPurifier_URISchemeRegistry::instance(); 0078 if ($this->scheme !== null) { 0079 $scheme_obj = $registry->getScheme($this->scheme, $config, $context); 0080 if (!$scheme_obj) { 0081 return false; 0082 } // invalid scheme, clean it out 0083 } else { 0084 // no scheme: retrieve the default one 0085 $def = $config->getDefinition('URI'); 0086 $scheme_obj = $def->getDefaultScheme($config, $context); 0087 if (!$scheme_obj) { 0088 if ($def->defaultScheme !== null) { 0089 // something funky happened to the default scheme object 0090 trigger_error( 0091 'Default scheme object "' . $def->defaultScheme . '" was not readable', 0092 E_USER_WARNING 0093 ); 0094 } // suppress error if it's null 0095 return false; 0096 } 0097 } 0098 return $scheme_obj; 0099 } 0100 0101 /** 0102 * Generic validation method applicable for all schemes. May modify 0103 * this URI in order to get it into a compliant form. 0104 * @param HTMLPurifier_Config $config 0105 * @param HTMLPurifier_Context $context 0106 * @return bool True if validation/filtering succeeds, false if failure 0107 */ 0108 public function validate($config, $context) 0109 { 0110 // ABNF definitions from RFC 3986 0111 $chars_sub_delims = '!$&\'()*+,;='; 0112 $chars_gen_delims = ':/?#[]@'; 0113 $chars_pchar = $chars_sub_delims . ':@'; 0114 0115 // validate host 0116 if (!is_null($this->host)) { 0117 $host_def = new HTMLPurifier_AttrDef_URI_Host(); 0118 $this->host = $host_def->validate($this->host, $config, $context); 0119 if ($this->host === false) { 0120 $this->host = null; 0121 } 0122 } 0123 0124 // validate scheme 0125 // NOTE: It's not appropriate to check whether or not this 0126 // scheme is in our registry, since a URIFilter may convert a 0127 // URI that we don't allow into one we do. So instead, we just 0128 // check if the scheme can be dropped because there is no host 0129 // and it is our default scheme. 0130 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { 0131 // support for relative paths is pretty abysmal when the 0132 // scheme is present, so axe it when possible 0133 $def = $config->getDefinition('URI'); 0134 if ($def->defaultScheme === $this->scheme) { 0135 $this->scheme = null; 0136 } 0137 } 0138 0139 // validate username 0140 if (!is_null($this->userinfo)) { 0141 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); 0142 $this->userinfo = $encoder->encode($this->userinfo); 0143 } 0144 0145 // validate port 0146 if (!is_null($this->port)) { 0147 if ($this->port < 1 || $this->port > 65535) { 0148 $this->port = null; 0149 } 0150 } 0151 0152 // validate path 0153 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); 0154 if (!is_null($this->host)) { // this catches $this->host === '' 0155 // path-abempty (hier and relative) 0156 // http://www.example.com/my/path 0157 // //www.example.com/my/path (looks odd, but works, and 0158 // recognized by most browsers) 0159 // (this set is valid or invalid on a scheme by scheme 0160 // basis, so we'll deal with it later) 0161 // file:///my/path 0162 // ///my/path 0163 $this->path = $segments_encoder->encode($this->path); 0164 } elseif ($this->path !== '') { 0165 if ($this->path[0] === '/') { 0166 // path-absolute (hier and relative) 0167 // http:/my/path 0168 // /my/path 0169 if (strlen($this->path) >= 2 && $this->path[1] === '/') { 0170 // This could happen if both the host gets stripped 0171 // out 0172 // http://my/path 0173 // //my/path 0174 $this->path = ''; 0175 } else { 0176 $this->path = $segments_encoder->encode($this->path); 0177 } 0178 } elseif (!is_null($this->scheme)) { 0179 // path-rootless (hier) 0180 // http:my/path 0181 // Short circuit evaluation means we don't need to check nz 0182 $this->path = $segments_encoder->encode($this->path); 0183 } else { 0184 // path-noscheme (relative) 0185 // my/path 0186 // (once again, not checking nz) 0187 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); 0188 $c = strpos($this->path, '/'); 0189 if ($c !== false) { 0190 $this->path = 0191 $segment_nc_encoder->encode(substr($this->path, 0, $c)) . 0192 $segments_encoder->encode(substr($this->path, $c)); 0193 } else { 0194 $this->path = $segment_nc_encoder->encode($this->path); 0195 } 0196 } 0197 } else { 0198 // path-empty (hier and relative) 0199 $this->path = ''; // just to be safe 0200 } 0201 0202 // qf = query and fragment 0203 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); 0204 0205 if (!is_null($this->query)) { 0206 $this->query = $qf_encoder->encode($this->query); 0207 } 0208 0209 if (!is_null($this->fragment)) { 0210 $this->fragment = $qf_encoder->encode($this->fragment); 0211 } 0212 return true; 0213 } 0214 0215 /** 0216 * Convert URI back to string 0217 * @return string URI appropriate for output 0218 */ 0219 public function toString() 0220 { 0221 // reconstruct authority 0222 $authority = null; 0223 // there is a rendering difference between a null authority 0224 // (http:foo-bar) and an empty string authority 0225 // (http:///foo-bar). 0226 if (!is_null($this->host)) { 0227 $authority = ''; 0228 if (!is_null($this->userinfo)) { 0229 $authority .= $this->userinfo . '@'; 0230 } 0231 $authority .= $this->host; 0232 if (!is_null($this->port)) { 0233 $authority .= ':' . $this->port; 0234 } 0235 } 0236 0237 // Reconstruct the result 0238 // One might wonder about parsing quirks from browsers after 0239 // this reconstruction. Unfortunately, parsing behavior depends 0240 // on what *scheme* was employed (file:///foo is handled *very* 0241 // differently than http:///foo), so unfortunately we have to 0242 // defer to the schemes to do the right thing. 0243 $result = ''; 0244 if (!is_null($this->scheme)) { 0245 $result .= $this->scheme . ':'; 0246 } 0247 if (!is_null($authority)) { 0248 $result .= '//' . $authority; 0249 } 0250 $result .= $this->path; 0251 if (!is_null($this->query)) { 0252 $result .= '?' . $this->query; 0253 } 0254 if (!is_null($this->fragment)) { 0255 $result .= '#' . $this->fragment; 0256 } 0257 0258 return $result; 0259 } 0260 0261 /** 0262 * Returns true if this URL might be considered a 'local' URL given 0263 * the current context. This is true when the host is null, or 0264 * when it matches the host supplied to the configuration. 0265 * 0266 * Note that this does not do any scheme checking, so it is mostly 0267 * only appropriate for metadata that doesn't care about protocol 0268 * security. isBenign is probably what you actually want. 0269 * @param HTMLPurifier_Config $config 0270 * @param HTMLPurifier_Context $context 0271 * @return bool 0272 */ 0273 public function isLocal($config, $context) 0274 { 0275 if ($this->host === null) { 0276 return true; 0277 } 0278 $uri_def = $config->getDefinition('URI'); 0279 if ($uri_def->host === $this->host) { 0280 return true; 0281 } 0282 return false; 0283 } 0284 0285 /** 0286 * Returns true if this URL should be considered a 'benign' URL, 0287 * that is: 0288 * 0289 * - It is a local URL (isLocal), and 0290 * - It has a equal or better level of security 0291 * @param HTMLPurifier_Config $config 0292 * @param HTMLPurifier_Context $context 0293 * @return bool 0294 */ 0295 public function isBenign($config, $context) 0296 { 0297 if (!$this->isLocal($config, $context)) { 0298 return false; 0299 } 0300 0301 $scheme_obj = $this->getSchemeObj($config, $context); 0302 if (!$scheme_obj) { 0303 return false; 0304 } // conservative approach 0305 0306 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); 0307 if ($current_scheme_obj->secure) { 0308 if (!$scheme_obj->secure) { 0309 return false; 0310 } 0311 } 0312 return true; 0313 } 0314 } 0315 0316 // vim: et sw=4 sts=4