File indexing completed on 2024-12-22 05:36:22
0001 <?php 0002 0003 /** 0004 * Parses a URI into the components and fragment identifier as specified 0005 * by RFC 3986. 0006 */ 0007 class HTMLPurifier_URIParser 0008 { 0009 0010 /** 0011 * Instance of HTMLPurifier_PercentEncoder to do normalization with. 0012 */ 0013 protected $percentEncoder; 0014 0015 public function __construct() 0016 { 0017 $this->percentEncoder = new HTMLPurifier_PercentEncoder(); 0018 } 0019 0020 /** 0021 * Parses a URI. 0022 * @param $uri string URI to parse 0023 * @return HTMLPurifier_URI representation of URI. This representation has 0024 * not been validated yet and may not conform to RFC. 0025 */ 0026 public function parse($uri) 0027 { 0028 $uri = $this->percentEncoder->normalize($uri); 0029 0030 // Regexp is as per Appendix B. 0031 // Note that ["<>] are an addition to the RFC's recommended 0032 // characters, because they represent external delimeters. 0033 $r_URI = '!'. 0034 '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme 0035 '(//([^/?#"<>]*))?'. // 4. Authority 0036 '([^?#"<>]*)'. // 5. Path 0037 '(\?([^#"<>]*))?'. // 7. Query 0038 '(#([^"<>]*))?'. // 8. Fragment 0039 '!'; 0040 0041 $matches = array(); 0042 $result = preg_match($r_URI, $uri, $matches); 0043 0044 if (!$result) return false; // *really* invalid URI 0045 0046 // seperate out parts 0047 $scheme = !empty($matches[1]) ? $matches[2] : null; 0048 $authority = !empty($matches[3]) ? $matches[4] : null; 0049 $path = $matches[5]; // always present, can be empty 0050 $query = !empty($matches[6]) ? $matches[7] : null; 0051 $fragment = !empty($matches[8]) ? $matches[9] : null; 0052 0053 // further parse authority 0054 if ($authority !== null) { 0055 $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; 0056 $matches = array(); 0057 preg_match($r_authority, $authority, $matches); 0058 $userinfo = !empty($matches[1]) ? $matches[2] : null; 0059 $host = !empty($matches[3]) ? $matches[3] : ''; 0060 $port = !empty($matches[4]) ? (int) $matches[5] : null; 0061 } else { 0062 $port = $host = $userinfo = null; 0063 } 0064 0065 return new HTMLPurifier_URI( 0066 $scheme, $userinfo, $host, $port, $path, $query, $fragment); 0067 } 0068 0069 } 0070 0071 // vim: et sw=4 sts=4