Warning, file /webapps/ocs-webserver/library/HTMLPurifier/URI.php was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 <?php
0002 
0003 /**
0004  * HTML Purifier's internal representation of a URI.
0005  * @note
0006  *      Internal data-structures are completely escaped. If the data needs
0007  *      to be used in a non-URI context (which is very unlikely), be sure
0008  *      to decode it first. The URI may not necessarily be well-formed until
0009  *      validate() is called.
0010  */
0011 class HTMLPurifier_URI
0012 {
0013     /**
0014      * @type string
0015      */
0016     public $scheme;
0017 
0018     /**
0019      * @type string
0020      */
0021     public $userinfo;
0022 
0023     /**
0024      * @type string
0025      */
0026     public $host;
0027 
0028     /**
0029      * @type int
0030      */
0031     public $port;
0032 
0033     /**
0034      * @type string
0035      */
0036     public $path;
0037 
0038     /**
0039      * @type string
0040      */
0041     public $query;
0042 
0043     /**
0044      * @type string
0045      */
0046     public $fragment;
0047 
0048     /**
0049      * @param string $scheme
0050      * @param string $userinfo
0051      * @param string $host
0052      * @param int $port
0053      * @param string $path
0054      * @param string $query
0055      * @param string $fragment
0056      * @note Automatically normalizes scheme and port
0057      */
0058     public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
0059     {
0060         $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
0061         $this->userinfo = $userinfo;
0062         $this->host = $host;
0063         $this->port = is_null($port) ? $port : (int)$port;
0064         $this->path = $path;
0065         $this->query = $query;
0066         $this->fragment = $fragment;
0067     }
0068 
0069     /**
0070      * Retrieves a scheme object corresponding to the URI's scheme/default
0071      * @param HTMLPurifier_Config $config
0072      * @param HTMLPurifier_Context $context
0073      * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
0074      */
0075     public function getSchemeObj($config, $context)
0076     {
0077         $registry = HTMLPurifier_URISchemeRegistry::instance();
0078         if ($this->scheme !== null) {
0079             $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
0080             if (!$scheme_obj) {
0081                 return false;
0082             } // invalid scheme, clean it out
0083         } else {
0084             // no scheme: retrieve the default one
0085             $def = $config->getDefinition('URI');
0086             $scheme_obj = $def->getDefaultScheme($config, $context);
0087             if (!$scheme_obj) {
0088                 if ($def->defaultScheme !== null) {
0089                     // something funky happened to the default scheme object
0090                     trigger_error(
0091                         'Default scheme object "' . $def->defaultScheme . '" was not readable',
0092                         E_USER_WARNING
0093                     );
0094                 } // suppress error if it's null
0095                 return false;
0096             }
0097         }
0098         return $scheme_obj;
0099     }
0100 
0101     /**
0102      * Generic validation method applicable for all schemes. May modify
0103      * this URI in order to get it into a compliant form.
0104      * @param HTMLPurifier_Config $config
0105      * @param HTMLPurifier_Context $context
0106      * @return bool True if validation/filtering succeeds, false if failure
0107      */
0108     public function validate($config, $context)
0109     {
0110         // ABNF definitions from RFC 3986
0111         $chars_sub_delims = '!$&\'()*+,;=';
0112         $chars_gen_delims = ':/?#[]@';
0113         $chars_pchar = $chars_sub_delims . ':@';
0114 
0115         // validate host
0116         if (!is_null($this->host)) {
0117             $host_def = new HTMLPurifier_AttrDef_URI_Host();
0118             $this->host = $host_def->validate($this->host, $config, $context);
0119             if ($this->host === false) {
0120                 $this->host = null;
0121             }
0122         }
0123 
0124         // validate scheme
0125         // NOTE: It's not appropriate to check whether or not this
0126         // scheme is in our registry, since a URIFilter may convert a
0127         // URI that we don't allow into one we do.  So instead, we just
0128         // check if the scheme can be dropped because there is no host
0129         // and it is our default scheme.
0130         if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
0131             // support for relative paths is pretty abysmal when the
0132             // scheme is present, so axe it when possible
0133             $def = $config->getDefinition('URI');
0134             if ($def->defaultScheme === $this->scheme) {
0135                 $this->scheme = null;
0136             }
0137         }
0138 
0139         // validate username
0140         if (!is_null($this->userinfo)) {
0141             $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
0142             $this->userinfo = $encoder->encode($this->userinfo);
0143         }
0144 
0145         // validate port
0146         if (!is_null($this->port)) {
0147             if ($this->port < 1 || $this->port > 65535) {
0148                 $this->port = null;
0149             }
0150         }
0151 
0152         // validate path
0153         $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
0154         if (!is_null($this->host)) { // this catches $this->host === ''
0155             // path-abempty (hier and relative)
0156             // http://www.example.com/my/path
0157             // //www.example.com/my/path (looks odd, but works, and
0158             //                            recognized by most browsers)
0159             // (this set is valid or invalid on a scheme by scheme
0160             // basis, so we'll deal with it later)
0161             // file:///my/path
0162             // ///my/path
0163             $this->path = $segments_encoder->encode($this->path);
0164         } elseif ($this->path !== '') {
0165             if ($this->path[0] === '/') {
0166                 // path-absolute (hier and relative)
0167                 // http:/my/path
0168                 // /my/path
0169                 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
0170                     // This could happen if both the host gets stripped
0171                     // out
0172                     // http://my/path
0173                     // //my/path
0174                     $this->path = '';
0175                 } else {
0176                     $this->path = $segments_encoder->encode($this->path);
0177                 }
0178             } elseif (!is_null($this->scheme)) {
0179                 // path-rootless (hier)
0180                 // http:my/path
0181                 // Short circuit evaluation means we don't need to check nz
0182                 $this->path = $segments_encoder->encode($this->path);
0183             } else {
0184                 // path-noscheme (relative)
0185                 // my/path
0186                 // (once again, not checking nz)
0187                 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
0188                 $c = strpos($this->path, '/');
0189                 if ($c !== false) {
0190                     $this->path =
0191                         $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
0192                         $segments_encoder->encode(substr($this->path, $c));
0193                 } else {
0194                     $this->path = $segment_nc_encoder->encode($this->path);
0195                 }
0196             }
0197         } else {
0198             // path-empty (hier and relative)
0199             $this->path = ''; // just to be safe
0200         }
0201 
0202         // qf = query and fragment
0203         $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
0204 
0205         if (!is_null($this->query)) {
0206             $this->query = $qf_encoder->encode($this->query);
0207         }
0208 
0209         if (!is_null($this->fragment)) {
0210             $this->fragment = $qf_encoder->encode($this->fragment);
0211         }
0212         return true;
0213     }
0214 
0215     /**
0216      * Convert URI back to string
0217      * @return string URI appropriate for output
0218      */
0219     public function toString()
0220     {
0221         // reconstruct authority
0222         $authority = null;
0223         // there is a rendering difference between a null authority
0224         // (http:foo-bar) and an empty string authority
0225         // (http:///foo-bar).
0226         if (!is_null($this->host)) {
0227             $authority = '';
0228             if (!is_null($this->userinfo)) {
0229                 $authority .= $this->userinfo . '@';
0230             }
0231             $authority .= $this->host;
0232             if (!is_null($this->port)) {
0233                 $authority .= ':' . $this->port;
0234             }
0235         }
0236 
0237         // Reconstruct the result
0238         // One might wonder about parsing quirks from browsers after
0239         // this reconstruction.  Unfortunately, parsing behavior depends
0240         // on what *scheme* was employed (file:///foo is handled *very*
0241         // differently than http:///foo), so unfortunately we have to
0242         // defer to the schemes to do the right thing.
0243         $result = '';
0244         if (!is_null($this->scheme)) {
0245             $result .= $this->scheme . ':';
0246         }
0247         if (!is_null($authority)) {
0248             $result .= '//' . $authority;
0249         }
0250         $result .= $this->path;
0251         if (!is_null($this->query)) {
0252             $result .= '?' . $this->query;
0253         }
0254         if (!is_null($this->fragment)) {
0255             $result .= '#' . $this->fragment;
0256         }
0257 
0258         return $result;
0259     }
0260 
0261     /**
0262      * Returns true if this URL might be considered a 'local' URL given
0263      * the current context.  This is true when the host is null, or
0264      * when it matches the host supplied to the configuration.
0265      *
0266      * Note that this does not do any scheme checking, so it is mostly
0267      * only appropriate for metadata that doesn't care about protocol
0268      * security.  isBenign is probably what you actually want.
0269      * @param HTMLPurifier_Config $config
0270      * @param HTMLPurifier_Context $context
0271      * @return bool
0272      */
0273     public function isLocal($config, $context)
0274     {
0275         if ($this->host === null) {
0276             return true;
0277         }
0278         $uri_def = $config->getDefinition('URI');
0279         if ($uri_def->host === $this->host) {
0280             return true;
0281         }
0282         return false;
0283     }
0284 
0285     /**
0286      * Returns true if this URL should be considered a 'benign' URL,
0287      * that is:
0288      *
0289      *      - It is a local URL (isLocal), and
0290      *      - It has a equal or better level of security
0291      * @param HTMLPurifier_Config $config
0292      * @param HTMLPurifier_Context $context
0293      * @return bool
0294      */
0295     public function isBenign($config, $context)
0296     {
0297         if (!$this->isLocal($config, $context)) {
0298             return false;
0299         }
0300 
0301         $scheme_obj = $this->getSchemeObj($config, $context);
0302         if (!$scheme_obj) {
0303             return false;
0304         } // conservative approach
0305 
0306         $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
0307         if ($current_scheme_obj->secure) {
0308             if (!$scheme_obj->secure) {
0309                 return false;
0310             }
0311         }
0312         return true;
0313     }
0314 }
0315 
0316 // vim: et sw=4 sts=4