File indexing completed on 2024-12-22 05:36:48

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Json
0017  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0018  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0019  * @version    $Id$
0020  */
0021 
0022 /**
0023  * @see Zend_Json
0024  */
0025 // require_once 'Zend/Json.php';
0026 
0027 /**
0028  * Decode JSON encoded string to PHP variable constructs
0029  *
0030  * @category   Zend
0031  * @package    Zend_Json
0032  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0033  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0034  */
0035 class Zend_Json_Decoder
0036 {
0037     /**
0038      * Parse tokens used to decode the JSON object. These are not
0039      * for public consumption, they are just used internally to the
0040      * class.
0041      */
0042     const EOF         = 0;
0043     const DATUM        = 1;
0044     const LBRACE    = 2;
0045     const LBRACKET    = 3;
0046     const RBRACE     = 4;
0047     const RBRACKET    = 5;
0048     const COMMA       = 6;
0049     const COLON        = 7;
0050 
0051     /**
0052      * Use to maintain a "pointer" to the source being decoded
0053      *
0054      * @var string
0055      */
0056     protected $_source;
0057 
0058     /**
0059      * Caches the source length
0060      *
0061      * @var int
0062      */
0063     protected $_sourceLength;
0064 
0065     /**
0066      * The offset within the souce being decoded
0067      *
0068      * @var int
0069      *
0070      */
0071     protected $_offset;
0072 
0073     /**
0074      * The current token being considered in the parser cycle
0075      *
0076      * @var int
0077      */
0078     protected $_token;
0079 
0080     /**
0081      * Flag indicating how objects should be decoded
0082      *
0083      * @var int
0084      * @access protected
0085      */
0086     protected $_decodeType;
0087 
0088     /**
0089      * Constructor
0090      *
0091      * @param string $source String source to decode
0092      * @param int $decodeType How objects should be decoded -- see
0093      * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for
0094      * valid values
0095      * @return void
0096      */
0097     protected function __construct($source, $decodeType)
0098     {
0099         // Set defaults
0100         $this->_source       = self::decodeUnicodeString($source);
0101         $this->_sourceLength = strlen($this->_source);
0102         $this->_token        = self::EOF;
0103         $this->_offset       = 0;
0104 
0105         // Normalize and set $decodeType
0106         if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT)))
0107         {
0108             $decodeType = Zend_Json::TYPE_ARRAY;
0109         }
0110         $this->_decodeType   = $decodeType;
0111 
0112         // Set pointer at first token
0113         $this->_getNextToken();
0114     }
0115 
0116     /**
0117      * Decode a JSON source string
0118      *
0119      * Decodes a JSON encoded string. The value returned will be one of the
0120      * following:
0121      *        - integer
0122      *        - float
0123      *        - boolean
0124      *        - null
0125      *      - StdClass
0126      *      - array
0127      *         - array of one or more of the above types
0128      *
0129      * By default, decoded objects will be returned as associative arrays; to
0130      * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to
0131      * the $objectDecodeType parameter.
0132      *
0133      * Throws a Zend_Json_Exception if the source string is null.
0134      *
0135      * @static
0136      * @access public
0137      * @param string $source String to be decoded
0138      * @param int $objectDecodeType How objects should be decoded; should be
0139      * either or {@link Zend_Json::TYPE_ARRAY} or
0140      * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
0141      * @return mixed
0142      * @throws Zend_Json_Exception
0143      */
0144     public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY)
0145     {
0146         if (null === $source) {
0147             // require_once 'Zend/Json/Exception.php';
0148             throw new Zend_Json_Exception('Must specify JSON encoded source for decoding');
0149         } elseif (!is_string($source)) {
0150             // require_once 'Zend/Json/Exception.php';
0151             throw new Zend_Json_Exception('Can only decode JSON encoded strings');
0152         }
0153 
0154         $decoder = new self($source, $objectDecodeType);
0155 
0156         return $decoder->_decodeValue();
0157     }
0158 
0159 
0160     /**
0161      * Recursive driving rountine for supported toplevel tops
0162      *
0163      * @return mixed
0164      */
0165     protected function _decodeValue()
0166     {
0167         switch ($this->_token) {
0168             case self::DATUM:
0169                 $result  = $this->_tokenValue;
0170                 $this->_getNextToken();
0171                 return($result);
0172                 break;
0173             case self::LBRACE:
0174                 return($this->_decodeObject());
0175                 break;
0176             case self::LBRACKET:
0177                 return($this->_decodeArray());
0178                 break;
0179             default:
0180                 return null;
0181                 break;
0182         }
0183     }
0184 
0185     /**
0186      * Decodes an object of the form:
0187      *  { "attribute: value, "attribute2" : value,...}
0188      *
0189      * If Zend_Json_Encoder was used to encode the original object then
0190      * a special attribute called __className which specifies a class
0191      * name that should wrap the data contained within the encoded source.
0192      *
0193      * Decodes to either an array or StdClass object, based on the value of
0194      * {@link $_decodeType}. If invalid $_decodeType present, returns as an
0195      * array.
0196      *
0197      * @return array|StdClass
0198      */
0199     protected function _decodeObject()
0200     {
0201         $members = array();
0202         $tok = $this->_getNextToken();
0203 
0204         while ($tok && $tok != self::RBRACE) {
0205             if ($tok != self::DATUM || ! is_string($this->_tokenValue)) {
0206                 // require_once 'Zend/Json/Exception.php';
0207                 throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source);
0208             }
0209 
0210             $key = $this->_tokenValue;
0211             $tok = $this->_getNextToken();
0212 
0213             if ($tok != self::COLON) {
0214                 // require_once 'Zend/Json/Exception.php';
0215                 throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source);
0216             }
0217 
0218             $tok = $this->_getNextToken();
0219             $members[$key] = $this->_decodeValue();
0220             $tok = $this->_token;
0221 
0222             if ($tok == self::RBRACE) {
0223                 break;
0224             }
0225 
0226             if ($tok != self::COMMA) {
0227                 // require_once 'Zend/Json/Exception.php';
0228                 throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source);
0229             }
0230 
0231             $tok = $this->_getNextToken();
0232         }
0233 
0234         switch ($this->_decodeType) {
0235             case Zend_Json::TYPE_OBJECT:
0236                 // Create new StdClass and populate with $members
0237                 $result = new StdClass();
0238                 foreach ($members as $key => $value) {
0239                     if ($key === '') {
0240                         $key = '_empty_';
0241                     }
0242                     $result->$key = $value;
0243                 }
0244                 break;
0245             case Zend_Json::TYPE_ARRAY:
0246             default:
0247                 $result = $members;
0248                 break;
0249         }
0250 
0251         $this->_getNextToken();
0252         return $result;
0253     }
0254 
0255     /**
0256      * Decodes a JSON array format:
0257      *    [element, element2,...,elementN]
0258      *
0259      * @return array
0260      */
0261     protected function _decodeArray()
0262     {
0263         $result = array();
0264         $starttok = $tok = $this->_getNextToken(); // Move past the '['
0265         $index  = 0;
0266 
0267         while ($tok && $tok != self::RBRACKET) {
0268             $result[$index++] = $this->_decodeValue();
0269 
0270             $tok = $this->_token;
0271 
0272             if ($tok == self::RBRACKET || !$tok) {
0273                 break;
0274             }
0275 
0276             if ($tok != self::COMMA) {
0277                 // require_once 'Zend/Json/Exception.php';
0278                 throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source);
0279             }
0280 
0281             $tok = $this->_getNextToken();
0282         }
0283 
0284         $this->_getNextToken();
0285         return($result);
0286     }
0287 
0288 
0289     /**
0290      * Removes whitepsace characters from the source input
0291      */
0292     protected function _eatWhitespace()
0293     {
0294         if (preg_match(
0295                 '/([\t\b\f\n\r ])*/s',
0296                 $this->_source,
0297                 $matches,
0298                 PREG_OFFSET_CAPTURE,
0299                 $this->_offset)
0300             && $matches[0][1] == $this->_offset)
0301         {
0302             $this->_offset += strlen($matches[0][0]);
0303         }
0304     }
0305 
0306 
0307     /**
0308      * Retrieves the next token from the source stream
0309      *
0310      * @return int Token constant value specified in class definition
0311      */
0312     protected function _getNextToken()
0313     {
0314         $this->_token      = self::EOF;
0315         $this->_tokenValue = null;
0316         $this->_eatWhitespace();
0317 
0318         if ($this->_offset >= $this->_sourceLength) {
0319             return(self::EOF);
0320         }
0321 
0322         $str        = $this->_source;
0323         $str_length = $this->_sourceLength;
0324         $i          = $this->_offset;
0325         $start      = $i;
0326 
0327         switch ($str{$i}) {
0328             case '{':
0329                $this->_token = self::LBRACE;
0330                break;
0331             case '}':
0332                 $this->_token = self::RBRACE;
0333                 break;
0334             case '[':
0335                 $this->_token = self::LBRACKET;
0336                 break;
0337             case ']':
0338                 $this->_token = self::RBRACKET;
0339                 break;
0340             case ',':
0341                 $this->_token = self::COMMA;
0342                 break;
0343             case ':':
0344                 $this->_token = self::COLON;
0345                 break;
0346             case  '"':
0347                 $result = '';
0348                 do {
0349                     $i++;
0350                     if ($i >= $str_length) {
0351                         break;
0352                     }
0353 
0354                     $chr = $str{$i};
0355 
0356                     if ($chr == '\\') {
0357                         $i++;
0358                         if ($i >= $str_length) {
0359                             break;
0360                         }
0361                         $chr = $str{$i};
0362                         switch ($chr) {
0363                             case '"' :
0364                                 $result .= '"';
0365                                 break;
0366                             case '\\':
0367                                 $result .= '\\';
0368                                 break;
0369                             case '/' :
0370                                 $result .= '/';
0371                                 break;
0372                             case 'b' :
0373                                 $result .= "\x08";
0374                                 break;
0375                             case 'f' :
0376                                 $result .= "\x0c";
0377                                 break;
0378                             case 'n' :
0379                                 $result .= "\x0a";
0380                                 break;
0381                             case 'r' :
0382                                 $result .= "\x0d";
0383                                 break;
0384                             case 't' :
0385                                 $result .= "\x09";
0386                                 break;
0387                             case '\'' :
0388                                 $result .= '\'';
0389                                 break;
0390                             default:
0391                                 // require_once 'Zend/Json/Exception.php';
0392                                 throw new Zend_Json_Exception("Illegal escape "
0393                                     .  "sequence '" . $chr . "'");
0394                         }
0395                     } elseif($chr == '"') {
0396                         break;
0397                     } else {
0398                         $result .= $chr;
0399                     }
0400                 } while ($i < $str_length);
0401 
0402                 $this->_token = self::DATUM;
0403                 //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1);
0404                 $this->_tokenValue = $result;
0405                 break;
0406             case 't':
0407                 if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") {
0408                     $this->_token = self::DATUM;
0409                 }
0410                 $this->_tokenValue = true;
0411                 $i += 3;
0412                 break;
0413             case 'f':
0414                 if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") {
0415                     $this->_token = self::DATUM;
0416                 }
0417                 $this->_tokenValue = false;
0418                 $i += 4;
0419                 break;
0420             case 'n':
0421                 if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") {
0422                     $this->_token = self::DATUM;
0423                 }
0424                 $this->_tokenValue = NULL;
0425                 $i += 3;
0426                 break;
0427         }
0428 
0429         if ($this->_token != self::EOF) {
0430             $this->_offset = $i + 1; // Consume the last token character
0431             return($this->_token);
0432         }
0433 
0434         $chr = $str{$i};
0435         if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
0436             if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
0437                 $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
0438 
0439                 $datum = $matches[0][0];
0440 
0441                 if (is_numeric($datum)) {
0442                     if (preg_match('/^0\d+$/', $datum)) {
0443                         // require_once 'Zend/Json/Exception.php';
0444                         throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)");
0445                     } else {
0446                         $val  = intval($datum);
0447                         $fVal = floatval($datum);
0448                         $this->_tokenValue = ($val == $fVal ? $val : $fVal);
0449                     }
0450                 } else {
0451                     // require_once 'Zend/Json/Exception.php';
0452                     throw new Zend_Json_Exception("Illegal number format: $datum");
0453                 }
0454 
0455                 $this->_token = self::DATUM;
0456                 $this->_offset = $start + strlen($datum);
0457             }
0458         } else {
0459             // require_once 'Zend/Json/Exception.php';
0460             throw new Zend_Json_Exception('Illegal Token');
0461         }
0462 
0463         return($this->_token);
0464     }
0465 
0466     /**
0467      * Decode Unicode Characters from \u0000 ASCII syntax.
0468      *
0469      * This algorithm was originally developed for the
0470      * Solar Framework by Paul M. Jones
0471      *
0472      * @link   http://solarphp.com/
0473      * @link   http://svn.solarphp.com/core/trunk/Solar/Json.php
0474      * @param  string $value
0475      * @return string
0476      */
0477     public static function decodeUnicodeString($chrs)
0478     {
0479         $delim       = substr($chrs, 0, 1);
0480         $utf8        = '';
0481         $strlen_chrs = strlen($chrs);
0482 
0483         for($i = 0; $i < $strlen_chrs; $i++) {
0484 
0485             $substr_chrs_c_2 = substr($chrs, $i, 2);
0486             $ord_chrs_c = ord($chrs[$i]);
0487 
0488             switch (true) {
0489                 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
0490                     // single, escaped unicode character
0491                     $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
0492                            . chr(hexdec(substr($chrs, ($i + 4), 2)));
0493                     $utf8 .= self::_utf162utf8($utf16);
0494                     $i += 5;
0495                     break;
0496                 case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
0497                     $utf8 .= $chrs{$i};
0498                     break;
0499                 case ($ord_chrs_c & 0xE0) == 0xC0:
0500                     // characters U-00000080 - U-000007FF, mask 110XXXXX
0501                     //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0502                     $utf8 .= substr($chrs, $i, 2);
0503                     ++$i;
0504                     break;
0505                 case ($ord_chrs_c & 0xF0) == 0xE0:
0506                     // characters U-00000800 - U-0000FFFF, mask 1110XXXX
0507                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0508                     $utf8 .= substr($chrs, $i, 3);
0509                     $i += 2;
0510                     break;
0511                 case ($ord_chrs_c & 0xF8) == 0xF0:
0512                     // characters U-00010000 - U-001FFFFF, mask 11110XXX
0513                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0514                     $utf8 .= substr($chrs, $i, 4);
0515                     $i += 3;
0516                     break;
0517                 case ($ord_chrs_c & 0xFC) == 0xF8:
0518                     // characters U-00200000 - U-03FFFFFF, mask 111110XX
0519                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0520                     $utf8 .= substr($chrs, $i, 5);
0521                     $i += 4;
0522                     break;
0523                 case ($ord_chrs_c & 0xFE) == 0xFC:
0524                     // characters U-04000000 - U-7FFFFFFF, mask 1111110X
0525                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0526                     $utf8 .= substr($chrs, $i, 6);
0527                     $i += 5;
0528                     break;
0529             }
0530         }
0531 
0532         return $utf8;
0533     }
0534 
0535     /**
0536      * Convert a string from one UTF-16 char to one UTF-8 char.
0537      *
0538      * Normally should be handled by mb_convert_encoding, but
0539      * provides a slower PHP-only method for installations
0540      * that lack the multibye string extension.
0541      *
0542      * This method is from the Solar Framework by Paul M. Jones
0543      *
0544      * @link   http://solarphp.com
0545      * @param  string $utf16 UTF-16 character
0546      * @return string UTF-8 character
0547      */
0548     protected static function _utf162utf8($utf16)
0549     {
0550         // Check for mb extension otherwise do by hand.
0551         if( function_exists('mb_convert_encoding') ) {
0552             return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
0553         }
0554 
0555         $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
0556 
0557         switch (true) {
0558             case ((0x7F & $bytes) == $bytes):
0559                 // this case should never be reached, because we are in ASCII range
0560                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0561                 return chr(0x7F & $bytes);
0562 
0563             case (0x07FF & $bytes) == $bytes:
0564                 // return a 2-byte UTF-8 character
0565                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0566                 return chr(0xC0 | (($bytes >> 6) & 0x1F))
0567                      . chr(0x80 | ($bytes & 0x3F));
0568 
0569             case (0xFFFF & $bytes) == $bytes:
0570                 // return a 3-byte UTF-8 character
0571                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
0572                 return chr(0xE0 | (($bytes >> 12) & 0x0F))
0573                      . chr(0x80 | (($bytes >> 6) & 0x3F))
0574                      . chr(0x80 | ($bytes & 0x3F));
0575         }
0576 
0577         // ignoring UTF-32 for now, sorry
0578         return '';
0579     }
0580 }
0581