File indexing completed on 2024-05-12 17:25:55

0001 <?php
0002 /////////////////////////////////////////////////////////////////
0003 /// getID3() by James Heinrich <info@getid3.org>               //
0004 //  available at http://getid3.sourceforge.net                 //
0005 //            or http://www.getid3.org                         //
0006 //          also https://github.com/JamesHeinrich/getID3       //
0007 /////////////////////////////////////////////////////////////////
0008 // See readme.txt for more details                             //
0009 /////////////////////////////////////////////////////////////////
0010 //                                                             //
0011 // module.archive.gzip.php                                     //
0012 // module for analyzing GZIP files                             //
0013 // dependencies: NONE                                          //
0014 //                                                            ///
0015 /////////////////////////////////////////////////////////////////
0016 //                                                             //
0017 // Module originally written by                                //
0018 //      Mike Mozolin <teddybearØmail*ru>                       //
0019 //                                                             //
0020 /////////////////////////////////////////////////////////////////
0021 
0022 
0023 class getid3_gzip extends getid3_handler {
0024 
0025   // public: Optional file list - disable for speed.
0026   public $option_gzip_parse_contents = false; // decode gzipped files, if possible, and parse recursively (.tar.gz for example)
0027 
0028   public function Analyze() {
0029     $info = &$this->getid3->info;
0030 
0031     $info['fileformat'] = 'gzip';
0032 
0033     $start_length = 10;
0034     $unpack_header = 'a1id1/a1id2/a1cmethod/a1flags/a4mtime/a1xflags/a1os';
0035     //+---+---+---+---+---+---+---+---+---+---+
0036     //|ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
0037     //+---+---+---+---+---+---+---+---+---+---+
0038 
0039     if ($info['filesize'] > $info['php_memory_limit']) {
0040       $info['error'][] = 'File is too large ('.number_format($info['filesize']).' bytes) to read into memory (limit: '.number_format($info['php_memory_limit'] / 1048576).'MB)';
0041       return false;
0042     }
0043     $this->fseek(0);
0044     $buffer = $this->fread($info['filesize']);
0045 
0046     $arr_members = explode("\x1F\x8B\x08", $buffer);
0047     while (true) {
0048       $is_wrong_members = false;
0049       $num_members = intval(count($arr_members));
0050       for ($i = 0; $i < $num_members; $i++) {
0051         if (strlen($arr_members[$i]) == 0) {
0052           continue;
0053         }
0054         $buf = "\x1F\x8B\x08".$arr_members[$i];
0055 
0056         $attr = unpack($unpack_header, substr($buf, 0, $start_length));
0057         if (!$this->get_os_type(ord($attr['os']))) {
0058           // Merge member with previous if wrong OS type
0059           $arr_members[$i - 1] .= $buf;
0060           $arr_members[$i] = '';
0061           $is_wrong_members = true;
0062           continue;
0063         }
0064       }
0065       if (!$is_wrong_members) {
0066         break;
0067       }
0068     }
0069 
0070     $info['gzip']['files'] = array();
0071 
0072     $fpointer = 0;
0073     $idx = 0;
0074     for ($i = 0; $i < $num_members; $i++) {
0075       if (strlen($arr_members[$i]) == 0) {
0076         continue;
0077       }
0078       $thisInfo = &$info['gzip']['member_header'][++$idx];
0079 
0080       $buff = "\x1F\x8B\x08".$arr_members[$i];
0081 
0082       $attr = unpack($unpack_header, substr($buff, 0, $start_length));
0083       $thisInfo['filemtime']      = getid3_lib::LittleEndian2Int($attr['mtime']);
0084       $thisInfo['raw']['id1']     = ord($attr['cmethod']);
0085       $thisInfo['raw']['id2']     = ord($attr['cmethod']);
0086       $thisInfo['raw']['cmethod'] = ord($attr['cmethod']);
0087       $thisInfo['raw']['os']      = ord($attr['os']);
0088       $thisInfo['raw']['xflags']  = ord($attr['xflags']);
0089       $thisInfo['raw']['flags']   = ord($attr['flags']);
0090 
0091       $thisInfo['flags']['crc16']    = (bool) ($thisInfo['raw']['flags'] & 0x02);
0092       $thisInfo['flags']['extra']    = (bool) ($thisInfo['raw']['flags'] & 0x04);
0093       $thisInfo['flags']['filename'] = (bool) ($thisInfo['raw']['flags'] & 0x08);
0094       $thisInfo['flags']['comment']  = (bool) ($thisInfo['raw']['flags'] & 0x10);
0095 
0096       $thisInfo['compression'] = $this->get_xflag_type($thisInfo['raw']['xflags']);
0097 
0098       $thisInfo['os'] = $this->get_os_type($thisInfo['raw']['os']);
0099       if (!$thisInfo['os']) {
0100         $info['error'][] = 'Read error on gzip file';
0101         return false;
0102       }
0103 
0104       $fpointer = 10;
0105       $arr_xsubfield = array();
0106       // bit 2 - FLG.FEXTRA
0107       //+---+---+=================================+
0108       //| XLEN  |...XLEN bytes of "extra field"...|
0109       //+---+---+=================================+
0110       if ($thisInfo['flags']['extra']) {
0111         $w_xlen = substr($buff, $fpointer, 2);
0112         $xlen = getid3_lib::LittleEndian2Int($w_xlen);
0113         $fpointer += 2;
0114 
0115         $thisInfo['raw']['xfield'] = substr($buff, $fpointer, $xlen);
0116         // Extra SubFields
0117         //+---+---+---+---+==================================+
0118         //|SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
0119         //+---+---+---+---+==================================+
0120         $idx = 0;
0121         while (true) {
0122           if ($idx >= $xlen) {
0123             break;
0124           }
0125           $si1 = ord(substr($buff, $fpointer + $idx++, 1));
0126           $si2 = ord(substr($buff, $fpointer + $idx++, 1));
0127           if (($si1 == 0x41) && ($si2 == 0x70)) {
0128             $w_xsublen = substr($buff, $fpointer + $idx, 2);
0129             $xsublen = getid3_lib::LittleEndian2Int($w_xsublen);
0130             $idx += 2;
0131             $arr_xsubfield[] = substr($buff, $fpointer + $idx, $xsublen);
0132             $idx += $xsublen;
0133           } else {
0134             break;
0135           }
0136         }
0137         $fpointer += $xlen;
0138       }
0139       // bit 3 - FLG.FNAME
0140       //+=========================================+
0141       //|...original file name, zero-terminated...|
0142       //+=========================================+
0143       // GZIP files may have only one file, with no filename, so assume original filename is current filename without .gz
0144       $thisInfo['filename'] = preg_replace('#\\.gz$#i', '', $info['filename']);
0145       if ($thisInfo['flags']['filename']) {
0146         $thisInfo['filename'] = '';
0147         while (true) {
0148           if (ord($buff[$fpointer]) == 0) {
0149             $fpointer++;
0150             break;
0151           }
0152           $thisInfo['filename'] .= $buff[$fpointer];
0153           $fpointer++;
0154         }
0155       }
0156       // bit 4 - FLG.FCOMMENT
0157       //+===================================+
0158       //|...file comment, zero-terminated...|
0159       //+===================================+
0160       if ($thisInfo['flags']['comment']) {
0161         while (true) {
0162           if (ord($buff[$fpointer]) == 0) {
0163             $fpointer++;
0164             break;
0165           }
0166           $thisInfo['comment'] .= $buff[$fpointer];
0167           $fpointer++;
0168         }
0169       }
0170       // bit 1 - FLG.FHCRC
0171       //+---+---+
0172       //| CRC16 |
0173       //+---+---+
0174       if ($thisInfo['flags']['crc16']) {
0175         $w_crc = substr($buff, $fpointer, 2);
0176         $thisInfo['crc16'] = getid3_lib::LittleEndian2Int($w_crc);
0177         $fpointer += 2;
0178       }
0179       // bit 0 - FLG.FTEXT
0180       //if ($thisInfo['raw']['flags'] & 0x01) {
0181       //  Ignored...
0182       //}
0183       // bits 5, 6, 7 - reserved
0184 
0185       $thisInfo['crc32']    = getid3_lib::LittleEndian2Int(substr($buff, strlen($buff) - 8, 4));
0186       $thisInfo['filesize'] = getid3_lib::LittleEndian2Int(substr($buff, strlen($buff) - 4));
0187 
0188       $info['gzip']['files'] = getid3_lib::array_merge_clobber($info['gzip']['files'], getid3_lib::CreateDeepArray($thisInfo['filename'], '/', $thisInfo['filesize']));
0189 
0190       if ($this->option_gzip_parse_contents) {
0191         // Try to inflate GZip
0192         $csize = 0;
0193         $inflated = '';
0194         $chkcrc32 = '';
0195         if (function_exists('gzinflate')) {
0196           $cdata = substr($buff, $fpointer);
0197           $cdata = substr($cdata, 0, strlen($cdata) - 8);
0198           $csize = strlen($cdata);
0199           $inflated = gzinflate($cdata);
0200 
0201           // Calculate CRC32 for inflated content
0202           $thisInfo['crc32_valid'] = (bool) (sprintf('%u', crc32($inflated)) == $thisInfo['crc32']);
0203 
0204           // determine format
0205           $formattest = substr($inflated, 0, 32774);
0206           $getid3_temp = new getID3();
0207           $determined_format = $getid3_temp->GetFileFormat($formattest);
0208           unset($getid3_temp);
0209 
0210           // file format is determined
0211           $determined_format['module'] = (isset($determined_format['module']) ? $determined_format['module'] : '');
0212           switch ($determined_format['module']) {
0213             case 'tar':
0214               // view TAR-file info
0215               if (file_exists(GETID3_INCLUDEPATH.$determined_format['include']) && include_once(GETID3_INCLUDEPATH.$determined_format['include'])) {
0216                 if (($temp_tar_filename = tempnam(GETID3_TEMP_DIR, 'getID3')) === false) {
0217                   // can't find anywhere to create a temp file, abort
0218                   $info['error'][] = 'Unable to create temp file to parse TAR inside GZIP file';
0219                   break;
0220                 }
0221                 if ($fp_temp_tar = fopen($temp_tar_filename, 'w+b')) {
0222                   fwrite($fp_temp_tar, $inflated);
0223                   fclose($fp_temp_tar);
0224                   $getid3_temp = new getID3();
0225                   $getid3_temp->openfile($temp_tar_filename);
0226                   $getid3_tar = new getid3_tar($getid3_temp);
0227                   $getid3_tar->Analyze();
0228                   $info['gzip']['member_header'][$idx]['tar'] = $getid3_temp->info['tar'];
0229                   unset($getid3_temp, $getid3_tar);
0230                   unlink($temp_tar_filename);
0231                 } else {
0232                   $info['error'][] = 'Unable to fopen() temp file to parse TAR inside GZIP file';
0233                   break;
0234                 }
0235               }
0236               break;
0237 
0238             case '':
0239             default:
0240               // unknown or unhandled format
0241               break;
0242           }
0243         }
0244       }
0245     }
0246     return true;
0247   }
0248 
0249   // Converts the OS type
0250   public function get_os_type($key) {
0251     static $os_type = array(
0252       '0'   => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)',
0253       '1'   => 'Amiga',
0254       '2'   => 'VMS (or OpenVMS)',
0255       '3'   => 'Unix',
0256       '4'   => 'VM/CMS',
0257       '5'   => 'Atari TOS',
0258       '6'   => 'HPFS filesystem (OS/2, NT)',
0259       '7'   => 'Macintosh',
0260       '8'   => 'Z-System',
0261       '9'   => 'CP/M',
0262       '10'  => 'TOPS-20',
0263       '11'  => 'NTFS filesystem (NT)',
0264       '12'  => 'QDOS',
0265       '13'  => 'Acorn RISCOS',
0266       '255' => 'unknown'
0267     );
0268     return (isset($os_type[$key]) ? $os_type[$key] : '');
0269   }
0270 
0271   // Converts the eXtra FLags
0272   public function get_xflag_type($key) {
0273     static $xflag_type = array(
0274       '0' => 'unknown',
0275       '2' => 'maximum compression',
0276       '4' => 'fastest algorithm'
0277     );
0278     return (isset($xflag_type[$key]) ? $xflag_type[$key] : '');
0279   }
0280 }
0281