File indexing completed on 2025-01-19 05:21:25
0001 <?php 0002 /** 0003 * Zend Framework 0004 * 0005 * LICENSE 0006 * 0007 * This source file is subject to the new BSD license that is bundled 0008 * with this package in the file LICENSE.txt. 0009 * It is also available through the world-wide-web at this URL: 0010 * http://framework.zend.com/license/new-bsd 0011 * If you did not receive a copy of the license and are unable to 0012 * obtain it through the world-wide-web, please send an email 0013 * to license@zend.com so we can send you a copy immediately. 0014 * 0015 * @category Zend 0016 * @package Zend_Search_Lucene 0017 * @subpackage Document 0018 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0019 * @license http://framework.zend.com/license/new-bsd New BSD License 0020 * @version $Id$ 0021 */ 0022 0023 0024 /** Zend_Search_Lucene_Document */ 0025 // require_once 'Zend/Search/Lucene/Document.php'; 0026 0027 /** Zend_Xml_Security */ 0028 // require_once 'Zend/Xml/Security.php'; 0029 0030 /** 0031 * OpenXML document. 0032 * 0033 * @category Zend 0034 * @package Zend_Search_Lucene 0035 * @subpackage Document 0036 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 0037 * @license http://framework.zend.com/license/new-bsd New BSD License 0038 */ 0039 abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document 0040 { 0041 /** 0042 * Xml Schema - Relationships 0043 * 0044 * @var string 0045 */ 0046 const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships'; 0047 0048 /** 0049 * Xml Schema - Office document 0050 * 0051 * @var string 0052 */ 0053 const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'; 0054 0055 /** 0056 * Xml Schema - Core properties 0057 * 0058 * @var string 0059 */ 0060 const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'; 0061 0062 /** 0063 * Xml Schema - Dublin Core 0064 * 0065 * @var string 0066 */ 0067 const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/'; 0068 0069 /** 0070 * Xml Schema - Dublin Core Terms 0071 * 0072 * @var string 0073 */ 0074 const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/'; 0075 0076 /** 0077 * Extract metadata from document 0078 * 0079 * @param ZipArchive $package ZipArchive OpenXML package 0080 * @return array Key-value pairs containing document meta data 0081 */ 0082 protected function extractMetaData(ZipArchive $package) 0083 { 0084 // Data holders 0085 $coreProperties = array(); 0086 0087 // Read relations and search for core properties 0088 $relations = Zend_Xml_Security::scan($package->getFromName("_rels/.rels")); 0089 foreach ($relations->Relationship as $rel) { 0090 if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) { 0091 // Found core properties! Read in contents... 0092 $contents = Zend_Xml_Security::scan( 0093 $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"])) 0094 ); 0095 0096 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) { 0097 $coreProperties[$child->getName()] = (string)$child; 0098 } 0099 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) { 0100 $coreProperties[$child->getName()] = (string)$child; 0101 } 0102 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) { 0103 $coreProperties[$child->getName()] = (string)$child; 0104 } 0105 } 0106 } 0107 0108 return $coreProperties; 0109 } 0110 0111 /** 0112 * Determine absolute zip path 0113 * 0114 * @param string $path 0115 * @return string 0116 */ 0117 protected function absoluteZipPath($path) { 0118 $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path); 0119 $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen'); 0120 $absolutes = array(); 0121 foreach ($parts as $part) { 0122 if ('.' == $part) continue; 0123 if ('..' == $part) { 0124 array_pop($absolutes); 0125 } else { 0126 $absolutes[] = $part; 0127 } 0128 } 0129 return implode('/', $absolutes); 0130 } 0131 }