File indexing completed on 2025-01-19 05:21:25

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Search_Lucene
0017  * @subpackage Document
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 
0024 /** Zend_Search_Lucene_Document */
0025 // require_once 'Zend/Search/Lucene/Document.php';
0026 
0027 /** Zend_Xml_Security */
0028 // require_once 'Zend/Xml/Security.php';
0029 
0030 /**
0031  * OpenXML document.
0032  *
0033  * @category   Zend
0034  * @package    Zend_Search_Lucene
0035  * @subpackage Document
0036  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0037  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0038  */
0039 abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document
0040 {
0041     /**
0042      * Xml Schema - Relationships
0043      *
0044      * @var string
0045      */
0046     const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
0047 
0048     /**
0049      * Xml Schema - Office document
0050      *
0051      * @var string
0052      */
0053     const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
0054 
0055     /**
0056      * Xml Schema - Core properties
0057      *
0058      * @var string
0059      */
0060     const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
0061 
0062     /**
0063      * Xml Schema - Dublin Core
0064      *
0065      * @var string
0066      */
0067     const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
0068 
0069     /**
0070      * Xml Schema - Dublin Core Terms
0071      *
0072      * @var string
0073      */
0074     const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
0075 
0076     /**
0077      * Extract metadata from document
0078      *
0079      * @param ZipArchive $package    ZipArchive OpenXML package
0080      * @return array    Key-value pairs containing document meta data
0081      */
0082     protected function extractMetaData(ZipArchive $package)
0083     {
0084         // Data holders
0085         $coreProperties = array();
0086 
0087         // Read relations and search for core properties
0088         $relations = Zend_Xml_Security::scan($package->getFromName("_rels/.rels"));
0089         foreach ($relations->Relationship as $rel) {
0090             if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) {
0091                 // Found core properties! Read in contents...
0092                 $contents = Zend_Xml_Security::scan(
0093                     $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
0094                 );
0095 
0096                 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) {
0097                     $coreProperties[$child->getName()] = (string)$child;
0098                 }
0099                 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) {
0100                     $coreProperties[$child->getName()] = (string)$child;
0101                 }
0102                 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) {
0103                     $coreProperties[$child->getName()] = (string)$child;
0104                 }
0105             }
0106         }
0107 
0108         return $coreProperties;
0109     }
0110 
0111     /**
0112      * Determine absolute zip path
0113      *
0114      * @param string $path
0115      * @return string
0116      */
0117     protected function absoluteZipPath($path) {
0118         $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
0119         $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
0120         $absolutes = array();
0121         foreach ($parts as $part) {
0122             if ('.' == $part) continue;
0123             if ('..' == $part) {
0124                 array_pop($absolutes);
0125             } else {
0126                 $absolutes[] = $part;
0127             }
0128         }
0129         return implode('/', $absolutes);
0130     }
0131 }