File indexing completed on 2024-05-12 06:03:06

0001 <?php
0002 
0003 /**
0004  * Copyright (c) 2007-2011, Servigistics, Inc.
0005  * All rights reserved.
0006  *
0007  * Redistribution and use in source and binary forms, with or without
0008  * modification, are permitted provided that the following conditions are met:
0009  *
0010  *  - Redistributions of source code must retain the above copyright notice,
0011  *    this list of conditions and the following disclaimer.
0012  *  - Redistributions in binary form must reproduce the above copyright
0013  *    notice, this list of conditions and the following disclaimer in the
0014  *    documentation and/or other materials provided with the distribution.
0015  *  - Neither the name of Servigistics, Inc. nor the names of
0016  *    its contributors may be used to endorse or promote products derived from
0017  *    this software without specific prior written permission.
0018  *
0019  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0020  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0021  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0022  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0023  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0024  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0025  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0026  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0027  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0028  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
0029  * POSSIBILITY OF SUCH DAMAGE.
0030  *
0031  * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com)
0032  * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD
0033  * @version $Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $
0034  *
0035  * @package Apache
0036  * @subpackage Solr
0037  * @author Donovan Jimenez <djimenez@conduit-it.com>
0038  */
0039 require_once 'Zend/Service/Solr/Exception.php';
0040 require_once 'Zend/Service/Solr/HttpTransportException.php';
0041 require_once 'Zend/Service/Solr/InvalidArgumentException.php';
0042 require_once 'Zend/Service/Solr/Document.php';
0043 require_once 'Zend/Service/Solr/Response.php';
0044 require_once 'Zend/Service/Solr/HttpTransport/Interface.php';
0045 
0046 /**
0047  * Starting point for the Solr API. Represents a Solr server resource and has
0048  * methods for pinging, adding, deleting, committing, optimizing and searching.
0049  *
0050  * Example Usage:
0051  * <code>
0052  * ...
0053  * $solr = new Zend_Service_Solr(); //or explicitly new Zend_Service_Solr_Service('localhost', 8180, '/solr')
0054  *
0055  * if ($solr->ping())
0056  * {
0057  *    $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :)
0058  *
0059  *    $document = new Zend_Service_Solr_Document();
0060  *    $document->id = uniqid(); //or something else suitably unique
0061  *
0062  *    $document->title = 'Some Title';
0063  *    $document->content = 'Some content for this wonderful document. Blah blah blah.';
0064  *
0065  *    $solr->addDocument($document);  //if you're going to be adding documents in bulk using addDocuments
0066  *                    //with an array of documents is faster
0067  *
0068  *    $solr->commit(); //commit to see the deletes and the document
0069  *    $solr->optimize(); //merges multiple segments into one
0070  *
0071  *    //and the one we all care about, search!
0072  *    //any other common or custom parameters to the request handler can go in the
0073  *    //optional 4th array argument.
0074  *    $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc'));
0075  * }
0076  * ...
0077  * </code>
0078  *
0079  * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance
0080  * improvements when dealing with multiple requests by using HTTP's keep alive functionality
0081  */
0082 class Zend_Service_Solr {
0083     /**
0084      * SVN Revision meta data for this class
0085      */
0086     const SVN_REVISION = '$Revision: 59 $';
0087 
0088     /**
0089      * SVN ID meta data for this class
0090      */
0091     const SVN_ID = '$Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $';
0092 
0093     /**
0094      * Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
0095      */
0096     const SOLR_WRITER = 'json';
0097 
0098     /**
0099      * NamedList Treatment constants
0100      */
0101     const NAMED_LIST_FLAT = 'flat';
0102     const NAMED_LIST_MAP = 'map';
0103 
0104     /**
0105      * Search HTTP Methods
0106      */
0107     const METHOD_GET = 'GET';
0108     const METHOD_POST = 'POST';
0109 
0110     /**
0111      * Servlet mappings
0112      */
0113     const PING_SERVLET = 'admin/ping';
0114     const UPDATE_SERVLET = 'update';
0115     const SEARCH_SERVLET = 'select';
0116     const SPELL_SERVLET = 'spell';
0117     const THREADS_SERVLET = 'admin/threads';
0118     const EXTRACT_SERVLET = 'update/extract';
0119 
0120     /**
0121      * Server identification strings
0122      *
0123      * @var string
0124      */
0125     protected $_host, $_port, $_path;
0126 
0127     /**
0128      * Whether {@link Zend_Service_Solr_Response} objects should create {@link Zend_Service_Solr_Document}s in
0129      * the returned parsed data
0130      *
0131      * @var boolean
0132      */
0133     protected $_createDocuments = true;
0134 
0135     /**
0136      * Whether {@link Zend_Service_Solr_Response} objects should have multivalue fields with only a single value
0137      * collapsed to appear as a single value would.
0138      *
0139      * @var boolean
0140      */
0141     protected $_collapseSingleValueArrays = true;
0142 
0143     /**
0144      * How NamedLists should be formatted in the output.  This specifically effects facet counts. Valid values
0145      * are {@link Zend_Service_Solr_Service::NAMED_LIST_MAP} (default) or {@link Zend_Service_Solr_Service::NAMED_LIST_FLAT}.
0146      *
0147      * @var string
0148      */
0149     protected $_namedListTreatment = self::NAMED_LIST_MAP;
0150 
0151     /**
0152      * Query delimiters. Someone might want to be able to change
0153      * these (to use &amp; instead of & for example), so I've provided them.
0154      *
0155      * @var string
0156      */
0157     protected $_queryDelimiter = '?', $_queryStringDelimiter = '&', $_queryBracketsEscaped = true;
0158 
0159     /**
0160      * Constructed servlet full path URLs
0161      *
0162      * @var string
0163      */
0164     protected $_pingUrl, $_updateUrl, $_spellUrl, $_searchUrl, $_threadsUrl;
0165 
0166     /**
0167      * Keep track of whether our URLs have been constructed
0168      *
0169      * @var boolean
0170      */
0171     protected $_urlsInited = false;
0172 
0173     /**
0174      * HTTP Transport implementation (pluggable)
0175      *
0176      * @var Zend_Service_Solr_HttpTransport_Interface
0177      */
0178     protected $_httpTransport = false;
0179 
0180     /**
0181      * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
0182      *
0183      * NOTE: inside a phrase fewer characters need escaped, use {@link Zend_Service_Solr_Service::escapePhrase()} instead
0184      *
0185      * @param string $value
0186      * @return string
0187      */
0188     static public function escape($value) {
0189         //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
0190         $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
0191         $replace = '\\\$1';
0192 
0193         return preg_replace($pattern, $replace, $value);
0194     }
0195 
0196     /**
0197      * Escape a value meant to be contained in a phrase for special query characters
0198      *
0199      * @param string $value
0200      * @return string
0201      */
0202     static public function escapePhrase($value) {
0203         $pattern = '/("|\\\)/';
0204         $replace = '\\\$1';
0205 
0206         return preg_replace($pattern, $replace, $value);
0207     }
0208 
0209     /**
0210      * Convenience function for creating phrase syntax from a value
0211      *
0212      * @param string $value
0213      * @return string
0214      */
0215     static public function phrase($value) {
0216         return '"' . self::escapePhrase($value) . '"';
0217     }
0218 
0219     /**
0220      * Constructor. All parameters are optional and will take on default values
0221      * if not specified.
0222      *
0223      * @param string $host
0224      * @param string $port
0225      * @param string $path
0226      * @param Zend_Service_Solr_HttpTransport_Interface $httpTransport
0227      */
0228     public function __construct($host = 'localhost', $port = 8983, $path = '/solr/', $httpTransport = false) {
0229         $this->setHost($host);
0230         $this->setPort($port);
0231         $this->setPath($path);
0232 
0233         $this->_initUrls();
0234 
0235         if ($httpTransport) {
0236             $this->setHttpTransport($httpTransport);
0237         }
0238 
0239         // check that our php version is >= 5.1.3 so we can correct for http_build_query behavior later
0240         $this->_queryBracketsEscaped = version_compare(phpversion(), '5.1.3', '>=');
0241     }
0242 
0243     /**
0244      * Return a valid http URL given this server's host, port and path and a provided servlet name
0245      *
0246      * @param string $servlet
0247      * @return string
0248      */
0249     protected function _constructUrl($servlet, $params = array()) {
0250         if (count($params)) {
0251             //escape all parameters appropriately for inclusion in the query string
0252             $escapedParams = array();
0253 
0254             foreach ($params as $key => $value) {
0255                 $escapedParams[] = urlencode($key) . '=' . urlencode($value);
0256             }
0257 
0258             $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams);
0259         } else {
0260             $queryString = '';
0261         }
0262 
0263         return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString;
0264     }
0265 
0266     /**
0267      * Construct the Full URLs for the three servlets we reference
0268      */
0269     protected function _initUrls() {
0270         //Initialize our full servlet URLs now that we have server information
0271         $this->_extractUrl = $this->_constructUrl(self::EXTRACT_SERVLET);
0272         $this->_pingUrl = $this->_constructUrl(self::PING_SERVLET);
0273         $this->_spellUrl = $this->_constructUrl(self::SPELL_SERVLET);
0274         $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
0275         $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER));
0276         $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER));
0277 
0278         $this->_urlsInited = true;
0279     }
0280 
0281     protected function _generateQueryString($params) {
0282         // use http_build_query to encode our arguments because its faster
0283         // than urlencoding all the parts ourselves in a loop
0284         //
0285     // because http_build_query treats arrays differently than we want to, correct the query
0286         // string by changing foo[#]=bar (# being an actual number) parameter strings to just
0287         // multiple foo=bar strings. This regex should always work since '=' will be urlencoded
0288         // anywhere else the regex isn't expecting it
0289         //
0290     // NOTE: before php 5.1.3 brackets were not url encoded by http_build query - we've checked
0291         // the php version in the constructor and put the results in the instance variable. Also, before
0292         // 5.1.2 the arg_separator parameter was not available, so don't use it
0293         if ($this->_queryBracketsEscaped) {
0294             $queryString = http_build_query($params, null, $this->_queryStringDelimiter);
0295             return preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString);
0296         } else {
0297             $queryString = http_build_query($params);
0298             return preg_replace('/\\[(?:[0-9]|[1-9][0-9]+)\\]=/', '=', $queryString);
0299         }
0300     }
0301 
0302     /**
0303      * Central method for making a get operation against this Solr Server
0304      *
0305      * @param string $url
0306      * @param float $timeout Read timeout in seconds
0307      * @return Zend_Service_Solr_Response
0308      *
0309      * @throws Zend_Service_Solr_HttpTransportException If a non 200 response status is returned
0310      */
0311     protected function _sendRawGet($url, $timeout = FALSE) {
0312         $httpTransport = $this->getHttpTransport();
0313 
0314         $httpResponse = $httpTransport->performGetRequest($url, $timeout);
0315         $solrResponse = new Zend_Service_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
0316 
0317         if ($solrResponse->getHttpStatus() != 200) {
0318             throw new Zend_Service_Solr_HttpTransportException($solrResponse);
0319         }
0320 
0321         return $solrResponse;
0322     }
0323 
0324     /**
0325      * Central method for making a post operation against this Solr Server
0326      *
0327      * @param string $url
0328      * @param string $rawPost
0329      * @param float $timeout Read timeout in seconds
0330      * @param string $contentType
0331      * @return Zend_Service_Solr_Response
0332      *
0333      * @throws Zend_Service_Solr_HttpTransportException If a non 200 response status is returned
0334      */
0335     protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') {
0336         $httpTransport = $this->getHttpTransport();
0337 
0338         $httpResponse = $httpTransport->performPostRequest($url, $rawPost, $contentType, $timeout);
0339         $solrResponse = new Zend_Service_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
0340 
0341         if ($solrResponse->getHttpStatus() != 200) {
0342             throw new Zend_Service_Solr_HttpTransportException($solrResponse);
0343         }
0344 
0345         return $solrResponse;
0346     }
0347 
0348     /**
0349      * Returns the set host
0350      *
0351      * @return string
0352      */
0353     public function getHost() {
0354         return $this->_host;
0355     }
0356 
0357     /**
0358      * Set the host used. If empty will fallback to constants
0359      *
0360      * @param string $host
0361      *
0362      * @throws Zend_Service_Solr_InvalidArgumentException If the host parameter is empty
0363      */
0364     public function setHost($host) {
0365         //Use the provided host or use the default
0366         if (empty($host)) {
0367             throw new Zend_Service_Solr_InvalidArgumentException('Host parameter is empty');
0368         } else {
0369             $this->_host = $host;
0370         }
0371 
0372         if ($this->_urlsInited) {
0373             $this->_initUrls();
0374         }
0375     }
0376 
0377     /**
0378      * Get the set port
0379      *
0380      * @return integer
0381      */
0382     public function getPort() {
0383         return $this->_port;
0384     }
0385 
0386     /**
0387      * Set the port used. If empty will fallback to constants
0388      *
0389      * @param integer $port
0390      *
0391      * @throws Zend_Service_Solr_InvalidArgumentException If the port parameter is empty
0392      */
0393     public function setPort($port) {
0394         //Use the provided port or use the default
0395         $port = (int) $port;
0396 
0397         if ($port <= 0) {
0398             throw new Zend_Service_Solr_InvalidArgumentException('Port is not a valid port number');
0399         } else {
0400             $this->_port = $port;
0401         }
0402 
0403         if ($this->_urlsInited) {
0404             $this->_initUrls();
0405         }
0406     }
0407 
0408     /**
0409      * Get the set path.
0410      *
0411      * @return string
0412      */
0413     public function getPath() {
0414         return $this->_path;
0415     }
0416 
0417     /**
0418      * Set the path used. If empty will fallback to constants
0419      *
0420      * @param string $path
0421      */
0422     public function setPath($path) {
0423         $path = trim($path, '/');
0424 
0425         $this->_path = '/' . $path . '/';
0426 
0427         if ($this->_urlsInited) {
0428             $this->_initUrls();
0429         }
0430     }
0431 
0432     /**
0433      * Get the current configured HTTP Transport
0434      *
0435      * @return HttpTransportInterface
0436      */
0437     public function getHttpTransport() {
0438         // lazy load a default if one has not be set
0439         if ($this->_httpTransport === false) {
0440             require_once 'Zend/Service/Solr/HttpTransport/FileGetContents.php';
0441             $this->_httpTransport = new Zend_Service_Solr_HttpTransport_FileGetContents();
0442         }
0443 
0444         return $this->_httpTransport;
0445     }
0446 
0447     /**
0448      * Set the HTTP Transport implemenation that will be used for all HTTP requests
0449      *
0450      * @param Zend_Service_Solr_HttpTransport_Interface
0451      */
0452     public function setHttpTransport(Zend_Service_Solr_HttpTransport_Interface $httpTransport) {
0453         $this->_httpTransport = $httpTransport;
0454     }
0455 
0456     /**
0457      * Set the create documents flag. This determines whether {@link Zend_Service_Solr_Response} objects will
0458      * parse the response and create {@link Zend_Service_Solr_Document} instances in place.
0459      *
0460      * @param boolean $createDocuments
0461      */
0462     public function setCreateDocuments($createDocuments) {
0463         $this->_createDocuments = (bool) $createDocuments;
0464     }
0465 
0466     /**
0467      * Get the current state of teh create documents flag.
0468      *
0469      * @return boolean
0470      */
0471     public function getCreateDocuments() {
0472         return $this->_createDocuments;
0473     }
0474 
0475     /**
0476      * Set the collapse single value arrays flag.
0477      *
0478      * @param boolean $collapseSingleValueArrays
0479      */
0480     public function setCollapseSingleValueArrays($collapseSingleValueArrays) {
0481         $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays;
0482     }
0483 
0484     /**
0485      * Get the current state of the collapse single value arrays flag.
0486      *
0487      * @return boolean
0488      */
0489     public function getCollapseSingleValueArrays() {
0490         return $this->_collapseSingleValueArrays;
0491     }
0492 
0493     /**
0494      * Get the current default timeout setting (initially the default_socket_timeout ini setting)
0495      * in seconds
0496      *
0497      * @return float
0498      *
0499      * @deprecated Use the getDefaultTimeout method on the HTTP transport implementation
0500      */
0501     public function getDefaultTimeout() {
0502         return $this->getHttpTransport()->getDefaultTimeout();
0503     }
0504 
0505     /**
0506      * Set the default timeout for all calls that aren't passed a specific timeout
0507      *
0508      * @param float $timeout Timeout value in seconds
0509      *
0510      * @deprecated Use the setDefaultTimeout method on the HTTP transport implementation
0511      */
0512     public function setDefaultTimeout($timeout) {
0513         $this->getHttpTransport()->setDefaultTimeout($timeout);
0514     }
0515 
0516     /**
0517      * Set how NamedLists should be formatted in the response data. This mainly effects
0518      * the facet counts format.
0519      *
0520      * @param string $namedListTreatment
0521      * @throws Zend_Service_Solr_InvalidArgumentException If invalid option is set
0522      */
0523     public function setNamedListTreatment($namedListTreatment) {
0524         switch ((string) $namedListTreatment) {
0525             case Zend_Service_Solr::NAMED_LIST_FLAT:
0526                 $this->_namedListTreatment = Zend_Service_Solr::NAMED_LIST_FLAT;
0527                 break;
0528 
0529             case Zend_Service_Solr::NAMED_LIST_MAP:
0530                 $this->_namedListTreatment = Zend_Service_Solr::NAMED_LIST_MAP;
0531                 break;
0532 
0533             default:
0534                 throw new Zend_Service_Solr_InvalidArgumentException('Not a valid named list treatement option');
0535         }
0536     }
0537 
0538     /**
0539      * Get the current setting for named list treatment.
0540      *
0541      * @return string
0542      */
0543     public function getNamedListTreatment() {
0544         return $this->_namedListTreatment;
0545     }
0546 
0547     /**
0548      * Set the string used to separate the path form the query string.
0549      * Defaulted to '?'
0550      *
0551      * @param string $queryDelimiter
0552      */
0553     public function setQueryDelimiter($queryDelimiter) {
0554         $this->_queryDelimiter = $queryDelimiter;
0555     }
0556 
0557     /**
0558      * Set the string used to separate the parameters in thequery string
0559      * Defaulted to '&'
0560      *
0561      * @param string $queryStringDelimiter
0562      */
0563     public function setQueryStringDelimiter($queryStringDelimiter) {
0564         $this->_queryStringDelimiter = $queryStringDelimiter;
0565     }
0566 
0567     /**
0568      * Call the /admin/ping servlet, can be used to quickly tell if a connection to the
0569      * server is able to be made.
0570      *
0571      * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2)
0572      * @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs
0573      */
0574     public function ping($timeout = 2) {
0575         $start = microtime(true);
0576 
0577         $httpTransport = $this->getHttpTransport();
0578 
0579         $httpResponse = $httpTransport->performHeadRequest($this->_pingUrl, $timeout);
0580         $solrResponse = new Zend_Service_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
0581 
0582         if ($solrResponse->getHttpStatus() == 200) {
0583             return microtime(true) - $start;
0584         } else {
0585             return false;
0586         }
0587     }
0588 
0589     /**
0590      * Call the /admin/threads servlet and retrieve information about all threads in the
0591      * Solr servlet's thread group. Useful for diagnostics.
0592      *
0593      * @return Zend_Service_Solr_Response
0594      *
0595      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0596      */
0597     public function threads() {
0598         return $this->_sendRawGet($this->_threadsUrl);
0599     }
0600 
0601     /**
0602      * Raw Add Method. Takes a raw post body and sends it to the update service.  Post body
0603      * should be a complete and well formed "add" xml document.
0604      *
0605      * @param string $rawPost
0606      * @return Zend_Service_Solr_Response
0607      *
0608      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0609      */
0610     public function add($rawPost) {
0611         return $this->_sendRawPost($this->_updateUrl, $rawPost);
0612     }
0613 
0614     /**
0615      * Add a Solr Document to the index
0616      *
0617      * @param Zend_Service_Solr_Document $document
0618      * @param boolean $allowDups
0619      * @param boolean $overwritePending
0620      * @param boolean $overwriteCommitted
0621      * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details.  If left empty this property will not be set in the request.
0622      * @return Zend_Service_Solr_Response
0623      *
0624      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0625      */
0626     public function addDocument(Zend_Service_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) {
0627         $dupValue = $allowDups ? 'true' : 'false';
0628         $pendingValue = $overwritePending ? 'true' : 'false';
0629         $committedValue = $overwriteCommitted ? 'true' : 'false';
0630 
0631         $commitWithin = (int) $commitWithin;
0632         $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : '';
0633 
0634         $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>";
0635         $rawPost .= $this->_documentToXmlFragment($document);
0636         $rawPost .= '</add>';
0637 
0638         return $this->add($rawPost);
0639     }
0640 
0641     /**
0642      * Add an array of Solr Documents to the index all at once
0643      *
0644      * @param array $documents Should be an array of Zend_Service_Solr_Document instances
0645      * @param boolean $allowDups
0646      * @param boolean $overwritePending
0647      * @param boolean $overwriteCommitted
0648      * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details.  If left empty this property will not be set in the request.
0649      * @return Zend_Service_Solr_Response
0650      *
0651      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0652      */
0653     public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) {
0654         $dupValue = $allowDups ? 'true' : 'false';
0655         $pendingValue = $overwritePending ? 'true' : 'false';
0656         $committedValue = $overwriteCommitted ? 'true' : 'false';
0657 
0658         $commitWithin = (int) $commitWithin;
0659         $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : '';
0660 
0661         $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>";
0662 
0663         foreach ($documents as $document) {
0664             if ($document instanceof Zend_Service_Solr_Document) {
0665                 $rawPost .= $this->_documentToXmlFragment($document);
0666             }
0667         }
0668 
0669         $rawPost .= '</add>';
0670 
0671         return $this->add($rawPost);
0672     }
0673 
0674     /**
0675      * Create an XML fragment from a {@link Zend_Service_Solr_Document} instance appropriate for use inside a Solr add call
0676      *
0677      * @return string
0678      */
0679     protected function _documentToXmlFragment(Zend_Service_Solr_Document $document) {
0680         $xml = '<doc';
0681 
0682         if ($document->getBoost() !== false) {
0683             $xml .= ' boost="' . $document->getBoost() . '"';
0684         }
0685 
0686         $xml .= '>';
0687 
0688         foreach ($document as $key => $value) {
0689             $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
0690             $fieldBoost = $document->getFieldBoost($key);
0691 
0692             if (is_array($value)) {
0693                 foreach ($value as $multivalue) {
0694                     $xml .= '<field name="' . $key . '"';
0695 
0696                     if ($fieldBoost !== false) {
0697                         $xml .= ' boost="' . $fieldBoost . '"';
0698 
0699                         // only set the boost for the first field in the set
0700                         $fieldBoost = false;
0701                     }
0702 
0703                     $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8');
0704 
0705                     $xml .= '>' . $multivalue . '</field>';
0706                 }
0707             } else {
0708                 $xml .= '<field name="' . $key . '"';
0709 
0710                 if ($fieldBoost !== false) {
0711                     $xml .= ' boost="' . $fieldBoost . '"';
0712                 }
0713 
0714                 $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8');
0715 
0716                 $xml .= '>' . $value . '</field>';
0717             }
0718         }
0719 
0720         $xml .= '</doc>';
0721 
0722         // replace any control characters to avoid Solr XML parser exception
0723         return $this->_stripCtrlChars($xml);
0724     }
0725 
0726     /**
0727      * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space.
0728      *
0729      * @param string $string
0730      * @return string
0731      */
0732     protected function _stripCtrlChars($string) {
0733         // See:  http://w3.org/International/questions/qa-forms-utf-8.html
0734         // Printable utf-8 does not include any of these chars below x7F
0735         return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string);
0736     }
0737 
0738     /**
0739      * Send a commit command.  Will be synchronous unless both wait parameters are set to false.
0740      *
0741      * @param boolean $expungeDeletes Defaults to false, merge segments with deletes away
0742      * @param boolean $waitFlush Defaults to true,  block until index changes are flushed to disk
0743      * @param boolean $waitSearcher Defaults to true, block until a new searcher is opened and registered as the main query searcher, making the changes visible
0744      * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour
0745      * @return Zend_Service_Solr_Response
0746      *
0747      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0748      */
0749     public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600) {
0750         $expungeValue = $expungeDeletes ? 'true' : 'false';
0751         $flushValue = $waitFlush ? 'true' : 'false';
0752         $searcherValue = $waitSearcher ? 'true' : 'false';
0753 
0754         $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
0755 
0756         return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
0757     }
0758 
0759     /**
0760      * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be
0761      * a complete and well formed "delete" xml document
0762      *
0763      * @param string $rawPost Expected to be utf-8 encoded xml document
0764      * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
0765      * @return Zend_Service_Solr_Response
0766      *
0767      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0768      */
0769     public function delete($rawPost, $timeout = 3600) {
0770         return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
0771     }
0772 
0773     /**
0774      * Create a delete document based on document ID
0775      *
0776      * @param string $id Expected to be utf-8 encoded
0777      * @param boolean $fromPending
0778      * @param boolean $fromCommitted
0779      * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
0780      * @return Zend_Service_Solr_Response
0781      *
0782      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0783      */
0784     public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600) {
0785         $pendingValue = $fromPending ? 'true' : 'false';
0786         $committedValue = $fromCommitted ? 'true' : 'false';
0787 
0788         //escape special xml characters
0789         $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
0790 
0791         $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>';
0792 
0793         return $this->delete($rawPost, $timeout);
0794     }
0795 
0796     /**
0797      * Create and post a delete document based on multiple document IDs.
0798      *
0799      * @param array $ids Expected to be utf-8 encoded strings
0800      * @param boolean $fromPending
0801      * @param boolean $fromCommitted
0802      * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
0803      * @return Zend_Service_Solr_Response
0804      *
0805      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0806      */
0807     public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600) {
0808         $pendingValue = $fromPending ? 'true' : 'false';
0809         $committedValue = $fromCommitted ? 'true' : 'false';
0810 
0811         $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '">';
0812 
0813         foreach ($ids as $id) {
0814             //escape special xml characters
0815             $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
0816 
0817             $rawPost .= '<id>' . $id . '</id>';
0818         }
0819 
0820         $rawPost .= '</delete>';
0821 
0822         return $this->delete($rawPost, $timeout);
0823     }
0824 
0825     /**
0826      * Create a delete document based on a query and submit it
0827      *
0828      * @param string $rawQuery Expected to be utf-8 encoded
0829      * @param boolean $fromPending
0830      * @param boolean $fromCommitted
0831      * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
0832      * @return Zend_Service_Solr_Response
0833      *
0834      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
0835      */
0836     public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600) {
0837         $pendingValue = $fromPending ? 'true' : 'false';
0838         $committedValue = $fromCommitted ? 'true' : 'false';
0839 
0840         // escape special xml characters
0841         $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8');
0842 
0843         $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>';
0844 
0845         return $this->delete($rawPost, $timeout);
0846     }
0847 
0848     /**
0849      * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
0850      * to use Solr Cell and what parameters are available.
0851      *
0852      * NOTE: when passing an Zend_Service_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
0853      * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
0854      * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
0855      * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
0856      *
0857      * @param string $file Path to file to extract data from
0858      * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
0859      * @param Zend_Service_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
0860      * @param string $mimetype optional mimetype specification (for the file being extracted)
0861      *
0862      * @return Zend_Service_Solr_Response
0863      *
0864      * @throws Zend_Service_Solr_InvalidArgumentException if $file, $params, or $document are invalid.
0865      */
0866     public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream') {
0867         // check if $params is an array (allow null for default empty array)
0868         if (!is_null($params)) {
0869             if (!is_array($params)) {
0870                 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null");
0871             }
0872         } else {
0873             $params = array();
0874         }
0875 
0876         // if $file is an http request, defer to extractFromUrl instead
0877         if (substr($file, 0, 7) == 'http://' || substr($file, 0, 8) == 'https://') {
0878             return $this->extractFromUrl($file, $params, $document, $mimetype);
0879         }
0880 
0881         // read the contents of the file
0882         $contents = @file_get_contents($file);
0883 
0884         if ($contents !== false) {
0885             // add the resource.name parameter if not specified
0886             if (!isset($params['resource.name'])) {
0887                 $params['resource.name'] = basename($file);
0888             }
0889 
0890             // delegate the rest to extractFromString
0891             return $this->extractFromString($contents, $params, $document, $mimetype);
0892         } else {
0893             throw new Zend_Service_Solr_InvalidArgumentException("File '{$file}' is empty or could not be read");
0894         }
0895     }
0896 
0897     /**
0898      * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
0899      * to use Solr Cell and what parameters are available.
0900      *
0901      * NOTE: when passing an Zend_Service_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
0902      * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
0903      * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
0904      * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
0905      *
0906      * @param string $data Data that will be passed to Solr Cell
0907      * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
0908      * @param Zend_Service_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
0909      * @param string $mimetype optional mimetype specification (for the file being extracted)
0910      *
0911      * @return Zend_Service_Solr_Response
0912      *
0913      * @throws Zend_Service_Solr_InvalidArgumentException if $file, $params, or $document are invalid.
0914      *
0915      * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted.
0916      */
0917     public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream') {
0918         // check if $params is an array (allow null for default empty array)
0919         if (!is_null($params)) {
0920             if (!is_array($params)) {
0921                 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null");
0922             }
0923         } else {
0924             $params = array();
0925         }
0926 
0927         // make sure we receive our response in JSON and have proper name list treatment
0928         $params['wt'] = self::SOLR_WRITER;
0929         $params['json.nl'] = $this->_namedListTreatment;
0930 
0931         // check if $document is an Zend_Service_Solr_Document instance
0932         if (!is_null($document) && $document instanceof Zend_Service_Solr_Document) {
0933             // iterate document, adding literal.* and boost.* fields to $params as appropriate
0934             foreach ($document as $field => $fieldValue) {
0935                 // check if we need to add a boost.* parameters
0936                 $fieldBoost = $document->getFieldBoost($field);
0937 
0938                 if ($fieldBoost !== false) {
0939                     $params["boost.{$field}"] = $fieldBoost;
0940                 }
0941 
0942                 // add the literal.* parameter
0943                 $params["literal.{$field}"] = $fieldValue;
0944             }
0945         }
0946 
0947         // params will be sent to SOLR in the QUERY STRING
0948         $queryString = $this->_generateQueryString($params);
0949 
0950         // the file contents will be sent to SOLR as the POST BODY - we use application/octect-stream as default mimetype
0951         return $this->_sendRawPost($this->_extractUrl . $this->_queryDelimiter . $queryString, $data, false, $mimetype);
0952     }
0953 
0954     /**
0955      * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
0956      * to use Solr Cell and what parameters are available.
0957      *
0958      * NOTE: when passing an Zend_Service_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
0959      * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
0960      * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
0961      * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
0962      *
0963      * @param string $url URL
0964      * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
0965      * @param Zend_Service_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
0966      * @param string $mimetype optional mimetype specification (for the file being extracted)
0967      *
0968      * @return Zend_Service_Solr_Response
0969      *
0970      * @throws Zend_Service_Solr_InvalidArgumentException if $url, $params, or $document are invalid.
0971      */
0972     public function extractFromUrl($url, $params = array(), $document = null, $mimetype = 'application/octet-stream') {
0973         // check if $params is an array (allow null for default empty array)
0974         if (!is_null($params)) {
0975             if (!is_array($params)) {
0976                 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null");
0977             }
0978         } else {
0979             $params = array();
0980         }
0981 
0982         $httpTransport = $this->getHttpTransport();
0983 
0984         // read the contents of the URL using our configured Http Transport and default timeout
0985         $httpResponse = $httpTransport->performGetRequest($url);
0986 
0987         // check that its a 200 response
0988         if ($httpResponse->getStatusCode() == 200) {
0989             // add the resource.name parameter if not specified
0990             if (!isset($params['resource.name'])) {
0991                 $params['resource.name'] = $url;
0992             }
0993 
0994             // delegate the rest to extractFromString
0995             return $this->extractFromString($httpResponse->getBody(), $params, $document, $mimetype);
0996         } else {
0997             throw new Zend_Service_Solr_InvalidArgumentException("URL '{$url}' returned non 200 response code");
0998         }
0999     }
1000 
1001     /**
1002      * Send an optimize command.  Will be synchronous unless both wait parameters are set
1003      * to false.
1004      *
1005      * @param boolean $waitFlush
1006      * @param boolean $waitSearcher
1007      * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
1008      * @return Zend_Service_Solr_Response
1009      *
1010      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
1011      */
1012     public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) {
1013         $flushValue = $waitFlush ? 'true' : 'false';
1014         $searcherValue = $waitSearcher ? 'true' : 'false';
1015 
1016         $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
1017 
1018         return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
1019     }
1020 
1021     /**
1022      * Simple Search interface
1023      *
1024      * @param string $query The raw query string
1025      * @param int $offset The starting offset for result documents
1026      * @param int $limit The maximum number of result documents to return
1027      * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
1028      * @param string $method The HTTP method (Zend_Service_Solr_Service::METHOD_GET or Zend_Service_Solr_Service::METHOD::POST)
1029      * @return Zend_Service_Solr_Response
1030      *
1031      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
1032      * @throws Zend_Service_Solr_InvalidArgumentException If an invalid HTTP method is used
1033      */
1034     public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET) {
1035         // ensure params is an array
1036         if (!is_null($params)) {
1037             if (!is_array($params)) {
1038                 // params was specified but was not an array - invalid
1039                 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null");
1040             }
1041         } else {
1042             $params = array();
1043         }
1044 
1045         // construct our full parameters
1046         // common parameters in this interface
1047         $params['wt'] = self::SOLR_WRITER;
1048         $params['json.nl'] = $this->_namedListTreatment;
1049 
1050         $params['q'] = $query;
1051         $params['start'] = $offset;
1052         $params['rows'] = $limit;
1053 
1054         $queryString = $this->_generateQueryString($params);
1055 
1056         if ($method == self::METHOD_GET) {
1057             return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString);
1058         } else if ($method == self::METHOD_POST) {
1059             return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded; charset=UTF-8');
1060         } else {
1061             throw new Zend_Service_Solr_InvalidArgumentException("Unsupported method '$method', please use the Zend_Service_Solr_Service::METHOD_* constants");
1062         }
1063     }
1064 
1065     /**
1066      * Simple Spell interface
1067      *
1068      * @param string $query The raw query string
1069      * @param int $offset The starting offset for result documents
1070      * @param int $limit The maximum number of result documents to return
1071      * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
1072      * @param string $method The HTTP method (Zend_Service_Solr_Service::METHOD_GET or Zend_Service_Solr_Service::METHOD::POST)
1073      * @return Zend_Service_Solr_Response
1074      *
1075      * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call
1076      * @throws Zend_Service_Solr_InvalidArgumentException If an invalid HTTP method is used
1077      */
1078     public function spell($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET) {
1079         // ensure params is an array
1080         if (!is_null($params)) {
1081             if (!is_array($params)) {
1082                 // params was specified but was not an array - invalid
1083                 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null");
1084             }
1085         } else {
1086             $params = array();
1087         }
1088 
1089         // construct our full parameters
1090         // common parameters in this interface
1091         $params['wt'] = self::SOLR_WRITER;
1092         $params['json.nl'] = $this->_namedListTreatment;
1093 
1094         $params['q'] = $query;
1095 
1096         $params['spellcheck.q'] = $query;
1097         $params['spellcheck'] = 'true';
1098         $params['spellcheck.extendedResults'] = 'true';
1099 
1100         $queryString = $this->_generateQueryString($params);
1101         $results = file_get_contents($this->_spellUrl . '?' . $queryString);
1102         return $results;
1103     }
1104 
1105 }