File indexing completed on 2025-01-26 05:29:57
0001 <?php 0002 0003 /** 0004 * Copyright (c) 2007-2011, Servigistics, Inc. 0005 * All rights reserved. 0006 * 0007 * Redistribution and use in source and binary forms, with or without 0008 * modification, are permitted provided that the following conditions are met: 0009 * 0010 * - Redistributions of source code must retain the above copyright notice, 0011 * this list of conditions and the following disclaimer. 0012 * - Redistributions in binary form must reproduce the above copyright 0013 * notice, this list of conditions and the following disclaimer in the 0014 * documentation and/or other materials provided with the distribution. 0015 * - Neither the name of Servigistics, Inc. nor the names of 0016 * its contributors may be used to endorse or promote products derived from 0017 * this software without specific prior written permission. 0018 * 0019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 0020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0021 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 0022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 0023 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 0024 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 0025 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 0026 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 0027 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 0028 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 0029 * POSSIBILITY OF SUCH DAMAGE. 0030 * 0031 * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) 0032 * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD 0033 * @version $Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $ 0034 * 0035 * @package Apache 0036 * @subpackage Solr 0037 * @author Donovan Jimenez <djimenez@conduit-it.com> 0038 */ 0039 require_once 'Zend/Service/Solr/Exception.php'; 0040 require_once 'Zend/Service/Solr/HttpTransportException.php'; 0041 require_once 'Zend/Service/Solr/InvalidArgumentException.php'; 0042 require_once 'Zend/Service/Solr/Document.php'; 0043 require_once 'Zend/Service/Solr/Response.php'; 0044 require_once 'Zend/Service/Solr/HttpTransport/Interface.php'; 0045 0046 /** 0047 * Starting point for the Solr API. Represents a Solr server resource and has 0048 * methods for pinging, adding, deleting, committing, optimizing and searching. 0049 * 0050 * Example Usage: 0051 * <code> 0052 * ... 0053 * $solr = new Zend_Service_Solr(); //or explicitly new Zend_Service_Solr_Service('localhost', 8180, '/solr') 0054 * 0055 * if ($solr->ping()) 0056 * { 0057 * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :) 0058 * 0059 * $document = new Zend_Service_Solr_Document(); 0060 * $document->id = uniqid(); //or something else suitably unique 0061 * 0062 * $document->title = 'Some Title'; 0063 * $document->content = 'Some content for this wonderful document. Blah blah blah.'; 0064 * 0065 * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments 0066 * //with an array of documents is faster 0067 * 0068 * $solr->commit(); //commit to see the deletes and the document 0069 * $solr->optimize(); //merges multiple segments into one 0070 * 0071 * //and the one we all care about, search! 0072 * //any other common or custom parameters to the request handler can go in the 0073 * //optional 4th array argument. 0074 * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc')); 0075 * } 0076 * ... 0077 * </code> 0078 * 0079 * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance 0080 * improvements when dealing with multiple requests by using HTTP's keep alive functionality 0081 */ 0082 class Zend_Service_Solr { 0083 /** 0084 * SVN Revision meta data for this class 0085 */ 0086 const SVN_REVISION = '$Revision: 59 $'; 0087 0088 /** 0089 * SVN ID meta data for this class 0090 */ 0091 const SVN_ID = '$Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $'; 0092 0093 /** 0094 * Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning 0095 */ 0096 const SOLR_WRITER = 'json'; 0097 0098 /** 0099 * NamedList Treatment constants 0100 */ 0101 const NAMED_LIST_FLAT = 'flat'; 0102 const NAMED_LIST_MAP = 'map'; 0103 0104 /** 0105 * Search HTTP Methods 0106 */ 0107 const METHOD_GET = 'GET'; 0108 const METHOD_POST = 'POST'; 0109 0110 /** 0111 * Servlet mappings 0112 */ 0113 const PING_SERVLET = 'admin/ping'; 0114 const UPDATE_SERVLET = 'update'; 0115 const SEARCH_SERVLET = 'select'; 0116 const SPELL_SERVLET = 'spell'; 0117 const THREADS_SERVLET = 'admin/threads'; 0118 const EXTRACT_SERVLET = 'update/extract'; 0119 0120 /** 0121 * Server identification strings 0122 * 0123 * @var string 0124 */ 0125 protected $_host, $_port, $_path; 0126 0127 /** 0128 * Whether {@link Zend_Service_Solr_Response} objects should create {@link Zend_Service_Solr_Document}s in 0129 * the returned parsed data 0130 * 0131 * @var boolean 0132 */ 0133 protected $_createDocuments = true; 0134 0135 /** 0136 * Whether {@link Zend_Service_Solr_Response} objects should have multivalue fields with only a single value 0137 * collapsed to appear as a single value would. 0138 * 0139 * @var boolean 0140 */ 0141 protected $_collapseSingleValueArrays = true; 0142 0143 /** 0144 * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values 0145 * are {@link Zend_Service_Solr_Service::NAMED_LIST_MAP} (default) or {@link Zend_Service_Solr_Service::NAMED_LIST_FLAT}. 0146 * 0147 * @var string 0148 */ 0149 protected $_namedListTreatment = self::NAMED_LIST_MAP; 0150 0151 /** 0152 * Query delimiters. Someone might want to be able to change 0153 * these (to use & instead of & for example), so I've provided them. 0154 * 0155 * @var string 0156 */ 0157 protected $_queryDelimiter = '?', $_queryStringDelimiter = '&', $_queryBracketsEscaped = true; 0158 0159 /** 0160 * Constructed servlet full path URLs 0161 * 0162 * @var string 0163 */ 0164 protected $_pingUrl, $_updateUrl, $_spellUrl, $_searchUrl, $_threadsUrl; 0165 0166 /** 0167 * Keep track of whether our URLs have been constructed 0168 * 0169 * @var boolean 0170 */ 0171 protected $_urlsInited = false; 0172 0173 /** 0174 * HTTP Transport implementation (pluggable) 0175 * 0176 * @var Zend_Service_Solr_HttpTransport_Interface 0177 */ 0178 protected $_httpTransport = false; 0179 0180 /** 0181 * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. 0182 * 0183 * NOTE: inside a phrase fewer characters need escaped, use {@link Zend_Service_Solr_Service::escapePhrase()} instead 0184 * 0185 * @param string $value 0186 * @return string 0187 */ 0188 static public function escape($value) { 0189 //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters 0190 $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; 0191 $replace = '\\\$1'; 0192 0193 return preg_replace($pattern, $replace, $value); 0194 } 0195 0196 /** 0197 * Escape a value meant to be contained in a phrase for special query characters 0198 * 0199 * @param string $value 0200 * @return string 0201 */ 0202 static public function escapePhrase($value) { 0203 $pattern = '/("|\\\)/'; 0204 $replace = '\\\$1'; 0205 0206 return preg_replace($pattern, $replace, $value); 0207 } 0208 0209 /** 0210 * Convenience function for creating phrase syntax from a value 0211 * 0212 * @param string $value 0213 * @return string 0214 */ 0215 static public function phrase($value) { 0216 return '"' . self::escapePhrase($value) . '"'; 0217 } 0218 0219 /** 0220 * Constructor. All parameters are optional and will take on default values 0221 * if not specified. 0222 * 0223 * @param string $host 0224 * @param string $port 0225 * @param string $path 0226 * @param Zend_Service_Solr_HttpTransport_Interface $httpTransport 0227 */ 0228 public function __construct($host = 'localhost', $port = 8983, $path = '/solr/', $httpTransport = false) { 0229 $this->setHost($host); 0230 $this->setPort($port); 0231 $this->setPath($path); 0232 0233 $this->_initUrls(); 0234 0235 if ($httpTransport) { 0236 $this->setHttpTransport($httpTransport); 0237 } 0238 0239 // check that our php version is >= 5.1.3 so we can correct for http_build_query behavior later 0240 $this->_queryBracketsEscaped = version_compare(phpversion(), '5.1.3', '>='); 0241 } 0242 0243 /** 0244 * Return a valid http URL given this server's host, port and path and a provided servlet name 0245 * 0246 * @param string $servlet 0247 * @return string 0248 */ 0249 protected function _constructUrl($servlet, $params = array()) { 0250 if (count($params)) { 0251 //escape all parameters appropriately for inclusion in the query string 0252 $escapedParams = array(); 0253 0254 foreach ($params as $key => $value) { 0255 $escapedParams[] = urlencode($key) . '=' . urlencode($value); 0256 } 0257 0258 $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams); 0259 } else { 0260 $queryString = ''; 0261 } 0262 0263 return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString; 0264 } 0265 0266 /** 0267 * Construct the Full URLs for the three servlets we reference 0268 */ 0269 protected function _initUrls() { 0270 //Initialize our full servlet URLs now that we have server information 0271 $this->_extractUrl = $this->_constructUrl(self::EXTRACT_SERVLET); 0272 $this->_pingUrl = $this->_constructUrl(self::PING_SERVLET); 0273 $this->_spellUrl = $this->_constructUrl(self::SPELL_SERVLET); 0274 $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); 0275 $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER)); 0276 $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER)); 0277 0278 $this->_urlsInited = true; 0279 } 0280 0281 protected function _generateQueryString($params) { 0282 // use http_build_query to encode our arguments because its faster 0283 // than urlencoding all the parts ourselves in a loop 0284 // 0285 // because http_build_query treats arrays differently than we want to, correct the query 0286 // string by changing foo[#]=bar (# being an actual number) parameter strings to just 0287 // multiple foo=bar strings. This regex should always work since '=' will be urlencoded 0288 // anywhere else the regex isn't expecting it 0289 // 0290 // NOTE: before php 5.1.3 brackets were not url encoded by http_build query - we've checked 0291 // the php version in the constructor and put the results in the instance variable. Also, before 0292 // 5.1.2 the arg_separator parameter was not available, so don't use it 0293 if ($this->_queryBracketsEscaped) { 0294 $queryString = http_build_query($params, null, $this->_queryStringDelimiter); 0295 return preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString); 0296 } else { 0297 $queryString = http_build_query($params); 0298 return preg_replace('/\\[(?:[0-9]|[1-9][0-9]+)\\]=/', '=', $queryString); 0299 } 0300 } 0301 0302 /** 0303 * Central method for making a get operation against this Solr Server 0304 * 0305 * @param string $url 0306 * @param float $timeout Read timeout in seconds 0307 * @return Zend_Service_Solr_Response 0308 * 0309 * @throws Zend_Service_Solr_HttpTransportException If a non 200 response status is returned 0310 */ 0311 protected function _sendRawGet($url, $timeout = FALSE) { 0312 $httpTransport = $this->getHttpTransport(); 0313 0314 $httpResponse = $httpTransport->performGetRequest($url, $timeout); 0315 $solrResponse = new Zend_Service_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); 0316 0317 if ($solrResponse->getHttpStatus() != 200) { 0318 throw new Zend_Service_Solr_HttpTransportException($solrResponse); 0319 } 0320 0321 return $solrResponse; 0322 } 0323 0324 /** 0325 * Central method for making a post operation against this Solr Server 0326 * 0327 * @param string $url 0328 * @param string $rawPost 0329 * @param float $timeout Read timeout in seconds 0330 * @param string $contentType 0331 * @return Zend_Service_Solr_Response 0332 * 0333 * @throws Zend_Service_Solr_HttpTransportException If a non 200 response status is returned 0334 */ 0335 protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') { 0336 $httpTransport = $this->getHttpTransport(); 0337 0338 $httpResponse = $httpTransport->performPostRequest($url, $rawPost, $contentType, $timeout); 0339 $solrResponse = new Zend_Service_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); 0340 0341 if ($solrResponse->getHttpStatus() != 200) { 0342 throw new Zend_Service_Solr_HttpTransportException($solrResponse); 0343 } 0344 0345 return $solrResponse; 0346 } 0347 0348 /** 0349 * Returns the set host 0350 * 0351 * @return string 0352 */ 0353 public function getHost() { 0354 return $this->_host; 0355 } 0356 0357 /** 0358 * Set the host used. If empty will fallback to constants 0359 * 0360 * @param string $host 0361 * 0362 * @throws Zend_Service_Solr_InvalidArgumentException If the host parameter is empty 0363 */ 0364 public function setHost($host) { 0365 //Use the provided host or use the default 0366 if (empty($host)) { 0367 throw new Zend_Service_Solr_InvalidArgumentException('Host parameter is empty'); 0368 } else { 0369 $this->_host = $host; 0370 } 0371 0372 if ($this->_urlsInited) { 0373 $this->_initUrls(); 0374 } 0375 } 0376 0377 /** 0378 * Get the set port 0379 * 0380 * @return integer 0381 */ 0382 public function getPort() { 0383 return $this->_port; 0384 } 0385 0386 /** 0387 * Set the port used. If empty will fallback to constants 0388 * 0389 * @param integer $port 0390 * 0391 * @throws Zend_Service_Solr_InvalidArgumentException If the port parameter is empty 0392 */ 0393 public function setPort($port) { 0394 //Use the provided port or use the default 0395 $port = (int) $port; 0396 0397 if ($port <= 0) { 0398 throw new Zend_Service_Solr_InvalidArgumentException('Port is not a valid port number'); 0399 } else { 0400 $this->_port = $port; 0401 } 0402 0403 if ($this->_urlsInited) { 0404 $this->_initUrls(); 0405 } 0406 } 0407 0408 /** 0409 * Get the set path. 0410 * 0411 * @return string 0412 */ 0413 public function getPath() { 0414 return $this->_path; 0415 } 0416 0417 /** 0418 * Set the path used. If empty will fallback to constants 0419 * 0420 * @param string $path 0421 */ 0422 public function setPath($path) { 0423 $path = trim($path, '/'); 0424 0425 $this->_path = '/' . $path . '/'; 0426 0427 if ($this->_urlsInited) { 0428 $this->_initUrls(); 0429 } 0430 } 0431 0432 /** 0433 * Get the current configured HTTP Transport 0434 * 0435 * @return HttpTransportInterface 0436 */ 0437 public function getHttpTransport() { 0438 // lazy load a default if one has not be set 0439 if ($this->_httpTransport === false) { 0440 require_once 'Zend/Service/Solr/HttpTransport/FileGetContents.php'; 0441 $this->_httpTransport = new Zend_Service_Solr_HttpTransport_FileGetContents(); 0442 } 0443 0444 return $this->_httpTransport; 0445 } 0446 0447 /** 0448 * Set the HTTP Transport implemenation that will be used for all HTTP requests 0449 * 0450 * @param Zend_Service_Solr_HttpTransport_Interface 0451 */ 0452 public function setHttpTransport(Zend_Service_Solr_HttpTransport_Interface $httpTransport) { 0453 $this->_httpTransport = $httpTransport; 0454 } 0455 0456 /** 0457 * Set the create documents flag. This determines whether {@link Zend_Service_Solr_Response} objects will 0458 * parse the response and create {@link Zend_Service_Solr_Document} instances in place. 0459 * 0460 * @param boolean $createDocuments 0461 */ 0462 public function setCreateDocuments($createDocuments) { 0463 $this->_createDocuments = (bool) $createDocuments; 0464 } 0465 0466 /** 0467 * Get the current state of teh create documents flag. 0468 * 0469 * @return boolean 0470 */ 0471 public function getCreateDocuments() { 0472 return $this->_createDocuments; 0473 } 0474 0475 /** 0476 * Set the collapse single value arrays flag. 0477 * 0478 * @param boolean $collapseSingleValueArrays 0479 */ 0480 public function setCollapseSingleValueArrays($collapseSingleValueArrays) { 0481 $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; 0482 } 0483 0484 /** 0485 * Get the current state of the collapse single value arrays flag. 0486 * 0487 * @return boolean 0488 */ 0489 public function getCollapseSingleValueArrays() { 0490 return $this->_collapseSingleValueArrays; 0491 } 0492 0493 /** 0494 * Get the current default timeout setting (initially the default_socket_timeout ini setting) 0495 * in seconds 0496 * 0497 * @return float 0498 * 0499 * @deprecated Use the getDefaultTimeout method on the HTTP transport implementation 0500 */ 0501 public function getDefaultTimeout() { 0502 return $this->getHttpTransport()->getDefaultTimeout(); 0503 } 0504 0505 /** 0506 * Set the default timeout for all calls that aren't passed a specific timeout 0507 * 0508 * @param float $timeout Timeout value in seconds 0509 * 0510 * @deprecated Use the setDefaultTimeout method on the HTTP transport implementation 0511 */ 0512 public function setDefaultTimeout($timeout) { 0513 $this->getHttpTransport()->setDefaultTimeout($timeout); 0514 } 0515 0516 /** 0517 * Set how NamedLists should be formatted in the response data. This mainly effects 0518 * the facet counts format. 0519 * 0520 * @param string $namedListTreatment 0521 * @throws Zend_Service_Solr_InvalidArgumentException If invalid option is set 0522 */ 0523 public function setNamedListTreatment($namedListTreatment) { 0524 switch ((string) $namedListTreatment) { 0525 case Zend_Service_Solr::NAMED_LIST_FLAT: 0526 $this->_namedListTreatment = Zend_Service_Solr::NAMED_LIST_FLAT; 0527 break; 0528 0529 case Zend_Service_Solr::NAMED_LIST_MAP: 0530 $this->_namedListTreatment = Zend_Service_Solr::NAMED_LIST_MAP; 0531 break; 0532 0533 default: 0534 throw new Zend_Service_Solr_InvalidArgumentException('Not a valid named list treatement option'); 0535 } 0536 } 0537 0538 /** 0539 * Get the current setting for named list treatment. 0540 * 0541 * @return string 0542 */ 0543 public function getNamedListTreatment() { 0544 return $this->_namedListTreatment; 0545 } 0546 0547 /** 0548 * Set the string used to separate the path form the query string. 0549 * Defaulted to '?' 0550 * 0551 * @param string $queryDelimiter 0552 */ 0553 public function setQueryDelimiter($queryDelimiter) { 0554 $this->_queryDelimiter = $queryDelimiter; 0555 } 0556 0557 /** 0558 * Set the string used to separate the parameters in thequery string 0559 * Defaulted to '&' 0560 * 0561 * @param string $queryStringDelimiter 0562 */ 0563 public function setQueryStringDelimiter($queryStringDelimiter) { 0564 $this->_queryStringDelimiter = $queryStringDelimiter; 0565 } 0566 0567 /** 0568 * Call the /admin/ping servlet, can be used to quickly tell if a connection to the 0569 * server is able to be made. 0570 * 0571 * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2) 0572 * @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs 0573 */ 0574 public function ping($timeout = 2) { 0575 $start = microtime(true); 0576 0577 $httpTransport = $this->getHttpTransport(); 0578 0579 $httpResponse = $httpTransport->performHeadRequest($this->_pingUrl, $timeout); 0580 $solrResponse = new Zend_Service_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); 0581 0582 if ($solrResponse->getHttpStatus() == 200) { 0583 return microtime(true) - $start; 0584 } else { 0585 return false; 0586 } 0587 } 0588 0589 /** 0590 * Call the /admin/threads servlet and retrieve information about all threads in the 0591 * Solr servlet's thread group. Useful for diagnostics. 0592 * 0593 * @return Zend_Service_Solr_Response 0594 * 0595 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0596 */ 0597 public function threads() { 0598 return $this->_sendRawGet($this->_threadsUrl); 0599 } 0600 0601 /** 0602 * Raw Add Method. Takes a raw post body and sends it to the update service. Post body 0603 * should be a complete and well formed "add" xml document. 0604 * 0605 * @param string $rawPost 0606 * @return Zend_Service_Solr_Response 0607 * 0608 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0609 */ 0610 public function add($rawPost) { 0611 return $this->_sendRawPost($this->_updateUrl, $rawPost); 0612 } 0613 0614 /** 0615 * Add a Solr Document to the index 0616 * 0617 * @param Zend_Service_Solr_Document $document 0618 * @param boolean $allowDups 0619 * @param boolean $overwritePending 0620 * @param boolean $overwriteCommitted 0621 * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request. 0622 * @return Zend_Service_Solr_Response 0623 * 0624 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0625 */ 0626 public function addDocument(Zend_Service_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) { 0627 $dupValue = $allowDups ? 'true' : 'false'; 0628 $pendingValue = $overwritePending ? 'true' : 'false'; 0629 $committedValue = $overwriteCommitted ? 'true' : 'false'; 0630 0631 $commitWithin = (int) $commitWithin; 0632 $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : ''; 0633 0634 $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>"; 0635 $rawPost .= $this->_documentToXmlFragment($document); 0636 $rawPost .= '</add>'; 0637 0638 return $this->add($rawPost); 0639 } 0640 0641 /** 0642 * Add an array of Solr Documents to the index all at once 0643 * 0644 * @param array $documents Should be an array of Zend_Service_Solr_Document instances 0645 * @param boolean $allowDups 0646 * @param boolean $overwritePending 0647 * @param boolean $overwriteCommitted 0648 * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request. 0649 * @return Zend_Service_Solr_Response 0650 * 0651 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0652 */ 0653 public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) { 0654 $dupValue = $allowDups ? 'true' : 'false'; 0655 $pendingValue = $overwritePending ? 'true' : 'false'; 0656 $committedValue = $overwriteCommitted ? 'true' : 'false'; 0657 0658 $commitWithin = (int) $commitWithin; 0659 $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : ''; 0660 0661 $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>"; 0662 0663 foreach ($documents as $document) { 0664 if ($document instanceof Zend_Service_Solr_Document) { 0665 $rawPost .= $this->_documentToXmlFragment($document); 0666 } 0667 } 0668 0669 $rawPost .= '</add>'; 0670 0671 return $this->add($rawPost); 0672 } 0673 0674 /** 0675 * Create an XML fragment from a {@link Zend_Service_Solr_Document} instance appropriate for use inside a Solr add call 0676 * 0677 * @return string 0678 */ 0679 protected function _documentToXmlFragment(Zend_Service_Solr_Document $document) { 0680 $xml = '<doc'; 0681 0682 if ($document->getBoost() !== false) { 0683 $xml .= ' boost="' . $document->getBoost() . '"'; 0684 } 0685 0686 $xml .= '>'; 0687 0688 foreach ($document as $key => $value) { 0689 $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); 0690 $fieldBoost = $document->getFieldBoost($key); 0691 0692 if (is_array($value)) { 0693 foreach ($value as $multivalue) { 0694 $xml .= '<field name="' . $key . '"'; 0695 0696 if ($fieldBoost !== false) { 0697 $xml .= ' boost="' . $fieldBoost . '"'; 0698 0699 // only set the boost for the first field in the set 0700 $fieldBoost = false; 0701 } 0702 0703 $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8'); 0704 0705 $xml .= '>' . $multivalue . '</field>'; 0706 } 0707 } else { 0708 $xml .= '<field name="' . $key . '"'; 0709 0710 if ($fieldBoost !== false) { 0711 $xml .= ' boost="' . $fieldBoost . '"'; 0712 } 0713 0714 $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8'); 0715 0716 $xml .= '>' . $value . '</field>'; 0717 } 0718 } 0719 0720 $xml .= '</doc>'; 0721 0722 // replace any control characters to avoid Solr XML parser exception 0723 return $this->_stripCtrlChars($xml); 0724 } 0725 0726 /** 0727 * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space. 0728 * 0729 * @param string $string 0730 * @return string 0731 */ 0732 protected function _stripCtrlChars($string) { 0733 // See: http://w3.org/International/questions/qa-forms-utf-8.html 0734 // Printable utf-8 does not include any of these chars below x7F 0735 return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string); 0736 } 0737 0738 /** 0739 * Send a commit command. Will be synchronous unless both wait parameters are set to false. 0740 * 0741 * @param boolean $expungeDeletes Defaults to false, merge segments with deletes away 0742 * @param boolean $waitFlush Defaults to true, block until index changes are flushed to disk 0743 * @param boolean $waitSearcher Defaults to true, block until a new searcher is opened and registered as the main query searcher, making the changes visible 0744 * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour 0745 * @return Zend_Service_Solr_Response 0746 * 0747 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0748 */ 0749 public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600) { 0750 $expungeValue = $expungeDeletes ? 'true' : 'false'; 0751 $flushValue = $waitFlush ? 'true' : 'false'; 0752 $searcherValue = $waitSearcher ? 'true' : 'false'; 0753 0754 $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />'; 0755 0756 return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); 0757 } 0758 0759 /** 0760 * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be 0761 * a complete and well formed "delete" xml document 0762 * 0763 * @param string $rawPost Expected to be utf-8 encoded xml document 0764 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) 0765 * @return Zend_Service_Solr_Response 0766 * 0767 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0768 */ 0769 public function delete($rawPost, $timeout = 3600) { 0770 return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); 0771 } 0772 0773 /** 0774 * Create a delete document based on document ID 0775 * 0776 * @param string $id Expected to be utf-8 encoded 0777 * @param boolean $fromPending 0778 * @param boolean $fromCommitted 0779 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) 0780 * @return Zend_Service_Solr_Response 0781 * 0782 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0783 */ 0784 public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600) { 0785 $pendingValue = $fromPending ? 'true' : 'false'; 0786 $committedValue = $fromCommitted ? 'true' : 'false'; 0787 0788 //escape special xml characters 0789 $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); 0790 0791 $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>'; 0792 0793 return $this->delete($rawPost, $timeout); 0794 } 0795 0796 /** 0797 * Create and post a delete document based on multiple document IDs. 0798 * 0799 * @param array $ids Expected to be utf-8 encoded strings 0800 * @param boolean $fromPending 0801 * @param boolean $fromCommitted 0802 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) 0803 * @return Zend_Service_Solr_Response 0804 * 0805 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0806 */ 0807 public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600) { 0808 $pendingValue = $fromPending ? 'true' : 'false'; 0809 $committedValue = $fromCommitted ? 'true' : 'false'; 0810 0811 $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '">'; 0812 0813 foreach ($ids as $id) { 0814 //escape special xml characters 0815 $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); 0816 0817 $rawPost .= '<id>' . $id . '</id>'; 0818 } 0819 0820 $rawPost .= '</delete>'; 0821 0822 return $this->delete($rawPost, $timeout); 0823 } 0824 0825 /** 0826 * Create a delete document based on a query and submit it 0827 * 0828 * @param string $rawQuery Expected to be utf-8 encoded 0829 * @param boolean $fromPending 0830 * @param boolean $fromCommitted 0831 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) 0832 * @return Zend_Service_Solr_Response 0833 * 0834 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 0835 */ 0836 public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600) { 0837 $pendingValue = $fromPending ? 'true' : 'false'; 0838 $committedValue = $fromCommitted ? 'true' : 'false'; 0839 0840 // escape special xml characters 0841 $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8'); 0842 0843 $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>'; 0844 0845 return $this->delete($rawPost, $timeout); 0846 } 0847 0848 /** 0849 * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how 0850 * to use Solr Cell and what parameters are available. 0851 * 0852 * NOTE: when passing an Zend_Service_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." 0853 * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value 0854 * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also 0855 * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). 0856 * 0857 * @param string $file Path to file to extract data from 0858 * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) 0859 * @param Zend_Service_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) 0860 * @param string $mimetype optional mimetype specification (for the file being extracted) 0861 * 0862 * @return Zend_Service_Solr_Response 0863 * 0864 * @throws Zend_Service_Solr_InvalidArgumentException if $file, $params, or $document are invalid. 0865 */ 0866 public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream') { 0867 // check if $params is an array (allow null for default empty array) 0868 if (!is_null($params)) { 0869 if (!is_array($params)) { 0870 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null"); 0871 } 0872 } else { 0873 $params = array(); 0874 } 0875 0876 // if $file is an http request, defer to extractFromUrl instead 0877 if (substr($file, 0, 7) == 'http://' || substr($file, 0, 8) == 'https://') { 0878 return $this->extractFromUrl($file, $params, $document, $mimetype); 0879 } 0880 0881 // read the contents of the file 0882 $contents = @file_get_contents($file); 0883 0884 if ($contents !== false) { 0885 // add the resource.name parameter if not specified 0886 if (!isset($params['resource.name'])) { 0887 $params['resource.name'] = basename($file); 0888 } 0889 0890 // delegate the rest to extractFromString 0891 return $this->extractFromString($contents, $params, $document, $mimetype); 0892 } else { 0893 throw new Zend_Service_Solr_InvalidArgumentException("File '{$file}' is empty or could not be read"); 0894 } 0895 } 0896 0897 /** 0898 * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how 0899 * to use Solr Cell and what parameters are available. 0900 * 0901 * NOTE: when passing an Zend_Service_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." 0902 * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value 0903 * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also 0904 * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). 0905 * 0906 * @param string $data Data that will be passed to Solr Cell 0907 * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) 0908 * @param Zend_Service_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) 0909 * @param string $mimetype optional mimetype specification (for the file being extracted) 0910 * 0911 * @return Zend_Service_Solr_Response 0912 * 0913 * @throws Zend_Service_Solr_InvalidArgumentException if $file, $params, or $document are invalid. 0914 * 0915 * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted. 0916 */ 0917 public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream') { 0918 // check if $params is an array (allow null for default empty array) 0919 if (!is_null($params)) { 0920 if (!is_array($params)) { 0921 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null"); 0922 } 0923 } else { 0924 $params = array(); 0925 } 0926 0927 // make sure we receive our response in JSON and have proper name list treatment 0928 $params['wt'] = self::SOLR_WRITER; 0929 $params['json.nl'] = $this->_namedListTreatment; 0930 0931 // check if $document is an Zend_Service_Solr_Document instance 0932 if (!is_null($document) && $document instanceof Zend_Service_Solr_Document) { 0933 // iterate document, adding literal.* and boost.* fields to $params as appropriate 0934 foreach ($document as $field => $fieldValue) { 0935 // check if we need to add a boost.* parameters 0936 $fieldBoost = $document->getFieldBoost($field); 0937 0938 if ($fieldBoost !== false) { 0939 $params["boost.{$field}"] = $fieldBoost; 0940 } 0941 0942 // add the literal.* parameter 0943 $params["literal.{$field}"] = $fieldValue; 0944 } 0945 } 0946 0947 // params will be sent to SOLR in the QUERY STRING 0948 $queryString = $this->_generateQueryString($params); 0949 0950 // the file contents will be sent to SOLR as the POST BODY - we use application/octect-stream as default mimetype 0951 return $this->_sendRawPost($this->_extractUrl . $this->_queryDelimiter . $queryString, $data, false, $mimetype); 0952 } 0953 0954 /** 0955 * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how 0956 * to use Solr Cell and what parameters are available. 0957 * 0958 * NOTE: when passing an Zend_Service_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." 0959 * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value 0960 * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also 0961 * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). 0962 * 0963 * @param string $url URL 0964 * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) 0965 * @param Zend_Service_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) 0966 * @param string $mimetype optional mimetype specification (for the file being extracted) 0967 * 0968 * @return Zend_Service_Solr_Response 0969 * 0970 * @throws Zend_Service_Solr_InvalidArgumentException if $url, $params, or $document are invalid. 0971 */ 0972 public function extractFromUrl($url, $params = array(), $document = null, $mimetype = 'application/octet-stream') { 0973 // check if $params is an array (allow null for default empty array) 0974 if (!is_null($params)) { 0975 if (!is_array($params)) { 0976 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null"); 0977 } 0978 } else { 0979 $params = array(); 0980 } 0981 0982 $httpTransport = $this->getHttpTransport(); 0983 0984 // read the contents of the URL using our configured Http Transport and default timeout 0985 $httpResponse = $httpTransport->performGetRequest($url); 0986 0987 // check that its a 200 response 0988 if ($httpResponse->getStatusCode() == 200) { 0989 // add the resource.name parameter if not specified 0990 if (!isset($params['resource.name'])) { 0991 $params['resource.name'] = $url; 0992 } 0993 0994 // delegate the rest to extractFromString 0995 return $this->extractFromString($httpResponse->getBody(), $params, $document, $mimetype); 0996 } else { 0997 throw new Zend_Service_Solr_InvalidArgumentException("URL '{$url}' returned non 200 response code"); 0998 } 0999 } 1000 1001 /** 1002 * Send an optimize command. Will be synchronous unless both wait parameters are set 1003 * to false. 1004 * 1005 * @param boolean $waitFlush 1006 * @param boolean $waitSearcher 1007 * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) 1008 * @return Zend_Service_Solr_Response 1009 * 1010 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 1011 */ 1012 public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) { 1013 $flushValue = $waitFlush ? 'true' : 'false'; 1014 $searcherValue = $waitSearcher ? 'true' : 'false'; 1015 1016 $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />'; 1017 1018 return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); 1019 } 1020 1021 /** 1022 * Simple Search interface 1023 * 1024 * @param string $query The raw query string 1025 * @param int $offset The starting offset for result documents 1026 * @param int $limit The maximum number of result documents to return 1027 * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) 1028 * @param string $method The HTTP method (Zend_Service_Solr_Service::METHOD_GET or Zend_Service_Solr_Service::METHOD::POST) 1029 * @return Zend_Service_Solr_Response 1030 * 1031 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 1032 * @throws Zend_Service_Solr_InvalidArgumentException If an invalid HTTP method is used 1033 */ 1034 public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET) { 1035 // ensure params is an array 1036 if (!is_null($params)) { 1037 if (!is_array($params)) { 1038 // params was specified but was not an array - invalid 1039 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null"); 1040 } 1041 } else { 1042 $params = array(); 1043 } 1044 1045 // construct our full parameters 1046 // common parameters in this interface 1047 $params['wt'] = self::SOLR_WRITER; 1048 $params['json.nl'] = $this->_namedListTreatment; 1049 1050 $params['q'] = $query; 1051 $params['start'] = $offset; 1052 $params['rows'] = $limit; 1053 1054 $queryString = $this->_generateQueryString($params); 1055 1056 if ($method == self::METHOD_GET) { 1057 return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString); 1058 } else if ($method == self::METHOD_POST) { 1059 return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded; charset=UTF-8'); 1060 } else { 1061 throw new Zend_Service_Solr_InvalidArgumentException("Unsupported method '$method', please use the Zend_Service_Solr_Service::METHOD_* constants"); 1062 } 1063 } 1064 1065 /** 1066 * Simple Spell interface 1067 * 1068 * @param string $query The raw query string 1069 * @param int $offset The starting offset for result documents 1070 * @param int $limit The maximum number of result documents to return 1071 * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) 1072 * @param string $method The HTTP method (Zend_Service_Solr_Service::METHOD_GET or Zend_Service_Solr_Service::METHOD::POST) 1073 * @return Zend_Service_Solr_Response 1074 * 1075 * @throws Zend_Service_Solr_HttpTransportException If an error occurs during the service call 1076 * @throws Zend_Service_Solr_InvalidArgumentException If an invalid HTTP method is used 1077 */ 1078 public function spell($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET) { 1079 // ensure params is an array 1080 if (!is_null($params)) { 1081 if (!is_array($params)) { 1082 // params was specified but was not an array - invalid 1083 throw new Zend_Service_Solr_InvalidArgumentException("\$params must be a valid array or null"); 1084 } 1085 } else { 1086 $params = array(); 1087 } 1088 1089 // construct our full parameters 1090 // common parameters in this interface 1091 $params['wt'] = self::SOLR_WRITER; 1092 $params['json.nl'] = $this->_namedListTreatment; 1093 1094 $params['q'] = $query; 1095 1096 $params['spellcheck.q'] = $query; 1097 $params['spellcheck'] = 'true'; 1098 $params['spellcheck.extendedResults'] = 'true'; 1099 1100 $queryString = $this->_generateQueryString($params); 1101 $results = file_get_contents($this->_spellUrl . '?' . $queryString); 1102 return $results; 1103 } 1104 1105 }