dokuwiki-sphinxsearch-plugin – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | <?php |
2 | |||
3 | // |
||
4 | // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $ |
||
5 | // |
||
6 | |||
7 | // |
||
8 | // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved. |
||
9 | // |
||
10 | // This program is free software; you can redistribute it and/or modify |
||
11 | // it under the terms of the GNU General Public License. You should have |
||
12 | // received a copy of the GPL license along with this program; if you |
||
13 | // did not, you can find it at http://www.gnu.org/ |
||
14 | // |
||
15 | |||
16 | ///////////////////////////////////////////////////////////////////////////// |
||
17 | // PHP version of Sphinx searchd client (PHP API) |
||
18 | ///////////////////////////////////////////////////////////////////////////// |
||
19 | |||
20 | /// known searchd commands |
||
21 | define("SEARCHD_COMMAND_SEARCH", 0); |
||
22 | define("SEARCHD_COMMAND_EXCERPT", 1); |
||
23 | define("SEARCHD_COMMAND_UPDATE", 2); |
||
24 | define("SEARCHD_COMMAND_KEYWORDS", 3); |
||
25 | define("SEARCHD_COMMAND_PERSIST", 4); |
||
26 | define("SEARCHD_COMMAND_STATUS", 5); |
||
27 | define("SEARCHD_COMMAND_QUERY", 6); |
||
28 | |||
29 | /// current client-side command implementation versions |
||
30 | define("VER_COMMAND_SEARCH", 0x116); |
||
31 | define("VER_COMMAND_EXCERPT", 0x100); |
||
32 | define("VER_COMMAND_UPDATE", 0x102); |
||
33 | define("VER_COMMAND_KEYWORDS", 0x100); |
||
34 | define("VER_COMMAND_STATUS", 0x100); |
||
35 | define("VER_COMMAND_QUERY", 0x100); |
||
36 | |||
37 | /// known searchd status codes |
||
38 | define("SEARCHD_OK", 0); |
||
39 | define("SEARCHD_ERROR", 1); |
||
40 | define("SEARCHD_RETRY", 2); |
||
41 | define("SEARCHD_WARNING", 3); |
||
42 | |||
43 | /// known match modes |
||
44 | define("SPH_MATCH_ALL", 0); |
||
45 | define("SPH_MATCH_ANY", 1); |
||
46 | define("SPH_MATCH_PHRASE", 2); |
||
47 | define("SPH_MATCH_BOOLEAN", 3); |
||
48 | define("SPH_MATCH_EXTENDED", 4); |
||
49 | define("SPH_MATCH_FULLSCAN", 5); |
||
50 | define("SPH_MATCH_EXTENDED2", 6); // extended engine V2 (TEMPORARY, WILL BE REMOVED) |
||
51 | |||
52 | /// known ranking modes (ext2 only) |
||
53 | define("SPH_RANK_PROXIMITY_BM25", 0); ///< default mode, phrase proximity major factor and BM25 minor one |
||
54 | define("SPH_RANK_BM25", 1); ///< statistical mode, BM25 ranking only (faster but worse quality) |
||
55 | define("SPH_RANK_NONE", 2); ///< no ranking, all matches get a weight of 1 |
||
56 | define("SPH_RANK_WORDCOUNT", 3); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts |
||
57 | define("SPH_RANK_PROXIMITY", 4); |
||
58 | define("SPH_RANK_MATCHANY", 5); |
||
59 | define("SPH_RANK_FIELDMASK", 6); |
||
60 | |||
61 | /// known sort modes |
||
62 | define("SPH_SORT_RELEVANCE", 0); |
||
63 | define("SPH_SORT_ATTR_DESC", 1); |
||
64 | define("SPH_SORT_ATTR_ASC", 2); |
||
65 | define("SPH_SORT_TIME_SEGMENTS", 3); |
||
66 | define("SPH_SORT_EXTENDED", 4); |
||
67 | define("SPH_SORT_EXPR", 5); |
||
68 | |||
69 | /// known filter types |
||
70 | define("SPH_FILTER_VALUES", 0); |
||
71 | define("SPH_FILTER_RANGE", 1); |
||
72 | define("SPH_FILTER_FLOATRANGE", 2); |
||
73 | |||
74 | /// known attribute types |
||
75 | define("SPH_ATTR_INTEGER", 1); |
||
76 | define("SPH_ATTR_TIMESTAMP", 2); |
||
77 | define("SPH_ATTR_ORDINAL", 3); |
||
78 | define("SPH_ATTR_BOOL", 4); |
||
79 | define("SPH_ATTR_FLOAT", 5); |
||
80 | define("SPH_ATTR_BIGINT", 6); |
||
81 | define("SPH_ATTR_MULTI", 0x40000000); |
||
82 | |||
83 | /// known grouping functions |
||
84 | define("SPH_GROUPBY_DAY", 0); |
||
85 | define("SPH_GROUPBY_WEEK", 1); |
||
86 | define("SPH_GROUPBY_MONTH", 2); |
||
87 | define("SPH_GROUPBY_YEAR", 3); |
||
88 | define("SPH_GROUPBY_ATTR", 4); |
||
89 | define("SPH_GROUPBY_ATTRPAIR", 5); |
||
90 | |||
91 | // important properties of PHP's integers: |
||
92 | // - always signed (one bit short of PHP_INT_SIZE) |
||
93 | // - conversion from string to int is saturated |
||
94 | // - float is double |
||
95 | // - div converts arguments to floats |
||
96 | // - mod converts arguments to ints |
||
97 | |||
98 | // the packing code below works as follows: |
||
99 | // - when we got an int, just pack it |
||
100 | // if performance is a problem, this is the branch users should aim for |
||
101 | // |
||
102 | // - otherwise, we got a number in string form |
||
103 | // this might be due to different reasons, but we assume that this is |
||
104 | // because it didn't fit into PHP int |
||
105 | // |
||
106 | // - factor the string into high and low ints for packing |
||
107 | // - if we have bcmath, then it is used |
||
108 | // - if we don't, we have to do it manually (this is the fun part) |
||
109 | // |
||
110 | // - x64 branch does factoring using ints |
||
111 | // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int |
||
112 | // |
||
113 | // unpacking routines are pretty much the same. |
||
114 | // - return ints if we can |
||
115 | // - otherwise format number into a string |
||
116 | |||
117 | /// pack 64-bit signed |
||
118 | function sphPackI64($v) |
||
119 | { |
||
120 | assert(is_numeric($v)); |
||
121 | |||
122 | // x64 |
||
123 | if (PHP_INT_SIZE >= 8) { |
||
124 | $v = (int)$v; |
||
125 | return pack("NN", $v >> 32, $v & 0xFFFFFFFF); |
||
126 | } |
||
127 | |||
128 | // x32, int |
||
129 | if (is_int($v)) |
||
130 | return pack("NN", $v < 0 ? -1 : 0, $v); |
||
131 | |||
132 | // x32, bcmath |
||
133 | if (function_exists("bcmul")) { |
||
134 | if (bccomp($v, 0) == -1) |
||
135 | $v = bcadd("18446744073709551616", $v); |
||
136 | $h = bcdiv($v, "4294967296", 0); |
||
137 | $l = bcmod($v, "4294967296"); |
||
138 | return pack("NN", (float)$h, (float)$l); // conversion to float is intentional; int would lose 31st bit |
||
139 | } |
||
140 | |||
141 | // x32, no-bcmath |
||
142 | $p = max(0, strlen($v) - 13); |
||
143 | $lo = abs((float)substr($v, $p)); |
||
144 | $hi = abs((float)substr($v, 0, $p)); |
||
145 | |||
146 | $m = $lo + $hi * 1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912 |
||
147 | $q = floor($m / 4294967296.0); |
||
148 | $l = $m - ($q * 4294967296.0); |
||
149 | $h = $hi * 2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328 |
||
150 | |||
151 | if ($v < 0) { |
||
152 | if ($l == 0) |
||
153 | $h = 4294967296.0 - $h; |
||
154 | else { |
||
155 | $h = 4294967295.0 - $h; |
||
156 | $l = 4294967296.0 - $l; |
||
157 | } |
||
158 | } |
||
159 | return pack("NN", $h, $l); |
||
160 | } |
||
161 | |||
162 | /// pack 64-bit unsigned |
||
163 | function sphPackU64($v) |
||
164 | { |
||
165 | assert(is_numeric($v)); |
||
166 | |||
167 | // x64 |
||
168 | if (PHP_INT_SIZE >= 8) { |
||
169 | assert($v >= 0); |
||
170 | |||
171 | // x64, int |
||
172 | if (is_int($v)) |
||
173 | return pack("NN", $v >> 32, $v & 0xFFFFFFFF); |
||
174 | |||
175 | // x64, bcmath |
||
176 | if (function_exists("bcmul")) { |
||
177 | $h = bcdiv($v, 4294967296, 0); |
||
178 | $l = bcmod($v, 4294967296); |
||
179 | return pack("NN", $h, $l); |
||
180 | } |
||
181 | |||
182 | // x64, no-bcmath |
||
183 | $p = max(0, strlen($v) - 13); |
||
184 | $lo = (int)substr($v, $p); |
||
185 | $hi = (int)substr($v, 0, $p); |
||
186 | |||
187 | $m = $lo + $hi * 1316134912; |
||
188 | $l = $m % 4294967296; |
||
189 | $h = $hi * 2328 + (int)($m / 4294967296); |
||
190 | |||
191 | return pack("NN", $h, $l); |
||
192 | } |
||
193 | |||
194 | // x32, int |
||
195 | if (is_int($v)) |
||
196 | return pack("NN", 0, $v); |
||
197 | |||
198 | // x32, bcmath |
||
199 | if (function_exists("bcmul")) { |
||
200 | $h = bcdiv($v, "4294967296", 0); |
||
201 | $l = bcmod($v, "4294967296"); |
||
202 | return pack("NN", (float)$h, (float)$l); // conversion to float is intentional; int would lose 31st bit |
||
203 | } |
||
204 | |||
205 | // x32, no-bcmath |
||
206 | $p = max(0, strlen($v) - 13); |
||
207 | $lo = (float)substr($v, $p); |
||
208 | $hi = (float)substr($v, 0, $p); |
||
209 | |||
210 | $m = $lo + $hi * 1316134912.0; |
||
211 | $q = floor($m / 4294967296.0); |
||
212 | $l = $m - ($q * 4294967296.0); |
||
213 | $h = $hi * 2328.0 + $q; |
||
214 | |||
215 | return pack("NN", $h, $l); |
||
216 | } |
||
217 | |||
218 | // unpack 64-bit unsigned |
||
219 | function sphUnpackU64($v) |
||
220 | { |
||
221 | list($hi, $lo) = array_values(unpack("N*N*", $v)); |
||
222 | |||
223 | if (PHP_INT_SIZE >= 8) { |
||
224 | if ($hi < 0) $hi += (1 << 32); // because php 5.2.2 to 5.2.5 is totally fucked up again |
||
225 | if ($lo < 0) $lo += (1 << 32); |
||
226 | |||
227 | // x64, int |
||
228 | if ($hi <= 2147483647) |
||
229 | return ($hi << 32) + $lo; |
||
230 | |||
231 | // x64, bcmath |
||
232 | if (function_exists("bcmul")) |
||
233 | return bcadd($lo, bcmul($hi, "4294967296")); |
||
234 | |||
235 | // x64, no-bcmath |
||
236 | $C = 100000; |
||
237 | $h = ((int)($hi / $C) << 32) + (int)($lo / $C); |
||
238 | $l = (($hi % $C) << 32) + ($lo % $C); |
||
239 | if ($l > $C) { |
||
240 | $h += (int)($l / $C); |
||
241 | $l = $l % $C; |
||
242 | } |
||
243 | |||
244 | if ($h == 0) |
||
245 | return $l; |
||
246 | return sprintf("%d%05d", $h, $l); |
||
247 | } |
||
248 | |||
249 | // x32, int |
||
250 | if ($hi == 0) { |
||
251 | if ($lo > 0) |
||
252 | return $lo; |
||
253 | return sprintf("%u", $lo); |
||
254 | } |
||
255 | |||
256 | $hi = sprintf("%u", $hi); |
||
257 | $lo = sprintf("%u", $lo); |
||
258 | |||
259 | // x32, bcmath |
||
260 | if (function_exists("bcmul")) |
||
261 | return bcadd($lo, bcmul($hi, "4294967296")); |
||
262 | |||
263 | // x32, no-bcmath |
||
264 | $hi = (float)$hi; |
||
265 | $lo = (float)$lo; |
||
266 | |||
267 | $q = floor($hi / 10000000.0); |
||
268 | $r = $hi - $q * 10000000.0; |
||
269 | $m = $lo + $r * 4967296.0; |
||
270 | $mq = floor($m / 10000000.0); |
||
271 | $l = $m - $mq * 10000000.0; |
||
272 | $h = $q * 4294967296.0 + $r * 429.0 + $mq; |
||
273 | |||
274 | $h = sprintf("%.0f", $h); |
||
275 | $l = sprintf("%07.0f", $l); |
||
276 | if ($h == "0") |
||
277 | return sprintf("%.0f", (float)$l); |
||
278 | return $h . $l; |
||
279 | } |
||
280 | |||
281 | // unpack 64-bit signed |
||
282 | function sphUnpackI64($v) |
||
283 | { |
||
284 | list($hi, $lo) = array_values(unpack("N*N*", $v)); |
||
285 | |||
286 | // x64 |
||
287 | if (PHP_INT_SIZE >= 8) { |
||
288 | if ($hi < 0) $hi += (1 << 32); // because php 5.2.2 to 5.2.5 is totally fucked up again |
||
289 | if ($lo < 0) $lo += (1 << 32); |
||
290 | |||
291 | return ($hi << 32) + $lo; |
||
292 | } |
||
293 | |||
294 | // x32, int |
||
295 | if ($hi == 0) { |
||
296 | if ($lo > 0) |
||
297 | return $lo; |
||
298 | return sprintf("%u", $lo); |
||
299 | } |
||
300 | // x32, int |
||
301 | elseif ($hi == -1) { |
||
302 | if ($lo < 0) |
||
303 | return $lo; |
||
304 | return sprintf("%.0f", $lo - 4294967296.0); |
||
305 | } |
||
306 | |||
307 | $neg = ""; |
||
308 | $c = 0; |
||
309 | if ($hi < 0) { |
||
310 | $hi = ~$hi; |
||
311 | $lo = ~$lo; |
||
312 | $c = 1; |
||
313 | $neg = "-"; |
||
314 | } |
||
315 | |||
316 | $hi = sprintf("%u", $hi); |
||
317 | $lo = sprintf("%u", $lo); |
||
318 | |||
319 | // x32, bcmath |
||
320 | if (function_exists("bcmul")) |
||
321 | return $neg . bcadd(bcadd($lo, bcmul($hi, "4294967296")), $c); |
||
322 | |||
323 | // x32, no-bcmath |
||
324 | $hi = (float)$hi; |
||
325 | $lo = (float)$lo; |
||
326 | |||
327 | $q = floor($hi / 10000000.0); |
||
328 | $r = $hi - $q * 10000000.0; |
||
329 | $m = $lo + $r * 4967296.0; |
||
330 | $mq = floor($m / 10000000.0); |
||
331 | $l = $m - $mq * 10000000.0 + $c; |
||
332 | $h = $q * 4294967296.0 + $r * 429.0 + $mq; |
||
333 | if ($l == 10000000) { |
||
334 | $l = 0; |
||
335 | $h += 1; |
||
336 | } |
||
337 | |||
338 | $h = sprintf("%.0f", $h); |
||
339 | $l = sprintf("%07.0f", $l); |
||
340 | if ($h == "0") |
||
341 | return $neg . sprintf("%.0f", (float)$l); |
||
342 | return $neg . $h . $l; |
||
343 | } |
||
344 | |||
345 | |||
346 | function sphFixUint($value) |
||
347 | { |
||
348 | if (PHP_INT_SIZE >= 8) { |
||
349 | // x64 route, workaround broken unpack() in 5.2.2+ |
||
350 | if ($value < 0) $value += (1 << 32); |
||
351 | return $value; |
||
352 | } else { |
||
353 | // x32 route, workaround php signed/unsigned braindamage |
||
354 | return sprintf("%u", $value); |
||
355 | } |
||
356 | } |
||
357 | |||
358 | if (!class_exists('SphinxClient')) { |
||
359 | /// sphinx searchd client class |
||
360 | class SphinxClient |
||
361 | { |
||
362 | var $_host; ///< searchd host (default is "localhost") |
||
363 | var $_port; ///< searchd port (default is 9312) |
||
364 | var $_offset; ///< how many records to seek from result-set start (default is 0) |
||
365 | var $_limit; ///< how many records to return from result-set starting at offset (default is 20) |
||
366 | var $_mode; ///< query matching mode (default is SPH_MATCH_ALL) |
||
367 | var $_weights; ///< per-field weights (default is 1 for all fields) |
||
368 | var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE) |
||
369 | var $_sortby; ///< attribute to sort by (defualt is "") |
||
370 | var $_min_id; ///< min ID to match (default is 0, which means no limit) |
||
371 | var $_max_id; ///< max ID to match (default is 0, which means no limit) |
||
372 | var $_filters; ///< search filters |
||
373 | var $_groupby; ///< group-by attribute name |
||
374 | var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with) |
||
375 | var $_groupsort; ///< group-by sorting clause (to sort groups in result set with) |
||
376 | var $_groupdistinct; ///< group-by count-distinct attribute |
||
377 | var $_maxmatches; ///< max matches to retrieve |
||
378 | var $_cutoff; ///< cutoff to stop searching at (default is 0) |
||
379 | var $_retrycount; ///< distributed retries count |
||
380 | var $_retrydelay; ///< distributed retries delay |
||
381 | var $_anchor; ///< geographical anchor point |
||
382 | var $_indexweights; ///< per-index weights |
||
383 | var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25) |
||
384 | var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit) |
||
385 | var $_fieldweights; ///< per-field-name weights |
||
386 | var $_overrides; ///< per-query attribute values overrides |
||
387 | var $_select; ///< select-list (attributes or expressions, with optional aliases) |
||
388 | |||
389 | var $_error; ///< last error message |
||
390 | var $_warning; ///< last warning message |
||
391 | var $_connerror; ///< connection error vs remote error flag |
||
392 | |||
393 | var $_reqs; ///< requests array for multi-query |
||
394 | var $_mbenc; ///< stored mbstring encoding |
||
395 | var $_arrayresult; ///< whether $result["matches"] should be a hash or an array |
||
396 | var $_timeout; ///< connect timeout |
||
397 | |||
398 | ///////////////////////////////////////////////////////////////////////////// |
||
399 | // common stuff |
||
400 | ///////////////////////////////////////////////////////////////////////////// |
||
401 | |||
402 | /// create a new client object and fill defaults |
||
403 | function SphinxClient() |
||
404 | { |
||
405 | // per-client-object settings |
||
406 | $this->_host = "localhost"; |
||
407 | $this->_port = 9312; |
||
408 | $this->_path = false; |
||
409 | $this->_socket = false; |
||
410 | |||
411 | // per-query settings |
||
412 | $this->_offset = 0; |
||
413 | $this->_limit = 20; |
||
414 | $this->_mode = SPH_MATCH_ALL; |
||
415 | $this->_weights = array(); |
||
416 | $this->_sort = SPH_SORT_RELEVANCE; |
||
417 | $this->_sortby = ""; |
||
418 | $this->_min_id = 0; |
||
419 | $this->_max_id = 0; |
||
420 | $this->_filters = array(); |
||
421 | $this->_groupby = ""; |
||
422 | $this->_groupfunc = SPH_GROUPBY_DAY; |
||
423 | $this->_groupsort = "@group desc"; |
||
424 | $this->_groupdistinct = ""; |
||
425 | $this->_maxmatches = 1000; |
||
426 | $this->_cutoff = 0; |
||
427 | $this->_retrycount = 0; |
||
428 | $this->_retrydelay = 0; |
||
429 | $this->_anchor = array(); |
||
430 | $this->_indexweights = array(); |
||
431 | $this->_ranker = SPH_RANK_PROXIMITY_BM25; |
||
432 | $this->_maxquerytime = 0; |
||
433 | $this->_fieldweights = array(); |
||
434 | $this->_overrides = array(); |
||
435 | $this->_select = "*"; |
||
436 | |||
437 | $this->_error = ""; // per-reply fields (for single-query case) |
||
438 | $this->_warning = ""; |
||
439 | $this->_connerror = false; |
||
440 | |||
441 | $this->_reqs = array(); // requests storage (for multi-query case) |
||
442 | $this->_mbenc = ""; |
||
443 | $this->_arrayresult = false; |
||
444 | $this->_timeout = 0; |
||
445 | } |
||
446 | |||
447 | function __destruct() |
||
448 | { |
||
449 | if ($this->_socket !== false) |
||
450 | fclose($this->_socket); |
||
451 | } |
||
452 | |||
453 | /// get last error message (string) |
||
454 | function GetLastError() |
||
455 | { |
||
456 | return $this->_error; |
||
457 | } |
||
458 | |||
459 | /// get last warning message (string) |
||
460 | function GetLastWarning() |
||
461 | { |
||
462 | return $this->_warning; |
||
463 | } |
||
464 | |||
465 | /// get last error flag (to tell network connection errors from searchd errors or broken responses) |
||
466 | function IsConnectError() |
||
467 | { |
||
468 | return $this->_connerror; |
||
469 | } |
||
470 | |||
471 | /// set searchd host name (string) and port (integer) |
||
472 | function SetServer($host, $port = 0) |
||
473 | { |
||
474 | assert(is_string($host)); |
||
475 | if ($host[0] == '/') { |
||
476 | $this->_path = 'unix://' . $host; |
||
477 | return; |
||
478 | } |
||
479 | if (substr($host, 0, 7) == "unix://") { |
||
480 | $this->_path = $host; |
||
481 | return; |
||
482 | } |
||
483 | |||
484 | assert(is_int($port)); |
||
485 | $this->_host = $host; |
||
486 | $this->_port = $port; |
||
487 | $this->_path = ''; |
||
488 | } |
||
489 | |||
490 | /// set server connection timeout (0 to remove) |
||
491 | function SetConnectTimeout($timeout) |
||
492 | { |
||
493 | assert(is_numeric($timeout)); |
||
494 | $this->_timeout = $timeout; |
||
495 | } |
||
496 | |||
497 | |||
498 | function _Send($handle, $data, $length) |
||
499 | { |
||
500 | if (feof($handle) || fwrite($handle, $data, $length) !== $length) { |
||
501 | $this->_error = 'connection unexpectedly closed (timed out?)'; |
||
502 | $this->_connerror = true; |
||
503 | return false; |
||
504 | } |
||
505 | return true; |
||
506 | } |
||
507 | |||
508 | ///////////////////////////////////////////////////////////////////////////// |
||
509 | |||
510 | /// enter mbstring workaround mode |
||
511 | function _MBPush() |
||
512 | { |
||
513 | $this->_mbenc = ""; |
||
514 | if (ini_get("mbstring.func_overload") & 2) { |
||
515 | $this->_mbenc = mb_internal_encoding(); |
||
516 | mb_internal_encoding("latin1"); |
||
517 | } |
||
518 | } |
||
519 | |||
520 | /// leave mbstring workaround mode |
||
521 | function _MBPop() |
||
522 | { |
||
523 | if ($this->_mbenc) |
||
524 | mb_internal_encoding($this->_mbenc); |
||
525 | } |
||
526 | |||
527 | /// connect to searchd server |
||
528 | function _Connect() |
||
529 | { |
||
530 | if ($this->_socket !== false) { |
||
531 | // we are in persistent connection mode, so we have a socket |
||
532 | // however, need to check whether it's still alive |
||
533 | if (!@feof($this->_socket)) |
||
534 | return $this->_socket; |
||
535 | |||
536 | // force reopen |
||
537 | $this->_socket = false; |
||
538 | } |
||
539 | |||
540 | $errno = 0; |
||
541 | $errstr = ""; |
||
542 | $this->_connerror = false; |
||
543 | |||
544 | if ($this->_path) { |
||
545 | $host = $this->_path; |
||
546 | $port = 0; |
||
547 | } else { |
||
548 | $host = $this->_host; |
||
549 | $port = $this->_port; |
||
550 | } |
||
551 | |||
552 | if ($this->_timeout <= 0) |
||
553 | $fp = @fsockopen($host, $port, $errno, $errstr); |
||
554 | else |
||
555 | $fp = @fsockopen($host, $port, $errno, $errstr, $this->_timeout); |
||
556 | |||
557 | if (!$fp) { |
||
558 | if ($this->_path) |
||
559 | $location = $this->_path; |
||
560 | else |
||
561 | $location = "{$this->_host}:{$this->_port}"; |
||
562 | |||
563 | $errstr = trim($errstr); |
||
564 | $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)"; |
||
565 | $this->_connerror = true; |
||
566 | return false; |
||
567 | } |
||
568 | |||
569 | // send my version |
||
570 | // this is a subtle part. we must do it before (!) reading back from searchd. |
||
571 | // because otherwise under some conditions (reported on FreeBSD for instance) |
||
572 | // TCP stack could throttle write-write-read pattern because of Nagle. |
||
573 | if (!$this->_Send($fp, pack("N", 1), 4)) { |
||
574 | fclose($fp); |
||
575 | $this->_error = "failed to send client protocol version"; |
||
576 | return false; |
||
577 | } |
||
578 | |||
579 | // check version |
||
580 | list(, $v) = unpack("N*", fread($fp, 4)); |
||
581 | $v = (int)$v; |
||
582 | if ($v < 1) { |
||
583 | fclose($fp); |
||
584 | $this->_error = "expected searchd protocol version 1+, got version '$v'"; |
||
585 | return false; |
||
586 | } |
||
587 | |||
588 | return $fp; |
||
589 | } |
||
590 | |||
591 | /// get and check response packet from searchd server |
||
592 | function _GetResponse($fp, $client_ver) |
||
593 | { |
||
594 | $response = ""; |
||
595 | $len = 0; |
||
596 | |||
597 | $header = fread($fp, 8); |
||
598 | if (strlen($header) == 8) { |
||
599 | list($status, $ver, $len) = array_values(unpack("n2a/Nb", $header)); |
||
600 | $left = $len; |
||
601 | while ($left > 0 && !feof($fp)) { |
||
602 | $chunk = fread($fp, $left); |
||
603 | if ($chunk) { |
||
604 | $response .= $chunk; |
||
605 | $left -= strlen($chunk); |
||
606 | } |
||
607 | } |
||
608 | } |
||
609 | if ($this->_socket === false) |
||
610 | fclose($fp); |
||
611 | |||
612 | // check response |
||
613 | $read = strlen($response); |
||
614 | if (!$response || $read != $len) { |
||
615 | $this->_error = $len |
||
616 | ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" |
||
617 | : "received zero-sized searchd response"; |
||
618 | return false; |
||
619 | } |
||
620 | |||
621 | // check status |
||
622 | if ($status == SEARCHD_WARNING) { |
||
623 | list(, $wlen) = unpack("N*", substr($response, 0, 4)); |
||
624 | $this->_warning = substr($response, 4, $wlen); |
||
625 | return substr($response, 4 + $wlen); |
||
626 | } |
||
627 | if ($status == SEARCHD_ERROR) { |
||
628 | $this->_error = "searchd error: " . substr($response, 4); |
||
629 | return false; |
||
630 | } |
||
631 | if ($status == SEARCHD_RETRY) { |
||
632 | $this->_error = "temporary searchd error: " . substr($response, 4); |
||
633 | return false; |
||
634 | } |
||
635 | if ($status != SEARCHD_OK) { |
||
636 | $this->_error = "unknown status code '$status'"; |
||
637 | return false; |
||
638 | } |
||
639 | |||
640 | // check version |
||
641 | if ($ver < $client_ver) { |
||
642 | $this->_warning = sprintf( |
||
643 | "searchd command v.%d.%d older than client's v.%d.%d, some options might not work", |
||
644 | $ver >> 8, |
||
645 | $ver & 0xff, |
||
646 | $client_ver >> 8, |
||
647 | $client_ver & 0xff |
||
648 | ); |
||
649 | } |
||
650 | |||
651 | return $response; |
||
652 | } |
||
653 | |||
654 | ///////////////////////////////////////////////////////////////////////////// |
||
655 | // searching |
||
656 | ///////////////////////////////////////////////////////////////////////////// |
||
657 | |||
658 | /// set offset and count into result set, |
||
659 | /// and optionally set max-matches and cutoff limits |
||
660 | function SetLimits($offset, $limit, $max = 0, $cutoff = 0) |
||
661 | { |
||
662 | assert(is_int($offset)); |
||
663 | assert(is_int($limit)); |
||
664 | assert($offset >= 0); |
||
665 | assert($limit > 0); |
||
666 | assert($max >= 0); |
||
667 | $this->_offset = $offset; |
||
668 | $this->_limit = $limit; |
||
669 | if ($max > 0) |
||
670 | $this->_maxmatches = $max; |
||
671 | if ($cutoff > 0) |
||
672 | $this->_cutoff = $cutoff; |
||
673 | } |
||
674 | |||
675 | /// set maximum query time, in milliseconds, per-index |
||
676 | /// integer, 0 means "do not limit" |
||
677 | function SetMaxQueryTime($max) |
||
678 | { |
||
679 | assert(is_int($max)); |
||
680 | assert($max >= 0); |
||
681 | $this->_maxquerytime = $max; |
||
682 | } |
||
683 | |||
684 | /// set matching mode |
||
685 | function SetMatchMode($mode) |
||
686 | { |
||
687 | assert($mode == SPH_MATCH_ALL |
||
688 | || $mode == SPH_MATCH_ANY |
||
689 | || $mode == SPH_MATCH_PHRASE |
||
690 | || $mode == SPH_MATCH_BOOLEAN |
||
691 | || $mode == SPH_MATCH_EXTENDED |
||
692 | || $mode == SPH_MATCH_FULLSCAN |
||
693 | || $mode == SPH_MATCH_EXTENDED2); |
||
694 | $this->_mode = $mode; |
||
695 | } |
||
696 | |||
697 | /// set ranking mode |
||
698 | function SetRankingMode($ranker) |
||
699 | { |
||
700 | assert($ranker == SPH_RANK_PROXIMITY_BM25 |
||
701 | || $ranker == SPH_RANK_BM25 |
||
702 | || $ranker == SPH_RANK_NONE |
||
703 | || $ranker == SPH_RANK_WORDCOUNT |
||
704 | || $ranker == SPH_RANK_PROXIMITY); |
||
705 | $this->_ranker = $ranker; |
||
706 | } |
||
707 | |||
708 | /// set matches sorting mode |
||
709 | function SetSortMode($mode, $sortby = "") |
||
710 | { |
||
711 | assert( |
||
712 | $mode == SPH_SORT_RELEVANCE || |
||
713 | $mode == SPH_SORT_ATTR_DESC || |
||
714 | $mode == SPH_SORT_ATTR_ASC || |
||
715 | $mode == SPH_SORT_TIME_SEGMENTS || |
||
716 | $mode == SPH_SORT_EXTENDED || |
||
717 | $mode == SPH_SORT_EXPR |
||
718 | ); |
||
719 | assert(is_string($sortby)); |
||
720 | assert($mode == SPH_SORT_RELEVANCE || strlen($sortby) > 0); |
||
721 | |||
722 | $this->_sort = $mode; |
||
723 | $this->_sortby = $sortby; |
||
724 | } |
||
725 | |||
726 | /// bind per-field weights by order |
||
727 | /// DEPRECATED; use SetFieldWeights() instead |
||
728 | function SetWeights($weights) |
||
729 | { |
||
730 | assert(is_array($weights)); |
||
731 | foreach ($weights as $weight) |
||
732 | assert(is_int($weight)); |
||
733 | |||
734 | $this->_weights = $weights; |
||
735 | } |
||
736 | |||
737 | /// bind per-field weights by name |
||
738 | function SetFieldWeights($weights) |
||
739 | { |
||
740 | assert(is_array($weights)); |
||
741 | foreach ($weights as $name => $weight) { |
||
742 | assert(is_string($name)); |
||
743 | assert(is_int($weight)); |
||
744 | } |
||
745 | $this->_fieldweights = $weights; |
||
746 | } |
||
747 | |||
748 | /// bind per-index weights by name |
||
749 | function SetIndexWeights($weights) |
||
750 | { |
||
751 | assert(is_array($weights)); |
||
752 | foreach ($weights as $index => $weight) { |
||
753 | assert(is_string($index)); |
||
754 | assert(is_int($weight)); |
||
755 | } |
||
756 | $this->_indexweights = $weights; |
||
757 | } |
||
758 | |||
759 | /// set IDs range to match |
||
760 | /// only match records if document ID is beetwen $min and $max (inclusive) |
||
761 | function SetIDRange($min, $max) |
||
762 | { |
||
763 | assert(is_numeric($min)); |
||
764 | assert(is_numeric($max)); |
||
765 | assert($min <= $max); |
||
766 | $this->_min_id = $min; |
||
767 | $this->_max_id = $max; |
||
768 | } |
||
769 | |||
770 | /// set values set filter |
||
771 | /// only match records where $attribute value is in given set |
||
772 | function SetFilter($attribute, $values, $exclude = false) |
||
773 | { |
||
774 | assert(is_string($attribute)); |
||
775 | assert(is_array($values)); |
||
776 | assert(count($values)); |
||
777 | |||
778 | if (is_array($values) && count($values)) { |
||
779 | foreach ($values as $value) |
||
780 | assert(is_numeric($value)); |
||
781 | |||
782 | $this->_filters[] = array("type" => SPH_FILTER_VALUES, "attr" => $attribute, "exclude" => $exclude, "values" => $values); |
||
783 | } |
||
784 | } |
||
785 | |||
786 | /// set range filter |
||
787 | /// only match records if $attribute value is beetwen $min and $max (inclusive) |
||
788 | function SetFilterRange($attribute, $min, $max, $exclude = false) |
||
789 | { |
||
790 | assert(is_string($attribute)); |
||
791 | assert(is_numeric($min)); |
||
792 | assert(is_numeric($max)); |
||
793 | assert($min <= $max); |
||
794 | |||
795 | $this->_filters[] = array("type" => SPH_FILTER_RANGE, "attr" => $attribute, "exclude" => $exclude, "min" => $min, "max" => $max); |
||
796 | } |
||
797 | |||
798 | /// set float range filter |
||
799 | /// only match records if $attribute value is beetwen $min and $max (inclusive) |
||
800 | function SetFilterFloatRange($attribute, $min, $max, $exclude = false) |
||
801 | { |
||
802 | assert(is_string($attribute)); |
||
803 | assert(is_float($min)); |
||
804 | assert(is_float($max)); |
||
805 | assert($min <= $max); |
||
806 | |||
807 | $this->_filters[] = array("type" => SPH_FILTER_FLOATRANGE, "attr" => $attribute, "exclude" => $exclude, "min" => $min, "max" => $max); |
||
808 | } |
||
809 | |||
810 | /// setup anchor point for geosphere distance calculations |
||
811 | /// required to use @geodist in filters and sorting |
||
812 | /// latitude and longitude must be in radians |
||
813 | function SetGeoAnchor($attrlat, $attrlong, $lat, $long) |
||
814 | { |
||
815 | assert(is_string($attrlat)); |
||
816 | assert(is_string($attrlong)); |
||
817 | assert(is_float($lat)); |
||
818 | assert(is_float($long)); |
||
819 | |||
820 | $this->_anchor = array("attrlat" => $attrlat, "attrlong" => $attrlong, "lat" => $lat, "long" => $long); |
||
821 | } |
||
822 | |||
823 | /// set grouping attribute and function |
||
824 | function SetGroupBy($attribute, $func, $groupsort = "@group desc") |
||
825 | { |
||
826 | assert(is_string($attribute)); |
||
827 | assert(is_string($groupsort)); |
||
828 | assert($func == SPH_GROUPBY_DAY |
||
829 | || $func == SPH_GROUPBY_WEEK |
||
830 | || $func == SPH_GROUPBY_MONTH |
||
831 | || $func == SPH_GROUPBY_YEAR |
||
832 | || $func == SPH_GROUPBY_ATTR |
||
833 | || $func == SPH_GROUPBY_ATTRPAIR); |
||
834 | |||
835 | $this->_groupby = $attribute; |
||
836 | $this->_groupfunc = $func; |
||
837 | $this->_groupsort = $groupsort; |
||
838 | } |
||
839 | |||
840 | /// set count-distinct attribute for group-by queries |
||
841 | function SetGroupDistinct($attribute) |
||
842 | { |
||
843 | assert(is_string($attribute)); |
||
844 | $this->_groupdistinct = $attribute; |
||
845 | } |
||
846 | |||
847 | /// set distributed retries count and delay |
||
848 | function SetRetries($count, $delay = 0) |
||
849 | { |
||
850 | assert(is_int($count) && $count >= 0); |
||
851 | assert(is_int($delay) && $delay >= 0); |
||
852 | $this->_retrycount = $count; |
||
853 | $this->_retrydelay = $delay; |
||
854 | } |
||
855 | |||
856 | /// set result set format (hash or array; hash by default) |
||
857 | /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs |
||
858 | function SetArrayResult($arrayresult) |
||
859 | { |
||
860 | assert(is_bool($arrayresult)); |
||
861 | $this->_arrayresult = $arrayresult; |
||
862 | } |
||
863 | |||
864 | /// set attribute values override |
||
865 | /// there can be only one override per attribute |
||
866 | /// $values must be a hash that maps document IDs to attribute values |
||
867 | function SetOverride($attrname, $attrtype, $values) |
||
868 | { |
||
869 | assert(is_string($attrname)); |
||
870 | assert(in_array($attrtype, array(SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT))); |
||
871 | assert(is_array($values)); |
||
872 | |||
873 | $this->_overrides[$attrname] = array("attr" => $attrname, "type" => $attrtype, "values" => $values); |
||
874 | } |
||
875 | |||
876 | /// set select-list (attributes or expressions), SQL-like syntax |
||
877 | function SetSelect($select) |
||
878 | { |
||
879 | assert(is_string($select)); |
||
880 | $this->_select = $select; |
||
881 | } |
||
882 | |||
883 | ////////////////////////////////////////////////////////////////////////////// |
||
884 | |||
885 | /// clear all filters (for multi-queries) |
||
886 | function ResetFilters() |
||
887 | { |
||
888 | $this->_filters = array(); |
||
889 | $this->_anchor = array(); |
||
890 | } |
||
891 | |||
892 | /// clear groupby settings (for multi-queries) |
||
893 | function ResetGroupBy() |
||
894 | { |
||
895 | $this->_groupby = ""; |
||
896 | $this->_groupfunc = SPH_GROUPBY_DAY; |
||
897 | $this->_groupsort = "@group desc"; |
||
898 | $this->_groupdistinct = ""; |
||
899 | } |
||
900 | |||
901 | /// clear all attribute value overrides (for multi-queries) |
||
902 | function ResetOverrides() |
||
903 | { |
||
904 | $this->_overrides = array(); |
||
905 | } |
||
906 | |||
907 | ////////////////////////////////////////////////////////////////////////////// |
||
908 | |||
909 | /// connect to searchd server, run given search query through given indexes, |
||
910 | /// and return the search results |
||
911 | function Query($query, $index = "*", $comment = "") |
||
912 | { |
||
913 | assert(empty($this->_reqs)); |
||
914 | |||
915 | $this->AddQuery($query, $index, $comment); |
||
916 | $results = $this->RunQueries(); |
||
917 | $this->_reqs = array(); // just in case it failed too early |
||
918 | |||
919 | if (!is_array($results)) |
||
920 | return false; // probably network error; error message should be already filled |
||
921 | |||
922 | $this->_error = $results[0]["error"]; |
||
923 | $this->_warning = $results[0]["warning"]; |
||
924 | if ($results[0]["status"] == SEARCHD_ERROR) |
||
925 | return false; |
||
926 | else |
||
927 | return $results[0]; |
||
928 | } |
||
929 | |||
930 | /// helper to pack floats in network byte order |
||
931 | function _PackFloat($f) |
||
932 | { |
||
933 | $t1 = pack("f", $f); // machine order |
||
934 | list(, $t2) = unpack("L*", $t1); // int in machine order |
||
935 | return pack("N", $t2); |
||
936 | } |
||
937 | |||
938 | /// add query to multi-query batch |
||
939 | /// returns index into results array from RunQueries() call |
||
940 | function AddQuery($query, $index = "*", $comment = "") |
||
941 | { |
||
942 | // mbstring workaround |
||
943 | $this->_MBPush(); |
||
944 | |||
945 | // build request |
||
946 | $req = pack("NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort); // mode and limits |
||
947 | $req .= pack("N", strlen($this->_sortby)) . $this->_sortby; |
||
948 | $req .= pack("N", strlen($query)) . $query; // query itself |
||
949 | $req .= pack("N", count($this->_weights)); // weights |
||
950 | foreach ($this->_weights as $weight) |
||
951 | $req .= pack("N", (int)$weight); |
||
952 | $req .= pack("N", strlen($index)) . $index; // indexes |
||
953 | $req .= pack("N", 1); // id64 range marker |
||
954 | $req .= sphPackU64($this->_min_id) . sphPackU64($this->_max_id); // id64 range |
||
955 | |||
956 | // filters |
||
957 | $req .= pack("N", count($this->_filters)); |
||
958 | foreach ($this->_filters as $filter) { |
||
959 | $req .= pack("N", strlen($filter["attr"])) . $filter["attr"]; |
||
960 | $req .= pack("N", $filter["type"]); |
||
961 | switch ($filter["type"]) { |
||
962 | case SPH_FILTER_VALUES: |
||
963 | $req .= pack("N", count($filter["values"])); |
||
964 | foreach ($filter["values"] as $value) |
||
965 | $req .= sphPackI64($value); |
||
966 | break; |
||
967 | |||
968 | case SPH_FILTER_RANGE: |
||
969 | $req .= sphPackI64($filter["min"]) . sphPackI64($filter["max"]); |
||
970 | break; |
||
971 | |||
972 | case SPH_FILTER_FLOATRANGE: |
||
973 | $req .= $this->_PackFloat($filter["min"]) . $this->_PackFloat($filter["max"]); |
||
974 | break; |
||
975 | |||
976 | default: |
||
977 | assert(0 && "internal error: unhandled filter type"); |
||
978 | } |
||
979 | $req .= pack("N", $filter["exclude"]); |
||
980 | } |
||
981 | |||
982 | // group-by clause, max-matches count, group-sort clause, cutoff count |
||
983 | $req .= pack("NN", $this->_groupfunc, strlen($this->_groupby)) . $this->_groupby; |
||
984 | $req .= pack("N", $this->_maxmatches); |
||
985 | $req .= pack("N", strlen($this->_groupsort)) . $this->_groupsort; |
||
986 | $req .= pack("NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay); |
||
987 | $req .= pack("N", strlen($this->_groupdistinct)) . $this->_groupdistinct; |
||
988 | |||
989 | // anchor point |
||
990 | if (empty($this->_anchor)) { |
||
991 | $req .= pack("N", 0); |
||
992 | } else { |
||
993 | $a = &$this->_anchor; |
||
994 | $req .= pack("N", 1); |
||
995 | $req .= pack("N", strlen($a["attrlat"])) . $a["attrlat"]; |
||
996 | $req .= pack("N", strlen($a["attrlong"])) . $a["attrlong"]; |
||
997 | $req .= $this->_PackFloat($a["lat"]) . $this->_PackFloat($a["long"]); |
||
998 | } |
||
999 | |||
1000 | // per-index weights |
||
1001 | $req .= pack("N", count($this->_indexweights)); |
||
1002 | foreach ($this->_indexweights as $idx => $weight) |
||
1003 | $req .= pack("N", strlen($idx)) . $idx . pack("N", $weight); |
||
1004 | |||
1005 | // max query time |
||
1006 | $req .= pack("N", $this->_maxquerytime); |
||
1007 | |||
1008 | // per-field weights |
||
1009 | $req .= pack("N", count($this->_fieldweights)); |
||
1010 | foreach ($this->_fieldweights as $field => $weight) |
||
1011 | $req .= pack("N", strlen($field)) . $field . pack("N", $weight); |
||
1012 | |||
1013 | // comment |
||
1014 | $req .= pack("N", strlen($comment)) . $comment; |
||
1015 | |||
1016 | // attribute overrides |
||
1017 | $req .= pack("N", count($this->_overrides)); |
||
1018 | foreach ($this->_overrides as $key => $entry) { |
||
1019 | $req .= pack("N", strlen($entry["attr"])) . $entry["attr"]; |
||
1020 | $req .= pack("NN", $entry["type"], count($entry["values"])); |
||
1021 | foreach ($entry["values"] as $id => $val) { |
||
1022 | assert(is_numeric($id)); |
||
1023 | assert(is_numeric($val)); |
||
1024 | |||
1025 | $req .= sphPackU64($id); |
||
1026 | switch ($entry["type"]) { |
||
1027 | case SPH_ATTR_FLOAT: |
||
1028 | $req .= $this->_PackFloat($val); |
||
1029 | break; |
||
1030 | case SPH_ATTR_BIGINT: |
||
1031 | $req .= sphPackI64($val); |
||
1032 | break; |
||
1033 | default: |
||
1034 | $req .= pack("N", $val); |
||
1035 | break; |
||
1036 | } |
||
1037 | } |
||
1038 | } |
||
1039 | |||
1040 | // select-list |
||
1041 | $req .= pack("N", strlen($this->_select)) . $this->_select; |
||
1042 | |||
1043 | // mbstring workaround |
||
1044 | $this->_MBPop(); |
||
1045 | |||
1046 | // store request to requests array |
||
1047 | $this->_reqs[] = $req; |
||
1048 | return count($this->_reqs) - 1; |
||
1049 | } |
||
1050 | |||
1051 | /// connect to searchd, run queries batch, and return an array of result sets |
||
1052 | function RunQueries() |
||
1053 | { |
||
1054 | if (empty($this->_reqs)) { |
||
1055 | $this->_error = "no queries defined, issue AddQuery() first"; |
||
1056 | return false; |
||
1057 | } |
||
1058 | |||
1059 | // mbstring workaround |
||
1060 | $this->_MBPush(); |
||
1061 | |||
1062 | if (!($fp = $this->_Connect())) { |
||
1063 | $this->_MBPop(); |
||
1064 | return false; |
||
1065 | } |
||
1066 | |||
1067 | // send query, get response |
||
1068 | $nreqs = count($this->_reqs); |
||
1069 | $req = join("", $this->_reqs); |
||
1070 | $len = 4 + strlen($req); |
||
1071 | $req = pack("nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs) . $req; // add header |
||
1072 | |||
1073 | if ( |
||
1074 | !($this->_Send($fp, $req, $len + 8)) || |
||
1075 | !($response = $this->_GetResponse($fp, VER_COMMAND_SEARCH)) |
||
1076 | ) { |
||
1077 | $this->_MBPop(); |
||
1078 | return false; |
||
1079 | } |
||
1080 | |||
1081 | // query sent ok; we can reset reqs now |
||
1082 | $this->_reqs = array(); |
||
1083 | |||
1084 | // parse and return response |
||
1085 | return $this->_ParseSearchResponse($response, $nreqs); |
||
1086 | } |
||
1087 | |||
1088 | /// parse and return search query (or queries) response |
||
1089 | function _ParseSearchResponse($response, $nreqs) |
||
1090 | { |
||
1091 | $p = 0; // current position |
||
1092 | $max = strlen($response); // max position for checks, to protect against broken responses |
||
1093 | |||
1094 | $results = array(); |
||
1095 | for ($ires = 0; $ires < $nreqs && $p < $max; $ires++) { |
||
1096 | $results[] = array(); |
||
1097 | $result = &$results[$ires]; |
||
1098 | |||
1099 | $result["error"] = ""; |
||
1100 | $result["warning"] = ""; |
||
1101 | |||
1102 | // extract status |
||
1103 | list(, $status) = unpack("N*", substr($response, $p, 4)); |
||
1104 | $p += 4; |
||
1105 | $result["status"] = $status; |
||
1106 | if ($status != SEARCHD_OK) { |
||
1107 | list(, $len) = unpack("N*", substr($response, $p, 4)); |
||
1108 | $p += 4; |
||
1109 | $message = substr($response, $p, $len); |
||
1110 | $p += $len; |
||
1111 | |||
1112 | if ($status == SEARCHD_WARNING) { |
||
1113 | $result["warning"] = $message; |
||
1114 | } else { |
||
1115 | $result["error"] = $message; |
||
1116 | continue; |
||
1117 | } |
||
1118 | } |
||
1119 | |||
1120 | // read schema |
||
1121 | $fields = array(); |
||
1122 | $attrs = array(); |
||
1123 | |||
1124 | list(, $nfields) = unpack("N*", substr($response, $p, 4)); |
||
1125 | $p += 4; |
||
1126 | while ($nfields-- > 0 && $p < $max) { |
||
1127 | list(, $len) = unpack("N*", substr($response, $p, 4)); |
||
1128 | $p += 4; |
||
1129 | $fields[] = substr($response, $p, $len); |
||
1130 | $p += $len; |
||
1131 | } |
||
1132 | $result["fields"] = $fields; |
||
1133 | |||
1134 | list(, $nattrs) = unpack("N*", substr($response, $p, 4)); |
||
1135 | $p += 4; |
||
1136 | while ($nattrs-- > 0 && $p < $max) { |
||
1137 | list(, $len) = unpack("N*", substr($response, $p, 4)); |
||
1138 | $p += 4; |
||
1139 | $attr = substr($response, $p, $len); |
||
1140 | $p += $len; |
||
1141 | list(, $type) = unpack("N*", substr($response, $p, 4)); |
||
1142 | $p += 4; |
||
1143 | $attrs[$attr] = $type; |
||
1144 | } |
||
1145 | $result["attrs"] = $attrs; |
||
1146 | |||
1147 | // read match count |
||
1148 | list(, $count) = unpack("N*", substr($response, $p, 4)); |
||
1149 | $p += 4; |
||
1150 | list(, $id64) = unpack("N*", substr($response, $p, 4)); |
||
1151 | $p += 4; |
||
1152 | |||
1153 | // read matches |
||
1154 | $idx = -1; |
||
1155 | while ($count-- > 0 && $p < $max) { |
||
1156 | // index into result array |
||
1157 | $idx++; |
||
1158 | |||
1159 | // parse document id and weight |
||
1160 | if ($id64) { |
||
1161 | $doc = sphUnpackU64(substr($response, $p, 8)); |
||
1162 | $p += 8; |
||
1163 | list(, $weight) = unpack("N*", substr($response, $p, 4)); |
||
1164 | $p += 4; |
||
1165 | } else { |
||
1166 | list($doc, $weight) = array_values(unpack( |
||
1167 | "N*N*", |
||
1168 | substr($response, $p, 8) |
||
1169 | )); |
||
1170 | $p += 8; |
||
1171 | $doc = sphFixUint($doc); |
||
1172 | } |
||
1173 | $weight = sprintf("%u", $weight); |
||
1174 | |||
1175 | // create match entry |
||
1176 | if ($this->_arrayresult) |
||
1177 | $result["matches"][$idx] = array("id" => $doc, "weight" => $weight); |
||
1178 | else |
||
1179 | $result["matches"][$doc]["weight"] = $weight; |
||
1180 | |||
1181 | // parse and create attributes |
||
1182 | $attrvals = array(); |
||
1183 | foreach ($attrs as $attr => $type) { |
||
1184 | // handle 64bit ints |
||
1185 | if ($type == SPH_ATTR_BIGINT) { |
||
1186 | $attrvals[$attr] = sphUnpackI64(substr($response, $p, 8)); |
||
1187 | $p += 8; |
||
1188 | continue; |
||
1189 | } |
||
1190 | |||
1191 | // handle floats |
||
1192 | if ($type == SPH_ATTR_FLOAT) { |
||
1193 | list(, $uval) = unpack("N*", substr($response, $p, 4)); |
||
1194 | $p += 4; |
||
1195 | list(, $fval) = unpack("f*", pack("L", $uval)); |
||
1196 | $attrvals[$attr] = $fval; |
||
1197 | continue; |
||
1198 | } |
||
1199 | |||
1200 | // handle everything else as unsigned ints |
||
1201 | list(, $val) = unpack("N*", substr($response, $p, 4)); |
||
1202 | $p += 4; |
||
1203 | if ($type & SPH_ATTR_MULTI) { |
||
1204 | $attrvals[$attr] = array(); |
||
1205 | $nvalues = $val; |
||
1206 | while ($nvalues-- > 0 && $p < $max) { |
||
1207 | list(, $val) = unpack("N*", substr($response, $p, 4)); |
||
1208 | $p += 4; |
||
1209 | $attrvals[$attr][] = sphFixUint($val); |
||
1210 | } |
||
1211 | } else { |
||
1212 | $attrvals[$attr] = sphFixUint($val); |
||
1213 | } |
||
1214 | } |
||
1215 | |||
1216 | if ($this->_arrayresult) |
||
1217 | $result["matches"][$idx]["attrs"] = $attrvals; |
||
1218 | else |
||
1219 | $result["matches"][$doc]["attrs"] = $attrvals; |
||
1220 | } |
||
1221 | |||
1222 | list($total, $total_found, $msecs, $words) = |
||
1223 | array_values(unpack("N*N*N*N*", substr($response, $p, 16))); |
||
1224 | $result["total"] = sprintf("%u", $total); |
||
1225 | $result["total_found"] = sprintf("%u", $total_found); |
||
1226 | $result["time"] = sprintf("%.3f", $msecs / 1000); |
||
1227 | $p += 16; |
||
1228 | |||
1229 | while ($words-- > 0 && $p < $max) { |
||
1230 | list(, $len) = unpack("N*", substr($response, $p, 4)); |
||
1231 | $p += 4; |
||
1232 | $word = substr($response, $p, $len); |
||
1233 | $p += $len; |
||
1234 | list($docs, $hits) = array_values(unpack("N*N*", substr($response, $p, 8))); |
||
1235 | $p += 8; |
||
1236 | $result["words"][$word] = array( |
||
1237 | "docs" => sprintf("%u", $docs), |
||
1238 | "hits" => sprintf("%u", $hits) |
||
1239 | ); |
||
1240 | } |
||
1241 | } |
||
1242 | |||
1243 | $this->_MBPop(); |
||
1244 | return $results; |
||
1245 | } |
||
1246 | |||
1247 | ///////////////////////////////////////////////////////////////////////////// |
||
1248 | // excerpts generation |
||
1249 | ///////////////////////////////////////////////////////////////////////////// |
||
1250 | |||
1251 | /// connect to searchd server, and generate exceprts (snippets) |
||
1252 | /// of given documents for given query. returns false on failure, |
||
1253 | /// an array of snippets on success |
||
1254 | function BuildExcerpts($docs, $index, $words, $opts = array()) |
||
1255 | { |
||
1256 | assert(is_array($docs)); |
||
1257 | assert(is_string($index)); |
||
1258 | assert(is_string($words)); |
||
1259 | assert(is_array($opts)); |
||
1260 | |||
1261 | $this->_MBPush(); |
||
1262 | |||
1263 | if (!($fp = $this->_Connect())) { |
||
1264 | $this->_MBPop(); |
||
1265 | return false; |
||
1266 | } |
||
1267 | |||
1268 | ///////////////// |
||
1269 | // fixup options |
||
1270 | ///////////////// |
||
1271 | |||
1272 | if (!isset($opts["before_match"])) $opts["before_match"] = "<b>"; |
||
1273 | if (!isset($opts["after_match"])) $opts["after_match"] = "</b>"; |
||
1274 | if (!isset($opts["chunk_separator"])) $opts["chunk_separator"] = " ... "; |
||
1275 | if (!isset($opts["limit"])) $opts["limit"] = 256; |
||
1276 | if (!isset($opts["around"])) $opts["around"] = 5; |
||
1277 | if (!isset($opts["exact_phrase"])) $opts["exact_phrase"] = false; |
||
1278 | if (!isset($opts["single_passage"])) $opts["single_passage"] = false; |
||
1279 | if (!isset($opts["use_boundaries"])) $opts["use_boundaries"] = false; |
||
1280 | if (!isset($opts["weight_order"])) $opts["weight_order"] = false; |
||
1281 | |||
1282 | ///////////////// |
||
1283 | // build request |
||
1284 | ///////////////// |
||
1285 | |||
1286 | // v.1.0 req |
||
1287 | $flags = 1; // remove spaces |
||
1288 | if ($opts["exact_phrase"]) $flags |= 2; |
||
1289 | if ($opts["single_passage"]) $flags |= 4; |
||
1290 | if ($opts["use_boundaries"]) $flags |= 8; |
||
1291 | if ($opts["weight_order"]) $flags |= 16; |
||
1292 | $req = pack("NN", 0, $flags); // mode=0, flags=$flags |
||
1293 | $req .= pack("N", strlen($index)) . $index; // req index |
||
1294 | $req .= pack("N", strlen($words)) . $words; // req words |
||
1295 | |||
1296 | // options |
||
1297 | $req .= pack("N", strlen($opts["before_match"])) . $opts["before_match"]; |
||
1298 | $req .= pack("N", strlen($opts["after_match"])) . $opts["after_match"]; |
||
1299 | $req .= pack("N", strlen($opts["chunk_separator"])) . $opts["chunk_separator"]; |
||
1300 | $req .= pack("N", (int)$opts["limit"]); |
||
1301 | $req .= pack("N", (int)$opts["around"]); |
||
1302 | |||
1303 | // documents |
||
1304 | $req .= pack("N", count($docs)); |
||
1305 | foreach ($docs as $doc) { |
||
1306 | assert(is_string($doc)); |
||
1307 | $req .= pack("N", strlen($doc)) . $doc; |
||
1308 | } |
||
1309 | |||
1310 | //////////////////////////// |
||
1311 | // send query, get response |
||
1312 | //////////////////////////// |
||
1313 | |||
1314 | $len = strlen($req); |
||
1315 | $req = pack("nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len) . $req; // add header |
||
1316 | if ( |
||
1317 | !($this->_Send($fp, $req, $len + 8)) || |
||
1318 | !($response = $this->_GetResponse($fp, VER_COMMAND_EXCERPT)) |
||
1319 | ) { |
||
1320 | $this->_MBPop(); |
||
1321 | return false; |
||
1322 | } |
||
1323 | |||
1324 | ////////////////// |
||
1325 | // parse response |
||
1326 | ////////////////// |
||
1327 | |||
1328 | $pos = 0; |
||
1329 | $res = array(); |
||
1330 | $rlen = strlen($response); |
||
1331 | for ($i = 0; $i < count($docs); $i++) { |
||
1332 | list(, $len) = unpack("N*", substr($response, $pos, 4)); |
||
1333 | $pos += 4; |
||
1334 | |||
1335 | if ($pos + $len > $rlen) { |
||
1336 | $this->_error = "incomplete reply"; |
||
1337 | $this->_MBPop(); |
||
1338 | return false; |
||
1339 | } |
||
1340 | $res[] = $len ? substr($response, $pos, $len) : ""; |
||
1341 | $pos += $len; |
||
1342 | } |
||
1343 | |||
1344 | $this->_MBPop(); |
||
1345 | return $res; |
||
1346 | } |
||
1347 | |||
1348 | |||
1349 | ///////////////////////////////////////////////////////////////////////////// |
||
1350 | // keyword generation |
||
1351 | ///////////////////////////////////////////////////////////////////////////// |
||
1352 | |||
1353 | /// connect to searchd server, and generate keyword list for a given query |
||
1354 | /// returns false on failure, |
||
1355 | /// an array of words on success |
||
1356 | function BuildKeywords($query, $index, $hits) |
||
1357 | { |
||
1358 | assert(is_string($query)); |
||
1359 | assert(is_string($index)); |
||
1360 | assert(is_bool($hits)); |
||
1361 | |||
1362 | $this->_MBPush(); |
||
1363 | |||
1364 | if (!($fp = $this->_Connect())) { |
||
1365 | $this->_MBPop(); |
||
1366 | return false; |
||
1367 | } |
||
1368 | |||
1369 | ///////////////// |
||
1370 | // build request |
||
1371 | ///////////////// |
||
1372 | |||
1373 | // v.1.0 req |
||
1374 | $req = pack("N", strlen($query)) . $query; // req query |
||
1375 | $req .= pack("N", strlen($index)) . $index; // req index |
||
1376 | $req .= pack("N", (int)$hits); |
||
1377 | |||
1378 | //////////////////////////// |
||
1379 | // send query, get response |
||
1380 | //////////////////////////// |
||
1381 | |||
1382 | $len = strlen($req); |
||
1383 | $req = pack("nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len) . $req; // add header |
||
1384 | if ( |
||
1385 | !($this->_Send($fp, $req, $len + 8)) || |
||
1386 | !($response = $this->_GetResponse($fp, VER_COMMAND_KEYWORDS)) |
||
1387 | ) { |
||
1388 | $this->_MBPop(); |
||
1389 | return false; |
||
1390 | } |
||
1391 | |||
1392 | ////////////////// |
||
1393 | // parse response |
||
1394 | ////////////////// |
||
1395 | |||
1396 | $pos = 0; |
||
1397 | $res = array(); |
||
1398 | $rlen = strlen($response); |
||
1399 | list(, $nwords) = unpack("N*", substr($response, $pos, 4)); |
||
1400 | $pos += 4; |
||
1401 | for ($i = 0; $i < $nwords; $i++) { |
||
1402 | list(, $len) = unpack("N*", substr($response, $pos, 4)); |
||
1403 | $pos += 4; |
||
1404 | $tokenized = $len ? substr($response, $pos, $len) : ""; |
||
1405 | $pos += $len; |
||
1406 | |||
1407 | list(, $len) = unpack("N*", substr($response, $pos, 4)); |
||
1408 | $pos += 4; |
||
1409 | $normalized = $len ? substr($response, $pos, $len) : ""; |
||
1410 | $pos += $len; |
||
1411 | |||
1412 | $res[] = array("tokenized" => $tokenized, "normalized" => $normalized); |
||
1413 | |||
1414 | if ($hits) { |
||
1415 | list($ndocs, $nhits) = array_values(unpack("N*N*", substr($response, $pos, 8))); |
||
1416 | $pos += 8; |
||
1417 | $res[$i]["docs"] = $ndocs; |
||
1418 | $res[$i]["hits"] = $nhits; |
||
1419 | } |
||
1420 | |||
1421 | if ($pos > $rlen) { |
||
1422 | $this->_error = "incomplete reply"; |
||
1423 | $this->_MBPop(); |
||
1424 | return false; |
||
1425 | } |
||
1426 | } |
||
1427 | |||
1428 | $this->_MBPop(); |
||
1429 | return $res; |
||
1430 | } |
||
1431 | |||
1432 | function EscapeString($string) |
||
1433 | { |
||
1434 | $from = array('\\', '(', ')', '|', '-', '!', '@', '~', '"', '&', '/', '^', '$', '='); |
||
1435 | $to = array('\\\\', '\(', '\)', '\|', '\-', '\!', '\@', '\~', '\"', '\&', '\/', '\^', '\$', '\='); |
||
1436 | |||
1437 | return str_replace($from, $to, $string); |
||
1438 | } |
||
1439 | |||
1440 | ///////////////////////////////////////////////////////////////////////////// |
||
1441 | // attribute updates |
||
1442 | ///////////////////////////////////////////////////////////////////////////// |
||
1443 | |||
1444 | /// batch update given attributes in given rows in given indexes |
||
1445 | /// returns amount of updated documents (0 or more) on success, or -1 on failure |
||
1446 | function UpdateAttributes($index, $attrs, $values, $mva = false) |
||
1447 | { |
||
1448 | // verify everything |
||
1449 | assert(is_string($index)); |
||
1450 | assert(is_bool($mva)); |
||
1451 | |||
1452 | assert(is_array($attrs)); |
||
1453 | foreach ($attrs as $attr) |
||
1454 | assert(is_string($attr)); |
||
1455 | |||
1456 | assert(is_array($values)); |
||
1457 | foreach ($values as $id => $entry) { |
||
1458 | assert(is_numeric($id)); |
||
1459 | assert(is_array($entry)); |
||
1460 | assert(count($entry) == count($attrs)); |
||
1461 | foreach ($entry as $v) { |
||
1462 | if ($mva) { |
||
1463 | assert(is_array($v)); |
||
1464 | foreach ($v as $vv) |
||
1465 | assert(is_int($vv)); |
||
1466 | } else |
||
1467 | assert(is_int($v)); |
||
1468 | } |
||
1469 | } |
||
1470 | |||
1471 | // build request |
||
1472 | $req = pack("N", strlen($index)) . $index; |
||
1473 | |||
1474 | $req .= pack("N", count($attrs)); |
||
1475 | foreach ($attrs as $attr) { |
||
1476 | $req .= pack("N", strlen($attr)) . $attr; |
||
1477 | $req .= pack("N", $mva ? 1 : 0); |
||
1478 | } |
||
1479 | |||
1480 | $req .= pack("N", count($values)); |
||
1481 | foreach ($values as $id => $entry) { |
||
1482 | $req .= sphPackU64($id); |
||
1483 | foreach ($entry as $v) { |
||
1484 | $req .= pack("N", $mva ? count($v) : $v); |
||
1485 | if ($mva) |
||
1486 | foreach ($v as $vv) |
||
1487 | $req .= pack("N", $vv); |
||
1488 | } |
||
1489 | } |
||
1490 | |||
1491 | // connect, send query, get response |
||
1492 | if (!($fp = $this->_Connect())) |
||
1493 | return -1; |
||
1494 | |||
1495 | $len = strlen($req); |
||
1496 | $req = pack("nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len) . $req; // add header |
||
1497 | if (!$this->_Send($fp, $req, $len + 8)) |
||
1498 | return -1; |
||
1499 | |||
1500 | if (!($response = $this->_GetResponse($fp, VER_COMMAND_UPDATE))) |
||
1501 | return -1; |
||
1502 | |||
1503 | // parse response |
||
1504 | list(, $updated) = unpack("N*", substr($response, 0, 4)); |
||
1505 | return $updated; |
||
1506 | } |
||
1507 | |||
1508 | ///////////////////////////////////////////////////////////////////////////// |
||
1509 | // persistent connections |
||
1510 | ///////////////////////////////////////////////////////////////////////////// |
||
1511 | |||
1512 | function Open() |
||
1513 | { |
||
1514 | if ($this->_socket !== false) { |
||
1515 | $this->_error = 'already connected'; |
||
1516 | return false; |
||
1517 | } |
||
1518 | if (!$fp = $this->_Connect()) |
||
1519 | return false; |
||
1520 | |||
1521 | // command, command version = 0, body length = 4, body = 1 |
||
1522 | $req = pack("nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1); |
||
1523 | if (!$this->_Send($fp, $req, 12)) |
||
1524 | return false; |
||
1525 | |||
1526 | $this->_socket = $fp; |
||
1527 | return true; |
||
1528 | } |
||
1529 | |||
1530 | function Close() |
||
1531 | { |
||
1532 | if ($this->_socket === false) { |
||
1533 | $this->_error = 'not connected'; |
||
1534 | return false; |
||
1535 | } |
||
1536 | |||
1537 | fclose($this->_socket); |
||
1538 | $this->_socket = false; |
||
1539 | |||
1540 | return true; |
||
1541 | } |
||
1542 | |||
1543 | ////////////////////////////////////////////////////////////////////////// |
||
1544 | // status |
||
1545 | ////////////////////////////////////////////////////////////////////////// |
||
1546 | |||
1547 | function Status() |
||
1548 | { |
||
1549 | $this->_MBPush(); |
||
1550 | if (!($fp = $this->_Connect())) { |
||
1551 | $this->_MBPop(); |
||
1552 | return false; |
||
1553 | } |
||
1554 | |||
1555 | $req = pack("nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1); // len=4, body=1 |
||
1556 | if ( |
||
1557 | !($this->_Send($fp, $req, 12)) || |
||
1558 | !($response = $this->_GetResponse($fp, VER_COMMAND_STATUS)) |
||
1559 | ) { |
||
1560 | $this->_MBPop(); |
||
1561 | return false; |
||
1562 | } |
||
1563 | |||
1564 | $res = substr($response, 4); // just ignore length, error handling, etc |
||
1565 | $p = 0; |
||
1566 | list($rows, $cols) = array_values(unpack("N*N*", substr($response, $p, 8))); |
||
1567 | $p += 8; |
||
1568 | |||
1569 | $res = array(); |
||
1570 | for ($i = 0; $i < $rows; $i++) |
||
1571 | for ($j = 0; $j < $cols; $j++) { |
||
1572 | list(, $len) = unpack("N*", substr($response, $p, 4)); |
||
1573 | $p += 4; |
||
1574 | $res[$i][] = substr($response, $p, $len); |
||
1575 | $p += $len; |
||
1576 | } |
||
1577 | |||
1578 | $this->_MBPop(); |
||
1579 | return $res; |
||
1580 | } |
||
1581 | } |
||
1582 | } |
||
1583 | |||
1584 | // |
||
1585 | // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $ |
||
1586 | // |