dokuwiki-sphinxsearch-plugin – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | <?php |
2 | /* |
||
3 | * To change this template, choose Tools | Templates |
||
4 | * and open the template in the editor. |
||
5 | */ |
||
6 | |||
7 | class SphinxSearch |
||
8 | { |
||
9 | private $_sphinx = null; |
||
10 | private $_result = array(); |
||
11 | private $_index = null; |
||
12 | private $_query = ''; |
||
13 | |||
14 | private $_snippetSize = 256; |
||
15 | private $_aroundKeyword = 5; |
||
16 | private $_resultsPerPage = 10; |
||
17 | |||
18 | private $_titlePriority = 1; |
||
19 | private $_bodyPriority = 1; |
||
20 | private $_namespacePriority = 1; |
||
21 | private $_pagenamePriority = 1; |
||
22 | |||
23 | public function __construct($host, $port, $index) |
||
24 | { |
||
25 | $this->_sphinx = new SphinxClient(); |
||
26 | $this->_sphinx->SetServer($host, $port); |
||
27 | $this->_sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); |
||
28 | |||
29 | $this->_index = $index; |
||
30 | } |
||
31 | |||
32 | public function setSearchAllQuery($keywords, $categories) |
||
33 | { |
||
34 | $keywords = $this->_sphinx->EscapeString($keywords); |
||
35 | $keywords = $this->_enableQuotesAndDefis($keywords); |
||
36 | $starKeyword = $this->starQuery($keywords); |
||
37 | $this->_query = "(@(namespace,pagename) $starKeyword) | (@(body,title) {$keywords})"; |
||
38 | } |
||
39 | |||
40 | public function setSearchAllQueryWithCategoryFilter($keywords, $categories) |
||
41 | { |
||
42 | $keywords = $this->_sphinx->EscapeString($keywords); |
||
43 | $keywords = $this->_enableQuotesAndDefis($keywords); |
||
44 | $starKeyword = $this->starQuery($keywords); |
||
45 | if (strpos($categories, "-") === 0) { |
||
46 | $categories = '-"' . substr($categories, 1) . '"'; |
||
47 | } |
||
48 | $this->_query = "(@(namespace,pagename) {$categories}) & ((@(body,title) {$keywords}) | (@(namespace,pagename) {$starKeyword}))"; |
||
49 | } |
||
50 | |||
51 | public function setSearchCategoryQuery($keywords, $categories) |
||
52 | { |
||
53 | $keywords = $this->_sphinx->EscapeString($keywords); |
||
54 | $keywords = $this->_enableQuotesAndDefis($keywords); |
||
55 | |||
56 | $starKeyword = $this->starQuery($keywords); |
||
57 | if (!empty($categories)) { |
||
58 | $this->_query = "(@(namespace,pagename) $categories $starKeyword)"; |
||
59 | } else { |
||
60 | $this->_query = "(@(namespace,pagename) $starKeyword)"; |
||
61 | } |
||
62 | } |
||
63 | |||
64 | public function setSearchOnlyPagename() |
||
65 | { |
||
66 | $this->_query = "(@(pagename) {$this->_query})"; |
||
67 | } |
||
68 | |||
69 | public function search($start, $resultsPerPage = 10) |
||
70 | { |
||
71 | $this->_resultsPerPage = $resultsPerPage; |
||
72 | |||
73 | $this->_sphinx->SetFieldWeights( |
||
74 | array( |
||
75 | 'namespace' => $this->_namespacePriority, |
||
76 | 'pagename' => $this->_pagenamePriority, |
||
77 | 'title' => $this->_titlePriority, |
||
78 | 'body' => $this->_bodyPriority |
||
79 | ) |
||
80 | ); |
||
81 | |||
82 | $this->_sphinx->SetLimits($start, $resultsPerPage + 100, 1000); |
||
83 | |||
84 | $this->_result = $this->_sphinx->Query($this->_query, $this->_index); |
||
85 | |||
86 | if (empty($this->_result['matches'])) { |
||
87 | return false; |
||
88 | } |
||
89 | return true; |
||
90 | } |
||
91 | |||
92 | public function getPages($keywords) |
||
93 | { |
||
94 | if (empty($this->_result['matches'])) { |
||
95 | return false; |
||
96 | } |
||
97 | |||
98 | $pagesIdsAll = $this->getPagesIds(); |
||
99 | $this->_offset = 0; |
||
100 | $counter = 0; |
||
101 | $tmpRes = array(); |
||
102 | $pagesIds = array(); |
||
103 | foreach ($pagesIdsAll as $id => $pageData) { |
||
104 | $this->_offset++; |
||
105 | if (auth_quickaclcheck($pageData['page']) >= AUTH_READ) { |
||
106 | if (!isset($tmpRes[$pageData['page']])) { |
||
107 | $tmpRes[$pageData['page']] = 1; |
||
108 | $counter++; |
||
109 | } |
||
110 | $pagesIds[$id] = $pageData; |
||
111 | if ($counter == $this->_resultsPerPage) { |
||
112 | break; |
||
113 | } |
||
114 | } else { |
||
115 | // decrease total found counter for the first page if the page is filtered |
||
116 | $this->_result['total_found']--; |
||
117 | } |
||
118 | } |
||
119 | if (empty($pagesIds)) { |
||
120 | return false; |
||
121 | } |
||
122 | |||
123 | $pagesList = array(); |
||
124 | $body = array(); |
||
125 | $titleText = array(); |
||
126 | $category = array(); |
||
127 | foreach ($pagesIds as $crc => $data) { |
||
128 | if (empty($data['page'])) { |
||
129 | continue; |
||
130 | } |
||
131 | if (!empty($data['hid'])) { |
||
132 | $bodyHtml = p_render('xhtml', p_get_instructions(getSectionByTitleLevel($data['page'], $data['title'], true)), $info); |
||
133 | } else { |
||
134 | $bodyHtml = p_wiki_xhtml($data['page']); |
||
135 | } |
||
136 | $bodyHtml = preg_replace("#[\s]+?</li>#", "</li>;", $bodyHtml); |
||
137 | $bodyHtml = htmlspecialchars_decode($bodyHtml); |
||
138 | $body[$crc] = strip_tags($bodyHtml); |
||
139 | if (!empty($data['title_text'])) { |
||
140 | $titleText[$crc] = strip_tags($data['title_text']); |
||
141 | } else { |
||
142 | $titleText[$crc] = $data['page']; |
||
143 | } |
||
144 | $category[$crc] = $data['page']; |
||
145 | } |
||
146 | |||
147 | //$starQuery = $this->starQuery($keywords); |
||
148 | $bodyExcerpt = $this->getExcerpt($body, $keywords); |
||
149 | $titleTextExcerpt = $this->getExcerpt($titleText, $keywords); |
||
150 | $i = 0; |
||
151 | $results = array(); |
||
152 | foreach ($body as $crc => $notused) { |
||
153 | $results[$crc] = array( |
||
154 | 'page' => $pagesIds[$crc]['page'], |
||
155 | 'bodyExcerpt' => $bodyExcerpt[$i], |
||
156 | 'titleTextExcerpt' => $titleTextExcerpt[$i], |
||
157 | 'hid' => $pagesIds[$crc]['hid'], |
||
158 | 'title' => $pagesIds[$crc]['title'], |
||
159 | 'title_text' => $pagesIds[$crc]['title_text'] |
||
160 | ); |
||
161 | $i++; |
||
162 | } |
||
163 | return $results; |
||
164 | } |
||
165 | |||
166 | public function getPagesIds() |
||
167 | { |
||
168 | $pageMapper = new PageMapper(); |
||
169 | |||
170 | return $pageMapper->getByCrc(array_keys($this->_result['matches'])); |
||
171 | } |
||
172 | |||
173 | public function getOffset() |
||
174 | { |
||
175 | return $this->_offset; |
||
176 | } |
||
177 | |||
178 | public function getPageNames() |
||
179 | { |
||
180 | $pageIds = $this->getPagesIds(); |
||
181 | |||
182 | $matchPages = array(); |
||
183 | foreach ($pageIds as $page) { |
||
184 | if (auth_quickaclcheck($page['page']) < AUTH_READ) { |
||
185 | continue; |
||
186 | } |
||
187 | $matchPages[$page['page']] = $page['hid']; |
||
188 | } |
||
189 | |||
190 | return $matchPages; |
||
191 | } |
||
192 | |||
193 | public function getError() |
||
194 | { |
||
195 | return $this->_sphinx->GetLastError(); |
||
196 | } |
||
197 | |||
198 | public function getTotalFound() |
||
199 | { |
||
200 | return !empty($this->_result['total_found']) ? $this->_result['total_found'] : 0; |
||
201 | } |
||
202 | |||
203 | public function getExcerpt($data, $query) |
||
204 | { |
||
205 | return $this->_sphinx->BuildExcerpts( |
||
206 | $data, |
||
207 | $this->_index, |
||
208 | $query, |
||
209 | array( |
||
210 | 'limit' => $this->_snippetSize, |
||
211 | 'around' => $this->_aroundKeyword, |
||
212 | 'weight_order' => 1, |
||
213 | 'sp' => 1 |
||
214 | ) |
||
215 | ); |
||
216 | } |
||
217 | |||
218 | public function starQuery($query) |
||
219 | { |
||
220 | $query = $this->removeStars($query); |
||
221 | $words = explode(" ", $query); |
||
222 | foreach ($words as $id => $word) { |
||
223 | $words[$id] = "*" . $word . "*"; |
||
224 | } |
||
225 | return implode(" ", $words); |
||
226 | } |
||
227 | |||
228 | public function removeStars($query) |
||
229 | { |
||
230 | $words = explode(" ", $query); |
||
231 | foreach ($words as $id => $word) { |
||
232 | $words[$id] = trim($word, "*"); |
||
233 | } |
||
234 | return implode(" ", $words); |
||
235 | } |
||
236 | |||
237 | public function getQuery() |
||
238 | { |
||
239 | return $this->_query; |
||
240 | } |
||
241 | |||
242 | public function setSnippetSize($symbols = 256) |
||
243 | { |
||
244 | $this->_snippetSize = $symbols; |
||
245 | } |
||
246 | |||
247 | public function setArroundWordsCount($words = 5) |
||
248 | { |
||
249 | $this->_aroundKeyword = $words; |
||
250 | } |
||
251 | |||
252 | public function setTitlePriority($priority) |
||
253 | { |
||
254 | $this->_titlePriority = $priority; |
||
255 | } |
||
256 | |||
257 | public function setBodyPriority($priority) |
||
258 | { |
||
259 | $this->_bodyPriority = $priority; |
||
260 | } |
||
261 | |||
262 | public function setNamespacePriority($priority) |
||
263 | { |
||
264 | $this->_namespacePriority = $priority; |
||
265 | } |
||
266 | |||
267 | public function setPagenamePriority($priority) |
||
268 | { |
||
269 | $this->_pagenamePriority = $priority; |
||
270 | } |
||
271 | |||
272 | private function _enableQuotesAndDefis($query) |
||
273 | { |
||
274 | $query = ' ' . $query; |
||
275 | $quotesCount = count(explode('"', $query)) - 1; |
||
276 | if ($quotesCount && $quotesCount % 2 == 0) { |
||
277 | $query = str_replace('\"', '"', $query); |
||
278 | } |
||
279 | $query = preg_replace("#\s\\\-(\w)#ui", " -$1", $query); |
||
280 | |||
281 | $query = substr($query, 1); |
||
282 | |||
283 | return $query; |
||
284 | } |
||
285 | } |