dokuwiki-sphinxsearch-plugin – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 <?php
2 /*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6  
7 function formatXml($data)
8 {
9 $xmlFormat = '
10 <sphinx:document id="{id}">
11 <title><![CDATA[[{title}]]></title>
12 <body><![CDATA[[{body}]]></body>
13 <namespace><![CDATA[[{namespace}]]></namespace>
14 <pagename><![CDATA[[{pagename}]]></pagename>
15 <level>{level}</level>
16 <modified>{modified}</modified>
17 </sphinx:document>
18  
19 ';
20  
21 return str_replace(
22 array('{id}', '{title}', '{body}', '{namespace}', '{pagename}', '{level}', '{modified}'),
23 array(
24 $data['id'], escapeTextValue($data['title_to_index']),
25 escapeTextValue($data['body']),
26 escapeTextValue($data['namespace']),
27 escapeTextValue($data['pagename']),
28 $data['level'], $data['modified']
29 ),
30 $xmlFormat
31 );
32 }
33  
34 function escapeTextValue($value)
35 {
36 if ("" === $value) {
37 return "";
38 }
39 //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
40 $value = strip_tags($value);
41 $value = stripInvalidXml($value);
42 return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
43 }
44  
45 function stripInvalidXml($value)
46 {
47 $ret = "";
48 if (empty($value)) {
49 return $ret;
50 }
51  
52 $current = null;
53 $length = strlen($value);
54 for ($i = 0; $i < $length; $i++) {
55 $current = ord($value{
56 $i});
57 if (($current == 0x9) || ($current == 0xA) || ($current == 0xD) || (($current >= 0x20) && ($current <= 0xD7FF)) || (($current >= 0xE000) && ($current <= 0xFFFD)) || (($current >= 0x10000) && ($current <= 0x10FFFF))
58 ) {
59 $ret .= chr($current);
60 } else {
61 $ret .= " ";
62 }
63 }
64 return $ret;
65 }
66  
67 function getDocumentsByHeadings($id, $metadata)
68 {
69 if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
70  
71 $sections = array();
72 $level = 1;
73 $previouse_title = '';
74 $firstSection = true;
75 foreach ($metadata['description']['tableofcontents'] as $row) {
76 if ($firstSection) {
77 $zerocontent = getZeroSectionContent($id, $row['title']);
78 if ($zerocontent) {
79 $sections[$id] = array(
80 'section' => $zerocontent,
81 'level' => 0,
82 'title' => $id,
83 'title_to_index' => $id
84 );
85 }
86 $firstSection = false;
87 }
88 $sections[$row['hid']] = array(
89 'section' => getSectionByTitleLevel($id, $row['title'], false),
90 'level' => $row['level'],
91 'title' => $row['title']
92 );
93 if ($row['level'] > $level && !empty($previouse_title)) {
94 $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; " . $row['title'];
95 } else {
96 $sections[$row['hid']]['title_text'] = $row['title'];
97 $previouse_title = $row['title'];
98 }
99 $sections[$row['hid']]['title_to_index'] = $row['title'];
100 }
101 return $sections;
102 }
103  
104 function getZeroSectionContent($id, $header)
105 {
106 $headerReg = preg_quote($header, '/');
107 $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
108 $doc = io_readFile(wikiFN($id));
109 $matches = array();
110 if (!preg_match("/$regex/i", $doc, $matches)) {
111 return false;
112 }
113 if (empty($matches[1])) {
114 return false;
115 }
116 $end = strpos($doc, $matches[1]);
117 if (!$end) {
118 return false;
119 }
120 $zerocontent = substr($doc, 0, $end);
121 return $zerocontent;
122 }
123  
124 function getSectionByTitleLevel($id, $header, $extended = false)
125 {
126 $headerReg = preg_quote($header, '/');
127 $doc = io_readFile(wikiFN($id));
128 $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
129 $section = '';
130 if (preg_match("/$regex/i", $doc, $matches)) {
131 $startHeader = $matches[0];
132 $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
133 $endDoc = substr($doc, $startHeaderPos);
134  
135 $regex = '(={4,6})(.*?)(={4,6})';
136 if (preg_match("/$regex/i", $endDoc, $matches)) {
137 $endHeader = $matches[0];
138 $endHeaderPos = strpos($doc, $endHeader);
139 } else {
140 $endHeaderPos = 0;
141 }
142 if ($endHeaderPos) {
143 $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
144 } else {
145 $section = substr($doc, $startHeaderPos);
146 }
147 }
148 $section = trim($section);
149 //trying to get next section content if body for first section is empty
150 //working only for extended mode
151 if ($extended && empty($section)) {
152 $startHeaderPos = $endHeaderPos + strlen($endHeader);
153 $endDoc = substr($endDoc, $startHeaderPos);
154 $regex = '(={4,6})(.*?)(={4,6})';
155 if (preg_match("/$regex/i", $endDoc, $matches)) {
156 $endHeader = $matches[0];
157 $endHeaderPos = strpos($doc, $endHeader);
158 } else {
159 $endHeaderPos = 0;
160 }
161 if ($endHeaderPos) {
162 $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
163 } else {
164 $section = substr($doc, $startHeaderPos);
165 }
166 }
167 $section = trim($section);
168 return $section;
169 }
170  
171 function getSection($id, $header)
172 {
173 static $cacheInstructions = null;
174 static $cacheDoc = null;
175  
176 if (empty($cacheDoc[$id])) {
177 // Create the parser
178 $Parser = new Doku_Parser();
179  
180 // Add the Handler
181 $Parser->Handler = new Doku_Handler();
182  
183 // Load the header mode to find headers
184 $Parser->addMode('header', new Doku_Parser_Mode_Header());
185 $Parser->addMode('listblock', new Doku_Parser_Mode_ListBlock());
186  
187 // Loads the raw wiki document
188 $doc = io_readFile(wikiFN($id));
189  
190 // Get a list of instructions
191 $instructions = $Parser->parse($doc);
192  
193 unset($Parser->Handler);
194 unset($Parser);
195  
196 //free old cache
197 $cacheInstructions = null;
198 $cacheDoc = null;
199  
200 //initialize new cache
201 $cacheInstructions[$id] = $instructions;
202 $cacheDoc[$id] = $doc;
203 } else {
204 $instructions = $cacheInstructions[$id];
205 $doc = $cacheDoc[$id];
206 }
207  
208  
209  
210 // Use this to watch when we're inside the section we want
211 $inSection = FALSE;
212 $startPos = 0;
213 $endPos = 0;
214  
215 // Loop through the instructions
216 foreach ($instructions as $instruction) {
217  
218 if (!$inSection) {
219  
220 // Look for the header for the "Lists" heading
221 if (
222 $instruction[0] == 'header' &&
223 trim($instruction[1][0]) == $header
224 ) {
225  
226 $startPos = $instruction[2];
227 $inSection = TRUE;
228 }
229 } else {
230  
231 // Look for the end of the section
232 if ($instruction[0] == 'section_close') {
233 $endPos = $instruction[2];
234 break;
235 }
236 }
237 }
238  
239 // Normalize and pad the document in the same way the parse does
240 // so that byte indexes with match
241 $doc = "\n" . str_replace("\r\n", "\n", $doc) . "\n";
242 $section = substr($doc, $startPos, ($endPos - $startPos));
243  
244 return $section;
245 }
246  
247 function getCategories($id)
248 {
249 if (empty($id)) return '';
250  
251 if (false === strpos($id, ":")) {
252 return '';
253 }
254  
255 $ns = explode(":", $id);
256 $nsCount = count($ns) - 1;
257  
258 $result = '';
259 do {
260 for ($i = 0; $i < $nsCount; $i++) {
261 $name = $ns[$i];
262 $result .= $name;
263 if ($i < $nsCount - 1) {
264 $result .= ':';
265 }
266 }
267 $result .= ' ';
268 } while ($nsCount--);
269 return $result;
270 }
271  
272 function getPagename($id)
273 {
274 if (empty($id)) return '';
275  
276 if (false === strpos($id, ":")) {
277 return $id;
278 }
279  
280 $ns = explode(":", $id);
281 return $ns[count($ns) - 1];
282 }
283  
284  
285  
286 /**
287 * Method return all wiki page names
288 * @global array $conf
289 * @return array
290 */
291 function getPagesList()
292 {
293 global $conf;
294  
295 $data = array();
296 sort($data);
297 search($data, $conf['datadir'], 'search_allpages', array('skipacl' => 1), '');
298  
299 return $data;
300 }
301  
302 function getNsLinks($id, $keywords, $search)
303 {
304 global $conf;
305 $parts = explode(':', $id);
306 $count = count($parts);
307  
308 // print intermediate namespace links
309 $part = '';
310 $data = array();
311 $titles = array();
312 for ($i = 0; $i < $count; $i++) {
313 $part .= $parts[$i] . ':';
314 $page = $part;
315 resolve_pageid('', $page, $exists);
316  
317 if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
318 $page = substr($page, 0, strpos($page, ":start"));
319 };
320  
321 // output
322 if ($exists) {
323 $titles[wl($page)] = $parts[$i];
324 } else {
325 $titles[wl($page)] = $parts[$i];
326 }
327 $data[] = array('link' => "?do=search&id={$keywords}" . urlencode(" @ns $page"));
328 }
329 $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
330 $i = 0;
331 foreach ($data as $key => $notused) {
332 $data[$key]['title'] = $titleExcerpt[$i++];
333 }
334 return $data;
335 }
336  
337 function printNamespaces($query)
338 {
339 $data = array();
340 $query = str_replace(" ", "_", $query);
341 $data = ft_pageLookup($query, false);
342  
343 if (!count($data)) return false;
344  
345 print '<ul>';
346 $counter = 0;
347 foreach ($data as $id) {
348 print '<li>';
349 $ns = getNS($id);
350 if ($ns) {
351 $name = shorten(noNS($id), ' (' . $ns . ')', 30);
352 } else {
353 $name = $id;
354 }
355 $href = wl($id);
356  
357 tpl_link($href, $id, "class='wikilink1'");
358 print '</li>';
359 if (++$counter == 20) {
360 break;
361 }
362 }
363 print '</ul>';
364 }
365  
366 function printNamespacesNew($pageNames)
367 {
368 if (empty($pageNames)) return false;
369  
370 $limit = 10;
371 print '<ul>';
372 $counter = 0;
373 foreach ($pageNames as $id => $header) {
374 $ns = getNS($id);
375 if ($ns) {
376 $name = shorten(noNS($id), ' (' . $ns . ')', 30);
377 } else {
378 $name = $id;
379 }
380 print '<li>';
381 /*if (!empty($header)){
382 print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
383 } else {
384 print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
385 }*/
386 print '<a href="' . wl($id) . '" ' . "class='wikilink1'>" . $id . "</a>";
387 print '</li>';
388 if (++$counter == $limit) {
389 break;
390 }
391 }
392 print '</ul>';
393 }
394  
395 if (!function_exists('shorten')) {
396 /**
397 * Shorten a given string by removing data from the middle
398 *
399 * You can give the string in two parts, teh first part $keep
400 * will never be shortened. The second part $short will be cut
401 * in the middle to shorten but only if at least $min chars are
402 * left to display it. Otherwise it will be left off.
403 *
404 * @param string $keep the part to keep
405 * @param string $short the part to shorten
406 * @param int $max maximum chars you want for the whole string
407 * @param int $min minimum number of chars to have left for middle shortening
408 * @param string $char the shortening character to use
409 */
410 function shorten($keep, $short, $max, $min = 9, $char = '⌇')
411 {
412 $max = $max - utf8_strlen($keep);
413 if ($max < $min) return $keep;
414 $len = utf8_strlen($short);
415 if ($len <= $max) return $keep . $short;
416 $half = floor($max / 2);
417 return $keep . utf8_substr($short, 0, $half - 1) . $char . utf8_substr($short, $len - $half);
418 }
419 }