dokuwiki-sphinxsearch-plugin – Blame information for rev 2

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 <?php
2 /*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6  
7 function formatXml($data)
8 {
9 $xmlFormat = '
10 <sphinx:document id="{id}">
11 <title><![CDATA[[{title}]]></title>
12 <body><![CDATA[[{body}]]></body>
13 <namespace><![CDATA[[{namespace}]]></namespace>
14 <pagename><![CDATA[[{pagename}]]></pagename>
15 <level>{level}</level>
16 <modified>{modified}</modified>
17 </sphinx:document>
18  
19 ';
20  
21 return str_replace(
22 array('{id}', '{title}', '{body}', '{namespace}', '{pagename}', '{level}', '{modified}'),
23 array(
24 $data['id'], escapeTextValue($data['title_to_index']),
25 escapeTextValue($data['body']),
26 escapeTextValue($data['namespace']),
27 escapeTextValue($data['pagename']),
28 $data['level'], $data['modified']
29 ),
30 $xmlFormat
31 );
32 }
33  
34 function escapeTextValue($value)
35 {
36 if ("" === $value) {
37 return "";
38 }
39 //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
40 $value = strip_tags($value);
41 $value = stripInvalidXml($value);
42 return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);
43 }
44  
45 function stripInvalidXml($value)
46 {
47 $ret = "";
48 if (empty($value)) {
49 return $ret;
50 }
51  
52 $current = null;
53 $length = strlen($value);
54 for ($i = 0; $i < $length; $i++) {
2 office 55 $current = ord($value[$i]);
1 office 56 if (($current == 0x9) || ($current == 0xA) || ($current == 0xD) || (($current >= 0x20) && ($current <= 0xD7FF)) || (($current >= 0xE000) && ($current <= 0xFFFD)) || (($current >= 0x10000) && ($current <= 0x10FFFF))
57 ) {
58 $ret .= chr($current);
59 } else {
60 $ret .= " ";
61 }
62 }
63 return $ret;
64 }
65  
66 function getDocumentsByHeadings($id, $metadata)
67 {
68 if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;
69  
70 $sections = array();
71 $level = 1;
72 $previouse_title = '';
73 $firstSection = true;
74 foreach ($metadata['description']['tableofcontents'] as $row) {
75 if ($firstSection) {
76 $zerocontent = getZeroSectionContent($id, $row['title']);
77 if ($zerocontent) {
78 $sections[$id] = array(
79 'section' => $zerocontent,
80 'level' => 0,
81 'title' => $id,
82 'title_to_index' => $id
83 );
84 }
85 $firstSection = false;
86 }
87 $sections[$row['hid']] = array(
88 'section' => getSectionByTitleLevel($id, $row['title'], false),
89 'level' => $row['level'],
90 'title' => $row['title']
91 );
92 if ($row['level'] > $level && !empty($previouse_title)) {
93 $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; " . $row['title'];
94 } else {
95 $sections[$row['hid']]['title_text'] = $row['title'];
96 $previouse_title = $row['title'];
97 }
98 $sections[$row['hid']]['title_to_index'] = $row['title'];
99 }
100 return $sections;
101 }
102  
103 function getZeroSectionContent($id, $header)
104 {
105 $headerReg = preg_quote($header, '/');
106 $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
107 $doc = io_readFile(wikiFN($id));
108 $matches = array();
109 if (!preg_match("/$regex/i", $doc, $matches)) {
110 return false;
111 }
112 if (empty($matches[1])) {
113 return false;
114 }
115 $end = strpos($doc, $matches[1]);
116 if (!$end) {
117 return false;
118 }
119 $zerocontent = substr($doc, 0, $end);
120 return $zerocontent;
121 }
122  
123 function getSectionByTitleLevel($id, $header, $extended = false)
124 {
125 $headerReg = preg_quote($header, '/');
126 $doc = io_readFile(wikiFN($id));
127 $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
128 $section = '';
129 if (preg_match("/$regex/i", $doc, $matches)) {
130 $startHeader = $matches[0];
131 $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
132 $endDoc = substr($doc, $startHeaderPos);
133  
134 $regex = '(={4,6})(.*?)(={4,6})';
135 if (preg_match("/$regex/i", $endDoc, $matches)) {
136 $endHeader = $matches[0];
137 $endHeaderPos = strpos($doc, $endHeader);
138 } else {
139 $endHeaderPos = 0;
140 }
141 if ($endHeaderPos) {
142 $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
143 } else {
144 $section = substr($doc, $startHeaderPos);
145 }
146 }
147 $section = trim($section);
148 //trying to get next section content if body for first section is empty
149 //working only for extended mode
150 if ($extended && empty($section)) {
151 $startHeaderPos = $endHeaderPos + strlen($endHeader);
152 $endDoc = substr($endDoc, $startHeaderPos);
153 $regex = '(={4,6})(.*?)(={4,6})';
154 if (preg_match("/$regex/i", $endDoc, $matches)) {
155 $endHeader = $matches[0];
156 $endHeaderPos = strpos($doc, $endHeader);
157 } else {
158 $endHeaderPos = 0;
159 }
160 if ($endHeaderPos) {
161 $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
162 } else {
163 $section = substr($doc, $startHeaderPos);
164 }
165 }
166 $section = trim($section);
167 return $section;
168 }
169  
170 function getSection($id, $header)
171 {
172 static $cacheInstructions = null;
173 static $cacheDoc = null;
174  
175 if (empty($cacheDoc[$id])) {
176 // Create the parser
177 $Parser = new Doku_Parser();
178  
179 // Add the Handler
180 $Parser->Handler = new Doku_Handler();
181  
182 // Load the header mode to find headers
183 $Parser->addMode('header', new Doku_Parser_Mode_Header());
184 $Parser->addMode('listblock', new Doku_Parser_Mode_ListBlock());
185  
186 // Loads the raw wiki document
187 $doc = io_readFile(wikiFN($id));
188  
189 // Get a list of instructions
190 $instructions = $Parser->parse($doc);
191  
192 unset($Parser->Handler);
193 unset($Parser);
194  
195 //free old cache
196 $cacheInstructions = null;
197 $cacheDoc = null;
198  
199 //initialize new cache
200 $cacheInstructions[$id] = $instructions;
201 $cacheDoc[$id] = $doc;
202 } else {
203 $instructions = $cacheInstructions[$id];
204 $doc = $cacheDoc[$id];
205 }
206  
207  
208  
209 // Use this to watch when we're inside the section we want
210 $inSection = FALSE;
211 $startPos = 0;
212 $endPos = 0;
213  
214 // Loop through the instructions
215 foreach ($instructions as $instruction) {
216  
217 if (!$inSection) {
218  
219 // Look for the header for the "Lists" heading
220 if (
221 $instruction[0] == 'header' &&
222 trim($instruction[1][0]) == $header
223 ) {
224  
225 $startPos = $instruction[2];
226 $inSection = TRUE;
227 }
228 } else {
229  
230 // Look for the end of the section
231 if ($instruction[0] == 'section_close') {
232 $endPos = $instruction[2];
233 break;
234 }
235 }
236 }
237  
238 // Normalize and pad the document in the same way the parse does
239 // so that byte indexes with match
240 $doc = "\n" . str_replace("\r\n", "\n", $doc) . "\n";
241 $section = substr($doc, $startPos, ($endPos - $startPos));
242  
243 return $section;
244 }
245  
246 function getCategories($id)
247 {
248 if (empty($id)) return '';
249  
250 if (false === strpos($id, ":")) {
251 return '';
252 }
253  
254 $ns = explode(":", $id);
255 $nsCount = count($ns) - 1;
256  
257 $result = '';
258 do {
259 for ($i = 0; $i < $nsCount; $i++) {
260 $name = $ns[$i];
261 $result .= $name;
262 if ($i < $nsCount - 1) {
263 $result .= ':';
264 }
265 }
266 $result .= ' ';
267 } while ($nsCount--);
268 return $result;
269 }
270  
271 function getPagename($id)
272 {
273 if (empty($id)) return '';
274  
275 if (false === strpos($id, ":")) {
276 return $id;
277 }
278  
279 $ns = explode(":", $id);
280 return $ns[count($ns) - 1];
281 }
282  
283  
284  
285 /**
286 * Method return all wiki page names
287 * @global array $conf
288 * @return array
289 */
290 function getPagesList()
291 {
292 global $conf;
293  
294 $data = array();
295 sort($data);
296 search($data, $conf['datadir'], 'search_allpages', array('skipacl' => 1), '');
297  
298 return $data;
299 }
300  
301 function getNsLinks($id, $keywords, $search)
302 {
303 global $conf;
304 $parts = explode(':', $id);
305 $count = count($parts);
306  
307 // print intermediate namespace links
308 $part = '';
309 $data = array();
310 $titles = array();
311 for ($i = 0; $i < $count; $i++) {
312 $part .= $parts[$i] . ':';
313 $page = $part;
314 resolve_pageid('', $page, $exists);
315  
316 if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
317 $page = substr($page, 0, strpos($page, ":start"));
318 };
319  
320 // output
321 if ($exists) {
322 $titles[wl($page)] = $parts[$i];
323 } else {
324 $titles[wl($page)] = $parts[$i];
325 }
326 $data[] = array('link' => "?do=search&id={$keywords}" . urlencode(" @ns $page"));
327 }
328 $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
329 $i = 0;
330 foreach ($data as $key => $notused) {
331 $data[$key]['title'] = $titleExcerpt[$i++];
332 }
333 return $data;
334 }
335  
336 function printNamespaces($query)
337 {
338 $data = array();
339 $query = str_replace(" ", "_", $query);
340 $data = ft_pageLookup($query, false);
341  
342 if (!count($data)) return false;
343  
344 print '<ul>';
345 $counter = 0;
346 foreach ($data as $id) {
347 print '<li>';
348 $ns = getNS($id);
349 if ($ns) {
350 $name = shorten(noNS($id), ' (' . $ns . ')', 30);
351 } else {
352 $name = $id;
353 }
354 $href = wl($id);
355  
356 tpl_link($href, $id, "class='wikilink1'");
357 print '</li>';
358 if (++$counter == 20) {
359 break;
360 }
361 }
362 print '</ul>';
363 }
364  
365 function printNamespacesNew($pageNames)
366 {
367 if (empty($pageNames)) return false;
368  
369 $limit = 10;
370 print '<ul>';
371 $counter = 0;
372 foreach ($pageNames as $id => $header) {
373 $ns = getNS($id);
374 if ($ns) {
375 $name = shorten(noNS($id), ' (' . $ns . ')', 30);
376 } else {
377 $name = $id;
378 }
379 print '<li>';
380 /*if (!empty($header)){
381 print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
382 } else {
383 print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
384 }*/
385 print '<a href="' . wl($id) . '" ' . "class='wikilink1'>" . $id . "</a>";
386 print '</li>';
387 if (++$counter == $limit) {
388 break;
389 }
390 }
391 print '</ul>';
392 }
393  
394 if (!function_exists('shorten')) {
395 /**
396 * Shorten a given string by removing data from the middle
397 *
398 * You can give the string in two parts, teh first part $keep
399 * will never be shortened. The second part $short will be cut
400 * in the middle to shorten but only if at least $min chars are
401 * left to display it. Otherwise it will be left off.
402 *
403 * @param string $keep the part to keep
404 * @param string $short the part to shorten
405 * @param int $max maximum chars you want for the whole string
406 * @param int $min minimum number of chars to have left for middle shortening
407 * @param string $char the shortening character to use
408 */
409 function shorten($keep, $short, $max, $min = 9, $char = '⌇')
410 {
411 $max = $max - utf8_strlen($keep);
412 if ($max < $min) return $keep;
413 $len = utf8_strlen($short);
414 if ($len <= $max) return $keep . $short;
415 $half = floor($max / 2);
416 return $keep . utf8_substr($short, 0, $half - 1) . $char . utf8_substr($short, $len - $half);
417 }
418 }