dokuwiki-sphinxsearch-plugin – Rev 2

Subversion Repositories:
 * To change this template, choose Tools | Templates
 * and open the template in the editor.

function formatXml($data)
    $xmlFormat = '
<sphinx:document id="{id}">


    return str_replace(
        array('{id}', '{title}', '{body}', '{namespace}', '{pagename}', '{level}', '{modified}'),
            $data['id'], escapeTextValue($data['title_to_index']),
            $data['level'], $data['modified']

function escapeTextValue($value)
    if ("" === $value) {
        return "";
    //$value = mb_convert_encoding($value,'UTF-8','ISO-8859-1');
    $value = strip_tags($value);
    $value = stripInvalidXml($value);
    return str_replace("]]>", "]]><![CDATA[]]]]><![CDATA[>]]><![CDATA[", $value);

function stripInvalidXml($value)
    $ret = "";
    if (empty($value)) {
        return $ret;

    $current = null;
    $length = strlen($value);
    for ($i = 0; $i < $length; $i++) {
        $current = ord($value[$i]);
        if (($current == 0x9) || ($current == 0xA) || ($current == 0xD) || (($current >= 0x20) && ($current <= 0xD7FF)) || (($current >= 0xE000) && ($current <= 0xFFFD)) || (($current >= 0x10000) && ($current <= 0x10FFFF))
        ) {
            $ret .= chr($current);
        } else {
            $ret .= " ";
    return $ret;

function getDocumentsByHeadings($id, $metadata)
    if (empty($metadata) || empty($metadata['description']['tableofcontents'])) return false;

    $sections = array();
    $level = 1;
    $previouse_title = '';
    $firstSection = true;
    foreach ($metadata['description']['tableofcontents'] as $row) {
        if ($firstSection) {
            $zerocontent  = getZeroSectionContent($id, $row['title']);
            if ($zerocontent) {
                $sections[$id] = array(
                    'section' => $zerocontent,
                    'level' => 0,
                    'title' => $id,
                    'title_to_index' => $id
            $firstSection = false;
        $sections[$row['hid']] = array(
            'section' => getSectionByTitleLevel($id, $row['title'], false),
            'level' => $row['level'],
            'title' => $row['title']
        if ($row['level'] > $level && !empty($previouse_title)) {
            $sections[$row['hid']]['title_text'] = $previouse_title . " &raquo; " . $row['title'];
        } else {
            $sections[$row['hid']]['title_text'] = $row['title'];
            $previouse_title = $row['title'];
        $sections[$row['hid']]['title_to_index'] = $row['title'];
    return $sections;

function getZeroSectionContent($id, $header)
    $headerReg = preg_quote($header, '/');
    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
    $doc = io_readFile(wikiFN($id));
    $matches = array();
    if (!preg_match("/$regex/i", $doc, $matches)) {
        return false;
    if (empty($matches[1])) {
        return false;
    $end = strpos($doc, $matches[1]);
    if (!$end) {
        return false;
    $zerocontent = substr($doc, 0, $end);
    return $zerocontent;

function getSectionByTitleLevel($id, $header, $extended = false)
    $headerReg = preg_quote($header, '/');
    $doc = io_readFile(wikiFN($id));
    $regex = "(={1,6})\s*({$headerReg})\s*(={1,6})";
    $section = '';
    if (preg_match("/$regex/i", $doc, $matches)) {
        $startHeader = $matches[0];
        $startHeaderPos = strpos($doc, $startHeader) + strlen($startHeader);
        $endDoc = substr($doc, $startHeaderPos);

        $regex = '(={4,6})(.*?)(={4,6})';
        if (preg_match("/$regex/i", $endDoc, $matches)) {
            $endHeader = $matches[0];
            $endHeaderPos = strpos($doc, $endHeader);
        } else {
            $endHeaderPos = 0;
        if ($endHeaderPos) {
            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
        } else {
            $section = substr($doc, $startHeaderPos);
    $section = trim($section);
    //trying to get next section content if body for first section is empty
    //working only for extended mode
    if ($extended && empty($section)) {
        $startHeaderPos = $endHeaderPos + strlen($endHeader);
        $endDoc = substr($endDoc, $startHeaderPos);
        $regex = '(={4,6})(.*?)(={4,6})';
        if (preg_match("/$regex/i", $endDoc, $matches)) {
            $endHeader = $matches[0];
            $endHeaderPos = strpos($doc, $endHeader);
        } else {
            $endHeaderPos = 0;
        if ($endHeaderPos) {
            $section = substr($doc, $startHeaderPos, $endHeaderPos - $startHeaderPos);
        } else {
            $section = substr($doc, $startHeaderPos);
    $section = trim($section);
    return $section;

function getSection($id, $header)
    static $cacheInstructions = null;
    static $cacheDoc = null;

    if (empty($cacheDoc[$id])) {
        // Create the parser
        $Parser = new Doku_Parser();

        // Add the Handler
        $Parser->Handler = new Doku_Handler();

        // Load the header mode to find headers
        $Parser->addMode('header', new Doku_Parser_Mode_Header());
        $Parser->addMode('listblock', new Doku_Parser_Mode_ListBlock());

        // Loads the raw wiki document
        $doc = io_readFile(wikiFN($id));

        // Get a list of instructions
        $instructions = $Parser->parse($doc);


        //free old cache
        $cacheInstructions = null;
        $cacheDoc = null;

        //initialize new cache
        $cacheInstructions[$id] = $instructions;
        $cacheDoc[$id] = $doc;
    } else {
        $instructions = $cacheInstructions[$id];
        $doc = $cacheDoc[$id];

    // Use this to watch when we're inside the section we want
    $inSection = FALSE;
    $startPos = 0;
    $endPos = 0;

    // Loop through the instructions
    foreach ($instructions as $instruction) {

        if (!$inSection) {

            // Look for the header for the "Lists" heading
            if (
                $instruction[0] == 'header' &&
                trim($instruction[1][0]) == $header
            ) {

                $startPos = $instruction[2];
                $inSection = TRUE;
        } else {

            // Look for the end of the section
            if ($instruction[0] == 'section_close') {
                $endPos = $instruction[2];

    // Normalize and pad the document in the same way the parse does
    // so that byte indexes with match
    $doc = "\n" . str_replace("\r\n", "\n", $doc) . "\n";
    $section = substr($doc, $startPos, ($endPos - $startPos));

    return $section;

function getCategories($id)
    if (empty($id)) return '';

    if (false === strpos($id, ":")) {
        return '';

    $ns = explode(":", $id);
    $nsCount = count($ns) - 1;

    $result = '';
    do {
        for ($i = 0; $i < $nsCount; $i++) {
            $name = $ns[$i];
            $result .= $name;
            if ($i < $nsCount - 1) {
                $result .= ':';
        $result .= ' ';
    } while ($nsCount--);
    return $result;

function getPagename($id)
    if (empty($id)) return '';

    if (false === strpos($id, ":")) {
        return $id;

    $ns = explode(":", $id);
    return $ns[count($ns) - 1];

 * Method return all wiki page names
 * @global array $conf
 * @return array
function getPagesList()
    global $conf;

    $data = array();
    search($data, $conf['datadir'], 'search_allpages', array('skipacl' => 1), '');

    return $data;

function getNsLinks($id, $keywords, $search)
    global $conf;
    $parts = explode(':', $id);
    $count = count($parts);

    // print intermediate namespace links
    $part = '';
    $data = array();
    $titles = array();
    for ($i = 0; $i < $count; $i++) {
        $part .= $parts[$i] . ':';
        $page = $part;
        resolve_pageid('', $page, $exists);

        if (preg_match("#:start$#", $page) && !preg_match("#:start:$#", $part)) {
            $page = substr($page, 0, strpos($page, ":start"));

        // output
        if ($exists) {
            $titles[wl($page)] = $parts[$i];
        } else {
            $titles[wl($page)] = $parts[$i];
        $data[] = array('link' => "?do=search&id={$keywords}" . urlencode(" @ns $page"));
    $titleExcerpt = $search->getExcerpt($titles, $search->starQuery($keywords));
    $i = 0;
    foreach ($data as $key => $notused) {
        $data[$key]['title'] = $titleExcerpt[$i++];
    return $data;

function printNamespaces($query)
    $data = array();
    $query = str_replace(" ", "_", $query);
    $data = ft_pageLookup($query, false);

    if (!count($data)) return false;

    print '<ul>';
    $counter = 0;
    foreach ($data as $id) {
        print '<li>';
        $ns = getNS($id);
        if ($ns) {
            $name = shorten(noNS($id), ' (' . $ns . ')', 30);
        } else {
            $name = $id;
        $href = wl($id);

        tpl_link($href, $id, "class='wikilink1'");
        print '</li>';
        if (++$counter == 20) {
    print '</ul>';

function printNamespacesNew($pageNames)
    if (empty($pageNames)) return false;

    $limit = 10;
    print '<ul>';
    $counter = 0;
    foreach ($pageNames as $id => $header) {
        $ns = getNS($id);
        if ($ns) {
            $name = shorten(noNS($id), ' (' . $ns . ')', 30);
        } else {
            $name = $id;
        print '<li>';
        /*if (!empty($header)){
            print '<a href="'.wl($id).'#'.$header.'" '. "class='wikilink1'>".$id."</a>".'#'.$header;
        } else {
            print '<a href="'.wl($id).'" '. "class='wikilink1'>".$id."</a>";
        print '<a href="' . wl($id) . '" ' . "class='wikilink1'>" . $id . "</a>";
        print '</li>';
        if (++$counter == $limit) {
    print '</ul>';

if (!function_exists('shorten')) {
     * Shorten a given string by removing data from the middle
     * You can give the string in two parts, teh first part $keep
     * will never be shortened. The second part $short will be cut
     * in the middle to shorten but only if at least $min chars are
     * left to display it. Otherwise it will be left off.
     * @param string $keep   the part to keep
     * @param string $short  the part to shorten
     * @param int    $max    maximum chars you want for the whole string
     * @param int    $min    minimum number of chars to have left for middle shortening
     * @param string $char   the shortening character to use
    function shorten($keep, $short, $max, $min = 9, $char = '⌇')
        $max = $max - utf8_strlen($keep);
        if ($max < $min) return $keep;
        $len = utf8_strlen($short);
        if ($len <= $max) return $keep . $short;
        $half = floor($max / 2);
        return $keep . utf8_substr($short, 0, $half - 1) . $char . utf8_substr($short, $len - $half);