dokuwiki-sphinxsearch-plugin – Rev 2

Subversion Repositories:
Rev:
<?php
/**
 * XML feed export
 *
 * @author     Ivinco <opensource@ivinco.com>
 */

$deStatus = ini_get('display_errors');
ini_set('display_errors', 0);
/* Initialization */

if (!defined('DOKU_INC')) define('DOKU_INC', dirname(__FILE__) . '/../../../');
if (!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');

define('NOSESSION', true);

require_once(DOKU_INC . 'inc/init.php');
require_once(DOKU_INC . 'inc/common.php');
//require_once(DOKU_INC . 'inc/events.php');
require_once(DOKU_INC . 'inc/parserutils.php');
//require_once(DOKU_INC.'inc/feedcreator.class.php');
require_once(DOKU_INC . 'inc/auth.php');
require_once(DOKU_INC . 'inc/pageutils.php');
require_once(DOKU_INC . 'inc/search.php');
require_once(DOKU_INC . 'inc/parser/parser.php');


require_once(DOKU_PLUGIN . 'sphinxsearch/PageMapper.php');
require_once(DOKU_PLUGIN . 'sphinxsearch/functions.php');

$dataPath = fullpath($conf['savedir']);
if (!@file_exists($dataPath)) {
    $dataPath = fullpath(DOKU_INC . $conf['savedir']);
    if (!@file_exists($dataPath)) die('invalid DokuWiki savedir');
}
$fullSphinxPath = $dataPath . '/sphinxsearch/';
if (!@file_exists($fullSphinxPath)) {
    mkdir($fullSphinxPath);
}

$pagesList = getPagesList();

echo '<?xml version="1.0" encoding="utf-8"?>
<sphinx:docset>

<sphinx:schema>
<sphinx:field name="title"/>
<sphinx:field name="body"/>
<sphinx:field name="namespace"/>
<sphinx:field name="pagename"/>
<sphinx:field name="level"/>
<sphinx:field name="modified"/>
<sphinx:attr name="level" type="int" bits="8" default="1"/>
</sphinx:schema>
';

$pageMapper = new PageMapper();
foreach ($pagesList as $row) {
    $dokuPageId = $row['id'];
    resolve_pageid('', $page, $exists);
    if (empty($dokuPageId) || !$exists) { //do not include not exists page
        continue;
    }
    if (!empty($conf['hidepages'])) {
        //check hidepages pattern to exclude hidden pages
        $testName = ':' . $dokuPageId;
        if (preg_match("/" . $conf['hidepages'] . "/", $testName)) {
            continue;
        }
    }

    //get meta data
    $metadata = p_get_metadata($dokuPageId);
    $sections = getDocumentsByHeadings($dokuPageId, $metadata);

    if (!empty($sections)) {
        foreach ($sections as $hid => $section) {
            if (empty($section['section'])) {
                continue;
            }
            //parse meta data for headers, abstract, date, authors
            $data = array();
            $data['id'] = sprintf('%u', crc32($dokuPageId . $hid));
            $data['namespace'] = getCategories($dokuPageId);
            $data['pagename'] = getPagename($dokuPageId);
            $data['level'] = $section['level'];
            $data['modified'] = $metadata['date']['modified'];
            $data['title'] = strip_tags($section['title_text']);
            $data['title_to_index'] = $section['title_to_index'];
            $data['body'] = $section['section']; //strip_tags(p_render('xhtml',p_get_instructions($section['section']),$info));

            //convert to utf-8 encoding
            $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
            $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));

            echo formatXml($data) . "\n";
            $pageMapper->add($dokuPageId, $data['title'], $section['title'], $hid);
        }
    } else {
        $data = array();
        $data['id'] = sprintf('%u', crc32($dokuPageId));
        $data['namespace'] = getCategories($dokuPageId);
        $data['pagename'] = getPagename($dokuPageId);
        $data['level'] = 1;
        // PHP Warning:  Illegal string offset 'modified' in /var/www/grimore.org/lib/plugins/sphinxsearch/xmlall.php on line 105
        // $data['modified'] = $metadata['date']['modified'];
        $data['modified'] = $metadata['date'];
        $data['title'] = strip_tags($metadata['title']);
        $data['title_to_index'] = $metadata['title'];
        $data['body'] = io_readFile(wikiFN($dokuPageId)); //strip_tags(p_wiki_xhtml($dokuPageId,$metadata['date']['modified'],false));

        if (empty($data['body'])) {
            continue;
        }

        //convert to utf-8 encoding
        $data['title_to_index'] = mb_convert_encoding($data['title_to_index'], "UTF-8", mb_detect_encoding($data['title_to_index'], "auto"));
        $data['body'] = mb_convert_encoding($data['body'], "UTF-8", mb_detect_encoding($data['body'], "auto"));

        echo formatXml($data) . "\n";
        $pageMapper->add($dokuPageId, $metadata['title'], $metadata['title']);
    }
}
echo '</sphinx:docset>';

ini_set('display_errors', $deStatus);