dokuwiki-sphinxsearch-plugin – Rev 1

Subversion Repositories:
Rev:
<?php
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

class SphinxSearch
{
    private $_sphinx = null;
    private $_result = array();
    private $_index = null;
    private $_query = '';

    private $_snippetSize = 256;
    private $_aroundKeyword = 5;
    private $_resultsPerPage = 10;

    private $_titlePriority = 1;
    private $_bodyPriority = 1;
    private $_namespacePriority = 1;
    private $_pagenamePriority = 1;

    public function  __construct($host, $port, $index)
    {
        $this->_sphinx = new SphinxClient();
        $this->_sphinx->SetServer($host, $port);
        $this->_sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);

        $this->_index = $index;
    }

    public function setSearchAllQuery($keywords, $categories)
    {
        $keywords = $this->_sphinx->EscapeString($keywords);
        $keywords = $this->_enableQuotesAndDefis($keywords);
        $starKeyword = $this->starQuery($keywords);
        $this->_query = "(@(namespace,pagename) $starKeyword) | (@(body,title) {$keywords})";
    }

    public function setSearchAllQueryWithCategoryFilter($keywords, $categories)
    {
        $keywords = $this->_sphinx->EscapeString($keywords);
        $keywords = $this->_enableQuotesAndDefis($keywords);
        $starKeyword = $this->starQuery($keywords);
        if (strpos($categories, "-") === 0) {
            $categories = '-"' . substr($categories, 1) . '"';
        }
        $this->_query = "(@(namespace,pagename) {$categories}) & ((@(body,title) {$keywords}) | (@(namespace,pagename) {$starKeyword}))";
    }

    public function setSearchCategoryQuery($keywords, $categories)
    {
        $keywords = $this->_sphinx->EscapeString($keywords);
        $keywords = $this->_enableQuotesAndDefis($keywords);

        $starKeyword = $this->starQuery($keywords);
        if (!empty($categories)) {
            $this->_query = "(@(namespace,pagename) $categories $starKeyword)";
        } else {
            $this->_query = "(@(namespace,pagename) $starKeyword)";
        }
    }

    public function setSearchOnlyPagename()
    {
        $this->_query = "(@(pagename) {$this->_query})";
    }

    public function search($start, $resultsPerPage = 10)
    {
        $this->_resultsPerPage = $resultsPerPage;

        $this->_sphinx->SetFieldWeights(
            array(
                'namespace' => $this->_namespacePriority,
                'pagename' => $this->_pagenamePriority,
                'title' => $this->_titlePriority,
                'body' => $this->_bodyPriority
            )
        );

        $this->_sphinx->SetLimits($start, $resultsPerPage + 100, 1000);

        $this->_result = $this->_sphinx->Query($this->_query, $this->_index);

        if (empty($this->_result['matches'])) {
            return false;
        }
        return true;
    }

    public function getPages($keywords)
    {
        if (empty($this->_result['matches'])) {
            return false;
        }

        $pagesIdsAll = $this->getPagesIds();
        $this->_offset = 0;
        $counter = 0;
        $tmpRes = array();
        $pagesIds = array();
        foreach ($pagesIdsAll as $id => $pageData) {
            $this->_offset++;
            if (auth_quickaclcheck($pageData['page']) >= AUTH_READ) {
                if (!isset($tmpRes[$pageData['page']])) {
                    $tmpRes[$pageData['page']] = 1;
                    $counter++;
                }
                $pagesIds[$id] = $pageData;
                if ($counter == $this->_resultsPerPage) {
                    break;
                }
            } else {
                // decrease total found counter for the first page if the page is filtered
                $this->_result['total_found']--;
            }
        }
        if (empty($pagesIds)) {
            return false;
        }

        $pagesList = array();
        $body = array();
        $titleText = array();
        $category = array();
        foreach ($pagesIds as $crc => $data) {
            if (empty($data['page'])) {
                continue;
            }
            if (!empty($data['hid'])) {
                $bodyHtml = p_render('xhtml', p_get_instructions(getSectionByTitleLevel($data['page'], $data['title'], true)), $info);
            } else {
                $bodyHtml = p_wiki_xhtml($data['page']);
            }
            $bodyHtml = preg_replace("#[\s]+?</li>#", "</li>;", $bodyHtml);
            $bodyHtml = htmlspecialchars_decode($bodyHtml);
            $body[$crc] = strip_tags($bodyHtml);
            if (!empty($data['title_text'])) {
                $titleText[$crc] = strip_tags($data['title_text']);
            } else {
                $titleText[$crc] = $data['page'];
            }
            $category[$crc] = $data['page'];
        }

        //$starQuery = $this->starQuery($keywords);
        $bodyExcerpt = $this->getExcerpt($body, $keywords);
        $titleTextExcerpt = $this->getExcerpt($titleText, $keywords);
        $i = 0;
        $results = array();
        foreach ($body as $crc => $notused) {
            $results[$crc] = array(
                'page' => $pagesIds[$crc]['page'],
                'bodyExcerpt' => $bodyExcerpt[$i],
                'titleTextExcerpt' => $titleTextExcerpt[$i],
                'hid' => $pagesIds[$crc]['hid'],
                'title' => $pagesIds[$crc]['title'],
                'title_text' => $pagesIds[$crc]['title_text']
            );
            $i++;
        }
        return $results;
    }

    public function getPagesIds()
    {
        $pageMapper = new PageMapper();

        return $pageMapper->getByCrc(array_keys($this->_result['matches']));
    }

    public function getOffset()
    {
        return $this->_offset;
    }

    public function getPageNames()
    {
        $pageIds = $this->getPagesIds();

        $matchPages = array();
        foreach ($pageIds as $page) {
            if (auth_quickaclcheck($page['page']) < AUTH_READ) {
                continue;
            }
            $matchPages[$page['page']] = $page['hid'];
        }

        return $matchPages;
    }

    public function getError()
    {
        return $this->_sphinx->GetLastError();
    }

    public function getTotalFound()
    {
        return !empty($this->_result['total_found']) ? $this->_result['total_found'] : 0;
    }

    public function getExcerpt($data, $query)
    {
        return $this->_sphinx->BuildExcerpts(
            $data,
            $this->_index,
            $query,
            array(
                'limit' => $this->_snippetSize,
                'around' => $this->_aroundKeyword,
                'weight_order' => 1,
                'sp' => 1
            )
        );
    }

    public function starQuery($query)
    {
        $query = $this->removeStars($query);
        $words = explode(" ", $query);
        foreach ($words as $id => $word) {
            $words[$id] = "*" . $word . "*";
        }
        return implode(" ", $words);
    }

    public function removeStars($query)
    {
        $words = explode(" ", $query);
        foreach ($words as $id => $word) {
            $words[$id] = trim($word, "*");
        }
        return implode(" ", $words);
    }

    public function getQuery()
    {
        return $this->_query;
    }

    public function setSnippetSize($symbols = 256)
    {
        $this->_snippetSize = $symbols;
    }

    public function setArroundWordsCount($words = 5)
    {
        $this->_aroundKeyword = $words;
    }

    public function setTitlePriority($priority)
    {
        $this->_titlePriority = $priority;
    }

    public function setBodyPriority($priority)
    {
        $this->_bodyPriority = $priority;
    }

    public function setNamespacePriority($priority)
    {
        $this->_namespacePriority = $priority;
    }

    public function setPagenamePriority($priority)
    {
        $this->_pagenamePriority = $priority;
    }

    private function _enableQuotesAndDefis($query)
    {
        $query = ' ' . $query;
        $quotesCount = count(explode('"', $query)) - 1;
        if ($quotesCount && $quotesCount % 2 == 0) {
            $query = str_replace('\"', '"', $query);
        }
        $query = preg_replace("#\s\\\-(\w)#ui", " -$1", $query);

        $query = substr($query, 1);

        return $query;
    }
}