Was.OrcSearch – Rev 1

Subversion Repositories:
Rev:
using System;
using System.Collections.Generic;
using System.Linq;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Was.OrcSearch.EventArgs;
using Was.OrcSearch.Extensions;
using Was.OrcSearch.Metadata.Interfaces;
using Was.OrcSearch.Services.Interfaces;

namespace Was.OrcSearch.Services
{
    public abstract class SearchServiceBase : ISearchService
    {
        #region Constructors

        protected SearchServiceBase(ISearchQueryService searchQueryService)
        {
            _searchQueryService = searchQueryService;
        }

        #endregion

        #region Properties

        public int IndexedObjectCount
        {
            get
            {
                lock (_lockObject)
                {
                    return _indexedObjects.Count;
                }
            }
        }

        #endregion

        #region Constants

        private const string IndexId = "__index_id";
        private static int _currentIndex;

        #endregion

        #region Fields

        private readonly object _lockObject = new object();

        private readonly ISearchQueryService _searchQueryService;

        private readonly Dictionary<int, ISearchable> _indexedObjects = new Dictionary<int, ISearchable>();
        private readonly Dictionary<ISearchable, int> _searchableIndexes = new Dictionary<ISearchable, int>();

        private readonly Dictionary<string, ISearchableMetadata> _searchableMetadata =
            new Dictionary<string, ISearchableMetadata>();

        private bool _initialized;

        private Directory _indexDirectory;

        #endregion

        #region Events

        public event EventHandler<System.EventArgs> Updating;

        public event EventHandler<System.EventArgs> Updated;

        public event EventHandler<SearchEventArgs> Searching;

        public event EventHandler<SearchEventArgs> Searched;

        #endregion

        #region Methods

        public virtual IEnumerable<ISearchableMetadata> GetSearchableMetadata()
        {
            lock (_lockObject)
            {
                var searchableMetadata = new List<ISearchableMetadata>(_searchableMetadata.Values);
                return searchableMetadata;
            }
        }

        public virtual void AddObjects(IEnumerable<ISearchable> searchables)
        {
            Initialize();

            Updating?.Invoke(this, null);

            lock (_lockObject)
            {
                using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
                {
                    using (var writer =
                        new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        foreach (var searchable in searchables)
                        {
                            var index = _currentIndex++;
                            _indexedObjects.Add(index, searchable);
                            _searchableIndexes.Add(searchable, index);

                            var document = new Document();
                            document.Add(
                                new Field(IndexId, index.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                            var metadata = searchable.MetadataCollection;
                            var searchableMetadatas = metadata.All.OfType<ISearchableMetadata>();

                            foreach (var searchableMetadata in searchableMetadatas)
                            {
                                var searchableMetadataValue = searchableMetadata.GetValue(searchable.Instance);
                                // TODO implement object to string helper.
                                var searchableMetadataValueAsString =
                                    searchableMetadataValue
                                        .ToString(); //ObjectToStringHelper.ToString(searchableMetadataValue);

                                var field = new Field(searchableMetadata.SearchName, searchableMetadataValueAsString,
                                    Field.Store.YES,
                                    searchableMetadata.Analyze ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED,
                                    Field.TermVector.NO);

                                document.Add(field);

                                if (!_searchableMetadata.ContainsKey(searchableMetadata.SearchName))
                                    _searchableMetadata.Add(searchableMetadata.SearchName, searchableMetadata);
                            }

                            writer.AddDocument(document);
                        }

                        writer.Optimize();
                        writer.Commit();
                    }
                }
            }

            Updated?.Invoke(this, null);
        }

        public virtual void RemoveObjects(IEnumerable<ISearchable> searchables)
        {
            Initialize();

            lock (_lockObject)
            {
                Updating?.Invoke(this, null);

                using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
                {
                    using (var writer =
                        new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        foreach (var searchable in searchables)
                        {
                            int index;
                            if (!_searchableIndexes.TryGetValue(searchable, out index)) continue;

                            var queryAsText = $"{IndexId}:{index}";
                            var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
                            var query = parser.Parse(queryAsText);

                            writer.DeleteDocuments(query);

                            _searchableIndexes.Remove(searchable);
                            _indexedObjects.Remove(index);
                        }

                        writer.Optimize();
                        writer.Commit();
                    }
                }

                Updated?.Invoke(this, null);
            }
        }

        public void ClearAllObjects()
        {
            Initialize();

            Updating?.Invoke(this, null);

            lock (_lockObject)
            {
                using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
                {
                    using (var writer =
                        new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        _indexedObjects.Clear();
                        _searchableMetadata.Clear();

                        writer.DeleteAll();

                        writer.Optimize();
                        writer.Commit();
                    }
                }
            }
        }

        public virtual IEnumerable<ISearchable> Search(string filter, int maxResults = SearchDefaults.DefaultResults)
        {
            Initialize();

            var results = new List<ISearchable>();

            lock (_lockObject)
            {
                try
                {
                    Searching?.Invoke(this, new SearchEventArgs(filter, results));

                    Query finalQuery = null;

                    // Note: There are two issues with using regex here
                    //       1. Lucene uses lower case interpretation of each string for indexing.
                    //          That means in regular expression we can use only lower case characters
                    //       2. escape sequences do not work. Not sure why
                    //      
                    //       In order to fix (1), we have to force Lucene to index differently. Probably we need to have two 
                    //       versions if indeces. One for regular search and another for regex
                    //var regexString = filter.ExtractRegexString();
                    //if (!string.IsNullOrWhiteSpace(regexString))
                    //{
                    //    var searchableMetadatas = GetSearchableMetadata();

                    //    var booleanQuery = new BooleanQuery();
                    //    foreach (var searchableMetadata in searchableMetadatas)
                    //    {
                    //        var query = new RegexQuery(new Term(searchableMetadata.SearchName, regexString));
                    //        var booleanClause = new BooleanClause(query, Occur.SHOULD);

                    //        booleanQuery.Add(booleanClause);
                    //    }

                    //    if (booleanQuery.Any())
                    //    {
                    //        finalQuery = booleanQuery;
                    //    }
                    //}

                    // ReSharper disable once ConditionIsAlwaysTrueOrFalse
                    if (finalQuery == null && filter.IsValidOrcSearchFilter())
                        using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
                        {
                            var queryAsText = _searchQueryService.GetSearchQuery(filter, GetSearchableMetadata());

                            var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
                            finalQuery = parser.Parse(queryAsText);
                        }

                    if (finalQuery != null)
                        using (var searcher = new IndexSearcher(_indexDirectory))
                        {
                            var search = searcher.Search(finalQuery, maxResults);
                            foreach (var scoreDoc in search.ScoreDocs)
                            {
                                var score = scoreDoc.Score;
                                var docId = scoreDoc.Doc;
                                var doc = searcher.Doc(docId);

                                var index = int.Parse(doc.Get(IndexId));
                                results.Add(_indexedObjects[index]);
                            }
                        }
                }
                catch (ParseException ex)
                {
                    //Log.Warning(ex, "Failed to parse search pattern");
                    throw ex;
                }
                catch (Exception ex)
                {
                    //Log.Error(ex, "An error occurred while searching, returning default results");
                    throw ex;
                }
                finally
                {
                    Searched?.Invoke(this, new SearchEventArgs(filter, results));
                }
            }

            return results;
        }

        private void Initialize()
        {
            if (_initialized) return;

            _initialized = true;

            _indexDirectory = GetDirectory();

            // Required to create empty index, which is required for our reader
            using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
            {
                using (var indexWriter =
                    new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    indexWriter.Commit();
                }
            }
        }

        protected abstract Directory GetDirectory();

        #endregion
    }
}

Generated by GNU Enscript 1.6.5.90.