Was.OrcSearch – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using Lucene.Net.Analysis.Standard;
5 using Lucene.Net.Documents;
6 using Lucene.Net.Index;
7 using Lucene.Net.QueryParsers;
8 using Lucene.Net.Search;
9 using Lucene.Net.Store;
10 using Was.OrcSearch.EventArgs;
11 using Was.OrcSearch.Extensions;
12 using Was.OrcSearch.Metadata.Interfaces;
13 using Was.OrcSearch.Services.Interfaces;
14  
15 namespace Was.OrcSearch.Services
16 {
17 public abstract class SearchServiceBase : ISearchService
18 {
19 #region Constructors
20  
21 protected SearchServiceBase(ISearchQueryService searchQueryService)
22 {
23 _searchQueryService = searchQueryService;
24 }
25  
26 #endregion
27  
28 #region Properties
29  
30 public int IndexedObjectCount
31 {
32 get
33 {
34 lock (_lockObject)
35 {
36 return _indexedObjects.Count;
37 }
38 }
39 }
40  
41 #endregion
42  
43 #region Constants
44  
45 private const string IndexId = "__index_id";
46 private static int _currentIndex;
47  
48 #endregion
49  
50 #region Fields
51  
52 private readonly object _lockObject = new object();
53  
54 private readonly ISearchQueryService _searchQueryService;
55  
56 private readonly Dictionary<int, ISearchable> _indexedObjects = new Dictionary<int, ISearchable>();
57 private readonly Dictionary<ISearchable, int> _searchableIndexes = new Dictionary<ISearchable, int>();
58  
59 private readonly Dictionary<string, ISearchableMetadata> _searchableMetadata =
60 new Dictionary<string, ISearchableMetadata>();
61  
62 private bool _initialized;
63  
64 private Directory _indexDirectory;
65  
66 #endregion
67  
68 #region Events
69  
70 public event EventHandler<System.EventArgs> Updating;
71  
72 public event EventHandler<System.EventArgs> Updated;
73  
74 public event EventHandler<SearchEventArgs> Searching;
75  
76 public event EventHandler<SearchEventArgs> Searched;
77  
78 #endregion
79  
80 #region Methods
81  
82 public virtual IEnumerable<ISearchableMetadata> GetSearchableMetadata()
83 {
84 lock (_lockObject)
85 {
86 var searchableMetadata = new List<ISearchableMetadata>(_searchableMetadata.Values);
87 return searchableMetadata;
88 }
89 }
90  
91 public virtual void AddObjects(IEnumerable<ISearchable> searchables)
92 {
93 Initialize();
94  
95 Updating?.Invoke(this, null);
96  
97 lock (_lockObject)
98 {
99 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
100 {
101 using (var writer =
102 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
103 {
104 foreach (var searchable in searchables)
105 {
106 var index = _currentIndex++;
107 _indexedObjects.Add(index, searchable);
108 _searchableIndexes.Add(searchable, index);
109  
110 var document = new Document();
111 document.Add(
112 new Field(IndexId, index.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
113  
114 var metadata = searchable.MetadataCollection;
115 var searchableMetadatas = metadata.All.OfType<ISearchableMetadata>();
116  
117 foreach (var searchableMetadata in searchableMetadatas)
118 {
119 var searchableMetadataValue = searchableMetadata.GetValue(searchable.Instance);
120 // TODO implement object to string helper.
121 var searchableMetadataValueAsString =
122 searchableMetadataValue
123 .ToString(); //ObjectToStringHelper.ToString(searchableMetadataValue);
124  
125 var field = new Field(searchableMetadata.SearchName, searchableMetadataValueAsString,
126 Field.Store.YES,
127 searchableMetadata.Analyze ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED,
128 Field.TermVector.NO);
129  
130 document.Add(field);
131  
132 if (!_searchableMetadata.ContainsKey(searchableMetadata.SearchName))
133 _searchableMetadata.Add(searchableMetadata.SearchName, searchableMetadata);
134 }
135  
136 writer.AddDocument(document);
137 }
138  
139 writer.Optimize();
140 writer.Commit();
141 }
142 }
143 }
144  
145 Updated?.Invoke(this, null);
146 }
147  
148 public virtual void RemoveObjects(IEnumerable<ISearchable> searchables)
149 {
150 Initialize();
151  
152 lock (_lockObject)
153 {
154 Updating?.Invoke(this, null);
155  
156 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
157 {
158 using (var writer =
159 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
160 {
161 foreach (var searchable in searchables)
162 {
163 int index;
164 if (!_searchableIndexes.TryGetValue(searchable, out index)) continue;
165  
166 var queryAsText = $"{IndexId}:{index}";
167 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
168 var query = parser.Parse(queryAsText);
169  
170 writer.DeleteDocuments(query);
171  
172 _searchableIndexes.Remove(searchable);
173 _indexedObjects.Remove(index);
174 }
175  
176 writer.Optimize();
177 writer.Commit();
178 }
179 }
180  
181 Updated?.Invoke(this, null);
182 }
183 }
184  
185 public void ClearAllObjects()
186 {
187 Initialize();
188  
189 Updating?.Invoke(this, null);
190  
191 lock (_lockObject)
192 {
193 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
194 {
195 using (var writer =
196 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
197 {
198 _indexedObjects.Clear();
199 _searchableMetadata.Clear();
200  
201 writer.DeleteAll();
202  
203 writer.Optimize();
204 writer.Commit();
205 }
206 }
207 }
208 }
209  
210 public virtual IEnumerable<ISearchable> Search(string filter, int maxResults = SearchDefaults.DefaultResults)
211 {
212 Initialize();
213  
214 var results = new List<ISearchable>();
215  
216 lock (_lockObject)
217 {
218 try
219 {
220 Searching?.Invoke(this, new SearchEventArgs(filter, results));
221  
222 Query finalQuery = null;
223  
224 // Note: There are two issues with using regex here
225 // 1. Lucene uses lower case interpretation of each string for indexing.
226 // That means in regular expression we can use only lower case characters
227 // 2. escape sequences do not work. Not sure why
228 //
229 // In order to fix (1), we have to force Lucene to index differently. Probably we need to have two
230 // versions if indeces. One for regular search and another for regex
231 //var regexString = filter.ExtractRegexString();
232 //if (!string.IsNullOrWhiteSpace(regexString))
233 //{
234 // var searchableMetadatas = GetSearchableMetadata();
235  
236 // var booleanQuery = new BooleanQuery();
237 // foreach (var searchableMetadata in searchableMetadatas)
238 // {
239 // var query = new RegexQuery(new Term(searchableMetadata.SearchName, regexString));
240 // var booleanClause = new BooleanClause(query, Occur.SHOULD);
241  
242 // booleanQuery.Add(booleanClause);
243 // }
244  
245 // if (booleanQuery.Any())
246 // {
247 // finalQuery = booleanQuery;
248 // }
249 //}
250  
251 // ReSharper disable once ConditionIsAlwaysTrueOrFalse
252 if (finalQuery == null && filter.IsValidOrcSearchFilter())
253 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
254 {
255 var queryAsText = _searchQueryService.GetSearchQuery(filter, GetSearchableMetadata());
256  
257 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
258 finalQuery = parser.Parse(queryAsText);
259 }
260  
261 if (finalQuery != null)
262 using (var searcher = new IndexSearcher(_indexDirectory))
263 {
264 var search = searcher.Search(finalQuery, maxResults);
265 foreach (var scoreDoc in search.ScoreDocs)
266 {
267 var score = scoreDoc.Score;
268 var docId = scoreDoc.Doc;
269 var doc = searcher.Doc(docId);
270  
271 var index = int.Parse(doc.Get(IndexId));
272 results.Add(_indexedObjects[index]);
273 }
274 }
275 }
276 catch (ParseException ex)
277 {
278 //Log.Warning(ex, "Failed to parse search pattern");
279 throw ex;
280 }
281 catch (Exception ex)
282 {
283 //Log.Error(ex, "An error occurred while searching, returning default results");
284 throw ex;
285 }
286 finally
287 {
288 Searched?.Invoke(this, new SearchEventArgs(filter, results));
289 }
290 }
291  
292 return results;
293 }
294  
295 private void Initialize()
296 {
297 if (_initialized) return;
298  
299 _initialized = true;
300  
301 _indexDirectory = GetDirectory();
302  
303 // Required to create empty index, which is required for our reader
304 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
305 {
306 using (var indexWriter =
307 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
308 {
309 indexWriter.Commit();
310 }
311 }
312 }
313  
314 protected abstract Directory GetDirectory();
315  
316 #endregion
317 }
318 }