Was.OrcSearch – Blame information for rev 2

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using Lucene.Net.Analysis.Standard;
5 using Lucene.Net.Documents;
6 using Lucene.Net.Index;
7 using Lucene.Net.QueryParsers;
8 using Lucene.Net.Search;
9 using Lucene.Net.Store;
10 using Was.OrcSearch.EventArgs;
11 using Was.OrcSearch.Extensions;
2 office 12 using Was.OrcSearch.Helpers;
1 office 13 using Was.OrcSearch.Metadata.Interfaces;
14 using Was.OrcSearch.Services.Interfaces;
15  
16 namespace Was.OrcSearch.Services
17 {
18 public abstract class SearchServiceBase : ISearchService
19 {
20 #region Constructors
21  
22 protected SearchServiceBase(ISearchQueryService searchQueryService)
23 {
24 _searchQueryService = searchQueryService;
25 }
26  
27 #endregion
28  
29 #region Properties
30  
31 public int IndexedObjectCount
32 {
33 get
34 {
35 lock (_lockObject)
36 {
37 return _indexedObjects.Count;
38 }
39 }
40 }
41  
42 #endregion
43  
44 #region Constants
45  
46 private const string IndexId = "__index_id";
47 private static int _currentIndex;
48  
49 #endregion
50  
51 #region Fields
52  
53 private readonly object _lockObject = new object();
54  
55 private readonly ISearchQueryService _searchQueryService;
56  
57 private readonly Dictionary<int, ISearchable> _indexedObjects = new Dictionary<int, ISearchable>();
58 private readonly Dictionary<ISearchable, int> _searchableIndexes = new Dictionary<ISearchable, int>();
59  
60 private readonly Dictionary<string, ISearchableMetadata> _searchableMetadata =
61 new Dictionary<string, ISearchableMetadata>();
62  
63 private bool _initialized;
64  
65 private Directory _indexDirectory;
66  
67 #endregion
68  
69 #region Events
70  
71 public event EventHandler<System.EventArgs> Updating;
72  
73 public event EventHandler<System.EventArgs> Updated;
74  
75 public event EventHandler<SearchEventArgs> Searching;
76  
77 public event EventHandler<SearchEventArgs> Searched;
78  
79 #endregion
80  
81 #region Methods
82  
83 public virtual IEnumerable<ISearchableMetadata> GetSearchableMetadata()
84 {
85 lock (_lockObject)
86 {
87 var searchableMetadata = new List<ISearchableMetadata>(_searchableMetadata.Values);
88 return searchableMetadata;
89 }
90 }
91  
92 public virtual void AddObjects(IEnumerable<ISearchable> searchables)
93 {
94 Initialize();
95  
96 Updating?.Invoke(this, null);
97  
98 lock (_lockObject)
99 {
100 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
101 {
102 using (var writer =
103 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
104 {
105 foreach (var searchable in searchables)
106 {
107 var index = _currentIndex++;
108 _indexedObjects.Add(index, searchable);
109 _searchableIndexes.Add(searchable, index);
110  
111 var document = new Document();
112 document.Add(
113 new Field(IndexId, index.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
114  
115 var metadata = searchable.MetadataCollection;
116 var searchableMetadatas = metadata.All.OfType<ISearchableMetadata>();
117  
118 foreach (var searchableMetadata in searchableMetadatas)
119 {
120 var searchableMetadataValue = searchableMetadata.GetValue(searchable.Instance);
2 office 121 // Original: ObjectToStringHelper.ToString(searchableMetadataValue);
122 // TODO Support more serializable types.
1 office 123 var searchableMetadataValueAsString =
2 office 124 string.Join(" ", searchableMetadataValue.Stringify());
1 office 125  
126 var field = new Field(searchableMetadata.SearchName, searchableMetadataValueAsString,
127 Field.Store.YES,
128 searchableMetadata.Analyze ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED,
129 Field.TermVector.NO);
130  
131 document.Add(field);
132  
133 if (!_searchableMetadata.ContainsKey(searchableMetadata.SearchName))
134 _searchableMetadata.Add(searchableMetadata.SearchName, searchableMetadata);
135 }
136  
137 writer.AddDocument(document);
138 }
139  
140 writer.Optimize();
141 writer.Commit();
142 }
143 }
144 }
145  
146 Updated?.Invoke(this, null);
147 }
148  
149 public virtual void RemoveObjects(IEnumerable<ISearchable> searchables)
150 {
151 Initialize();
152  
153 lock (_lockObject)
154 {
155 Updating?.Invoke(this, null);
156  
157 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
158 {
159 using (var writer =
160 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
161 {
162 foreach (var searchable in searchables)
163 {
164 int index;
165 if (!_searchableIndexes.TryGetValue(searchable, out index)) continue;
166  
167 var queryAsText = $"{IndexId}:{index}";
168 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
169 var query = parser.Parse(queryAsText);
170  
171 writer.DeleteDocuments(query);
172  
173 _searchableIndexes.Remove(searchable);
174 _indexedObjects.Remove(index);
175 }
176  
177 writer.Optimize();
178 writer.Commit();
179 }
180 }
181  
182 Updated?.Invoke(this, null);
183 }
184 }
185  
186 public void ClearAllObjects()
187 {
188 Initialize();
189  
190 Updating?.Invoke(this, null);
191  
192 lock (_lockObject)
193 {
194 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
195 {
196 using (var writer =
197 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
198 {
199 _indexedObjects.Clear();
200 _searchableMetadata.Clear();
201  
202 writer.DeleteAll();
203  
204 writer.Optimize();
205 writer.Commit();
206 }
207 }
208 }
209 }
210  
211 public virtual IEnumerable<ISearchable> Search(string filter, int maxResults = SearchDefaults.DefaultResults)
212 {
213 Initialize();
214  
215 var results = new List<ISearchable>();
216  
217 lock (_lockObject)
218 {
219 try
220 {
221 Searching?.Invoke(this, new SearchEventArgs(filter, results));
222  
223 Query finalQuery = null;
224  
225 // Note: There are two issues with using regex here
226 // 1. Lucene uses lower case interpretation of each string for indexing.
227 // That means in regular expression we can use only lower case characters
228 // 2. escape sequences do not work. Not sure why
229 //
230 // In order to fix (1), we have to force Lucene to index differently. Probably we need to have two
231 // versions if indeces. One for regular search and another for regex
232 //var regexString = filter.ExtractRegexString();
233 //if (!string.IsNullOrWhiteSpace(regexString))
234 //{
235 // var searchableMetadatas = GetSearchableMetadata();
236  
237 // var booleanQuery = new BooleanQuery();
238 // foreach (var searchableMetadata in searchableMetadatas)
239 // {
240 // var query = new RegexQuery(new Term(searchableMetadata.SearchName, regexString));
241 // var booleanClause = new BooleanClause(query, Occur.SHOULD);
242  
243 // booleanQuery.Add(booleanClause);
244 // }
245  
246 // if (booleanQuery.Any())
247 // {
248 // finalQuery = booleanQuery;
249 // }
250 //}
251  
252 // ReSharper disable once ConditionIsAlwaysTrueOrFalse
253 if (finalQuery == null && filter.IsValidOrcSearchFilter())
254 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
255 {
256 var queryAsText = _searchQueryService.GetSearchQuery(filter, GetSearchableMetadata());
257  
258 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
259 finalQuery = parser.Parse(queryAsText);
260 }
261  
262 if (finalQuery != null)
263 using (var searcher = new IndexSearcher(_indexDirectory))
264 {
265 var search = searcher.Search(finalQuery, maxResults);
266 foreach (var scoreDoc in search.ScoreDocs)
267 {
268 var score = scoreDoc.Score;
269 var docId = scoreDoc.Doc;
270 var doc = searcher.Doc(docId);
271  
272 var index = int.Parse(doc.Get(IndexId));
273 results.Add(_indexedObjects[index]);
274 }
275 }
276 }
277 catch (ParseException ex)
278 {
279 //Log.Warning(ex, "Failed to parse search pattern");
280 throw ex;
281 }
282 catch (Exception ex)
283 {
284 //Log.Error(ex, "An error occurred while searching, returning default results");
285 throw ex;
286 }
287 finally
288 {
289 Searched?.Invoke(this, new SearchEventArgs(filter, results));
290 }
291 }
292  
293 return results;
294 }
295  
296 private void Initialize()
297 {
298 if (_initialized) return;
299  
300 _initialized = true;
301  
302 _indexDirectory = GetDirectory();
303  
304 // Required to create empty index, which is required for our reader
305 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
306 {
307 using (var indexWriter =
308 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
309 {
310 indexWriter.Commit();
311 }
312 }
313 }
314  
315 protected abstract Directory GetDirectory();
316  
317 #endregion
318 }
319 }