Was.OrcSearch – Blame information for rev 3

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using Lucene.Net.Analysis.Standard;
5 using Lucene.Net.Documents;
6 using Lucene.Net.Index;
7 using Lucene.Net.QueryParsers;
8 using Lucene.Net.Search;
9 using Lucene.Net.Store;
10 using Was.OrcSearch.EventArgs;
11 using Was.OrcSearch.Extensions;
2 office 12 using Was.OrcSearch.Helpers;
1 office 13 using Was.OrcSearch.Metadata.Interfaces;
14 using Was.OrcSearch.Services.Interfaces;
15  
16 namespace Was.OrcSearch.Services
17 {
18 public abstract class SearchServiceBase : ISearchService
19 {
20 #region Constructors
21  
22 protected SearchServiceBase(ISearchQueryService searchQueryService)
23 {
24 _searchQueryService = searchQueryService;
25 }
26  
27 #endregion
28  
29 #region Properties
30  
31 public int IndexedObjectCount
32 {
33 get
34 {
35 lock (_lockObject)
36 {
37 return _indexedObjects.Count;
38 }
39 }
40 }
41  
42 #endregion
43  
44 #region Constants
45  
46 private const string IndexId = "__index_id";
47 private static int _currentIndex;
48  
49 #endregion
50  
51 #region Fields
52  
53 private readonly object _lockObject = new object();
54  
55 private readonly ISearchQueryService _searchQueryService;
56  
57 private readonly Dictionary<int, ISearchable> _indexedObjects = new Dictionary<int, ISearchable>();
58 private readonly Dictionary<ISearchable, int> _searchableIndexes = new Dictionary<ISearchable, int>();
59  
60 private readonly Dictionary<string, ISearchableMetadata> _searchableMetadata =
61 new Dictionary<string, ISearchableMetadata>();
62  
63 private bool _initialized;
64  
65 private Directory _indexDirectory;
66  
67 #endregion
68  
69 #region Events
70  
71 public event EventHandler<System.EventArgs> Updating;
72  
73 public event EventHandler<System.EventArgs> Updated;
74  
75 public event EventHandler<SearchEventArgs> Searching;
76  
77 public event EventHandler<SearchEventArgs> Searched;
78  
79 #endregion
80  
81 #region Methods
82  
83 public virtual IEnumerable<ISearchableMetadata> GetSearchableMetadata()
84 {
85 lock (_lockObject)
86 {
87 var searchableMetadata = new List<ISearchableMetadata>(_searchableMetadata.Values);
88 return searchableMetadata;
89 }
90 }
91  
92 public virtual void AddObjects(IEnumerable<ISearchable> searchables)
93 {
94 Initialize();
95  
96 Updating?.Invoke(this, null);
97  
98 lock (_lockObject)
99 {
100 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
101 {
102 using (var writer =
103 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
104 {
105 foreach (var searchable in searchables)
106 {
107 var index = _currentIndex++;
108 _indexedObjects.Add(index, searchable);
109 _searchableIndexes.Add(searchable, index);
110  
111 var document = new Document();
112 document.Add(
113 new Field(IndexId, index.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
114  
115 var metadata = searchable.MetadataCollection;
116 var searchableMetadatas = metadata.All.OfType<ISearchableMetadata>();
117  
118 foreach (var searchableMetadata in searchableMetadatas)
119 {
120 var searchableMetadataValue = searchableMetadata.GetValue(searchable.Instance);
2 office 121 // Original: ObjectToStringHelper.ToString(searchableMetadataValue);
122 // TODO Support more serializable types.
1 office 123 var searchableMetadataValueAsString =
2 office 124 string.Join(" ", searchableMetadataValue.Stringify());
3 office 125  
126 // DEBUG
127 //Console.WriteLine("String metadata: " + string.Join(" ", searchableMetadataValue.Stringify()));
1 office 128  
129 var field = new Field(searchableMetadata.SearchName, searchableMetadataValueAsString,
130 Field.Store.YES,
131 searchableMetadata.Analyze ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED,
3 office 132 Field.TermVector.YES);
1 office 133  
134 document.Add(field);
135  
136 if (!_searchableMetadata.ContainsKey(searchableMetadata.SearchName))
137 _searchableMetadata.Add(searchableMetadata.SearchName, searchableMetadata);
138 }
139  
140 writer.AddDocument(document);
141 }
142  
143 writer.Optimize();
144 writer.Commit();
145 }
146 }
147 }
148  
149 Updated?.Invoke(this, null);
150 }
151  
152 public virtual void RemoveObjects(IEnumerable<ISearchable> searchables)
153 {
154 Initialize();
155  
156 lock (_lockObject)
157 {
158 Updating?.Invoke(this, null);
159  
160 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
161 {
162 using (var writer =
163 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
164 {
165 foreach (var searchable in searchables)
166 {
167 int index;
168 if (!_searchableIndexes.TryGetValue(searchable, out index)) continue;
169  
170 var queryAsText = $"{IndexId}:{index}";
171 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
172 var query = parser.Parse(queryAsText);
173  
174 writer.DeleteDocuments(query);
175  
176 _searchableIndexes.Remove(searchable);
177 _indexedObjects.Remove(index);
178 }
179  
180 writer.Optimize();
181 writer.Commit();
182 }
183 }
184  
185 Updated?.Invoke(this, null);
186 }
187 }
188  
189 public void ClearAllObjects()
190 {
191 Initialize();
192  
193 Updating?.Invoke(this, null);
194  
195 lock (_lockObject)
196 {
197 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
198 {
199 using (var writer =
200 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
201 {
202 _indexedObjects.Clear();
203 _searchableMetadata.Clear();
204  
205 writer.DeleteAll();
206  
207 writer.Optimize();
208 writer.Commit();
209 }
210 }
211 }
212 }
213  
214 public virtual IEnumerable<ISearchable> Search(string filter, int maxResults = SearchDefaults.DefaultResults)
215 {
216 Initialize();
217  
218 var results = new List<ISearchable>();
219  
220 lock (_lockObject)
221 {
222 try
223 {
224 Searching?.Invoke(this, new SearchEventArgs(filter, results));
225  
226 Query finalQuery = null;
227  
228 // Note: There are two issues with using regex here
229 // 1. Lucene uses lower case interpretation of each string for indexing.
230 // That means in regular expression we can use only lower case characters
231 // 2. escape sequences do not work. Not sure why
232 //
233 // In order to fix (1), we have to force Lucene to index differently. Probably we need to have two
234 // versions if indeces. One for regular search and another for regex
235 //var regexString = filter.ExtractRegexString();
236 //if (!string.IsNullOrWhiteSpace(regexString))
237 //{
238 // var searchableMetadatas = GetSearchableMetadata();
239  
240 // var booleanQuery = new BooleanQuery();
241 // foreach (var searchableMetadata in searchableMetadatas)
242 // {
243 // var query = new RegexQuery(new Term(searchableMetadata.SearchName, regexString));
244 // var booleanClause = new BooleanClause(query, Occur.SHOULD);
245  
246 // booleanQuery.Add(booleanClause);
247 // }
248  
249 // if (booleanQuery.Any())
250 // {
251 // finalQuery = booleanQuery;
252 // }
253 //}
254  
255 // ReSharper disable once ConditionIsAlwaysTrueOrFalse
256 if (finalQuery == null && filter.IsValidOrcSearchFilter())
257 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
258 {
259 var queryAsText = _searchQueryService.GetSearchQuery(filter, GetSearchableMetadata());
260  
261 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
262 finalQuery = parser.Parse(queryAsText);
263 }
264  
265 if (finalQuery != null)
266 using (var searcher = new IndexSearcher(_indexDirectory))
267 {
268 var search = searcher.Search(finalQuery, maxResults);
269 foreach (var scoreDoc in search.ScoreDocs)
270 {
271 var score = scoreDoc.Score;
272 var docId = scoreDoc.Doc;
273 var doc = searcher.Doc(docId);
274  
275 var index = int.Parse(doc.Get(IndexId));
276 results.Add(_indexedObjects[index]);
277 }
278 }
279 }
280 catch (ParseException ex)
281 {
282 //Log.Warning(ex, "Failed to parse search pattern");
283 throw ex;
284 }
285 catch (Exception ex)
286 {
287 //Log.Error(ex, "An error occurred while searching, returning default results");
288 throw ex;
289 }
290 finally
291 {
292 Searched?.Invoke(this, new SearchEventArgs(filter, results));
293 }
294 }
295  
296 return results;
297 }
298  
299 private void Initialize()
300 {
301 if (_initialized) return;
302  
303 _initialized = true;
304  
305 _indexDirectory = GetDirectory();
306  
307 // Required to create empty index, which is required for our reader
308 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
309 {
310 using (var indexWriter =
311 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
312 {
313 indexWriter.Commit();
314 }
315 }
316 }
317  
318 protected abstract Directory GetDirectory();
319  
320 #endregion
321 }
322 }