Was.OrcSearch – Blame information for rev 4

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using Lucene.Net.Analysis.Standard;
5 using Lucene.Net.Documents;
6 using Lucene.Net.Index;
7 using Lucene.Net.QueryParsers;
8 using Lucene.Net.Search;
9 using Lucene.Net.Store;
10 using Was.OrcSearch.EventArgs;
11 using Was.OrcSearch.Extensions;
12 using Was.OrcSearch.Metadata.Interfaces;
4 office 13 using Was.OrcSearch.Services.Extensions;
1 office 14 using Was.OrcSearch.Services.Interfaces;
15  
16 namespace Was.OrcSearch.Services
17 {
18 public abstract class SearchServiceBase : ISearchService
19 {
20 #region Constructors
21  
22 protected SearchServiceBase(ISearchQueryService searchQueryService)
23 {
24 _searchQueryService = searchQueryService;
25 }
26  
27 #endregion
28  
29 #region Properties
30  
31 public int IndexedObjectCount
32 {
33 get
34 {
35 lock (_lockObject)
36 {
37 return _indexedObjects.Count;
38 }
39 }
40 }
41  
42 #endregion
43  
44 #region Constants
45  
46 private const string IndexId = "__index_id";
47 private static int _currentIndex;
48  
49 #endregion
50  
51 #region Fields
52  
53 private readonly object _lockObject = new object();
54  
55 private readonly ISearchQueryService _searchQueryService;
56  
57 private readonly Dictionary<int, ISearchable> _indexedObjects = new Dictionary<int, ISearchable>();
58 private readonly Dictionary<ISearchable, int> _searchableIndexes = new Dictionary<ISearchable, int>();
59  
60 private readonly Dictionary<string, ISearchableMetadata> _searchableMetadata =
61 new Dictionary<string, ISearchableMetadata>();
62  
63 private bool _initialized;
64  
65 private Directory _indexDirectory;
66  
67 #endregion
68  
69 #region Events
70  
71 public event EventHandler<System.EventArgs> Updating;
72  
73 public event EventHandler<System.EventArgs> Updated;
74  
75 public event EventHandler<SearchEventArgs> Searching;
76  
77 public event EventHandler<SearchEventArgs> Searched;
78  
79 #endregion
80  
81 #region Methods
82  
83 public virtual IEnumerable<ISearchableMetadata> GetSearchableMetadata()
84 {
85 lock (_lockObject)
86 {
87 var searchableMetadata = new List<ISearchableMetadata>(_searchableMetadata.Values);
88 return searchableMetadata;
89 }
90 }
91  
92 public virtual void AddObjects(IEnumerable<ISearchable> searchables)
93 {
94 Initialize();
95  
96 Updating?.Invoke(this, null);
97  
98 lock (_lockObject)
99 {
100 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
101 {
102 using (var writer =
103 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
104 {
105 foreach (var searchable in searchables)
106 {
107 var index = _currentIndex++;
108 _indexedObjects.Add(index, searchable);
109 _searchableIndexes.Add(searchable, index);
110  
111 var document = new Document();
112 document.Add(
113 new Field(IndexId, index.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
114  
115 var metadata = searchable.MetadataCollection;
116 var searchableMetadatas = metadata.All.OfType<ISearchableMetadata>();
117  
118 foreach (var searchableMetadata in searchableMetadatas)
119 {
120 var searchableMetadataValue = searchableMetadata.GetValue(searchable.Instance);
4 office 121  
122 // DEBUG
123 //Console.WriteLine("Stringifying: " + searchableMetadataValue);
124  
1 office 125 var searchableMetadataValueAsString =
2 office 126 string.Join(" ", searchableMetadataValue.Stringify());
3 office 127  
128 // DEBUG
129 //Console.WriteLine("String metadata: " + string.Join(" ", searchableMetadataValue.Stringify()));
1 office 130  
131 var field = new Field(searchableMetadata.SearchName, searchableMetadataValueAsString,
132 Field.Store.YES,
133 searchableMetadata.Analyze ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED,
4 office 134 Field.TermVector.NO);
1 office 135  
136 document.Add(field);
137  
138 if (!_searchableMetadata.ContainsKey(searchableMetadata.SearchName))
139 _searchableMetadata.Add(searchableMetadata.SearchName, searchableMetadata);
140 }
141  
142 writer.AddDocument(document);
143 }
144  
145 writer.Optimize();
146 writer.Commit();
147 }
148 }
149 }
150  
151 Updated?.Invoke(this, null);
152 }
153  
154 public virtual void RemoveObjects(IEnumerable<ISearchable> searchables)
155 {
156 Initialize();
157  
158 lock (_lockObject)
159 {
160 Updating?.Invoke(this, null);
161  
162 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
163 {
164 using (var writer =
165 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
166 {
167 foreach (var searchable in searchables)
168 {
4 office 169 if (!_searchableIndexes.TryGetValue(searchable, out var index)) continue;
1 office 170  
171 var queryAsText = $"{IndexId}:{index}";
172 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
173 var query = parser.Parse(queryAsText);
174  
175 writer.DeleteDocuments(query);
176  
177 _searchableIndexes.Remove(searchable);
178 _indexedObjects.Remove(index);
179 }
180  
181 writer.Optimize();
182 writer.Commit();
183 }
184 }
185  
186 Updated?.Invoke(this, null);
187 }
188 }
189  
190 public void ClearAllObjects()
191 {
192 Initialize();
193  
194 Updating?.Invoke(this, null);
195  
196 lock (_lockObject)
197 {
198 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
199 {
200 using (var writer =
201 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
202 {
203 _indexedObjects.Clear();
204 _searchableMetadata.Clear();
205  
206 writer.DeleteAll();
207  
208 writer.Optimize();
209 writer.Commit();
210 }
211 }
212 }
213 }
214  
215 public virtual IEnumerable<ISearchable> Search(string filter, int maxResults = SearchDefaults.DefaultResults)
216 {
217 Initialize();
218  
219 var results = new List<ISearchable>();
220  
221 lock (_lockObject)
222 {
223 try
224 {
225 Searching?.Invoke(this, new SearchEventArgs(filter, results));
226  
227 Query finalQuery = null;
228  
229 // Note: There are two issues with using regex here
230 // 1. Lucene uses lower case interpretation of each string for indexing.
231 // That means in regular expression we can use only lower case characters
232 // 2. escape sequences do not work. Not sure why
233 //
234 // In order to fix (1), we have to force Lucene to index differently. Probably we need to have two
235 // versions if indeces. One for regular search and another for regex
236 //var regexString = filter.ExtractRegexString();
237 //if (!string.IsNullOrWhiteSpace(regexString))
238 //{
239 // var searchableMetadatas = GetSearchableMetadata();
240  
241 // var booleanQuery = new BooleanQuery();
242 // foreach (var searchableMetadata in searchableMetadatas)
243 // {
244 // var query = new RegexQuery(new Term(searchableMetadata.SearchName, regexString));
245 // var booleanClause = new BooleanClause(query, Occur.SHOULD);
246  
247 // booleanQuery.Add(booleanClause);
248 // }
249  
250 // if (booleanQuery.Any())
251 // {
252 // finalQuery = booleanQuery;
253 // }
254 //}
255  
256 // ReSharper disable once ConditionIsAlwaysTrueOrFalse
257 if (finalQuery == null && filter.IsValidOrcSearchFilter())
258 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
259 {
260 var queryAsText = _searchQueryService.GetSearchQuery(filter, GetSearchableMetadata());
261  
262 var parser = new QueryParser(LuceneDefaults.Version, string.Empty, analyzer);
263 finalQuery = parser.Parse(queryAsText);
264 }
265  
266 if (finalQuery != null)
267 using (var searcher = new IndexSearcher(_indexDirectory))
268 {
269 var search = searcher.Search(finalQuery, maxResults);
270 foreach (var scoreDoc in search.ScoreDocs)
271 {
272 var score = scoreDoc.Score;
273 var docId = scoreDoc.Doc;
274 var doc = searcher.Doc(docId);
275  
276 var index = int.Parse(doc.Get(IndexId));
277 results.Add(_indexedObjects[index]);
278 }
279 }
280 }
4 office 281 catch (ParseException)
1 office 282 {
283 //Log.Warning(ex, "Failed to parse search pattern");
4 office 284 throw;
1 office 285 }
4 office 286 catch (Exception)
1 office 287 {
288 //Log.Error(ex, "An error occurred while searching, returning default results");
4 office 289 throw;
1 office 290 }
291 finally
292 {
293 Searched?.Invoke(this, new SearchEventArgs(filter, results));
294 }
295 }
296  
297 return results;
298 }
299  
300 private void Initialize()
301 {
302 if (_initialized) return;
303  
304 _initialized = true;
305  
306 _indexDirectory = GetDirectory();
307  
308 // Required to create empty index, which is required for our reader
309 using (var analyzer = new StandardAnalyzer(LuceneDefaults.Version))
310 {
311 using (var indexWriter =
312 new IndexWriter(_indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
313 {
314 indexWriter.Commit();
315 }
316 }
317 }
318  
319 protected abstract Directory GetDirectory();
320  
321 #endregion
322 }
323 }