Horizon – Blame information for rev 15
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
11 | office | 1 | using Horizon.Database; |
15 | office | 2 | using Horizon.Searching; |
3 | using Horizon.Snapshots; |
||
11 | office | 4 | using Serilog; |
5 | using System; |
||
6 | using System.Collections.Generic; |
||
7 | using System.Data.SQLite; |
||
13 | office | 8 | using System.Diagnostics; |
11 | office | 9 | using System.Drawing; |
10 | using System.IO; |
||
11 | using System.Linq; |
||
13 | office | 12 | using System.Runtime.CompilerServices; |
11 | office | 13 | using System.Text; |
13 | office | 14 | using System.Text.RegularExpressions; |
11 | office | 15 | using System.Threading; |
16 | using System.Threading.Tasks; |
||
13 | office | 17 | using System.Threading.Tasks.Dataflow; |
18 | using Tesseract; |
||
11 | office | 19 | using TrackedFolders; |
20 | |||
21 | namespace Horizon |
||
22 | { |
||
23 | public static class Extensions |
||
24 | { |
||
13 | office | 25 | private static readonly SemaphoreSlim _tesseractSemaphoreSlim = new SemaphoreSlim(1, 1); |
26 | |||
27 | private static readonly Stopwatch _tesseractStopWatch = new Stopwatch(); |
||
28 | |||
29 | private static readonly TesseractEngine _tesseractEngine = new TesseractEngine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "tessdata"), "eng", EngineMode.Default); |
||
30 | |||
31 | /// <summary> |
||
32 | /// https://stackoverflow.com/questions/7311734/split-sentence-into-words-but-having-trouble-with-the-punctuations-in-c-sharp |
||
33 | /// </summary> |
||
34 | private static readonly Regex _splitWordRegex = new Regex(@"((\b[^\s]+\b)((?<=\.\w).)?)", RegexOptions.Compiled); |
||
35 | |||
15 | office | 36 | public static async Task TakeSnapshot(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, SearchEngine searchEngine, [EnumeratorCancellation] CancellationToken cancellationToken) |
11 | office | 37 | { |
38 | foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.TopDirectoryOnly)) |
||
39 | { |
||
15 | office | 40 | var fileName = Path.GetFileName(file); |
41 | var directory = Path.GetDirectoryName(fileName); |
||
42 | var color = Color.Empty; |
||
43 | if (trackedFolders.TryGet(directory, out var folder)) |
||
44 | { |
||
45 | color = folder.Color; |
||
46 | } |
||
47 | |||
11 | office | 48 | try |
49 | { |
||
15 | office | 50 | |
51 | if(await snapshotDatabase.CreateSnapshotAsync(fileName, file, color, cancellationToken) is Snapshot snapshot) |
||
11 | office | 52 | { |
15 | office | 53 | await searchEngine.Index(snapshot, cancellationToken); |
11 | office | 54 | } |
55 | } |
||
56 | catch (SQLiteException exception) |
||
57 | { |
||
58 | if (exception.ResultCode == SQLiteErrorCode.Constraint) |
||
59 | { |
||
60 | Log.Information(exception, "Snapshot already exists."); |
||
61 | } |
||
62 | } |
||
63 | catch (Exception exception) |
||
64 | { |
||
65 | Log.Error(exception, $"Could not take snapshot of file: {file}"); |
||
66 | } |
||
67 | } |
||
68 | } |
||
69 | |||
15 | office | 70 | public static async Task TakeSnapshotRecursive(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, SearchEngine searchEngine, [EnumeratorCancellation] CancellationToken cancellationToken) |
11 | office | 71 | { |
72 | foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.AllDirectories)) |
||
73 | { |
||
15 | office | 74 | var fileName = Path.GetFileName(file); |
75 | var directory = Path.GetDirectoryName(fileName); |
||
76 | var color = Color.Empty; |
||
77 | if (trackedFolders.TryGet(directory, out var folder)) |
||
78 | { |
||
79 | color = folder.Color; |
||
80 | } |
||
81 | |||
11 | office | 82 | try |
83 | { |
||
15 | office | 84 | |
85 | if (await snapshotDatabase.CreateSnapshotAsync(fileName, file, color, cancellationToken) is Snapshot snapshot) |
||
11 | office | 86 | { |
15 | office | 87 | await searchEngine.Index(snapshot, cancellationToken); |
11 | office | 88 | } |
89 | } |
||
90 | catch (SQLiteException exception) |
||
91 | { |
||
92 | if (exception.ResultCode == SQLiteErrorCode.Constraint) |
||
93 | { |
||
94 | Log.Information(exception, "Snapshot already exists."); |
||
95 | } |
||
96 | } |
||
97 | catch (Exception exception) |
||
98 | { |
||
99 | Log.Error(exception, $"Could not take snapshot of file: {file}"); |
||
100 | } |
||
101 | } |
||
102 | } |
||
13 | office | 103 | |
104 | public static async IAsyncEnumerable<string> RecognizeStrings(Bitmap screenCapture, [EnumeratorCancellation] CancellationToken cancellationToken) |
||
105 | { |
||
106 | var bufferBlock = new BufferBlock<string>(new DataflowBlockOptions { CancellationToken = cancellationToken, EnsureOrdered = false }); |
||
107 | // tesseract can only process a single image at once |
||
108 | await _tesseractSemaphoreSlim.WaitAsync(); |
||
109 | try |
||
110 | { |
||
111 | // ocr image |
||
112 | using var memoryStream = new MemoryStream(); |
||
113 | screenCapture.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Bmp); |
||
114 | memoryStream.Position = 0L; |
||
115 | var imageData = memoryStream.ToArray(); |
||
116 | using var pix = Pix.LoadFromMemory(imageData); |
||
117 | using var page = _tesseractEngine.Process(pix); |
||
118 | _tesseractStopWatch.Start(); |
||
119 | var text = page.GetText(); |
||
120 | _tesseractStopWatch.Stop(); |
||
121 | var time = _tesseractStopWatch.Elapsed; |
||
122 | |||
123 | Log.Information($"Tesseract OCR complete in {time}"); |
||
124 | |||
125 | foreach (var match in _splitWordRegex.Matches(text)) |
||
126 | { |
||
127 | await bufferBlock.SendAsync($"{match}", cancellationToken); |
||
128 | } |
||
129 | |||
130 | bufferBlock.Complete(); |
||
131 | } |
||
132 | catch (Exception exception) |
||
133 | { |
||
134 | Log.Error(exception, $"Exception thrown while processing images with OCR"); |
||
135 | |||
136 | bufferBlock.Complete(); |
||
137 | } |
||
138 | finally |
||
139 | { |
||
140 | _tesseractSemaphoreSlim.Release(); |
||
141 | } |
||
142 | |||
143 | //await bufferBlock.Completion; |
||
144 | while (await bufferBlock.OutputAvailableAsync()) |
||
145 | { |
||
146 | if (bufferBlock.TryReceive(out var term)) |
||
147 | { |
||
148 | yield return term; |
||
149 | } |
||
150 | } |
||
151 | } |
||
11 | office | 152 | } |
153 | } |