Horizon – Blame information for rev 13
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
11 | office | 1 | using Horizon.Database; |
2 | using Serilog; |
||
3 | using System; |
||
4 | using System.Collections.Generic; |
||
5 | using System.Data.SQLite; |
||
13 | office | 6 | using System.Diagnostics; |
11 | office | 7 | using System.Drawing; |
8 | using System.IO; |
||
9 | using System.Linq; |
||
13 | office | 10 | using System.Runtime.CompilerServices; |
11 | office | 11 | using System.Text; |
13 | office | 12 | using System.Text.RegularExpressions; |
11 | office | 13 | using System.Threading; |
14 | using System.Threading.Tasks; |
||
13 | office | 15 | using System.Threading.Tasks.Dataflow; |
16 | using Tesseract; |
||
11 | office | 17 | using TrackedFolders; |
18 | |||
19 | namespace Horizon |
||
20 | { |
||
21 | public static class Extensions |
||
22 | { |
||
13 | office | 23 | private static readonly SemaphoreSlim _tesseractSemaphoreSlim = new SemaphoreSlim(1, 1); |
24 | |||
25 | private static readonly Stopwatch _tesseractStopWatch = new Stopwatch(); |
||
26 | |||
27 | private static readonly TesseractEngine _tesseractEngine = new TesseractEngine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "tessdata"), "eng", EngineMode.Default); |
||
28 | |||
29 | /// <summary> |
||
30 | /// https://stackoverflow.com/questions/7311734/split-sentence-into-words-but-having-trouble-with-the-punctuations-in-c-sharp |
||
31 | /// </summary> |
||
32 | private static readonly Regex _splitWordRegex = new Regex(@"((\b[^\s]+\b)((?<=\.\w).)?)", RegexOptions.Compiled); |
||
33 | |||
11 | office | 34 | public static async Task TakeSnapshot(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, CancellationToken cancellationToken) |
35 | { |
||
36 | foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.TopDirectoryOnly)) |
||
37 | { |
||
38 | try |
||
39 | { |
||
40 | var fileName = Path.GetFileName(file); |
||
41 | var directory = Path.GetDirectoryName(fileName); |
||
42 | var color = Color.Empty; |
||
43 | if (trackedFolders.TryGet(directory, out var folder)) |
||
44 | { |
||
45 | color = folder.Color; |
||
46 | } |
||
47 | |||
48 | await snapshotDatabase.CreateSnapshotAsync(fileName, file, color, cancellationToken); |
||
49 | } |
||
50 | catch (SQLiteException exception) |
||
51 | { |
||
52 | if (exception.ResultCode == SQLiteErrorCode.Constraint) |
||
53 | { |
||
54 | Log.Information(exception, "Snapshot already exists."); |
||
55 | } |
||
56 | } |
||
57 | catch (Exception exception) |
||
58 | { |
||
59 | Log.Error(exception, $"Could not take snapshot of file: {file}"); |
||
60 | } |
||
61 | } |
||
62 | } |
||
63 | |||
64 | public static async Task TakeSnapshotRecursive(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, CancellationToken cancellationToken) |
||
65 | { |
||
66 | foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.AllDirectories)) |
||
67 | { |
||
68 | try |
||
69 | { |
||
70 | var fileName = Path.GetFileName(file); |
||
71 | var directory = Path.GetDirectoryName(fileName); |
||
72 | var color = Color.Empty; |
||
73 | if (trackedFolders.TryGet(directory, out var folder)) |
||
74 | { |
||
75 | color = folder.Color; |
||
76 | } |
||
77 | |||
78 | await snapshotDatabase.CreateSnapshotAsync(fileName, file, color, cancellationToken); |
||
79 | } |
||
80 | catch (SQLiteException exception) |
||
81 | { |
||
82 | if (exception.ResultCode == SQLiteErrorCode.Constraint) |
||
83 | { |
||
84 | Log.Information(exception, "Snapshot already exists."); |
||
85 | } |
||
86 | } |
||
87 | catch (Exception exception) |
||
88 | { |
||
89 | Log.Error(exception, $"Could not take snapshot of file: {file}"); |
||
90 | } |
||
91 | } |
||
92 | } |
||
13 | office | 93 | |
94 | public static async IAsyncEnumerable<string> RecognizeStrings(Bitmap screenCapture, [EnumeratorCancellation] CancellationToken cancellationToken) |
||
95 | { |
||
96 | var bufferBlock = new BufferBlock<string>(new DataflowBlockOptions { CancellationToken = cancellationToken, EnsureOrdered = false }); |
||
97 | // tesseract can only process a single image at once |
||
98 | await _tesseractSemaphoreSlim.WaitAsync(); |
||
99 | try |
||
100 | { |
||
101 | // ocr image |
||
102 | using var memoryStream = new MemoryStream(); |
||
103 | screenCapture.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Bmp); |
||
104 | memoryStream.Position = 0L; |
||
105 | var imageData = memoryStream.ToArray(); |
||
106 | using var pix = Pix.LoadFromMemory(imageData); |
||
107 | using var page = _tesseractEngine.Process(pix); |
||
108 | _tesseractStopWatch.Start(); |
||
109 | var text = page.GetText(); |
||
110 | _tesseractStopWatch.Stop(); |
||
111 | var time = _tesseractStopWatch.Elapsed; |
||
112 | |||
113 | Log.Information($"Tesseract OCR complete in {time}"); |
||
114 | |||
115 | foreach (var match in _splitWordRegex.Matches(text)) |
||
116 | { |
||
117 | await bufferBlock.SendAsync($"{match}", cancellationToken); |
||
118 | } |
||
119 | |||
120 | bufferBlock.Complete(); |
||
121 | } |
||
122 | catch (Exception exception) |
||
123 | { |
||
124 | Log.Error(exception, $"Exception thrown while processing images with OCR"); |
||
125 | |||
126 | bufferBlock.Complete(); |
||
127 | } |
||
128 | finally |
||
129 | { |
||
130 | _tesseractSemaphoreSlim.Release(); |
||
131 | } |
||
132 | |||
133 | //await bufferBlock.Completion; |
||
134 | while (await bufferBlock.OutputAvailableAsync()) |
||
135 | { |
||
136 | if (bufferBlock.TryReceive(out var term)) |
||
137 | { |
||
138 | yield return term; |
||
139 | } |
||
140 | } |
||
141 | } |
||
11 | office | 142 | } |
143 | } |