Horizon – Blame information for rev 13

Subversion Repositories:
Rev:
Rev Author Line No. Line
11 office 1 using Horizon.Database;
2 using Serilog;
3 using System;
4 using System.Collections.Generic;
5 using System.Data.SQLite;
13 office 6 using System.Diagnostics;
11 office 7 using System.Drawing;
8 using System.IO;
9 using System.Linq;
13 office 10 using System.Runtime.CompilerServices;
11 office 11 using System.Text;
13 office 12 using System.Text.RegularExpressions;
11 office 13 using System.Threading;
14 using System.Threading.Tasks;
13 office 15 using System.Threading.Tasks.Dataflow;
16 using Tesseract;
11 office 17 using TrackedFolders;
18  
19 namespace Horizon
20 {
21 public static class Extensions
22 {
13 office 23 private static readonly SemaphoreSlim _tesseractSemaphoreSlim = new SemaphoreSlim(1, 1);
24  
25 private static readonly Stopwatch _tesseractStopWatch = new Stopwatch();
26  
27 private static readonly TesseractEngine _tesseractEngine = new TesseractEngine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "tessdata"), "eng", EngineMode.Default);
28  
29 /// <summary>
30 /// https://stackoverflow.com/questions/7311734/split-sentence-into-words-but-having-trouble-with-the-punctuations-in-c-sharp
31 /// </summary>
32 private static readonly Regex _splitWordRegex = new Regex(@"((\b[^\s]+\b)((?<=\.\w).)?)", RegexOptions.Compiled);
33  
11 office 34 public static async Task TakeSnapshot(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, CancellationToken cancellationToken)
35 {
36 foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.TopDirectoryOnly))
37 {
38 try
39 {
40 var fileName = Path.GetFileName(file);
41 var directory = Path.GetDirectoryName(fileName);
42 var color = Color.Empty;
43 if (trackedFolders.TryGet(directory, out var folder))
44 {
45 color = folder.Color;
46 }
47  
48 await snapshotDatabase.CreateSnapshotAsync(fileName, file, color, cancellationToken);
49 }
50 catch (SQLiteException exception)
51 {
52 if (exception.ResultCode == SQLiteErrorCode.Constraint)
53 {
54 Log.Information(exception, "Snapshot already exists.");
55 }
56 }
57 catch (Exception exception)
58 {
59 Log.Error(exception, $"Could not take snapshot of file: {file}");
60 }
61 }
62 }
63  
64 public static async Task TakeSnapshotRecursive(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, CancellationToken cancellationToken)
65 {
66 foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.AllDirectories))
67 {
68 try
69 {
70 var fileName = Path.GetFileName(file);
71 var directory = Path.GetDirectoryName(fileName);
72 var color = Color.Empty;
73 if (trackedFolders.TryGet(directory, out var folder))
74 {
75 color = folder.Color;
76 }
77  
78 await snapshotDatabase.CreateSnapshotAsync(fileName, file, color, cancellationToken);
79 }
80 catch (SQLiteException exception)
81 {
82 if (exception.ResultCode == SQLiteErrorCode.Constraint)
83 {
84 Log.Information(exception, "Snapshot already exists.");
85 }
86 }
87 catch (Exception exception)
88 {
89 Log.Error(exception, $"Could not take snapshot of file: {file}");
90 }
91 }
92 }
13 office 93  
94 public static async IAsyncEnumerable<string> RecognizeStrings(Bitmap screenCapture, [EnumeratorCancellation] CancellationToken cancellationToken)
95 {
96 var bufferBlock = new BufferBlock<string>(new DataflowBlockOptions { CancellationToken = cancellationToken, EnsureOrdered = false });
97 // tesseract can only process a single image at once
98 await _tesseractSemaphoreSlim.WaitAsync();
99 try
100 {
101 // ocr image
102 using var memoryStream = new MemoryStream();
103 screenCapture.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Bmp);
104 memoryStream.Position = 0L;
105 var imageData = memoryStream.ToArray();
106 using var pix = Pix.LoadFromMemory(imageData);
107 using var page = _tesseractEngine.Process(pix);
108 _tesseractStopWatch.Start();
109 var text = page.GetText();
110 _tesseractStopWatch.Stop();
111 var time = _tesseractStopWatch.Elapsed;
112  
113 Log.Information($"Tesseract OCR complete in {time}");
114  
115 foreach (var match in _splitWordRegex.Matches(text))
116 {
117 await bufferBlock.SendAsync($"{match}", cancellationToken);
118 }
119  
120 bufferBlock.Complete();
121 }
122 catch (Exception exception)
123 {
124 Log.Error(exception, $"Exception thrown while processing images with OCR");
125  
126 bufferBlock.Complete();
127 }
128 finally
129 {
130 _tesseractSemaphoreSlim.Release();
131 }
132  
133 //await bufferBlock.Completion;
134 while (await bufferBlock.OutputAvailableAsync())
135 {
136 if (bufferBlock.TryReceive(out var term))
137 {
138 yield return term;
139 }
140 }
141 }
11 office 142 }
143 }