Horizon – Diff between revs 11 and 13

Subversion Repositories:
Rev:
Show entire fileIgnore whitespace
Rev 11 Rev 13
Line 1... Line 1...
1 using Horizon.Database; 1 using Horizon.Database;
2 using Serilog; 2 using Serilog;
3 using System; 3 using System;
4 using System.Collections.Generic; 4 using System.Collections.Generic;
5 using System.Data.SQLite; 5 using System.Data.SQLite;
-   6 using System.Diagnostics;
6 using System.Drawing; 7 using System.Drawing;
7 using System.IO; 8 using System.IO;
8 using System.Linq; 9 using System.Linq;
-   10 using System.Runtime.CompilerServices;
9 using System.Text; 11 using System.Text;
-   12 using System.Text.RegularExpressions;
10 using System.Threading; 13 using System.Threading;
11 using System.Threading.Tasks; 14 using System.Threading.Tasks;
-   15 using System.Threading.Tasks.Dataflow;
-   16 using Tesseract;
12 using TrackedFolders; 17 using TrackedFolders;
Line 13... Line 18...
13   18  
14 namespace Horizon 19 namespace Horizon
15 { 20 {
16 public static class Extensions 21 public static class Extensions
-   22 {
-   23 private static readonly SemaphoreSlim _tesseractSemaphoreSlim = new SemaphoreSlim(1, 1);
-   24  
-   25 private static readonly Stopwatch _tesseractStopWatch = new Stopwatch();
-   26  
-   27 private static readonly TesseractEngine _tesseractEngine = new TesseractEngine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "tessdata"), "eng", EngineMode.Default);
-   28  
-   29 /// <summary>
-   30 /// https://stackoverflow.com/questions/7311734/split-sentence-into-words-but-having-trouble-with-the-punctuations-in-c-sharp
-   31 /// </summary>
-   32 private static readonly Regex _splitWordRegex = new Regex(@"((\b[^\s]+\b)((?<=\.\w).)?)", RegexOptions.Compiled);
17 { 33  
18 public static async Task TakeSnapshot(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, CancellationToken cancellationToken) 34 public static async Task TakeSnapshot(string path, TrackedFolders.TrackedFolders trackedFolders, SnapshotDatabase snapshotDatabase, CancellationToken cancellationToken)
19 { 35 {
20 foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.TopDirectoryOnly)) 36 foreach (var file in Directory.EnumerateFiles(path, "*.*", SearchOption.TopDirectoryOnly))
21 { 37 {
Line 72... Line 88...
72 { 88 {
73 Log.Error(exception, $"Could not take snapshot of file: {file}"); 89 Log.Error(exception, $"Could not take snapshot of file: {file}");
74 } 90 }
75 } 91 }
76 } 92 }
-   93  
-   94 public static async IAsyncEnumerable<string> RecognizeStrings(Bitmap screenCapture, [EnumeratorCancellation] CancellationToken cancellationToken)
-   95 {
-   96 var bufferBlock = new BufferBlock<string>(new DataflowBlockOptions { CancellationToken = cancellationToken, EnsureOrdered = false });
-   97 // tesseract can only process a single image at once
-   98 await _tesseractSemaphoreSlim.WaitAsync();
-   99 try
-   100 {
-   101 // ocr image
-   102 using var memoryStream = new MemoryStream();
-   103 screenCapture.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Bmp);
-   104 memoryStream.Position = 0L;
-   105 var imageData = memoryStream.ToArray();
-   106 using var pix = Pix.LoadFromMemory(imageData);
-   107 using var page = _tesseractEngine.Process(pix);
-   108 _tesseractStopWatch.Start();
-   109 var text = page.GetText();
-   110 _tesseractStopWatch.Stop();
-   111 var time = _tesseractStopWatch.Elapsed;
-   112  
-   113 Log.Information($"Tesseract OCR complete in {time}");
-   114  
-   115 foreach (var match in _splitWordRegex.Matches(text))
-   116 {
-   117 await bufferBlock.SendAsync($"{match}", cancellationToken);
-   118 }
-   119  
-   120 bufferBlock.Complete();
-   121 }
-   122 catch (Exception exception)
-   123 {
-   124 Log.Error(exception, $"Exception thrown while processing images with OCR");
-   125  
-   126 bufferBlock.Complete();
-   127 }
-   128 finally
-   129 {
-   130 _tesseractSemaphoreSlim.Release();
-   131 }
-   132  
-   133 //await bufferBlock.Completion;
-   134 while (await bufferBlock.OutputAvailableAsync())
-   135 {
-   136 if (bufferBlock.TryReceive(out var term))
-   137 {
-   138 yield return term;
-   139 }
-   140 }
-   141 }
77 } 142 }
78 } 143 }