HuntnGather – Blame information for rev 14

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 ///////////////////////////////////////////////////////////////////////////
2 // Copyright (C) 2021 Wizardry and Steamworks - License: MIT //
3 ///////////////////////////////////////////////////////////////////////////
4  
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <dirent.h>
9 #include <signal.h>
10  
11 #include <sys/types.h>
12 #include <sys/stat.h>
13  
14 #include <proto/dos.h>
15 #include <proto/exec.h>
16  
17 #include "StringStack.h"
18  
5 office 19 #if !defined ___HAVE_GETOPT___
1 office 20 #include "getopt.h"
21 #endif
22  
5 office 23 #if defined ___AmigaOS___
24 /*************************************************************************/
25 /* Version string used for querrying the program version. */
26 /*************************************************************************/
27 TEXT version_string[] =
11 office 28 "\0$VER: Gather 1.7 "__DATE__" by Wizardry and Steamworks";
5 office 29 #endif
1 office 30  
31 #if !defined TRUE
32 #define TRUE 1;
33 #endif
34  
35 #if !defined FALSE
36 #define FALSE 0;
37 #endif
38  
2 office 39 #define MAX_MEM 262144
11 office 40 #define NAME_BUF 32
41 #define PATH_BUF 128
42 #define LINE_BUF 256
2 office 43 #define DEFAULT_DATABASE_FILE "S:gather.db"
1 office 44  
45 typedef struct {
46 unsigned int dirs;
47 unsigned int files;
48 } stats;
49  
50 int run = TRUE;
51 int verbose = TRUE;
52  
53 void SignalHandler(int sig) {
54 // Toggle the run flag to stop execution.
55 run = FALSE;
56 }
57  
58 int compare(const void *a, const void *b) {
59 const char **p = (const char **)a;
60 const char **q = (const char **)b;
13 office 61 return strncmp(*p, *q, strlen(*p));
1 office 62 }
63  
64 /*
65 *
66 * Sorts a database file lexicographically.
67 */
68 void SortDatabase(char *dbFile) {
69 FILE *fp;
70 char *name = NULL;
71 char *path = NULL;
72 char **database;
73 char c;
74 int i;
75 int side;
76 unsigned int line;
11 office 77 int name_size;
78 int path_size;
1 office 79  
80 // Open database file for reading.
81 if((fp = fopen(dbFile, "r")) == NULL) {
82 fprintf(stderr, "Unable to open gather database for reading.\n");
83 return;
84 }
85  
11 office 86 database = malloc(sizeof(char *));
87 name_size = NAME_BUF;
88 name = malloc(name_size * sizeof(char));
89 path_size = PATH_BUF;
90 path = malloc(path_size * sizeof(char));
1 office 91 line = 0;
92 side = 0;
93 i = 0;
94  
95 if(verbose) {
96 fprintf(stdout, "Sorting database: '%s'\n", dbFile);
97 }
98  
99 while(run && fscanf(fp, "%c", &c) == 1) {
100 #if defined ___AmigaOS___
101 // Check if CTRL+C was pressed and abort the program.
102 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
103 run = FALSE;
104 continue;
105 }
106 #endif
107 switch(c) {
108 case '\n':
109 // Load up the name and path into the database variable.
11 office 110 database = realloc(database, (line + 1) * sizeof(char *));
111 database[line] = malloc((strlen(name) + strlen(path) + 1 + 1) * sizeof(char));
1 office 112 sprintf(database[line], "%s\t%s", name, path);
113 ++line;
114  
115 free(name);
11 office 116 name_size = NAME_BUF;
117 name = malloc(name_size * sizeof(char));
1 office 118 --side;
119 i = 0;
120  
121 break;
122 case '\t':
123 free(path);
11 office 124 path_size = PATH_BUF;
125 path = malloc(path_size * sizeof(char));
1 office 126 ++side;
127 i = 0;
128 break;
129 default:
130 switch(side) {
131 case 0:
11 office 132 if(strlen(name) == name_size) {
133 name_size = name_size * 1.5;
134 name = realloc(name, name_size * sizeof(char));
135 }
136 //name = realloc(name, (i + 1 + 1) * sizeof(char));
1 office 137 name[i] = c;
138 name[i + 1] = '\0';
139 break;
140 case 1:
11 office 141 if(strlen(path) == path_size) {
142 path_size = path_size * 1.5;
143 path = realloc(path, path_size * sizeof(char));
144 }
145 //path = realloc(path, (i + 1 + 1) * sizeof(char));
1 office 146 path[i] = c;
147 path[i + 1] = '\0';
148 break;
149 default:
150 fprintf(stderr, "Database corrupted.\n");
151 break;
152 }
153 ++i;
154 break;
155 }
156 }
157  
158 fclose(fp);
159  
160 // Sort the database.
161 qsort(database, line, sizeof(char *), compare);
162  
163 // Write the database lines back to the database.
164 if((fp = fopen(dbFile, "w+")) == NULL) {
165 fprintf(stderr, "Unable to open gather database for writing.\n");
166 return;
167 }
168  
169 for(i = 0; i < line; ++i) {
170 fprintf(fp, "%s\n", database[i]);
171 }
172  
173 free(database);
174 fclose(fp);
175 }
176  
177 /*
178 *
179 * Updates a database file "dbFile".
180 */
181 void UpdateDatabase(char *dbFile, stringStack *dirStack, stats *stats) {
182 FILE *fp;
183 DIR *dir;
184 struct dirent *dirEntry;
185 struct stat dirStat;
186 unsigned int size;
187 char *path;
188 char *subPath;
189  
190 if((fp = fopen(dbFile, "w+")) == NULL) {
191 fprintf(stderr, "Unable to open gather database for writing.\n");
192 return;
193 }
194  
195 while(run && !stringStackIsEmpty(dirStack)) {
196 #if defined ___AmigaOS___
197 // Check if CTRL+C was pressed and abort the program.
198 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
199 run = FALSE;
200 }
201 #endif
202 if((path = stringStackPop(dirStack)) == NULL) {
203 return;
204 }
205  
206 if((dir = opendir(path)) == NULL) {
207 return;
208 }
209  
210 while(run && (dirEntry = readdir(dir)) != NULL) {
211 #if defined ___AmigaOS___
212 // Check if CTRL+C was pressed and abort the program.
213 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
214 run = FALSE;
215 }
216 #endif
217 size = sizeof(path) + sizeof(dirEntry->d_name) + 1;
218 switch(path[strlen(path) - 1]) {
219 case '/':
220 case ':': // This is a drive path.
11 office 221 subPath = malloc(size);
1 office 222 sprintf(subPath, "%s%s", path, dirEntry->d_name);
223 break;
224 default:
11 office 225 subPath = malloc(size + 1);
1 office 226 sprintf(subPath, "%s/%s", path, dirEntry->d_name);
227 break;
228 }
229 stat(subPath, &dirStat);
230 if(S_ISDIR(dirStat.st_mode)) {
231 stringStackPush(dirStack, subPath);
232  
233 ++stats->dirs;
234  
235 if(verbose) {
236 fprintf(stdout,
237 "Gathered %d directories and %d files.\r",
238 stats->dirs,
239 stats->files);
240 }
241  
242 free(subPath);
243 continue;
244 }
245  
246 // Write to database file.
247 fprintf(fp, "%s\t%s\n", dirEntry->d_name, subPath);
248  
249 ++stats->files;
250  
251 if(verbose) {
252 fprintf(stdout,
253 "Gathered %d directories and %d files.\r",
254 stats->dirs,
255 stats->files);
256 }
257  
258 free(subPath);
259 }
260  
261 closedir(dir);
262 free(path);
263 }
264  
265 if(verbose) {
266 fprintf(stdout, "\n");
267 }
268  
269 fclose(fp);
270  
271 }
272  
273 /*
274 *
275 * Gets the size of a database "dbFle".
276 */
277 int GetDatabaseSize(char *dbFile) {
278 FILE *fp;
279 int size;
280  
281 if((fp = fopen(dbFile, "r")) == NULL) {
282 fprintf(stderr, "Unable to open gather database for reading.\n");
283 fclose(fp);
284 return 0;
285 }
286  
287 fseek(fp, 0L, SEEK_END);
288 size = ftell(fp);
289  
290 fclose(fp);
291 return size;
292 }
293  
294 /*
295 *
296 * Counts the lines in a database file "dbFile".
297 */
298 int CountDatabaseLines(char *dbFile) {
299 FILE *fp;
300 int lines;
301 char c;
302  
303 if((fp = fopen(dbFile, "r")) == NULL) {
304 fprintf(stderr, "Unable to open gather database for reading.\n");
305 fclose(fp);
306 return 0;
307 }
308  
309 lines = 0;
310 while(fscanf(fp, "%c", &c) == 1) {
311 switch(c) {
312 case '\n':
313 ++lines;
314 break;
315 }
316 }
317  
318 fclose(fp);
319  
320 return lines;
321 }
322  
323 /*
324 *
325 * Creates "files" temporary filenames.
326 */
327 char **CreateTempFiles(int files) {
328 char **tmpNames;
329 int count;
330  
11 office 331 tmpNames = malloc(files * sizeof(char *));
1 office 332  
333 if(verbose) {
334 fprintf(stdout, "Creating temporary files.\r");
335 }
336  
337 count = files;
338 while(--count > -1) {
339 #if defined ___AmigaOS___
340 // Check if CTRL+C was pressed and abort the program.
341 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
342 run = FALSE;
343 }
344 #endif
345 tmpNames[count] = tmpnam(NULL);
346  
347 if(verbose) {
348 fprintf(stdout, "Creating temporary files: %d%%\r", 100 - (int)(((float)count / files) * 100.0));
349 }
350 }
351  
352 if(verbose) {
353 fprintf(stdout, "\n");
354 }
355  
356 return tmpNames;
357 }
358  
359 /*
360 *
361 * Writes lines from the database "dbFile" to temporary filenames "tmpNames".
362 */
363 void WriteTempFiles(char *dbFile, char **tmpNames, int tmpFiles, int tmpLines, int total) {
364 FILE *fp, *tp;
365 char c;
366 int lines;
367 int linesWritten;
368  
369 if((fp = fopen(dbFile, "r")) == NULL) {
370 fprintf(stderr, "Unable to open gather database for reading.\n");
371 return;
372 }
373  
374 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
375 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
376 fclose(fp);
377 return;
378 }
379  
380 if(verbose) {
381 fprintf(stdout, "Writing to temporary files.\r");
382 }
383  
384 linesWritten = 0;
385 lines = 0;
386 while(run && fscanf(fp, "%c", &c) == 1) {
387 #if defined ___AmigaOS___
388 // Check if CTRL+C was pressed and abort the program.
389 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
390 run = FALSE;
391 }
392 #endif
393 switch(c) {
394 case '\n':
395 // Increment the total written lines.
396 ++linesWritten;
397  
398 if(verbose) {
399 fprintf(stdout, "Writing to temporary files: %d%%.\r", (int)(((float)linesWritten / total) * 100.0));
400 }
401  
402 // Write the newline character back.
403 if(fprintf(tp, "%c", c) != 1) {
404 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
405 fclose(tp);
406 fclose(fp);
407 return;
408 }
409 // Switch to the next temporary file.
410 if(++lines >= tmpLines) {
411 // If there are no temporary files left then run till the end.
412 if(tmpFiles - 1 < 0) {
413 break;
414 }
415  
416 // Close the previous temporary file and write to the next temporary file.
417 fclose(tp);
418 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
419 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
420 fclose(tp);
421 fclose(fp);
422 }
423 lines = 0;
424 break;
425 }
426 break;
427 default:
428 if(fprintf(tp, "%c", c) != 1) {
429 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
430 fclose(tp);
431 fclose(fp);
432 return;
433 }
434 break;
435 }
436 }
437  
438 fprintf(stdout, "\n");
439  
440 fclose(tp);
441 fclose(fp);
442 }
443  
444 /*
445 *
446 * Skips a line in a database file "fp".
447 */
448 void SkipDatabaseLine(FILE *fp) {
449 char c;
450  
451 while(fscanf(fp, "%c", &c) == 1) {
452 if(c == '\n') {
453 break;
454 }
455 }
456  
457 return;
458 }
459  
460 /*
461 *
462 * Reads a line from the database file "fp".
463 */
464 char *ReadDatabaseLine(FILE *fp) {
465 char c;
466 char *line;
11 office 467 int line_size;
468 int i;
1 office 469  
11 office 470 line_size = LINE_BUF;
471 line = malloc(line_size * sizeof(char));
1 office 472  
11 office 473 i = 0;
1 office 474 while(run && fscanf(fp, "%c", &c) == 1) {
475 #if defined ___AmigaOS___
476 // Check if CTRL+C was pressed and abort the program.
477 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
478 run = FALSE;
479 }
480 #endif
481 switch(c) {
482 case '\n':
483 // Rewind the file by the number of read characters.
11 office 484 fseek(fp, -(i + 1), SEEK_CUR);
1 office 485 return line;
486 default:
11 office 487 if(strlen(line) == line_size) {
488 line_size = line_size * 1.5;
489 line = realloc(line, line_size * sizeof(char));
490 }
491 //line = realloc(line, (chars + 1 + 1) * sizeof(char));
492 line[i] = c;
493 line[i + 1] = '\0';
1 office 494 break;
495 }
11 office 496 ++i;
1 office 497 }
498  
499 return NULL;
500 }
501  
502 /*
503 *
504 * Merges temporary files "tmpNames" into a database "dbFile".
505 */
506 void MergeDatabase(char *dbFile, char **tmpNames, int files, int lines) {
507 FILE *fp;
508 FILE **tp;
509 int i;
14 office 510 int j;
1 office 511 char *tmp;
14 office 512 char *min;
1 office 513 int count;
514  
515 if((fp = fopen(dbFile, "w+")) == NULL) {
516 fprintf(stderr, "Unable to open gather database for writing.\n");
517 return;
518 }
519  
520 // Allocate as many file pointers as temporary files.
11 office 521 tp = malloc(files * sizeof(FILE *));
1 office 522  
523 // Open all temporary files for reading.
524 for(i = 0; i < files; ++i) {
525 if((tp[i] = fopen(tmpNames[i], "r")) == NULL) {
526 fprintf(stderr, "Unable to open temporary file '%s' for reading.\n", tmpNames[i]);
527 // Close all temporary files.
528 --i;
529 while(i >= 0) {
530 fclose(tp[i]);
531 }
532 return;
533 }
534 }
535  
536 if(verbose) {
537 fprintf(stdout, "Merging all database lines in temporary files.\r");
538 }
539  
540 count = lines;
14 office 541 j = 0;
1 office 542 while(run && --count > -1) {
543 #if defined ___AmigaOS___
544 // Check if CTRL+C was pressed and abort the program.
545 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
546 run = FALSE;
547 }
548 #endif
549 // Find the smallest line in all temporary files.
550 if(verbose) {
551 fprintf(stdout, "Merging all database lines in temporary files: %d%%.\r", 100 - (int)(((float)count / lines) * 100.0));
552 }
553  
14 office 554 min = NULL;
1 office 555 for(i = 0; i < files; ++i) {
556 tmp = ReadDatabaseLine(tp[i]);
557 if(tmp == NULL) {
558 continue;
559 }
14 office 560 if(min == NULL || strncmp(tmp, min, strlen(tmp)) < 0) {
561 if(min != NULL) {
2 office 562 // Free previous instance.
14 office 563 free(min);
2 office 564 }
14 office 565 min = malloc((strlen(tmp) + 1) * sizeof(char));
566 sprintf(min, "%s", tmp);
1 office 567 // Remember the index of the file where the smallest entry has been found.
14 office 568 j = i;
1 office 569 free(tmp);
570 continue;
571 }
572 free(tmp);
573 }
574  
575 // Forward the file where the smallest line was found.
14 office 576 SkipDatabaseLine(tp[j]);
1 office 577  
578 // Write the smallest line.
14 office 579 if(min != NULL) {
580 fprintf(fp, "%s\n", min);
581 free(min);
1 office 582 }
583 }
584  
585 // Write out any remaining contents from the temporary files.
586 for(i = 0; i < files; ++i) {
587 tmp = ReadDatabaseLine(tp[i]);
588 if(tmp == NULL) {
589 continue;
590 }
591 fprintf(fp, "%s\n", tmp);
14 office 592 free(tmp);
1 office 593 }
594  
595 // Close and delete all temporary files.
596 for(i = 0; i < files; ++i) {
597 fclose(tp[i]);
598 // Delete temporary file.
599 remove(tmpNames[i]);
600 }
601  
602 if(verbose) {
603 fprintf(stdout, "\n");
604 }
605  
606 fclose(fp);
607 }
608  
609 /*
610 *
611 * Indexes a "path" by creating a database "dbFile".
612 */
613 void Gather(char *dbFile, char *path) {
614 stringStack *stack = stringStackCreate(1);
615 stats *stats = malloc(sizeof(stats));
616 char **tmpNames;
617 int dbSize, dbLines, tmpFiles, tmpLines;
618 int i;
619  
620 // Initialize metrics.
621 stats->dirs = 0;
622 stats->files = 0;
623  
624 // Push the first path onto the stack.
625 stringStackPush(stack, path);
626  
627 // Generate the database file.
628 UpdateDatabase(dbFile, stack, stats);
629  
630 // Get the database metrics.
631 dbSize = GetDatabaseSize(dbFile);
632 dbLines = CountDatabaseLines(dbFile);
633  
634 // Compute the amount of temporary files needed.
635 tmpFiles = dbSize / MAX_MEM;
636  
637 // In case no temporary files are required,
638 // just sort the database and terminate.
2 office 639 if(tmpFiles <= 1) {
1 office 640 SortDatabase(dbFile);
641 return;
642 }
643  
644 tmpLines = dbLines / tmpFiles;
645  
646 // Create temporary files.
647 if((tmpNames = CreateTempFiles(tmpFiles)) == NULL) {
648 fprintf(stderr, "Unable to create temporary files.\n");
649 return;
650 }
651  
652 // Write "tmpLines" to temporary files in "tmpFiles" from "dbFile".
653 WriteTempFiles(dbFile, tmpNames, tmpFiles, tmpLines, dbLines);
654  
655 // Sort the temporary files.
656 for(i = 0; i < tmpFiles; ++i) {
657 SortDatabase(tmpNames[i]);
658 }
659  
660 MergeDatabase(dbFile, tmpNames, tmpFiles, dbLines);
661 }
662  
11 office 663 void usage(char *name) {
664 fprintf(stdout, "Hunt & Gather - %s, a file index generating tool. \n", name);
665 fprintf(stdout, " \n");
666 fprintf(stdout, "SYNTAX: %s [-q] DATABASE \n", name);
667 fprintf(stdout, " \n");
668 fprintf(stdout, " -q Do not print out any messages. \n");
669 fprintf(stdout, " \n");
670 fprintf(stdout, "DATABASE is a path to where the indexed results will be \n");
671 fprintf(stdout, "stored for searching with the Hunt tool. \n");
672 fprintf(stdout, " \n");
673 fprintf(stdout, "(c) 2021 Wizardry and Steamworks, MIT. \n");
674 }
675  
1 office 676 /*
677 *
678 * Main entry point.
679 */
680 int main(int argc, char **argv) {
681 int option;
2 office 682 char *dbFile;
10 office 683 struct stat dirStat;
1 office 684  
685 // Bind handler to SIGINT.
686 signal(SIGINT, SignalHandler);
687  
2 office 688 dbFile = DEFAULT_DATABASE_FILE;
689 while((option = getopt(argc, argv, "hqd:")) != -1) {
1 office 690 switch(option) {
2 office 691 case 'd':
692 dbFile = optarg;
693 break;
1 office 694 case 'q':
695 verbose = FALSE;
696 break;
697 case 'h':
11 office 698 usage(argv[0]);
2 office 699 return 0;
1 office 700 case '?':
701 fprintf(stderr, "Invalid option %ct.\n", optopt);
702 return 1;
703 }
704 }
705  
10 office 706  
707 if(optind >= argc) {
11 office 708 usage(argv[0]);
1 office 709 return 1;
710 }
711  
10 office 712 stat(argv[optind], &dirStat);
713 if(!S_ISDIR(dirStat.st_mode)) {
5 office 714 fprintf(stderr, "Path '%s' is not a directory.\n", argv[optind]);
1 office 715 return 1;
716 }
717  
2 office 718 if(verbose) {
719 fprintf(stdout, "Gathering to database file: %s\n", dbFile);
720 }
721  
1 office 722 // Gather.
2 office 723 Gather(dbFile, argv[optind]);
1 office 724  
725 return 0;
726 }