HuntnGather – Blame information for rev 15

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 ///////////////////////////////////////////////////////////////////////////
2 // Copyright (C) 2021 Wizardry and Steamworks - License: MIT //
3 ///////////////////////////////////////////////////////////////////////////
4  
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <dirent.h>
9 #include <signal.h>
10  
11 #include <sys/types.h>
12 #include <sys/stat.h>
13  
14 #include <proto/dos.h>
15 #include <proto/exec.h>
16  
17 #include "StringStack.h"
18  
5 office 19 #if !defined ___HAVE_GETOPT___
1 office 20 #include "getopt.h"
21 #endif
22  
5 office 23 #if defined ___AmigaOS___
24 /*************************************************************************/
25 /* Version string used for querrying the program version. */
26 /*************************************************************************/
27 TEXT version_string[] =
11 office 28 "\0$VER: Gather 1.7 "__DATE__" by Wizardry and Steamworks";
5 office 29 #endif
1 office 30  
31 #if !defined TRUE
32 #define TRUE 1;
33 #endif
34  
35 #if !defined FALSE
36 #define FALSE 0;
37 #endif
38  
2 office 39 #define MAX_MEM 262144
11 office 40 #define NAME_BUF 32
41 #define PATH_BUF 128
42 #define LINE_BUF 256
2 office 43 #define DEFAULT_DATABASE_FILE "S:gather.db"
1 office 44  
45 typedef struct {
46 unsigned int dirs;
47 unsigned int files;
48 } stats;
49  
50 int run = TRUE;
51 int verbose = TRUE;
52  
53 void SignalHandler(int sig) {
54 // Toggle the run flag to stop execution.
55 run = FALSE;
56 }
57  
58 int compare(const void *a, const void *b) {
59 const char **p = (const char **)a;
60 const char **q = (const char **)b;
13 office 61 return strncmp(*p, *q, strlen(*p));
1 office 62 }
63  
64 /*
65 *
66 * Sorts a database file lexicographically.
67 */
68 void SortDatabase(char *dbFile) {
69 FILE *fp;
70 char *name = NULL;
71 char *path = NULL;
72 char **database;
73 char c;
74 int i;
75 int side;
76 unsigned int line;
11 office 77 int name_size;
78 int path_size;
1 office 79  
80 // Open database file for reading.
81 if((fp = fopen(dbFile, "r")) == NULL) {
82 fprintf(stderr, "Unable to open gather database for reading.\n");
83 return;
84 }
85  
11 office 86 database = malloc(sizeof(char *));
87 name_size = NAME_BUF;
88 name = malloc(name_size * sizeof(char));
89 path_size = PATH_BUF;
90 path = malloc(path_size * sizeof(char));
1 office 91 line = 0;
92 side = 0;
93 i = 0;
94  
95 if(verbose) {
96 fprintf(stdout, "Sorting database: '%s'\n", dbFile);
97 }
98  
99 while(run && fscanf(fp, "%c", &c) == 1) {
100 #if defined ___AmigaOS___
101 // Check if CTRL+C was pressed and abort the program.
102 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
103 run = FALSE;
104 continue;
105 }
106 #endif
107 switch(c) {
108 case '\n':
109 // Load up the name and path into the database variable.
11 office 110 database = realloc(database, (line + 1) * sizeof(char *));
111 database[line] = malloc((strlen(name) + strlen(path) + 1 + 1) * sizeof(char));
1 office 112 sprintf(database[line], "%s\t%s", name, path);
113 ++line;
114  
115 free(name);
11 office 116 name_size = NAME_BUF;
117 name = malloc(name_size * sizeof(char));
1 office 118 --side;
119 i = 0;
120  
121 break;
122 case '\t':
123 free(path);
11 office 124 path_size = PATH_BUF;
125 path = malloc(path_size * sizeof(char));
1 office 126 ++side;
127 i = 0;
128 break;
129 default:
130 switch(side) {
131 case 0:
11 office 132 if(strlen(name) == name_size) {
133 name_size = name_size * 1.5;
134 name = realloc(name, name_size * sizeof(char));
135 }
136 //name = realloc(name, (i + 1 + 1) * sizeof(char));
1 office 137 name[i] = c;
138 name[i + 1] = '\0';
139 break;
140 case 1:
11 office 141 if(strlen(path) == path_size) {
142 path_size = path_size * 1.5;
143 path = realloc(path, path_size * sizeof(char));
144 }
145 //path = realloc(path, (i + 1 + 1) * sizeof(char));
1 office 146 path[i] = c;
147 path[i + 1] = '\0';
148 break;
149 default:
150 fprintf(stderr, "Database corrupted.\n");
151 break;
152 }
153 ++i;
154 break;
155 }
156 }
157  
158 fclose(fp);
159  
160 // Sort the database.
161 qsort(database, line, sizeof(char *), compare);
162  
163 // Write the database lines back to the database.
164 if((fp = fopen(dbFile, "w+")) == NULL) {
165 fprintf(stderr, "Unable to open gather database for writing.\n");
166 return;
167 }
168  
169 for(i = 0; i < line; ++i) {
170 fprintf(fp, "%s\n", database[i]);
171 }
172  
173 free(database);
174 fclose(fp);
175 }
176  
177 /*
178 *
179 * Updates a database file "dbFile".
180 */
181 void UpdateDatabase(char *dbFile, stringStack *dirStack, stats *stats) {
182 FILE *fp;
183 DIR *dir;
184 struct dirent *dirEntry;
185 struct stat dirStat;
186 unsigned int size;
187 char *path;
188 char *subPath;
189  
190 if((fp = fopen(dbFile, "w+")) == NULL) {
191 fprintf(stderr, "Unable to open gather database for writing.\n");
192 return;
193 }
194  
195 while(run && !stringStackIsEmpty(dirStack)) {
196 #if defined ___AmigaOS___
197 // Check if CTRL+C was pressed and abort the program.
198 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
199 run = FALSE;
200 }
201 #endif
202 if((path = stringStackPop(dirStack)) == NULL) {
203 return;
204 }
205  
206 if((dir = opendir(path)) == NULL) {
207 return;
208 }
209  
210 while(run && (dirEntry = readdir(dir)) != NULL) {
211 #if defined ___AmigaOS___
212 // Check if CTRL+C was pressed and abort the program.
213 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
214 run = FALSE;
215 }
216 #endif
217 size = sizeof(path) + sizeof(dirEntry->d_name) + 1;
218 switch(path[strlen(path) - 1]) {
219 case '/':
220 case ':': // This is a drive path.
11 office 221 subPath = malloc(size);
1 office 222 sprintf(subPath, "%s%s", path, dirEntry->d_name);
223 break;
224 default:
11 office 225 subPath = malloc(size + 1);
1 office 226 sprintf(subPath, "%s/%s", path, dirEntry->d_name);
227 break;
228 }
229 stat(subPath, &dirStat);
230 if(S_ISDIR(dirStat.st_mode)) {
231 stringStackPush(dirStack, subPath);
232  
233 ++stats->dirs;
234  
235 if(verbose) {
236 fprintf(stdout,
237 "Gathered %d directories and %d files.\r",
238 stats->dirs,
239 stats->files);
240 }
241  
242 free(subPath);
243 continue;
244 }
245  
246 // Write to database file.
15 office 247  
248 #if defined ___NOCASE_FS___
249 strupr(dirEntry->d_name);
250 #endif
251  
1 office 252 fprintf(fp, "%s\t%s\n", dirEntry->d_name, subPath);
253  
254 ++stats->files;
255  
256 if(verbose) {
257 fprintf(stdout,
258 "Gathered %d directories and %d files.\r",
259 stats->dirs,
260 stats->files);
261 }
262  
263 free(subPath);
264 }
265  
266 closedir(dir);
267 free(path);
268 }
269  
270 if(verbose) {
271 fprintf(stdout, "\n");
272 }
273  
274 fclose(fp);
275  
276 }
277  
278 /*
279 *
280 * Gets the size of a database "dbFle".
281 */
282 int GetDatabaseSize(char *dbFile) {
283 FILE *fp;
284 int size;
285  
286 if((fp = fopen(dbFile, "r")) == NULL) {
287 fprintf(stderr, "Unable to open gather database for reading.\n");
288 fclose(fp);
289 return 0;
290 }
291  
292 fseek(fp, 0L, SEEK_END);
293 size = ftell(fp);
294  
295 fclose(fp);
296 return size;
297 }
298  
299 /*
300 *
301 * Counts the lines in a database file "dbFile".
302 */
303 int CountDatabaseLines(char *dbFile) {
304 FILE *fp;
305 int lines;
306 char c;
307  
308 if((fp = fopen(dbFile, "r")) == NULL) {
309 fprintf(stderr, "Unable to open gather database for reading.\n");
310 fclose(fp);
311 return 0;
312 }
313  
314 lines = 0;
315 while(fscanf(fp, "%c", &c) == 1) {
316 switch(c) {
317 case '\n':
318 ++lines;
319 break;
320 }
321 }
322  
323 fclose(fp);
324  
325 return lines;
326 }
327  
328 /*
329 *
330 * Creates "files" temporary filenames.
331 */
332 char **CreateTempFiles(int files) {
333 char **tmpNames;
334 int count;
335  
11 office 336 tmpNames = malloc(files * sizeof(char *));
1 office 337  
338 if(verbose) {
339 fprintf(stdout, "Creating temporary files.\r");
340 }
341  
342 count = files;
343 while(--count > -1) {
344 #if defined ___AmigaOS___
345 // Check if CTRL+C was pressed and abort the program.
346 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
347 run = FALSE;
348 }
349 #endif
350 tmpNames[count] = tmpnam(NULL);
351  
352 if(verbose) {
353 fprintf(stdout, "Creating temporary files: %d%%\r", 100 - (int)(((float)count / files) * 100.0));
354 }
355 }
356  
357 if(verbose) {
358 fprintf(stdout, "\n");
359 }
360  
361 return tmpNames;
362 }
363  
364 /*
365 *
366 * Writes lines from the database "dbFile" to temporary filenames "tmpNames".
367 */
368 void WriteTempFiles(char *dbFile, char **tmpNames, int tmpFiles, int tmpLines, int total) {
369 FILE *fp, *tp;
370 char c;
371 int lines;
372 int linesWritten;
373  
374 if((fp = fopen(dbFile, "r")) == NULL) {
375 fprintf(stderr, "Unable to open gather database for reading.\n");
376 return;
377 }
378  
379 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
380 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
381 fclose(fp);
382 return;
383 }
384  
385 if(verbose) {
386 fprintf(stdout, "Writing to temporary files.\r");
387 }
388  
389 linesWritten = 0;
390 lines = 0;
391 while(run && fscanf(fp, "%c", &c) == 1) {
392 #if defined ___AmigaOS___
393 // Check if CTRL+C was pressed and abort the program.
394 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
395 run = FALSE;
396 }
397 #endif
398 switch(c) {
399 case '\n':
400 // Increment the total written lines.
401 ++linesWritten;
402  
403 if(verbose) {
404 fprintf(stdout, "Writing to temporary files: %d%%.\r", (int)(((float)linesWritten / total) * 100.0));
405 }
406  
407 // Write the newline character back.
408 if(fprintf(tp, "%c", c) != 1) {
409 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
410 fclose(tp);
411 fclose(fp);
412 return;
413 }
414 // Switch to the next temporary file.
415 if(++lines >= tmpLines) {
416 // If there are no temporary files left then run till the end.
417 if(tmpFiles - 1 < 0) {
418 break;
419 }
420  
421 // Close the previous temporary file and write to the next temporary file.
422 fclose(tp);
423 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
424 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
425 fclose(tp);
426 fclose(fp);
427 }
428 lines = 0;
429 break;
430 }
431 break;
432 default:
433 if(fprintf(tp, "%c", c) != 1) {
434 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
435 fclose(tp);
436 fclose(fp);
437 return;
438 }
439 break;
440 }
441 }
442  
443 fprintf(stdout, "\n");
444  
445 fclose(tp);
446 fclose(fp);
447 }
448  
449 /*
450 *
451 * Skips a line in a database file "fp".
452 */
453 void SkipDatabaseLine(FILE *fp) {
454 char c;
455  
456 while(fscanf(fp, "%c", &c) == 1) {
457 if(c == '\n') {
458 break;
459 }
460 }
461  
462 return;
463 }
464  
465 /*
466 *
467 * Reads a line from the database file "fp".
468 */
469 char *ReadDatabaseLine(FILE *fp) {
470 char c;
471 char *line;
11 office 472 int line_size;
473 int i;
1 office 474  
11 office 475 line_size = LINE_BUF;
476 line = malloc(line_size * sizeof(char));
1 office 477  
11 office 478 i = 0;
1 office 479 while(run && fscanf(fp, "%c", &c) == 1) {
480 #if defined ___AmigaOS___
481 // Check if CTRL+C was pressed and abort the program.
482 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
483 run = FALSE;
484 }
485 #endif
486 switch(c) {
487 case '\n':
488 // Rewind the file by the number of read characters.
11 office 489 fseek(fp, -(i + 1), SEEK_CUR);
1 office 490 return line;
491 default:
11 office 492 if(strlen(line) == line_size) {
493 line_size = line_size * 1.5;
494 line = realloc(line, line_size * sizeof(char));
495 }
496 //line = realloc(line, (chars + 1 + 1) * sizeof(char));
497 line[i] = c;
498 line[i + 1] = '\0';
1 office 499 break;
500 }
11 office 501 ++i;
1 office 502 }
503  
504 return NULL;
505 }
506  
507 /*
508 *
509 * Merges temporary files "tmpNames" into a database "dbFile".
510 */
511 void MergeDatabase(char *dbFile, char **tmpNames, int files, int lines) {
512 FILE *fp;
513 FILE **tp;
514 int i;
14 office 515 int j;
1 office 516 char *tmp;
14 office 517 char *min;
1 office 518 int count;
519  
520 if((fp = fopen(dbFile, "w+")) == NULL) {
521 fprintf(stderr, "Unable to open gather database for writing.\n");
522 return;
523 }
524  
525 // Allocate as many file pointers as temporary files.
11 office 526 tp = malloc(files * sizeof(FILE *));
1 office 527  
528 // Open all temporary files for reading.
529 for(i = 0; i < files; ++i) {
530 if((tp[i] = fopen(tmpNames[i], "r")) == NULL) {
531 fprintf(stderr, "Unable to open temporary file '%s' for reading.\n", tmpNames[i]);
532 // Close all temporary files.
533 --i;
534 while(i >= 0) {
535 fclose(tp[i]);
536 }
537 return;
538 }
539 }
540  
541 if(verbose) {
542 fprintf(stdout, "Merging all database lines in temporary files.\r");
543 }
544  
545 count = lines;
14 office 546 j = 0;
1 office 547 while(run && --count > -1) {
548 #if defined ___AmigaOS___
549 // Check if CTRL+C was pressed and abort the program.
550 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
551 run = FALSE;
552 }
553 #endif
554 // Find the smallest line in all temporary files.
555 if(verbose) {
556 fprintf(stdout, "Merging all database lines in temporary files: %d%%.\r", 100 - (int)(((float)count / lines) * 100.0));
557 }
558  
14 office 559 min = NULL;
1 office 560 for(i = 0; i < files; ++i) {
561 tmp = ReadDatabaseLine(tp[i]);
562 if(tmp == NULL) {
563 continue;
564 }
14 office 565 if(min == NULL || strncmp(tmp, min, strlen(tmp)) < 0) {
566 if(min != NULL) {
2 office 567 // Free previous instance.
14 office 568 free(min);
2 office 569 }
14 office 570 min = malloc((strlen(tmp) + 1) * sizeof(char));
571 sprintf(min, "%s", tmp);
1 office 572 // Remember the index of the file where the smallest entry has been found.
14 office 573 j = i;
1 office 574 free(tmp);
575 continue;
576 }
577 free(tmp);
578 }
579  
580 // Forward the file where the smallest line was found.
14 office 581 SkipDatabaseLine(tp[j]);
1 office 582  
583 // Write the smallest line.
14 office 584 if(min != NULL) {
585 fprintf(fp, "%s\n", min);
586 free(min);
1 office 587 }
588 }
589  
590 // Write out any remaining contents from the temporary files.
591 for(i = 0; i < files; ++i) {
592 tmp = ReadDatabaseLine(tp[i]);
593 if(tmp == NULL) {
594 continue;
595 }
596 fprintf(fp, "%s\n", tmp);
14 office 597 free(tmp);
1 office 598 }
599  
600 // Close and delete all temporary files.
601 for(i = 0; i < files; ++i) {
602 fclose(tp[i]);
603 // Delete temporary file.
604 remove(tmpNames[i]);
605 }
606  
607 if(verbose) {
608 fprintf(stdout, "\n");
609 }
610  
611 fclose(fp);
612 }
613  
614 /*
615 *
616 * Indexes a "path" by creating a database "dbFile".
617 */
618 void Gather(char *dbFile, char *path) {
619 stringStack *stack = stringStackCreate(1);
620 stats *stats = malloc(sizeof(stats));
621 char **tmpNames;
622 int dbSize, dbLines, tmpFiles, tmpLines;
623 int i;
624  
625 // Initialize metrics.
626 stats->dirs = 0;
627 stats->files = 0;
628  
629 // Push the first path onto the stack.
630 stringStackPush(stack, path);
631  
632 // Generate the database file.
633 UpdateDatabase(dbFile, stack, stats);
634  
635 // Get the database metrics.
636 dbSize = GetDatabaseSize(dbFile);
637 dbLines = CountDatabaseLines(dbFile);
638  
639 // Compute the amount of temporary files needed.
640 tmpFiles = dbSize / MAX_MEM;
641  
642 // In case no temporary files are required,
643 // just sort the database and terminate.
2 office 644 if(tmpFiles <= 1) {
1 office 645 SortDatabase(dbFile);
646 return;
647 }
648  
649 tmpLines = dbLines / tmpFiles;
650  
651 // Create temporary files.
652 if((tmpNames = CreateTempFiles(tmpFiles)) == NULL) {
653 fprintf(stderr, "Unable to create temporary files.\n");
654 return;
655 }
656  
657 // Write "tmpLines" to temporary files in "tmpFiles" from "dbFile".
658 WriteTempFiles(dbFile, tmpNames, tmpFiles, tmpLines, dbLines);
659  
660 // Sort the temporary files.
661 for(i = 0; i < tmpFiles; ++i) {
662 SortDatabase(tmpNames[i]);
663 }
664  
665 MergeDatabase(dbFile, tmpNames, tmpFiles, dbLines);
666 }
667  
11 office 668 void usage(char *name) {
669 fprintf(stdout, "Hunt & Gather - %s, a file index generating tool. \n", name);
670 fprintf(stdout, " \n");
671 fprintf(stdout, "SYNTAX: %s [-q] DATABASE \n", name);
672 fprintf(stdout, " \n");
673 fprintf(stdout, " -q Do not print out any messages. \n");
674 fprintf(stdout, " \n");
675 fprintf(stdout, "DATABASE is a path to where the indexed results will be \n");
676 fprintf(stdout, "stored for searching with the Hunt tool. \n");
677 fprintf(stdout, " \n");
678 fprintf(stdout, "(c) 2021 Wizardry and Steamworks, MIT. \n");
679 }
680  
1 office 681 /*
682 *
683 * Main entry point.
684 */
685 int main(int argc, char **argv) {
686 int option;
2 office 687 char *dbFile;
10 office 688 struct stat dirStat;
1 office 689  
690 // Bind handler to SIGINT.
691 signal(SIGINT, SignalHandler);
692  
2 office 693 dbFile = DEFAULT_DATABASE_FILE;
694 while((option = getopt(argc, argv, "hqd:")) != -1) {
1 office 695 switch(option) {
2 office 696 case 'd':
697 dbFile = optarg;
698 break;
1 office 699 case 'q':
700 verbose = FALSE;
701 break;
702 case 'h':
11 office 703 usage(argv[0]);
2 office 704 return 0;
1 office 705 case '?':
706 fprintf(stderr, "Invalid option %ct.\n", optopt);
707 return 1;
708 }
709 }
710  
10 office 711  
712 if(optind >= argc) {
11 office 713 usage(argv[0]);
1 office 714 return 1;
715 }
716  
10 office 717 stat(argv[optind], &dirStat);
718 if(!S_ISDIR(dirStat.st_mode)) {
5 office 719 fprintf(stderr, "Path '%s' is not a directory.\n", argv[optind]);
1 office 720 return 1;
721 }
722  
2 office 723 if(verbose) {
724 fprintf(stdout, "Gathering to database file: %s\n", dbFile);
725 }
726  
1 office 727 // Gather.
2 office 728 Gather(dbFile, argv[optind]);
1 office 729  
730 return 0;
731 }