HuntnGather – Blame information for rev 16

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 ///////////////////////////////////////////////////////////////////////////
2 // Copyright (C) 2021 Wizardry and Steamworks - License: MIT //
3 ///////////////////////////////////////////////////////////////////////////
4  
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <dirent.h>
9 #include <signal.h>
10  
11 #include <sys/types.h>
12 #include <sys/stat.h>
13  
14 #include <proto/dos.h>
15 #include <proto/exec.h>
16  
17 #include "StringStack.h"
18  
5 office 19 #if !defined ___HAVE_GETOPT___
1 office 20 #include "getopt.h"
21 #endif
22  
5 office 23 #if defined ___AmigaOS___
24 /*************************************************************************/
25 /* Version string used for querrying the program version. */
26 /*************************************************************************/
27 TEXT version_string[] =
11 office 28 "\0$VER: Gather 1.7 "__DATE__" by Wizardry and Steamworks";
5 office 29 #endif
1 office 30  
31 #if !defined TRUE
32 #define TRUE 1;
33 #endif
34  
35 #if !defined FALSE
36 #define FALSE 0;
37 #endif
38  
2 office 39 #define MAX_MEM 262144
11 office 40 #define NAME_BUF 32
41 #define PATH_BUF 128
42 #define LINE_BUF 256
2 office 43 #define DEFAULT_DATABASE_FILE "S:gather.db"
1 office 44  
45 typedef struct {
46 unsigned int dirs;
47 unsigned int files;
48 } stats;
49  
50 int run = TRUE;
51 int verbose = TRUE;
52  
53 void SignalHandler(int sig) {
54 // Toggle the run flag to stop execution.
55 run = FALSE;
56 }
57  
58 int compare(const void *a, const void *b) {
59 const char **p = (const char **)a;
60 const char **q = (const char **)b;
13 office 61 return strncmp(*p, *q, strlen(*p));
1 office 62 }
63  
64 /*
65 *
66 * Sorts a database file lexicographically.
67 */
68 void SortDatabase(char *dbFile) {
69 FILE *fp;
70 char *name = NULL;
71 char *path = NULL;
72 char **database;
73 char c;
74 int i;
75 int side;
76 unsigned int line;
11 office 77 int name_size;
78 int path_size;
1 office 79  
80 // Open database file for reading.
81 if((fp = fopen(dbFile, "r")) == NULL) {
82 fprintf(stderr, "Unable to open gather database for reading.\n");
83 return;
84 }
85  
11 office 86 database = malloc(sizeof(char *));
87 name_size = NAME_BUF;
88 name = malloc(name_size * sizeof(char));
89 path_size = PATH_BUF;
90 path = malloc(path_size * sizeof(char));
1 office 91 line = 0;
92 side = 0;
93 i = 0;
94  
95 if(verbose) {
96 fprintf(stdout, "Sorting database: '%s'\n", dbFile);
97 }
98  
99 while(run && fscanf(fp, "%c", &c) == 1) {
100 #if defined ___AmigaOS___
101 // Check if CTRL+C was pressed and abort the program.
102 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
103 run = FALSE;
104 continue;
105 }
106 #endif
107 switch(c) {
108 case '\n':
109 // Load up the name and path into the database variable.
11 office 110 database = realloc(database, (line + 1) * sizeof(char *));
111 database[line] = malloc((strlen(name) + strlen(path) + 1 + 1) * sizeof(char));
1 office 112 sprintf(database[line], "%s\t%s", name, path);
113 ++line;
114  
115 free(name);
11 office 116 name_size = NAME_BUF;
117 name = malloc(name_size * sizeof(char));
1 office 118 --side;
119 i = 0;
120  
121 break;
122 case '\t':
123 free(path);
11 office 124 path_size = PATH_BUF;
125 path = malloc(path_size * sizeof(char));
1 office 126 ++side;
127 i = 0;
128 break;
129 default:
130 switch(side) {
131 case 0:
11 office 132 if(strlen(name) == name_size) {
133 name_size = name_size * 1.5;
134 name = realloc(name, name_size * sizeof(char));
135 }
136 //name = realloc(name, (i + 1 + 1) * sizeof(char));
1 office 137 name[i] = c;
138 name[i + 1] = '\0';
139 break;
140 case 1:
11 office 141 if(strlen(path) == path_size) {
142 path_size = path_size * 1.5;
143 path = realloc(path, path_size * sizeof(char));
144 }
145 //path = realloc(path, (i + 1 + 1) * sizeof(char));
1 office 146 path[i] = c;
147 path[i + 1] = '\0';
148 break;
149 default:
150 fprintf(stderr, "Database corrupted.\n");
151 break;
152 }
153 ++i;
154 break;
155 }
156 }
157  
158 fclose(fp);
159  
160 // Sort the database.
161 qsort(database, line, sizeof(char *), compare);
162  
163 // Write the database lines back to the database.
164 if((fp = fopen(dbFile, "w+")) == NULL) {
165 fprintf(stderr, "Unable to open gather database for writing.\n");
166 return;
167 }
168  
169 for(i = 0; i < line; ++i) {
170 fprintf(fp, "%s\n", database[i]);
171 }
172  
173 free(database);
174 fclose(fp);
175 }
176  
177 /*
178 *
179 * Updates a database file "dbFile".
180 */
181 void UpdateDatabase(char *dbFile, stringStack *dirStack, stats *stats) {
182 FILE *fp;
183 DIR *dir;
184 struct dirent *dirEntry;
185 struct stat dirStat;
186 unsigned int size;
187 char *path;
188 char *subPath;
189  
190 if((fp = fopen(dbFile, "w+")) == NULL) {
191 fprintf(stderr, "Unable to open gather database for writing.\n");
192 return;
193 }
194  
195 while(run && !stringStackIsEmpty(dirStack)) {
196 #if defined ___AmigaOS___
197 // Check if CTRL+C was pressed and abort the program.
198 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
199 run = FALSE;
200 }
201 #endif
16 office 202 if((path = stringStackPop(dirStack)) == NULL ||
203 strlen(path) == 0) {
1 office 204 return;
205 }
206  
207 if((dir = opendir(path)) == NULL) {
208 return;
209 }
210  
211 while(run && (dirEntry = readdir(dir)) != NULL) {
212 #if defined ___AmigaOS___
213 // Check if CTRL+C was pressed and abort the program.
214 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
215 run = FALSE;
216 }
217 #endif
218 size = sizeof(path) + sizeof(dirEntry->d_name) + 1;
219 switch(path[strlen(path) - 1]) {
220 case '/':
221 case ':': // This is a drive path.
11 office 222 subPath = malloc(size);
1 office 223 sprintf(subPath, "%s%s", path, dirEntry->d_name);
224 break;
225 default:
11 office 226 subPath = malloc(size + 1);
1 office 227 sprintf(subPath, "%s/%s", path, dirEntry->d_name);
228 break;
229 }
230 stat(subPath, &dirStat);
231 if(S_ISDIR(dirStat.st_mode)) {
232 stringStackPush(dirStack, subPath);
233  
234 ++stats->dirs;
235  
236 if(verbose) {
237 fprintf(stdout,
238 "Gathered %d directories and %d files.\r",
239 stats->dirs,
240 stats->files);
241 }
242  
243 free(subPath);
244 continue;
245 }
246  
247 // Write to database file.
15 office 248  
249 #if defined ___NOCASE_FS___
250 strupr(dirEntry->d_name);
251 #endif
252  
1 office 253 fprintf(fp, "%s\t%s\n", dirEntry->d_name, subPath);
254  
255 ++stats->files;
256  
257 if(verbose) {
258 fprintf(stdout,
259 "Gathered %d directories and %d files.\r",
260 stats->dirs,
261 stats->files);
262 }
263  
264 free(subPath);
265 }
266  
267 closedir(dir);
268 free(path);
269 }
270  
271 if(verbose) {
272 fprintf(stdout, "\n");
273 }
274  
275 fclose(fp);
276  
277 }
278  
279 /*
280 *
281 * Gets the size of a database "dbFle".
282 */
283 int GetDatabaseSize(char *dbFile) {
284 FILE *fp;
285 int size;
286  
287 if((fp = fopen(dbFile, "r")) == NULL) {
288 fprintf(stderr, "Unable to open gather database for reading.\n");
289 return 0;
290 }
291  
292 fseek(fp, 0L, SEEK_END);
293 size = ftell(fp);
294  
295 fclose(fp);
296 return size;
297 }
298  
299 /*
300 *
301 * Counts the lines in a database file "dbFile".
302 */
303 int CountDatabaseLines(char *dbFile) {
304 FILE *fp;
305 int lines;
306 char c;
307  
308 if((fp = fopen(dbFile, "r")) == NULL) {
309 fprintf(stderr, "Unable to open gather database for reading.\n");
310 return 0;
311 }
312  
313 lines = 0;
314 while(fscanf(fp, "%c", &c) == 1) {
315 switch(c) {
316 case '\n':
317 ++lines;
318 break;
319 }
320 }
321  
322 fclose(fp);
323  
324 return lines;
325 }
326  
327 /*
328 *
329 * Creates "files" temporary filenames.
330 */
331 char **CreateTempFiles(int files) {
332 char **tmpNames;
333 int count;
334  
11 office 335 tmpNames = malloc(files * sizeof(char *));
1 office 336  
337 if(verbose) {
338 fprintf(stdout, "Creating temporary files.\r");
339 }
340  
341 count = files;
342 while(--count > -1) {
343 #if defined ___AmigaOS___
344 // Check if CTRL+C was pressed and abort the program.
345 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
346 run = FALSE;
347 }
348 #endif
349 tmpNames[count] = tmpnam(NULL);
350  
351 if(verbose) {
352 fprintf(stdout, "Creating temporary files: %d%%\r", 100 - (int)(((float)count / files) * 100.0));
353 }
354 }
355  
356 if(verbose) {
357 fprintf(stdout, "\n");
358 }
359  
360 return tmpNames;
361 }
362  
363 /*
364 *
365 * Writes lines from the database "dbFile" to temporary filenames "tmpNames".
366 */
367 void WriteTempFiles(char *dbFile, char **tmpNames, int tmpFiles, int tmpLines, int total) {
368 FILE *fp, *tp;
369 char c;
370 int lines;
371 int linesWritten;
372  
373 if((fp = fopen(dbFile, "r")) == NULL) {
374 fprintf(stderr, "Unable to open gather database for reading.\n");
375 return;
376 }
377  
378 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
379 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
380 return;
381 }
382  
383 if(verbose) {
384 fprintf(stdout, "Writing to temporary files.\r");
385 }
386  
387 linesWritten = 0;
388 lines = 0;
389 while(run && fscanf(fp, "%c", &c) == 1) {
390 #if defined ___AmigaOS___
391 // Check if CTRL+C was pressed and abort the program.
392 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
393 run = FALSE;
394 }
395 #endif
396 switch(c) {
397 case '\n':
398 // Increment the total written lines.
399 ++linesWritten;
400  
401 if(verbose) {
402 fprintf(stdout, "Writing to temporary files: %d%%.\r", (int)(((float)linesWritten / total) * 100.0));
403 }
404  
405 // Write the newline character back.
406 if(fprintf(tp, "%c", c) != 1) {
407 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
408 fclose(fp);
409 return;
410 }
411 // Switch to the next temporary file.
412 if(++lines >= tmpLines) {
413 // If there are no temporary files left then run till the end.
414 if(tmpFiles - 1 < 0) {
415 break;
416 }
417  
418 // Close the previous temporary file and write to the next temporary file.
419 fclose(tp);
420 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
421 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
422 fclose(fp);
423 }
424 lines = 0;
425 break;
426 }
427 break;
428 default:
429 if(fprintf(tp, "%c", c) != 1) {
430 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
431 fclose(tp);
432 fclose(fp);
433 return;
434 }
435 break;
436 }
437 }
438  
439 fprintf(stdout, "\n");
440  
441 fclose(tp);
442 fclose(fp);
443 }
444  
445 /*
446 *
447 * Skips a line in a database file "fp".
448 */
449 void SkipDatabaseLine(FILE *fp) {
450 char c;
451  
452 while(fscanf(fp, "%c", &c) == 1) {
453 if(c == '\n') {
454 break;
455 }
456 }
457  
458 return;
459 }
460  
461 /*
462 *
463 * Reads a line from the database file "fp".
464 */
465 char *ReadDatabaseLine(FILE *fp) {
466 char c;
467 char *line;
11 office 468 int line_size;
469 int i;
1 office 470  
11 office 471 line_size = LINE_BUF;
472 line = malloc(line_size * sizeof(char));
1 office 473  
11 office 474 i = 0;
1 office 475 while(run && fscanf(fp, "%c", &c) == 1) {
476 #if defined ___AmigaOS___
477 // Check if CTRL+C was pressed and abort the program.
478 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
479 run = FALSE;
480 }
481 #endif
482 switch(c) {
483 case '\n':
484 // Rewind the file by the number of read characters.
11 office 485 fseek(fp, -(i + 1), SEEK_CUR);
1 office 486 return line;
487 default:
11 office 488 if(strlen(line) == line_size) {
489 line_size = line_size * 1.5;
490 line = realloc(line, line_size * sizeof(char));
491 }
492 //line = realloc(line, (chars + 1 + 1) * sizeof(char));
493 line[i] = c;
494 line[i + 1] = '\0';
1 office 495 break;
496 }
11 office 497 ++i;
1 office 498 }
499  
500 return NULL;
501 }
502  
503 /*
504 *
505 * Merges temporary files "tmpNames" into a database "dbFile".
506 */
507 void MergeDatabase(char *dbFile, char **tmpNames, int files, int lines) {
508 FILE *fp;
509 FILE **tp;
510 int i;
14 office 511 int j;
1 office 512 char *tmp;
14 office 513 char *min;
1 office 514 int count;
515  
516 if((fp = fopen(dbFile, "w+")) == NULL) {
517 fprintf(stderr, "Unable to open gather database for writing.\n");
518 return;
519 }
520  
521 // Allocate as many file pointers as temporary files.
11 office 522 tp = malloc(files * sizeof(FILE *));
1 office 523  
524 // Open all temporary files for reading.
525 for(i = 0; i < files; ++i) {
526 if((tp[i] = fopen(tmpNames[i], "r")) == NULL) {
527 fprintf(stderr, "Unable to open temporary file '%s' for reading.\n", tmpNames[i]);
528 // Close all temporary files.
529 --i;
530 while(i >= 0) {
531 fclose(tp[i]);
532 }
533 return;
534 }
535 }
536  
537 if(verbose) {
538 fprintf(stdout, "Merging all database lines in temporary files.\r");
539 }
540  
541 count = lines;
14 office 542 j = 0;
1 office 543 while(run && --count > -1) {
544 #if defined ___AmigaOS___
545 // Check if CTRL+C was pressed and abort the program.
546 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
547 run = FALSE;
548 }
549 #endif
550 // Find the smallest line in all temporary files.
551 if(verbose) {
552 fprintf(stdout, "Merging all database lines in temporary files: %d%%.\r", 100 - (int)(((float)count / lines) * 100.0));
553 }
554  
14 office 555 min = NULL;
1 office 556 for(i = 0; i < files; ++i) {
557 tmp = ReadDatabaseLine(tp[i]);
558 if(tmp == NULL) {
559 continue;
560 }
14 office 561 if(min == NULL || strncmp(tmp, min, strlen(tmp)) < 0) {
562 if(min != NULL) {
2 office 563 // Free previous instance.
14 office 564 free(min);
2 office 565 }
14 office 566 min = malloc((strlen(tmp) + 1) * sizeof(char));
567 sprintf(min, "%s", tmp);
1 office 568 // Remember the index of the file where the smallest entry has been found.
14 office 569 j = i;
1 office 570 free(tmp);
571 continue;
572 }
573 free(tmp);
574 }
575  
576 // Forward the file where the smallest line was found.
14 office 577 SkipDatabaseLine(tp[j]);
1 office 578  
579 // Write the smallest line.
14 office 580 if(min != NULL) {
581 fprintf(fp, "%s\n", min);
582 free(min);
1 office 583 }
584 }
585  
586 // Write out any remaining contents from the temporary files.
587 for(i = 0; i < files; ++i) {
588 tmp = ReadDatabaseLine(tp[i]);
589 if(tmp == NULL) {
590 continue;
591 }
592 fprintf(fp, "%s\n", tmp);
14 office 593 free(tmp);
1 office 594 }
595  
596 // Close and delete all temporary files.
597 for(i = 0; i < files; ++i) {
598 fclose(tp[i]);
599 // Delete temporary file.
600 remove(tmpNames[i]);
601 }
602  
603 if(verbose) {
604 fprintf(stdout, "\n");
605 }
606  
607 fclose(fp);
608 }
609  
610 /*
611 *
612 * Indexes a "path" by creating a database "dbFile".
613 */
614 void Gather(char *dbFile, char *path) {
615 stringStack *stack = stringStackCreate(1);
616 stats *stats = malloc(sizeof(stats));
617 char **tmpNames;
618 int dbSize, dbLines, tmpFiles, tmpLines;
619 int i;
620  
621 // Initialize metrics.
622 stats->dirs = 0;
623 stats->files = 0;
624  
625 // Push the first path onto the stack.
626 stringStackPush(stack, path);
627  
628 // Generate the database file.
629 UpdateDatabase(dbFile, stack, stats);
630  
631 // Get the database metrics.
632 dbSize = GetDatabaseSize(dbFile);
633 dbLines = CountDatabaseLines(dbFile);
634  
635 // Compute the amount of temporary files needed.
636 tmpFiles = dbSize / MAX_MEM;
637  
638 // In case no temporary files are required,
639 // just sort the database and terminate.
2 office 640 if(tmpFiles <= 1) {
1 office 641 SortDatabase(dbFile);
642 return;
643 }
644  
645 tmpLines = dbLines / tmpFiles;
646  
647 // Create temporary files.
648 if((tmpNames = CreateTempFiles(tmpFiles)) == NULL) {
649 fprintf(stderr, "Unable to create temporary files.\n");
650 return;
651 }
652  
653 // Write "tmpLines" to temporary files in "tmpFiles" from "dbFile".
654 WriteTempFiles(dbFile, tmpNames, tmpFiles, tmpLines, dbLines);
655  
656 // Sort the temporary files.
657 for(i = 0; i < tmpFiles; ++i) {
658 SortDatabase(tmpNames[i]);
659 }
660  
661 MergeDatabase(dbFile, tmpNames, tmpFiles, dbLines);
662 }
663  
11 office 664 void usage(char *name) {
665 fprintf(stdout, "Hunt & Gather - %s, a file index generating tool. \n", name);
666 fprintf(stdout, " \n");
667 fprintf(stdout, "SYNTAX: %s [-q] DATABASE \n", name);
668 fprintf(stdout, " \n");
669 fprintf(stdout, " -q Do not print out any messages. \n");
670 fprintf(stdout, " \n");
671 fprintf(stdout, "DATABASE is a path to where the indexed results will be \n");
672 fprintf(stdout, "stored for searching with the Hunt tool. \n");
673 fprintf(stdout, " \n");
674 fprintf(stdout, "(c) 2021 Wizardry and Steamworks, MIT. \n");
675 }
676  
1 office 677 /*
678 *
679 * Main entry point.
680 */
681 int main(int argc, char **argv) {
682 int option;
2 office 683 char *dbFile;
10 office 684 struct stat dirStat;
1 office 685  
686 // Bind handler to SIGINT.
687 signal(SIGINT, SignalHandler);
688  
2 office 689 dbFile = DEFAULT_DATABASE_FILE;
690 while((option = getopt(argc, argv, "hqd:")) != -1) {
1 office 691 switch(option) {
2 office 692 case 'd':
693 dbFile = optarg;
694 break;
1 office 695 case 'q':
696 verbose = FALSE;
697 break;
698 case 'h':
11 office 699 usage(argv[0]);
2 office 700 return 0;
1 office 701 case '?':
702 fprintf(stderr, "Invalid option %ct.\n", optopt);
703 return 1;
704 }
705 }
706  
10 office 707  
708 if(optind >= argc) {
11 office 709 usage(argv[0]);
1 office 710 return 1;
711 }
712  
10 office 713 stat(argv[optind], &dirStat);
714 if(!S_ISDIR(dirStat.st_mode)) {
5 office 715 fprintf(stderr, "Path '%s' is not a directory.\n", argv[optind]);
1 office 716 return 1;
717 }
718  
2 office 719 if(verbose) {
720 fprintf(stdout, "Gathering to database file: %s\n", dbFile);
721 }
722  
1 office 723 // Gather.
2 office 724 Gather(dbFile, argv[optind]);
1 office 725  
726 return 0;
727 }