HuntnGather – Blame information for rev 19

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 ///////////////////////////////////////////////////////////////////////////
2 // Copyright (C) 2021 Wizardry and Steamworks - License: MIT //
3 ///////////////////////////////////////////////////////////////////////////
4  
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <dirent.h>
9 #include <signal.h>
10  
11 #include <sys/types.h>
12 #include <sys/stat.h>
19 office 13 #include <sys/syslimits.h>
1 office 14  
15 #include <proto/dos.h>
16 #include <proto/exec.h>
17  
18 #include "StringStack.h"
19  
5 office 20 #if !defined ___HAVE_GETOPT___
1 office 21 #include "getopt.h"
22 #endif
23  
19 office 24 #define PROGRAM_VERSION "1.7.2"
25  
5 office 26 #if defined ___AmigaOS___
27 /*************************************************************************/
28 /* Version string used for querrying the program version. */
29 /*************************************************************************/
30 TEXT version_string[] =
19 office 31 "\0$VER: Gather " PROGRAM_VERSION " "__DATE__" by Wizardry and Steamworks";
5 office 32 #endif
1 office 33  
34 #if !defined TRUE
35 #define TRUE 1;
36 #endif
37  
38 #if !defined FALSE
39 #define FALSE 0;
40 #endif
41  
2 office 42 #define MAX_MEM 262144
11 office 43 #define NAME_BUF 32
44 #define PATH_BUF 128
45 #define LINE_BUF 256
2 office 46 #define DEFAULT_DATABASE_FILE "S:gather.db"
1 office 47  
48 typedef struct {
49 unsigned int dirs;
50 unsigned int files;
51 } stats;
52  
53 int run = TRUE;
54 int verbose = TRUE;
55  
56 void SignalHandler(int sig) {
57 // Toggle the run flag to stop execution.
58 run = FALSE;
59 }
60  
61 int compare(const void *a, const void *b) {
62 const char **p = (const char **)a;
63 const char **q = (const char **)b;
13 office 64 return strncmp(*p, *q, strlen(*p));
1 office 65 }
66  
67 /*
68 *
69 * Sorts a database file lexicographically.
70 */
71 void SortDatabase(char *dbFile) {
72 FILE *fp;
73 char *name = NULL;
74 char *path = NULL;
75 char **database;
76 char c;
77 int i;
78 int side;
79 unsigned int line;
11 office 80 int name_size;
81 int path_size;
1 office 82  
83 // Open database file for reading.
84 if((fp = fopen(dbFile, "r")) == NULL) {
85 fprintf(stderr, "Unable to open gather database for reading.\n");
86 return;
87 }
88  
19 office 89 database = malloc(sizeof(*database));
11 office 90 name_size = NAME_BUF;
19 office 91 name = malloc(name_size * sizeof(*name));
11 office 92 path_size = PATH_BUF;
19 office 93 path = malloc(path_size * sizeof(*path));
1 office 94 line = 0;
95 side = 0;
96 i = 0;
97  
98 if(verbose) {
99 fprintf(stdout, "Sorting database: '%s'\n", dbFile);
100 }
101  
102 while(run && fscanf(fp, "%c", &c) == 1) {
103 #if defined ___AmigaOS___
104 // Check if CTRL+C was pressed and abort the program.
105 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
106 run = FALSE;
107 continue;
108 }
109 #endif
110 switch(c) {
111 case '\n':
112 // Load up the name and path into the database variable.
19 office 113 database = realloc(database, (line + 1) * sizeof(*database));
114 database[line] = malloc((strlen(name) + strlen(path) + 1 + 1) * sizeof(*database[line]));
1 office 115 sprintf(database[line], "%s\t%s", name, path);
116 ++line;
117  
118 free(name);
11 office 119 name_size = NAME_BUF;
19 office 120 name = malloc(name_size * sizeof(*name));
1 office 121 --side;
122 i = 0;
123  
124 break;
125 case '\t':
126 free(path);
11 office 127 path_size = PATH_BUF;
19 office 128 path = malloc(path_size * sizeof(*path));
1 office 129 ++side;
130 i = 0;
131 break;
132 default:
133 switch(side) {
134 case 0:
11 office 135 if(strlen(name) == name_size) {
136 name_size = name_size * 1.5;
19 office 137 name = realloc(name, name_size * sizeof(*name));
11 office 138 }
139 //name = realloc(name, (i + 1 + 1) * sizeof(char));
1 office 140 name[i] = c;
141 name[i + 1] = '\0';
142 break;
143 case 1:
11 office 144 if(strlen(path) == path_size) {
145 path_size = path_size * 1.5;
19 office 146 path = realloc(path, path_size * sizeof(*path));
11 office 147 }
148 //path = realloc(path, (i + 1 + 1) * sizeof(char));
1 office 149 path[i] = c;
150 path[i + 1] = '\0';
151 break;
152 default:
153 fprintf(stderr, "Database corrupted.\n");
154 break;
155 }
156 ++i;
157 break;
158 }
159 }
160  
161 fclose(fp);
162  
163 // Sort the database.
164 qsort(database, line, sizeof(char *), compare);
165  
166 // Write the database lines back to the database.
167 if((fp = fopen(dbFile, "w+")) == NULL) {
168 fprintf(stderr, "Unable to open gather database for writing.\n");
169 return;
170 }
171  
172 for(i = 0; i < line; ++i) {
173 fprintf(fp, "%s\n", database[i]);
174 }
175  
176 free(database);
177 fclose(fp);
178 }
179  
180 /*
181 *
182 * Updates a database file "dbFile".
183 */
184 void UpdateDatabase(char *dbFile, stringStack *dirStack, stats *stats) {
185 FILE *fp;
186 DIR *dir;
187 struct dirent *dirEntry;
188 struct stat dirStat;
189 unsigned int size;
190 char *path;
191 char *subPath;
192  
193 if((fp = fopen(dbFile, "w+")) == NULL) {
194 fprintf(stderr, "Unable to open gather database for writing.\n");
195 return;
196 }
197  
198 while(run && !stringStackIsEmpty(dirStack)) {
199 #if defined ___AmigaOS___
200 // Check if CTRL+C was pressed and abort the program.
201 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
202 run = FALSE;
203 }
204 #endif
19 office 205 if((path = stringStackPop(dirStack)) == NULL) {
1 office 206 return;
207 }
208  
209 if((dir = opendir(path)) == NULL) {
210 return;
211 }
212  
213 while(run && (dirEntry = readdir(dir)) != NULL) {
214 #if defined ___AmigaOS___
215 // Check if CTRL+C was pressed and abort the program.
216 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
217 run = FALSE;
218 }
219 #endif
220 size = sizeof(path) + sizeof(dirEntry->d_name) + 1;
221 switch(path[strlen(path) - 1]) {
222 case '/':
223 case ':': // This is a drive path.
11 office 224 subPath = malloc(size);
1 office 225 sprintf(subPath, "%s%s", path, dirEntry->d_name);
226 break;
227 default:
11 office 228 subPath = malloc(size + 1);
1 office 229 sprintf(subPath, "%s/%s", path, dirEntry->d_name);
230 break;
231 }
232 stat(subPath, &dirStat);
233 if(S_ISDIR(dirStat.st_mode)) {
234 stringStackPush(dirStack, subPath);
235  
236 ++stats->dirs;
237  
238 if(verbose) {
239 fprintf(stdout,
240 "Gathered %d directories and %d files.\r",
241 stats->dirs,
242 stats->files);
243 }
244  
245 free(subPath);
246 continue;
247 }
248  
249 // Write to database file.
15 office 250  
251 #if defined ___NOCASE_FS___
252 strupr(dirEntry->d_name);
253 #endif
254  
1 office 255 fprintf(fp, "%s\t%s\n", dirEntry->d_name, subPath);
256  
257 ++stats->files;
258  
259 if(verbose) {
260 fprintf(stdout,
261 "Gathered %d directories and %d files.\r",
262 stats->dirs,
263 stats->files);
264 }
265  
266 free(subPath);
267 }
268  
269 closedir(dir);
270 free(path);
271 }
272  
273 if(verbose) {
274 fprintf(stdout, "\n");
275 }
276  
277 fclose(fp);
278  
279 }
280  
281 /*
282 *
283 * Gets the size of a database "dbFle".
284 */
285 int GetDatabaseSize(char *dbFile) {
286 FILE *fp;
287 int size;
288  
289 if((fp = fopen(dbFile, "r")) == NULL) {
290 fprintf(stderr, "Unable to open gather database for reading.\n");
19 office 291 fclose(fp);
1 office 292 return 0;
293 }
294  
295 fseek(fp, 0L, SEEK_END);
296 size = ftell(fp);
297  
298 fclose(fp);
299 return size;
300 }
301  
302 /*
303 *
304 * Counts the lines in a database file "dbFile".
305 */
306 int CountDatabaseLines(char *dbFile) {
307 FILE *fp;
308 int lines;
309 char c;
310  
311 if((fp = fopen(dbFile, "r")) == NULL) {
312 fprintf(stderr, "Unable to open gather database for reading.\n");
19 office 313 fclose(fp);
1 office 314 return 0;
315 }
316  
317 lines = 0;
318 while(fscanf(fp, "%c", &c) == 1) {
319 switch(c) {
320 case '\n':
321 ++lines;
322 break;
323 }
324 }
325  
326 fclose(fp);
327  
328 return lines;
329 }
330  
331 /*
332 *
333 * Creates "files" temporary filenames.
334 */
335 char **CreateTempFiles(int files) {
336 char **tmpNames;
337 int count;
338  
11 office 339 tmpNames = malloc(files * sizeof(char *));
1 office 340  
341 if(verbose) {
342 fprintf(stdout, "Creating temporary files.\r");
343 }
344  
345 count = files;
346 while(--count > -1) {
347 #if defined ___AmigaOS___
348 // Check if CTRL+C was pressed and abort the program.
349 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
350 run = FALSE;
351 }
352 #endif
353 tmpNames[count] = tmpnam(NULL);
354  
355 if(verbose) {
356 fprintf(stdout, "Creating temporary files: %d%%\r", 100 - (int)(((float)count / files) * 100.0));
357 }
358 }
359  
360 if(verbose) {
361 fprintf(stdout, "\n");
362 }
363  
364 return tmpNames;
365 }
366  
367 /*
368 *
369 * Writes lines from the database "dbFile" to temporary filenames "tmpNames".
370 */
371 void WriteTempFiles(char *dbFile, char **tmpNames, int tmpFiles, int tmpLines, int total) {
372 FILE *fp, *tp;
373 char c;
374 int lines;
375 int linesWritten;
376  
377 if((fp = fopen(dbFile, "r")) == NULL) {
378 fprintf(stderr, "Unable to open gather database for reading.\n");
379 return;
380 }
381  
382 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
383 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
19 office 384 fclose(fp);
1 office 385 return;
386 }
387  
388 if(verbose) {
389 fprintf(stdout, "Writing to temporary files.\r");
390 }
391  
392 linesWritten = 0;
393 lines = 0;
394 while(run && fscanf(fp, "%c", &c) == 1) {
395 #if defined ___AmigaOS___
396 // Check if CTRL+C was pressed and abort the program.
397 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
398 run = FALSE;
399 }
400 #endif
401 switch(c) {
402 case '\n':
403 // Increment the total written lines.
404 ++linesWritten;
405  
406 if(verbose) {
407 fprintf(stdout, "Writing to temporary files: %d%%.\r", (int)(((float)linesWritten / total) * 100.0));
408 }
409  
410 // Write the newline character back.
411 if(fprintf(tp, "%c", c) != 1) {
412 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
19 office 413 fclose(tp);
1 office 414 fclose(fp);
415 return;
416 }
417 // Switch to the next temporary file.
418 if(++lines >= tmpLines) {
419 // If there are no temporary files left then run till the end.
420 if(tmpFiles - 1 < 0) {
421 break;
422 }
423  
424 // Close the previous temporary file and write to the next temporary file.
425 fclose(tp);
426 if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
427 fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
19 office 428 fclose(tp);
1 office 429 fclose(fp);
430 }
431 lines = 0;
432 break;
433 }
434 break;
435 default:
436 if(fprintf(tp, "%c", c) != 1) {
437 fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
438 fclose(tp);
439 fclose(fp);
440 return;
441 }
442 break;
443 }
444 }
445  
446 fprintf(stdout, "\n");
447  
448 fclose(tp);
449 fclose(fp);
450 }
451  
452 /*
453 *
454 * Skips a line in a database file "fp".
455 */
456 void SkipDatabaseLine(FILE *fp) {
457 char c;
458  
459 while(fscanf(fp, "%c", &c) == 1) {
460 if(c == '\n') {
461 break;
462 }
463 }
464  
465 return;
466 }
467  
468 /*
469 *
470 * Reads a line from the database file "fp".
471 */
472 char *ReadDatabaseLine(FILE *fp) {
473 char c;
474 char *line;
11 office 475 int line_size;
476 int i;
1 office 477  
11 office 478 line_size = LINE_BUF;
19 office 479 line = malloc(line_size * sizeof(*line));
1 office 480  
11 office 481 i = 0;
1 office 482 while(run && fscanf(fp, "%c", &c) == 1) {
483 #if defined ___AmigaOS___
484 // Check if CTRL+C was pressed and abort the program.
485 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
486 run = FALSE;
487 }
488 #endif
489 switch(c) {
490 case '\n':
491 // Rewind the file by the number of read characters.
11 office 492 fseek(fp, -(i + 1), SEEK_CUR);
1 office 493 return line;
494 default:
11 office 495 if(strlen(line) == line_size) {
496 line_size = line_size * 1.5;
19 office 497 line = realloc(line, line_size * sizeof(*line));
11 office 498 }
499 //line = realloc(line, (chars + 1 + 1) * sizeof(char));
500 line[i] = c;
501 line[i + 1] = '\0';
1 office 502 break;
503 }
11 office 504 ++i;
1 office 505 }
506  
507 return NULL;
508 }
509  
510 /*
511 *
512 * Merges temporary files "tmpNames" into a database "dbFile".
513 */
514 void MergeDatabase(char *dbFile, char **tmpNames, int files, int lines) {
515 FILE *fp;
516 FILE **tp;
517 int i;
14 office 518 int j;
1 office 519 char *tmp;
14 office 520 char *min;
1 office 521 int count;
522  
523 if((fp = fopen(dbFile, "w+")) == NULL) {
524 fprintf(stderr, "Unable to open gather database for writing.\n");
525 return;
526 }
527  
528 // Allocate as many file pointers as temporary files.
19 office 529 tp = malloc(files * sizeof(*tp));
1 office 530  
531 // Open all temporary files for reading.
532 for(i = 0; i < files; ++i) {
533 if((tp[i] = fopen(tmpNames[i], "r")) == NULL) {
534 fprintf(stderr, "Unable to open temporary file '%s' for reading.\n", tmpNames[i]);
535 // Close all temporary files.
536 --i;
537 while(i >= 0) {
538 fclose(tp[i]);
539 }
540 return;
541 }
542 }
543  
544 if(verbose) {
545 fprintf(stdout, "Merging all database lines in temporary files.\r");
546 }
547  
548 count = lines;
14 office 549 j = 0;
1 office 550 while(run && --count > -1) {
551 #if defined ___AmigaOS___
552 // Check if CTRL+C was pressed and abort the program.
553 if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
554 run = FALSE;
555 }
556 #endif
557 // Find the smallest line in all temporary files.
558 if(verbose) {
559 fprintf(stdout, "Merging all database lines in temporary files: %d%%.\r", 100 - (int)(((float)count / lines) * 100.0));
560 }
561  
14 office 562 min = NULL;
1 office 563 for(i = 0; i < files; ++i) {
564 tmp = ReadDatabaseLine(tp[i]);
565 if(tmp == NULL) {
566 continue;
567 }
14 office 568 if(min == NULL || strncmp(tmp, min, strlen(tmp)) < 0) {
569 if(min != NULL) {
2 office 570 // Free previous instance.
14 office 571 free(min);
2 office 572 }
19 office 573 min = malloc((strlen(tmp) + 1) * sizeof(*min));
14 office 574 sprintf(min, "%s", tmp);
1 office 575 // Remember the index of the file where the smallest entry has been found.
14 office 576 j = i;
1 office 577 free(tmp);
578 continue;
579 }
580 free(tmp);
581 }
582  
583 // Forward the file where the smallest line was found.
14 office 584 SkipDatabaseLine(tp[j]);
1 office 585  
586 // Write the smallest line.
14 office 587 if(min != NULL) {
588 fprintf(fp, "%s\n", min);
589 free(min);
1 office 590 }
591 }
592  
593 // Write out any remaining contents from the temporary files.
594 for(i = 0; i < files; ++i) {
595 tmp = ReadDatabaseLine(tp[i]);
596 if(tmp == NULL) {
597 continue;
598 }
599 fprintf(fp, "%s\n", tmp);
14 office 600 free(tmp);
1 office 601 }
602  
603 // Close and delete all temporary files.
604 for(i = 0; i < files; ++i) {
605 fclose(tp[i]);
606 // Delete temporary file.
607 remove(tmpNames[i]);
608 }
609  
610 if(verbose) {
611 fprintf(stdout, "\n");
612 }
613  
614 fclose(fp);
615 }
616  
617 /*
618 *
619 * Indexes a "path" by creating a database "dbFile".
620 */
621 void Gather(char *dbFile, char *path) {
622 stringStack *stack = stringStackCreate(1);
623 stats *stats = malloc(sizeof(stats));
624 char **tmpNames;
625 int dbSize, dbLines, tmpFiles, tmpLines;
626 int i;
627  
628 // Initialize metrics.
629 stats->dirs = 0;
630 stats->files = 0;
631  
632 // Push the first path onto the stack.
633 stringStackPush(stack, path);
634  
635 // Generate the database file.
636 UpdateDatabase(dbFile, stack, stats);
637  
638 // Get the database metrics.
639 dbSize = GetDatabaseSize(dbFile);
640 dbLines = CountDatabaseLines(dbFile);
641  
642 // Compute the amount of temporary files needed.
643 tmpFiles = dbSize / MAX_MEM;
644  
645 // In case no temporary files are required,
646 // just sort the database and terminate.
2 office 647 if(tmpFiles <= 1) {
1 office 648 SortDatabase(dbFile);
649 return;
650 }
651  
652 tmpLines = dbLines / tmpFiles;
653  
654 // Create temporary files.
655 if((tmpNames = CreateTempFiles(tmpFiles)) == NULL) {
656 fprintf(stderr, "Unable to create temporary files.\n");
657 return;
658 }
659  
660 // Write "tmpLines" to temporary files in "tmpFiles" from "dbFile".
661 WriteTempFiles(dbFile, tmpNames, tmpFiles, tmpLines, dbLines);
662  
663 // Sort the temporary files.
664 for(i = 0; i < tmpFiles; ++i) {
665 SortDatabase(tmpNames[i]);
666 }
667  
668 MergeDatabase(dbFile, tmpNames, tmpFiles, dbLines);
669 }
670  
11 office 671 void usage(char *name) {
672 fprintf(stdout, "Hunt & Gather - %s, a file index generating tool. \n", name);
19 office 673 fprintf(stdout, "Version: %s \n", PROGRAM_VERSION);
11 office 674 fprintf(stdout, " \n");
675 fprintf(stdout, "SYNTAX: %s [-q] DATABASE \n", name);
676 fprintf(stdout, " \n");
677 fprintf(stdout, " -q Do not print out any messages. \n");
678 fprintf(stdout, " \n");
679 fprintf(stdout, "DATABASE is a path to where the indexed results will be \n");
680 fprintf(stdout, "stored for searching with the Hunt tool. \n");
681 fprintf(stdout, " \n");
682 fprintf(stdout, "(c) 2021 Wizardry and Steamworks, MIT. \n");
683 }
684  
1 office 685 /*
686 *
687 * Main entry point.
688 */
689 int main(int argc, char **argv) {
690 int option;
2 office 691 char *dbFile;
19 office 692 char *path;
10 office 693 struct stat dirStat;
19 office 694 #if defined ___AmigaOS___
695 BPTR lock;
696 #endif
1 office 697  
698 // Bind handler to SIGINT.
699 signal(SIGINT, SignalHandler);
700  
2 office 701 dbFile = DEFAULT_DATABASE_FILE;
702 while((option = getopt(argc, argv, "hqd:")) != -1) {
1 office 703 switch(option) {
2 office 704 case 'd':
705 dbFile = optarg;
706 break;
1 office 707 case 'q':
708 verbose = FALSE;
709 break;
710 case 'h':
11 office 711 usage(argv[0]);
2 office 712 return 0;
1 office 713 case '?':
714 fprintf(stderr, "Invalid option %ct.\n", optopt);
715 return 1;
716 }
717 }
718  
10 office 719  
720 if(optind >= argc) {
11 office 721 usage(argv[0]);
1 office 722 return 1;
723 }
724  
19 office 725 #if defined ___AmigaOS___
726 path = malloc(PATH_MAX * sizeof(*path));
727 lock = Lock(argv[optind], SHARED_LOCK);
728 NameFromLock(lock, path, PATH_MAX);
729 UnLock(lock);
730 #else
731 path = realpath(argv[optind], NULL);
732 #endif
733  
734 stat(path, &dirStat);
10 office 735 if(!S_ISDIR(dirStat.st_mode)) {
5 office 736 fprintf(stderr, "Path '%s' is not a directory.\n", argv[optind]);
1 office 737 return 1;
738 }
739  
2 office 740 if(verbose) {
741 fprintf(stdout, "Gathering to database file: %s\n", dbFile);
742 }
743  
1 office 744 // Gather.
19 office 745 Gather(dbFile, path);
1 office 746  
19 office 747 free(path);
748  
1 office 749 return 0;
750 }