HuntnGather – Rev 13
?pathlinks?
///////////////////////////////////////////////////////////////////////////
// Copyright (C) 2021 Wizardry and Steamworks - License: MIT //
///////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <proto/dos.h>
#include <proto/exec.h>
#include "StringStack.h"
#if !defined ___HAVE_GETOPT___
#include "getopt.h"
#endif
#if defined ___AmigaOS___
/*************************************************************************/
/* Version string used for querrying the program version. */
/*************************************************************************/
TEXT version_string[] =
"\0$VER: Gather 1.7 "__DATE__" by Wizardry and Steamworks";
#endif
#if !defined TRUE
#define TRUE 1;
#endif
#if !defined FALSE
#define FALSE 0;
#endif
#define MAX_MEM 262144
#define NAME_BUF 32
#define PATH_BUF 128
#define LINE_BUF 256
#define DEFAULT_DATABASE_FILE "S:gather.db"
typedef struct {
unsigned int dirs;
unsigned int files;
} stats;
int run = TRUE;
int verbose = TRUE;
void SignalHandler(int sig) {
// Toggle the run flag to stop execution.
run = FALSE;
}
int compare(const void *a, const void *b) {
const char **p = (const char **)a;
const char **q = (const char **)b;
return strncmp(*p, *q, strlen(*p));
}
/*
*
* Sorts a database file lexicographically.
*/
void SortDatabase(char *dbFile) {
FILE *fp;
char *name = NULL;
char *path = NULL;
char **database;
char c;
int i;
int side;
unsigned int line;
int name_size;
int path_size;
// Open database file for reading.
if((fp = fopen(dbFile, "r")) == NULL) {
fprintf(stderr, "Unable to open gather database for reading.\n");
return;
}
database = malloc(sizeof(char *));
name_size = NAME_BUF;
name = malloc(name_size * sizeof(char));
path_size = PATH_BUF;
path = malloc(path_size * sizeof(char));
line = 0;
side = 0;
i = 0;
if(verbose) {
fprintf(stdout, "Sorting database: '%s'\n", dbFile);
}
while(run && fscanf(fp, "%c", &c) == 1) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
continue;
}
#endif
switch(c) {
case '\n':
// Load up the name and path into the database variable.
database = realloc(database, (line + 1) * sizeof(char *));
database[line] = malloc((strlen(name) + strlen(path) + 1 + 1) * sizeof(char));
sprintf(database[line], "%s\t%s", name, path);
++line;
free(name);
name_size = NAME_BUF;
name = malloc(name_size * sizeof(char));
--side;
i = 0;
break;
case '\t':
free(path);
path_size = PATH_BUF;
path = malloc(path_size * sizeof(char));
++side;
i = 0;
break;
default:
switch(side) {
case 0:
if(strlen(name) == name_size) {
name_size = name_size * 1.5;
name = realloc(name, name_size * sizeof(char));
}
//name = realloc(name, (i + 1 + 1) * sizeof(char));
name[i] = c;
name[i + 1] = '\0';
break;
case 1:
if(strlen(path) == path_size) {
path_size = path_size * 1.5;
path = realloc(path, path_size * sizeof(char));
}
//path = realloc(path, (i + 1 + 1) * sizeof(char));
path[i] = c;
path[i + 1] = '\0';
break;
default:
fprintf(stderr, "Database corrupted.\n");
break;
}
++i;
break;
}
}
fclose(fp);
// Sort the database.
qsort(database, line, sizeof(char *), compare);
// Write the database lines back to the database.
if((fp = fopen(dbFile, "w+")) == NULL) {
fprintf(stderr, "Unable to open gather database for writing.\n");
return;
}
for(i = 0; i < line; ++i) {
fprintf(fp, "%s\n", database[i]);
}
free(database);
fclose(fp);
}
/*
*
* Updates a database file "dbFile".
*/
void UpdateDatabase(char *dbFile, stringStack *dirStack, stats *stats) {
FILE *fp;
DIR *dir;
struct dirent *dirEntry;
struct stat dirStat;
unsigned int size;
char *path;
char *subPath;
if((fp = fopen(dbFile, "w+")) == NULL) {
fprintf(stderr, "Unable to open gather database for writing.\n");
return;
}
while(run && !stringStackIsEmpty(dirStack)) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
}
#endif
if((path = stringStackPop(dirStack)) == NULL) {
return;
}
if((dir = opendir(path)) == NULL) {
return;
}
while(run && (dirEntry = readdir(dir)) != NULL) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
}
#endif
size = sizeof(path) + sizeof(dirEntry->d_name) + 1;
switch(path[strlen(path) - 1]) {
case '/':
case ':': // This is a drive path.
subPath = malloc(size);
sprintf(subPath, "%s%s", path, dirEntry->d_name);
break;
default:
subPath = malloc(size + 1);
sprintf(subPath, "%s/%s", path, dirEntry->d_name);
break;
}
stat(subPath, &dirStat);
if(S_ISDIR(dirStat.st_mode)) {
stringStackPush(dirStack, subPath);
++stats->dirs;
if(verbose) {
fprintf(stdout,
"Gathered %d directories and %d files.\r",
stats->dirs,
stats->files);
}
free(subPath);
continue;
}
// Write to database file.
fprintf(fp, "%s\t%s\n", dirEntry->d_name, subPath);
++stats->files;
if(verbose) {
fprintf(stdout,
"Gathered %d directories and %d files.\r",
stats->dirs,
stats->files);
}
free(subPath);
}
closedir(dir);
free(path);
}
if(verbose) {
fprintf(stdout, "\n");
}
fclose(fp);
}
/*
*
* Gets the size of a database "dbFle".
*/
int GetDatabaseSize(char *dbFile) {
FILE *fp;
int size;
if((fp = fopen(dbFile, "r")) == NULL) {
fprintf(stderr, "Unable to open gather database for reading.\n");
fclose(fp);
return 0;
}
fseek(fp, 0L, SEEK_END);
size = ftell(fp);
fclose(fp);
return size;
}
/*
*
* Counts the lines in a database file "dbFile".
*/
int CountDatabaseLines(char *dbFile) {
FILE *fp;
int lines;
char c;
if((fp = fopen(dbFile, "r")) == NULL) {
fprintf(stderr, "Unable to open gather database for reading.\n");
fclose(fp);
return 0;
}
lines = 0;
while(fscanf(fp, "%c", &c) == 1) {
switch(c) {
case '\n':
++lines;
break;
}
}
fclose(fp);
return lines;
}
/*
*
* Creates "files" temporary filenames.
*/
char **CreateTempFiles(int files) {
char **tmpNames;
int count;
tmpNames = malloc(files * sizeof(char *));
if(verbose) {
fprintf(stdout, "Creating temporary files.\r");
}
count = files;
while(--count > -1) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
}
#endif
tmpNames[count] = tmpnam(NULL);
if(verbose) {
fprintf(stdout, "Creating temporary files: %d%%\r", 100 - (int)(((float)count / files) * 100.0));
}
}
if(verbose) {
fprintf(stdout, "\n");
}
return tmpNames;
}
/*
*
* Writes lines from the database "dbFile" to temporary filenames "tmpNames".
*/
void WriteTempFiles(char *dbFile, char **tmpNames, int tmpFiles, int tmpLines, int total) {
FILE *fp, *tp;
char c;
int lines;
int linesWritten;
if((fp = fopen(dbFile, "r")) == NULL) {
fprintf(stderr, "Unable to open gather database for reading.\n");
return;
}
if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
fclose(fp);
return;
}
if(verbose) {
fprintf(stdout, "Writing to temporary files.\r");
}
linesWritten = 0;
lines = 0;
while(run && fscanf(fp, "%c", &c) == 1) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
}
#endif
switch(c) {
case '\n':
// Increment the total written lines.
++linesWritten;
if(verbose) {
fprintf(stdout, "Writing to temporary files: %d%%.\r", (int)(((float)linesWritten / total) * 100.0));
}
// Write the newline character back.
if(fprintf(tp, "%c", c) != 1) {
fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
fclose(tp);
fclose(fp);
return;
}
// Switch to the next temporary file.
if(++lines >= tmpLines) {
// If there are no temporary files left then run till the end.
if(tmpFiles - 1 < 0) {
break;
}
// Close the previous temporary file and write to the next temporary file.
fclose(tp);
if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
fclose(tp);
fclose(fp);
}
lines = 0;
break;
}
break;
default:
if(fprintf(tp, "%c", c) != 1) {
fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
fclose(tp);
fclose(fp);
return;
}
break;
}
}
fprintf(stdout, "\n");
fclose(tp);
fclose(fp);
}
/*
*
* Skips a line in a database file "fp".
*/
void SkipDatabaseLine(FILE *fp) {
char c;
while(fscanf(fp, "%c", &c) == 1) {
if(c == '\n') {
break;
}
}
return;
}
/*
*
* Reads a line from the database file "fp".
*/
char *ReadDatabaseLine(FILE *fp) {
char c;
char *line;
int line_size;
int i;
line_size = LINE_BUF;
line = malloc(line_size * sizeof(char));
i = 0;
while(run && fscanf(fp, "%c", &c) == 1) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
}
#endif
switch(c) {
case '\n':
// Rewind the file by the number of read characters.
fseek(fp, -(i + 1), SEEK_CUR);
return line;
default:
if(strlen(line) == line_size) {
line_size = line_size * 1.5;
line = realloc(line, line_size * sizeof(char));
}
//line = realloc(line, (chars + 1 + 1) * sizeof(char));
line[i] = c;
line[i + 1] = '\0';
break;
}
++i;
}
return NULL;
}
/*
*
* Merges temporary files "tmpNames" into a database "dbFile".
*/
void MergeDatabase(char *dbFile, char **tmpNames, int files, int lines) {
FILE *fp;
FILE **tp;
int i;
char *tmp;
char *tmpMin;
int idxMin;
int count;
if((fp = fopen(dbFile, "w+")) == NULL) {
fprintf(stderr, "Unable to open gather database for writing.\n");
return;
}
// Allocate as many file pointers as temporary files.
tp = malloc(files * sizeof(FILE *));
// Open all temporary files for reading.
for(i = 0; i < files; ++i) {
if((tp[i] = fopen(tmpNames[i], "r")) == NULL) {
fprintf(stderr, "Unable to open temporary file '%s' for reading.\n", tmpNames[i]);
// Close all temporary files.
--i;
while(i >= 0) {
fclose(tp[i]);
}
return;
}
}
if(verbose) {
fprintf(stdout, "Merging all database lines in temporary files.\r");
}
count = lines;
idxMin = 0;
while(run && --count > -1) {
#if defined ___AmigaOS___
// Check if CTRL+C was pressed and abort the program.
if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
run = FALSE;
}
#endif
// Find the smallest line in all temporary files.
if(verbose) {
fprintf(stdout, "Merging all database lines in temporary files: %d%%.\r", 100 - (int)(((float)count / lines) * 100.0));
}
tmpMin = NULL;
for(i = 0; i < files; ++i) {
tmp = ReadDatabaseLine(tp[i]);
if(tmp == NULL) {
continue;
}
if(tmpMin == NULL || strncmp(tmp, tmpMin, strlen(tmp)) < 0) {
if(tmpMin != NULL) {
// Free previous instance.
free(tmpMin);
}
tmpMin = malloc((strlen(tmp) + 1) * sizeof(char));
sprintf(tmpMin, "%s", tmp);
// Remember the index of the file where the smallest entry has been found.
idxMin = i;
free(tmp);
continue;
}
free(tmp);
}
// Forward the file where the smallest line was found.
SkipDatabaseLine(tp[idxMin]);
// Write the smallest line.
if(tmpMin != NULL) {
fprintf(fp, "%s\n", tmpMin);
free(tmpMin);
}
}
// Write out any remaining contents from the temporary files.
for(i = 0; i < files; ++i) {
tmp = ReadDatabaseLine(tp[i]);
if(tmp == NULL) {
continue;
}
fprintf(fp, "%s\n", tmp);
}
// Close and delete all temporary files.
for(i = 0; i < files; ++i) {
fclose(tp[i]);
// Delete temporary file.
remove(tmpNames[i]);
}
if(verbose) {
fprintf(stdout, "\n");
}
fclose(fp);
}
/*
*
* Indexes a "path" by creating a database "dbFile".
*/
void Gather(char *dbFile, char *path) {
stringStack *stack = stringStackCreate(1);
stats *stats = malloc(sizeof(stats));
char **tmpNames;
int dbSize, dbLines, tmpFiles, tmpLines;
int i;
// Initialize metrics.
stats->dirs = 0;
stats->files = 0;
// Push the first path onto the stack.
stringStackPush(stack, path);
// Generate the database file.
UpdateDatabase(dbFile, stack, stats);
// Get the database metrics.
dbSize = GetDatabaseSize(dbFile);
dbLines = CountDatabaseLines(dbFile);
// Compute the amount of temporary files needed.
tmpFiles = dbSize / MAX_MEM;
// In case no temporary files are required,
// just sort the database and terminate.
if(tmpFiles <= 1) {
SortDatabase(dbFile);
return;
}
tmpLines = dbLines / tmpFiles;
// Create temporary files.
if((tmpNames = CreateTempFiles(tmpFiles)) == NULL) {
fprintf(stderr, "Unable to create temporary files.\n");
return;
}
// Write "tmpLines" to temporary files in "tmpFiles" from "dbFile".
WriteTempFiles(dbFile, tmpNames, tmpFiles, tmpLines, dbLines);
// Sort the temporary files.
for(i = 0; i < tmpFiles; ++i) {
SortDatabase(tmpNames[i]);
}
MergeDatabase(dbFile, tmpNames, tmpFiles, dbLines);
}
void usage(char *name) {
fprintf(stdout, "Hunt & Gather - %s, a file index generating tool. \n", name);
fprintf(stdout, " \n");
fprintf(stdout, "SYNTAX: %s [-q] DATABASE \n", name);
fprintf(stdout, " \n");
fprintf(stdout, " -q Do not print out any messages. \n");
fprintf(stdout, " \n");
fprintf(stdout, "DATABASE is a path to where the indexed results will be \n");
fprintf(stdout, "stored for searching with the Hunt tool. \n");
fprintf(stdout, " \n");
fprintf(stdout, "(c) 2021 Wizardry and Steamworks, MIT. \n");
}
/*
*
* Main entry point.
*/
int main(int argc, char **argv) {
int option;
char *dbFile;
struct stat dirStat;
// Bind handler to SIGINT.
signal(SIGINT, SignalHandler);
dbFile = DEFAULT_DATABASE_FILE;
while((option = getopt(argc, argv, "hqd:")) != -1) {
switch(option) {
case 'd':
dbFile = optarg;
break;
case 'q':
verbose = FALSE;
break;
case 'h':
usage(argv[0]);
return 0;
case '?':
fprintf(stderr, "Invalid option %ct.\n", optopt);
return 1;
}
}
if(optind >= argc) {
usage(argv[0]);
return 1;
}
stat(argv[optind], &dirStat);
if(!S_ISDIR(dirStat.st_mode)) {
fprintf(stderr, "Path '%s' is not a directory.\n", argv[optind]);
return 1;
}
if(verbose) {
fprintf(stdout, "Gathering to database file: %s\n", dbFile);
}
// Gather.
Gather(dbFile, argv[optind]);
return 0;
}