HuntnGather – Rev 1

Subversion Repositories:
Rev:
///////////////////////////////////////////////////////////////////////////
//    Copyright (C) 2021 Wizardry and Steamworks - License: MIT          //
///////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <signal.h>

#include <sys/types.h>
#include <sys/stat.h>

#include <proto/dos.h>
#include <proto/exec.h>

#include "StringStack.h"

#if defined ___AmigaOS___
#include "getopt.h"
#endif


#if !defined TRUE
#define TRUE 1;
#endif

#if !defined FALSE
#define FALSE 0;
#endif

#define MAX_MEM 524288

/*************************************************************************/
/*        Version string used for querrying the program version.         */
/*************************************************************************/
TEXT version_string[] =
        "\0$VER: Gather 1.2 "__DATE__" by Wizardry and Steamworks";

typedef struct {
        unsigned int dirs;
        unsigned int files;
} stats;

int run = TRUE;
int verbose = TRUE;

void SignalHandler(int sig) {
        // Toggle the run flag to stop execution.
        run = FALSE;
}

int compare(const void *a, const void *b) {
        const char **p = (const char **)a;
        const char **q = (const char **)b;
        return strcmp(*p, *q);
}

/*
        *
        * Sorts a database file lexicographically.
        */
void SortDatabase(char *dbFile) {
        FILE *fp;
        char *name = NULL;
        char *path = NULL;
        char **database;
        char c;
        int i;
        int side;
        unsigned int line;

        // Open database file for reading.
        if((fp = fopen(dbFile, "r")) == NULL) {
                fprintf(stderr, "Unable to open gather database for reading.\n");
                return;
        }

        database = (char **) malloc(sizeof(char *));
        name = (char *) malloc(sizeof(char));
        path = (char *) malloc(sizeof(char));
        line = 0;
        side = 0;
        i = 0;

        if(verbose) {
                fprintf(stdout, "Sorting database: '%s'\n", dbFile);
        }

        while(run && fscanf(fp, "%c", &c) == 1) {
#if defined ___AmigaOS___
                // Check if CTRL+C was pressed and abort the program.
        if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                        run = FALSE;
                        continue;
                }
#endif
                switch(c) {
                        case '\n':
                                // Load up the name and path into the database variable.
                                database = (char **) realloc(database, (line + 1) * sizeof(char *));
                                database[line] = (char *) malloc((strlen(name) + strlen(path) + 1 + 1) * sizeof(char));
                                sprintf(database[line], "%s\t%s", name, path);
                                ++line;

                                free(name);
                                name = (char *) malloc(sizeof(char));
                                --side;
                                i = 0;

                                break;
                        case '\t':
                                free(path);
                                path = (char *) malloc(sizeof(char));
                                ++side;
                                i = 0;
                                break;
                        default:
                                switch(side) {
                                        case 0:
                                                name = (char *) realloc(name, (i + 1 + 1) * sizeof(char));
                                                name[i] = c;
                                                name[i + 1] = '\0';
                                                break;
                                        case 1:
                                                path = (char *) realloc(path, (i + 1 + 1) * sizeof(char));
                                                path[i] = c;
                                                path[i + 1] = '\0';
                                                break;
                                        default:
                                                fprintf(stderr, "Database corrupted.\n");
                                                break;
                                }
                                ++i;
                                break;
                }
        }

        fclose(fp);

        // Sort the database.
        qsort(database, line, sizeof(char *), compare);

        // Write the database lines back to the database.
        if((fp = fopen(dbFile, "w+")) == NULL) {
                fprintf(stderr, "Unable to open gather database for writing.\n");
                return;
        }

        for(i = 0; i < line; ++i) {
                fprintf(fp, "%s\n", database[i]);
        }

        free(database);
        fclose(fp);
}

/*
        *
        * Updates a database file "dbFile".
        */
void UpdateDatabase(char *dbFile, stringStack *dirStack, stats *stats) {
        FILE *fp;
        DIR *dir;
        struct dirent *dirEntry;
        struct stat dirStat;
        unsigned int size;
        char *path;
        char *subPath;

        if((fp = fopen(dbFile, "w+")) == NULL) {
                fprintf(stderr, "Unable to open gather database for writing.\n");
                return;
        }

        while(run && !stringStackIsEmpty(dirStack)) {
#if defined ___AmigaOS___
                // Check if CTRL+C was pressed and abort the program.
                if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                        run = FALSE;
                }
#endif
                if((path = stringStackPop(dirStack)) == NULL) {
                        return;
                }

                if((dir = opendir(path)) == NULL) {
                        return;
                }

                while(run && (dirEntry = readdir(dir)) != NULL) {
#if defined ___AmigaOS___
                        // Check if CTRL+C was pressed and abort the program.
                        if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                                run = FALSE;
                        }
#endif
                        size = sizeof(path) + sizeof(dirEntry->d_name) + 1;
                        switch(path[strlen(path) - 1]) {
                                case '/':
                                case ':': // This is a drive path.
                                        subPath = (char *) malloc(size);
                                        sprintf(subPath, "%s%s", path, dirEntry->d_name);
                                        break;
                                default:
                                        subPath = (char *) malloc(size + 1);
                                        sprintf(subPath, "%s/%s", path, dirEntry->d_name);
                                        break;
                        }
                        stat(subPath, &dirStat);
                        if(S_ISDIR(dirStat.st_mode)) {
                                stringStackPush(dirStack, subPath);

                                ++stats->dirs;

                                if(verbose) {
                                        fprintf(stdout,
                                                "Gathered %d directories and %d files.\r",
                                                stats->dirs,
                                                stats->files);
                                }

                                free(subPath);
                                continue;
                        }

                        // Write to database file.
                        fprintf(fp, "%s\t%s\n", dirEntry->d_name, subPath);

                        ++stats->files;

                        if(verbose) {
                                fprintf(stdout,
                                        "Gathered %d directories and %d files.\r",
                                        stats->dirs,
                                        stats->files);
                        }

                        free(subPath);
                }

                closedir(dir);
                free(path);
        }

        if(verbose) {
                fprintf(stdout, "\n");
        }

        fclose(fp);

}

/*
        *
        * Gets the size of a database "dbFle".
        */
int GetDatabaseSize(char *dbFile) {
        FILE *fp;
        int size;

        if((fp = fopen(dbFile, "r")) == NULL) {
                fprintf(stderr, "Unable to open gather database for reading.\n");
                fclose(fp);
                return 0;
        }

        fseek(fp, 0L, SEEK_END);
        size = ftell(fp);

        fclose(fp);
        return size;
}

/*
        *
        * Counts the lines in a database file "dbFile".
        */
int CountDatabaseLines(char *dbFile) {
        FILE *fp;
        int lines;
        char c;

        if((fp = fopen(dbFile, "r")) == NULL) {
                fprintf(stderr, "Unable to open gather database for reading.\n");
                fclose(fp);
                return 0;
        }

        lines = 0;
        while(fscanf(fp, "%c", &c) == 1) {
                switch(c) {
                        case '\n':
                                ++lines;
                                break;
                }
        }

        fclose(fp);

        return lines;
}

/*
        *
        * Creates "files" temporary filenames.
        */
char **CreateTempFiles(int files) {
        char **tmpNames;
        int count;

        tmpNames = (char **) malloc(files * sizeof(char *));

        if(verbose) {
                fprintf(stdout, "Creating temporary files.\r");
        }

        count = files;
        while(--count > -1) {
#if defined ___AmigaOS___
                // Check if CTRL+C was pressed and abort the program.
                if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                        run = FALSE;
                }
#endif
                tmpNames[count] = tmpnam(NULL);

                if(verbose) {
                        fprintf(stdout, "Creating temporary files: %d%%\r", 100 - (int)(((float)count / files) * 100.0));
                }
        }

        if(verbose) {
                fprintf(stdout, "\n");
        }

        return tmpNames;
}

/*
        *
        * Writes lines from the database "dbFile" to temporary filenames "tmpNames".
        */
void WriteTempFiles(char *dbFile, char **tmpNames, int tmpFiles, int tmpLines, int total) {
        FILE *fp, *tp;
        char c;
        int lines;
        int linesWritten;

        if((fp = fopen(dbFile, "r")) == NULL) {
                fprintf(stderr, "Unable to open gather database for reading.\n");
                return;
        }

        if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
                fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
                fclose(fp);
                return;
        }

        if(verbose) {
                fprintf(stdout, "Writing to temporary files.\r");
        }

        linesWritten = 0;
        lines = 0;
        while(run && fscanf(fp, "%c", &c) == 1) {
#if defined ___AmigaOS___
                // Check if CTRL+C was pressed and abort the program.
                if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                        run = FALSE;
                }
#endif
                switch(c) {
                        case '\n':
                                // Increment the total written lines.
                                ++linesWritten;

                                if(verbose) {
                                        fprintf(stdout, "Writing to temporary files: %d%%.\r", (int)(((float)linesWritten / total) * 100.0));
                                }

                                // Write the newline character back.
                                if(fprintf(tp, "%c", c) != 1) {
                                        fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
                                        fclose(tp);
                                        fclose(fp);
                                        return;
                                }
                                // Switch to the next temporary file.
                                if(++lines >= tmpLines) {
                                        // If there are no temporary files left then run till the end.
                                        if(tmpFiles - 1 < 0) {
                                                break;
                                        }

                                        // Close the previous temporary file and write to the next temporary file.
                                        fclose(tp);
                                        if((tp = fopen(tmpNames[--tmpFiles], "w+")) == NULL) {
                                                fprintf(stderr, "Unable to open temporary file '%s' for writing.\n", tmpNames[tmpFiles]);
                                                fclose(tp);
                                                fclose(fp);
                                        }
                                        lines = 0;
                                        break;
                                }
                                break;
                        default:
                                if(fprintf(tp, "%c", c) != 1) {
                                        fprintf(stderr, "Unable to write to temporary file '%s'.\n", tmpNames[tmpFiles]);
                                        fclose(tp);
                                        fclose(fp);
                                        return;
                                }
                                break;
                }
        }

        fprintf(stdout, "\n");

        fclose(tp);
        fclose(fp);
}

/*
        *
        * Skips a line in a database file "fp".
        */
void SkipDatabaseLine(FILE *fp) {
        char c;

        while(fscanf(fp, "%c", &c) == 1) {
                if(c == '\n') {
                        break;
                }
        }

        return;
}

/*
        *
        * Reads a line from the database file "fp".
        */
char *ReadDatabaseLine(FILE *fp) {
        char c;
        char *line;
        int chars;

        line = (char *) malloc(sizeof(char));

        chars = 0;
        while(run && fscanf(fp, "%c", &c) == 1) {
#if defined ___AmigaOS___
                // Check if CTRL+C was pressed and abort the program.
                if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                        run = FALSE;
                }
#endif
                switch(c) {
                        case '\n':
                                // Rewind the file by the number of read characters.
                                fseek(fp, -(chars + 1), SEEK_CUR);
                                return line;
                        default:
                                line = (char *) realloc(line, (chars + 1 + 1) * sizeof(char));
                                line[chars] = c;
                                line[chars + 1] = '\0';
                                break;
                }
                ++chars;
        }

        return NULL;
}

/*
        *
        * Merges temporary files "tmpNames" into a database "dbFile".
        */
void MergeDatabase(char *dbFile, char **tmpNames, int files, int lines) {
        FILE *fp;
        FILE **tp;
        int i;
        char *tmp;
        char *tmpMin;
        int idxMin;
        int count;

        if((fp = fopen(dbFile, "w+")) == NULL) {
                fprintf(stderr, "Unable to open gather database for writing.\n");
                return;
        }

        // Allocate as many file pointers as temporary files.
        tp = (FILE **) malloc(files * sizeof(FILE *));

        // Open all temporary files for reading.
        for(i = 0; i < files; ++i) {
                if((tp[i] = fopen(tmpNames[i], "r")) == NULL) {
                        fprintf(stderr, "Unable to open temporary file '%s' for reading.\n", tmpNames[i]);
                        // Close all temporary files.
                        --i;
                        while(i >= 0) {
                                fclose(tp[i]);
                        }
                        return;
                }
        }

        if(verbose) {
                fprintf(stdout, "Merging all database lines in temporary files.\r");
        }

        count = lines;
        idxMin = 0;
        while(run && --count > -1) {
#if defined ___AmigaOS___
                // Check if CTRL+C was pressed and abort the program.
                if(SetSignal(0L, SIGBREAKF_CTRL_C) & SIGBREAKF_CTRL_C) {
                        run = FALSE;
                }
#endif
                // Find the smallest line in all temporary files.
                if(verbose) {
                        fprintf(stdout, "Merging all database lines in temporary files: %d%%.\r", 100 - (int)(((float)count / lines) * 100.0));
                }

                tmpMin = NULL;
                for(i = 0; i < files; ++i) {
                        tmp = ReadDatabaseLine(tp[i]);
                        if(tmp == NULL) {
                                free(tmp);
                                continue;
                        }
                        if(tmpMin == NULL || strcmp(tmp, tmpMin) < 0) {
                                tmpMin = (char *) malloc((strlen(tmp) + 1) * sizeof(char));
                                sprintf(tmpMin, "%s", tmp);
                                // Remember the index of the file where the smallest entry has been found.
                                idxMin = i;
                                free(tmp);
                                continue;
                        }
                        free(tmp);
                }

                // Forward the file where the smallest line was found.
                SkipDatabaseLine(tp[idxMin]);

                // Write the smallest line.
                if(tmpMin != NULL) {
                        fprintf(fp, "%s\n", tmpMin);
                        free(tmpMin);
                }
        }

        // Write out any remaining contents from the temporary files.
        for(i = 0; i < files; ++i) {
                tmp = ReadDatabaseLine(tp[i]);
                if(tmp == NULL) {
                        continue;
                }
                fprintf(fp, "%s\n", tmp);
        }

        // Close and delete all temporary files.
        for(i = 0; i < files; ++i) {
                fclose(tp[i]);
                // Delete temporary file.
                remove(tmpNames[i]);
        }

        if(verbose) {
                fprintf(stdout, "\n");
        }

        fclose(fp);
}

/*
        *
        * Indexes a "path" by creating a database "dbFile".
        */
void Gather(char *dbFile, char *path) {
        stringStack *stack = stringStackCreate(1);
        stats *stats = malloc(sizeof(stats));
        char **tmpNames;
        int dbSize, dbLines, tmpFiles, tmpLines;
        int i;

        // Initialize metrics.
        stats->dirs = 0;
        stats->files = 0;

        // Push the first path onto the stack.
        stringStackPush(stack, path);

        // Generate the database file.
        UpdateDatabase(dbFile, stack, stats);

        // Get the database metrics.
        dbSize = GetDatabaseSize(dbFile);
        dbLines = CountDatabaseLines(dbFile);

        // Compute the amount of temporary files needed.
        tmpFiles = dbSize / MAX_MEM;

   // In case no temporary files are required,
        // just sort the database and terminate.
        if(tmpFiles == 0) {
                SortDatabase(dbFile);
                return;
        }

        tmpLines = dbLines / tmpFiles;

        // Create temporary files.
        if((tmpNames = CreateTempFiles(tmpFiles)) == NULL) {
                fprintf(stderr, "Unable to create temporary files.\n");
                return;
        }

        // Write "tmpLines" to temporary files in "tmpFiles" from "dbFile".
        WriteTempFiles(dbFile, tmpNames, tmpFiles, tmpLines, dbLines);

        // Sort the temporary files.
        for(i = 0; i < tmpFiles; ++i) {
                SortDatabase(tmpNames[i]);
        }

        MergeDatabase(dbFile, tmpNames, tmpFiles, dbLines);
}

/*
        *
        * Main entry point.
        */
int main(int argc, char **argv) {
        int option;
        struct stat path;

        // Bind handler to SIGINT.
        signal(SIGINT, SignalHandler);

        while((option = getopt(argc, argv, "hq")) != -1) {
                switch(option) {
                        case 'q':
                                verbose = FALSE;
                                break;
                        case 'h':
                                fprintf(stdout, "SYNTAX: %s [-q] DIRECTORY", argv[0]);
                                break;
                        case '?':
                                fprintf(stderr, "Invalid option %ct.\n", optopt);
                                fprintf(stdout, "SYNTAX: %s [-q] DIRECTORY\n", argv[0]);
                                return 1;
                }
        }

        if(optind > argc) {
                fprintf(stdout, "SYNTAX: %s [-q] DIRECTORY\n", argv[0]);
                return 1;
        }

        stat(argv[optind], &path);

        if(!S_ISDIR(path.st_mode)) {
                fprintf(stdout, "%s is not a directory.\n", argv[optind]);
                return 1;
        }

        // Gather.
        Gather("S:gather.db", argv[optind]);

   return 0;
}