From e3e323daf9e3fde721ebf8f76aa19fdcfedf87e3 Mon Sep 17 00:00:00 2001 From: Urban Wallasch Date: Sun, 6 Jun 2021 16:21:43 +0200 Subject: [PATCH] * Improved directory handling. * Improved error handling and reporting. * Corrected command line usage synopsis. * Corrected old spelling (imdupe) to new (imgdupe) in README.md. * Minor edits in README.md. --- README.md | 34 +++++++++++++++++++--------------- main.c | 44 ++++++++++++++++++++++---------------------- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 6871076..4b5315d 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,24 @@ -# Imdupe +# Imgdupe -Imdupe is a command line tool to find potential duplicate images in a +Imgdupe is a command line tool to find potential duplicate images in a directory tree by comparing perceptual hash values of image files. It writes a shell script to stdout which can in turn be used to further process the scan results. -The perceptual hash for an image is calculated by: +The perceptual hash for an image is calculated by applying the following +transformations in the given order: - * flattening the image (merging layers/frames) - * transforming the color space to gray scale - * resizing to 16 by 16 pixel using box blur - * normalizing contrast and brightness - * conversion to monochromatic image - * interpreting the pixel values as 256 bit hash value + * flatten image (merge layers/frames) + * transform color space to gray scale + * resize to 16 by 16 pixel using box blur + * normalize + * convert to hard black and white + * interpret the resulting pixel values as 256 bit hash value ## Usage ``` -imgdupe [OPTIONS] DIR ... +imgdupe [OPTIONS] [DIR ...] OPTIONS: -d n max directory recursion depth; default: 0 (unlimited) @@ -33,8 +34,11 @@ OPTIONS: ``` **Notes** + * If no directory is specified on the command line, `imgdupe` scans + the current working directory. + * `-d` limits the recursion depth when scanning a directory tree; a - value of 1 causes `imdupe` to only scan files in the directories + value of 1 causes `imgdupe` to only scan files in the directories specified on the command line and not descend into any sub-directories, 0 (the default) means unlimited depth. @@ -62,7 +66,7 @@ OPTIONS: * `-I` allows to include a custom lead-in in the output, which is in particular useful to replace the default dummy versions of the `VIEW()` - and `END()` functions which are used by `imdupe` to structure its + and `END()` functions which are used by `imgdupe` to structure its output. * `-T` allows to set the number of hashing threads to use; for optimal @@ -73,10 +77,10 @@ OPTIONS: ## Build The [GraphicsMagick](http://www.graphicsmagick.org/) library must be -installed as a prerequisite prior to building `imdupe`, as it is required +installed as a prerequisite prior to building `imgdupe`, as it is required to manipulate images during perceptual hash calculation. -Run `make` in the project directory to build the `imdupe` executable. +Run GNU `make` in the project directory to build the `imgdupe` executable. Though it was only tested on GNU/Linux, it should work with few (if any) modifications on other Unix-like systems. @@ -90,7 +94,7 @@ are only portable between systems of equal endianness. ## License -Imdupe is distributed under the Modified ("3-clause") BSD License. +Imgdupe is distributed under the Modified ("3-clause") BSD License. See `LICENSE` file for more information. ---------------------------------------------------------------------- diff --git a/main.c b/main.c index 7d5d1b3..426a253 100644 --- a/main.c +++ b/main.c @@ -66,7 +66,7 @@ void *worker(void *arg) { rc = db_insert(thrinf->db, entry, cfg.rescan, cfg.blur); if ( 0 != rc ) { if ( 0 > rc ) - eprintf("not fingerprinting '%s'\n", entry->fname); + eprintf("WARNING: not fingerprinting '%s'\n", entry->fname); s_free(entry->fname); s_free(entry); } @@ -111,7 +111,7 @@ int scan_dir(const char *dir, queue_t *q, db_t *db) { s_free(dirpath); } else - eprintf("%s: '%s'\n", strerror(errno), dir); + eprintf("ERROR: '%s': %s\n", dir, strerror(errno)); /* wait for workers to finish */ q_set_complete(q); for ( int i = 0; i < cfg.nthreads; ++i ) @@ -139,7 +139,7 @@ static int find_dupes_cb(db_entry_t *dupes) { static void usage(char *pname, int ec) { char *prog = basename(pname); printf("%s - find potentially duplicate images\n", prog); - printf("USAGE: %s [OPTIONS] DIR ...\n", prog); + printf("USAGE: %s [OPTIONS] [DIR ...]\n", prog); printf("OPTIONS:\n" " -d n max directory recursion depth; default: 0 (unlimited)\n" " -f file database file\n" @@ -157,7 +157,8 @@ static void usage(char *pname, int ec) { /* main function */ int main(int argc, char *argv[]) { - int c, rc = 0, n; + int c, rc = 0, n, err = 0; + char **dirs = NULL; char *cp; db_t *db ; const char *lead_in = @@ -225,43 +226,42 @@ int main(int argc, char *argv[]) { } if ( cfg.db_prune ) { - dprintf("pruning\n"); + dprintf("pruning missing files from database\n"); db_prune(db); } q = q_init(); - if (optind >= argc) { - dprintf("scanning '.'\n"); - rc = scan_dir(".", q, db); - } else { - while ( optind < argc ){//&& 0 == rc ) { - dprintf("scanning '%s'\n", argv[optind]); - q_reset_complete(q); - rc = scan_dir(argv[optind++], q, db); + dirs = optind < argc ? &argv[optind] : (char *[]){".", NULL}; + for ( int i = 0; NULL != dirs[i]; ++i ) { + dprintf("scanning '%s'\n", dirs[i]); + q_reset_complete(q); + rc = scan_dir(dirs[i], q, db); + if ( 0 != rc ) { + eprintf("ERROR: scanning '%s' failed\n", dirs[i]); + ++err; } } q_destroy(&q); - if ( 0 != rc ) - goto DONE; if ( NULL != cfg.db_outfile ) { int cnt; cnt = db_write(db, cfg.db_outfile); if ( 0 <= cnt ) dprintf("%d entries written\n", cnt); - else - eprintf("writing '%s' failed\n", cfg.db_outfile); + else { + eprintf("ERROR: writing '%s' failed\n", cfg.db_outfile); + ++err; + } } - dprintf("searching dupes ...\n"); + dprintf("searching for potential duplicates ...\n"); printf("%s", lead_in); - rc = db_find_dupes(db, cfg.thresh, find_dupes_cb); + db_find_dupes(db, cfg.thresh, find_dupes_cb); printf("END\n"); - DONE: db_destroy(&db); - dprintf("done.\n"); - exit(rc ? EXIT_FAILURE : EXIT_SUCCESS); + dprintf("done, encountered %d error%s\n", err, err==1?"":"s"); + exit(err ? EXIT_FAILURE : EXIT_SUCCESS); } /* EOF */ -- 2.30.2