-# Imdupe
+# Imgdupe
-Imdupe is a command line tool to find potential duplicate images in a
+Imgdupe is a command line tool to find potential duplicate images in a
directory tree by comparing perceptual hash values of image files. It
writes a shell script to stdout which can in turn be used to further
process the scan results.
-The perceptual hash for an image is calculated by:
+The perceptual hash for an image is calculated by applying the following
+transformations in the given order:
- * flattening the image (merging layers/frames)
- * transforming the color space to gray scale
- * resizing to 16 by 16 pixel using box blur
- * normalizing contrast and brightness
- * conversion to monochromatic image
- * interpreting the pixel values as 256 bit hash value
+ * flatten image (merge layers/frames)
+ * transform color space to gray scale
+ * resize to 16 by 16 pixel using box blur
+ * normalize
+ * convert to hard black and white
+ * interpret the resulting pixel values as 256 bit hash value
## Usage
```
-imgdupe [OPTIONS] DIR ...
+imgdupe [OPTIONS] [DIR ...]
OPTIONS:
-d n max directory recursion depth; default: 0 (unlimited)
```
**Notes**
+ * If no directory is specified on the command line, `imgdupe` scans
+ the current working directory.
+
* `-d` limits the recursion depth when scanning a directory tree; a
- value of 1 causes `imdupe` to only scan files in the directories
+ value of 1 causes `imgdupe` to only scan files in the directories
specified on the command line and not descend into any sub-directories,
0 (the default) means unlimited depth.
* `-I` allows to include a custom lead-in in the output, which is in
particular useful to replace the default dummy versions of the `VIEW()`
- and `END()` functions which are used by `imdupe` to structure its
+ and `END()` functions which are used by `imgdupe` to structure its
output.
* `-T` allows to set the number of hashing threads to use; for optimal
## Build
The [GraphicsMagick](http://www.graphicsmagick.org/) library must be
-installed as a prerequisite prior to building `imdupe`, as it is required
+installed as a prerequisite prior to building `imgdupe`, as it is required
to manipulate images during perceptual hash calculation.
-Run `make` in the project directory to build the `imdupe` executable.
+Run GNU `make` in the project directory to build the `imgdupe` executable.
Though it was only tested on GNU/Linux, it should work with few (if any)
modifications on other Unix-like systems.
## License
-Imdupe is distributed under the Modified ("3-clause") BSD License.
+Imgdupe is distributed under the Modified ("3-clause") BSD License.
See `LICENSE` file for more information.
----------------------------------------------------------------------
rc = db_insert(thrinf->db, entry, cfg.rescan, cfg.blur);
if ( 0 != rc ) {
if ( 0 > rc )
- eprintf("not fingerprinting '%s'\n", entry->fname);
+ eprintf("WARNING: not fingerprinting '%s'\n", entry->fname);
s_free(entry->fname);
s_free(entry);
}
s_free(dirpath);
}
else
- eprintf("%s: '%s'\n", strerror(errno), dir);
+ eprintf("ERROR: '%s': %s\n", dir, strerror(errno));
/* wait for workers to finish */
q_set_complete(q);
for ( int i = 0; i < cfg.nthreads; ++i )
static void usage(char *pname, int ec) {
char *prog = basename(pname);
printf("%s - find potentially duplicate images\n", prog);
- printf("USAGE: %s [OPTIONS] DIR ...\n", prog);
+ printf("USAGE: %s [OPTIONS] [DIR ...]\n", prog);
printf("OPTIONS:\n"
" -d n max directory recursion depth; default: 0 (unlimited)\n"
" -f file database file\n"
/* main function */
int main(int argc, char *argv[]) {
- int c, rc = 0, n;
+ int c, rc = 0, n, err = 0;
+ char **dirs = NULL;
char *cp;
db_t *db ;
const char *lead_in =
}
if ( cfg.db_prune ) {
- dprintf("pruning\n");
+ dprintf("pruning missing files from database\n");
db_prune(db);
}
q = q_init();
- if (optind >= argc) {
- dprintf("scanning '.'\n");
- rc = scan_dir(".", q, db);
- } else {
- while ( optind < argc ){//&& 0 == rc ) {
- dprintf("scanning '%s'\n", argv[optind]);
- q_reset_complete(q);
- rc = scan_dir(argv[optind++], q, db);
+ dirs = optind < argc ? &argv[optind] : (char *[]){".", NULL};
+ for ( int i = 0; NULL != dirs[i]; ++i ) {
+ dprintf("scanning '%s'\n", dirs[i]);
+ q_reset_complete(q);
+ rc = scan_dir(dirs[i], q, db);
+ if ( 0 != rc ) {
+ eprintf("ERROR: scanning '%s' failed\n", dirs[i]);
+ ++err;
}
}
q_destroy(&q);
- if ( 0 != rc )
- goto DONE;
if ( NULL != cfg.db_outfile ) {
int cnt;
cnt = db_write(db, cfg.db_outfile);
if ( 0 <= cnt )
dprintf("%d entries written\n", cnt);
- else
- eprintf("writing '%s' failed\n", cfg.db_outfile);
+ else {
+ eprintf("ERROR: writing '%s' failed\n", cfg.db_outfile);
+ ++err;
+ }
}
- dprintf("searching dupes ...\n");
+ dprintf("searching for potential duplicates ...\n");
printf("%s", lead_in);
- rc = db_find_dupes(db, cfg.thresh, find_dupes_cb);
+ db_find_dupes(db, cfg.thresh, find_dupes_cb);
printf("END\n");
- DONE:
db_destroy(&db);
- dprintf("done.\n");
- exit(rc ? EXIT_FAILURE : EXIT_SUCCESS);
+ dprintf("done, encountered %d error%s\n", err, err==1?"":"s");
+ exit(err ? EXIT_FAILURE : EXIT_SUCCESS);
}
/* EOF */