#!/usr/bin/perl # # fdupes-after, a tool to do more with the output of fdupes by symlynX 2010 # # consider also 'grep ... | xargs' # see also 'each' # # WISHLIST --- what's next beyond fdupes: # + persistent storage of fdupes data & hashes (locate db replacement) # + automatically index files as they are created or changed # + maybe also store metadata (using libextractor?) - lm/ix/s replacement # + correlate data between filesystems: has this file been backed up? # + size limit? who cares about dupes < 100k? # - fdupes can lock up my linux? but only on ntfs partitions? # use Getopt::Std; sub verbose () { 0 } sub syntax { print <: only consider files greater than size option -G : skip files greater than size option -P: panic if a file doesn't exist (default is to skip and cause no harm) option -o: override, do not check if the file to keep really exists option -p : ignore files that are not within the provided path option -v: verbose, tell us what we are keeping an input line starting with # causes program to stop (useful for processing your fdupes output in chunks) typical usage: fdupes -r . >/dev/shm/.fdupes $0 -h /dev/shm/.fdupes X } # option -V: more verbose, show us all files we'd operate on sub say { my $m = shift; print STDERR "*** $m\n"; } # collect all candidates, then do things at once. # currently unused sub afteraction { my $e = shift; say "afteraction:"; print $e, "\n"; # system "rm $e"; my $rc = $@ or $!; exit $rc if $rc; } sub slurp { my $fd = shift; my $name = undef; my $skip = undef; my $rc; my $count = 0; while (<$fd>) { if ( /^\s+$/ ) { # empty line = new candidate print "$name\n" if $opt_v and not $skip; # print "\n" if $opt_V; $name = $skip = undef; next; } chomp; if ( /^#/) { say "stopping at '$_'. $count files treated."; return; } next if $skip; if ($opt_p &&! /^$opt_p/o) { say "skipping '$_'" if $opt_v; next; } unless ($name) { $name = $_; if (not $opt_o and not -f $_) { die < $opt_G; if (verbose) { say "skipping '$_' (". -s $_ .")" if $skip; say "considering '$_' (". -s $_ .")" unless $skip; } next; } undef $@; undef $!; if ($name eq $_) { say "skipping self-referencing argument '$_'"; next; } elsif ($opt_s) { say "examining '$_'" if verbose; if (length($_) < length($name)) { my $t = $_; die < length($name)) { my $t = $_; die < 0; exit;