#!/usr/bin/perl
#
# fdupes-after, a tool to do more with the output of fdupes by symlynX 2010
#
# consider also 'grep ... | xargs'
# see also 'each'
#
# WISHLIST --- what's next beyond fdupes:
#	+ persistent storage of fdupes data & hashes (locate db replacement)
#	+ automatically index files as they are created or changed
#	+ maybe also store metadata (using libextractor?) - lm/ix/s replacement
#	+ correlate data between filesystems: has this file been backed up?
#	+ size limit? who cares about dupes < 100k?
#	- fdupes can lock up my linux? but only on ntfs partitions?
#
use Getopt::Std;

sub verbose () { 0 }

sub syntax {
	print <<X;
fdupes-after: do things with the output of an 'fdupes' (without -f)

action -h: turn identical files into hardlinks
		CAUTION: when '/bin/ln -f' cannot create a hard link,
			 the file will end up being deleted!
action -d: delete copies of files
action -D: shred them (you need 'shred' in your path)
without an action, a test run is performed

option -s: keep the candidate with the shortest path instead of the first
option -S: keep the candidate with the longest path instead of the first
option -g <size>: only consider files greater than size
option -G <size>: skip files greater than size
option -P: panic if a file doesn't exist (default is to skip and cause no harm)
option -o: override, do not check if the file to keep really exists
option -p <prefix>: ignore files that are not within the provided path
option -v: verbose, tell us what we are keeping

an input line starting with # causes program to stop
(useful for processing your fdupes output in chunks)

typical usage:
	fdupes -r . >/dev/shm/.fdupes
	$0 -h /dev/shm/.fdupes
X
}
# option -V: more verbose, show us all files we'd operate on

sub say {
	my $m = shift;
	print STDERR "*** $m\n";
}

# collect all candidates, then do things at once.
# currently unused
sub afteraction {
	my $e = shift;
	say "afteraction:";
	print $e, "\n";
#	system "rm $e";
	my $rc = $@ or $!;
	exit $rc if $rc;
}

sub slurp {
	my $fd = shift;
	my $name = undef;
	my $skip = undef;
	my $rc;
	my $count = 0;

	while (<$fd>) {
		if ( /^\s+$/ ) {
			# empty line = new candidate
			print "$name\n" if $opt_v and not $skip;
#			print "\n" if $opt_V;
			$name = $skip = undef;
			next;
		}
		chomp;
		if ( /^#/) {
			say "stopping at '$_'. $count files treated.";
			return;
		}
		next if $skip;
		if ($opt_p &&! /^$opt_p/o) {
			say "skipping '$_'" if $opt_v;
			next;
		}
		unless ($name) {
			$name = $_;
                        if (not $opt_o and not -f $_) {
				die <<X if $opt_P;
'$_' does not exist! $count files treated.
X
                                say "skipping '$_': it does not exist!";
                                $skip=1;
                                next;
                        }
			$skip=1 if $opt_g and -s $_ < $opt_g;
			$skip=1 if $opt_G and -s $_ > $opt_G;
			if (verbose) {
			    say "skipping '$_' (". -s $_ .")" if $skip;
			    say "considering '$_' (". -s $_ .")" unless $skip;
			}
			next;
		}

		undef $@; undef $!;
		if ($name eq $_) {
			say "skipping self-referencing argument '$_'";
			next;
		} elsif ($opt_s) {
			say "examining '$_'" if verbose;
			if (length($_) < length($name)) {
				my $t = $_;
				die <<X if not $opt_o and not -f $_;
'$_' does not exist! $count files treated.
X
				say "using '$_' instead" if verbose;
				$_ = $name;
				$name = $t;
			}
		} elsif ($opt_S) {
			say "examining '$_'" if verbose;
			if (length($_) > length($name)) {
				my $t = $_;
				die <<X if not $opt_o and not -f $_;
'$_' does not exist! $count files treated.
X
				say "using '$_' instead" if verbose;
				$_ = $name;
				$name = $t;
			}
		}
		if ($opt_h) {
##
## CAUTION:
## when 'ln -f' cannot create a hard link, the file will end up being deleted!
##
			print "ln -f \"$name\" \"$_\"\n" if $opt_v;
			#$rc = `ln -f "$name" "$_"`;
			$rc = system('/bin/ln', '-f', $name, $_);
			++$count unless $rc;
		} elsif ($opt_d) {
			my $ndeleted = unlink($_);
			$count += $ndeleted;
			$rc = $ndeleted != 1;
		} elsif ($opt_D) {
			print "shred \"$_\"\n" if $opt_v;
			#$rc = `shred "$_"`;
			$rc = system('shred', $_);
			++$count unless $rc;
#		} elsif ($opt_a) {
#			say "pushing '$_' into afteraction queue" if verbose;
#			push @after, $_;
		} else {
			print "$_\n"; # if $opt_V;
			++$count unless $rc;
		}
		say "$_: $rc /@ $@ /! $! /" if $rc or $! or $@;
	}
	say "finished input. $count files treated.";
}

MAIN:
	getopt('pgG');

	my @a = @ARGV;
	my $f = shift;

	if ( -r $f ) {
		shift @a;
		open (I, $f) or die $!;
		slurp *I;
		close I;
		goto OPER8;
	}
	if ( $f eq '-' ) {
		slurp *STDIN;
		goto OPER8;
	}
	&syntax;
	exit;
OPER8:
#	afteraction join(' ', @after) if $#a > 0;
	exit;

