
$|=1; 
local (*F1,*F2); my %farray = (); my $statF1;
# ------------------------------
# traverse directories
sub scan ($) {
    my ($dir) = $_[0];
    opendir (DIR, $dir) or die "($dir) $!:$@";
    map {
          (-d) ? scan ($_) : push @{$farray{-s $_}},$_
             unless (-l or -S  or -p or -c or -b);
    } map "$dir/$_", grep !/^/./.?$/, readdir (DIR); closedir (DIR);
}
# ------------------------------
# get chunk of bytes from a file
sub getchunk ($$) {
  my ($fsize,$pfname) = @_;
  my $chunksize = 32;
  my ($nread,$buff);
  return undef unless open(F1,$$pfname);
  $statF1 = [(stat  F1)[3,1]];
  binmode F1;
  $nread = read (F1,$buff,$chunksize);
  ($nread == $chunksize || $nread == $fsize) ? "$buff" : undef;
}  
# ------------------------------
# compare two files
sub mycmp ($) {
  my ($fptr) = $_[0];
  my ($buffa, $buffb);
  my ($nread1,$nread2);
  my $statF2;
  my ($buffsize) = 16*1024;
  return -1 unless (open(F2,"<$$fptr"));
  $statF2 = [(stat  F2)[3,1]];
  return 0 
   if ($statF2->[0] > 1 && $statF1->[1] == $statF2->[1]);
  binmode F2;
  seek (F1,0,0);
  do {  $nread1 = read (F1,$buffa,$buffsize);
     $nread2 = read (F2,$buffb,$buffsize);
     if (($nread1 != $nread2) || ($buffa cmp $buffb)) {
         return -1;
        }
  } while ($nread1);
  return 0;
}
# ------------------------------
print "collecting files and sizes .../n";
if (-t STDIN) {
 $ARGV[0] = '.' unless $ARGV[0]; # use wd if no arguments given
 map scan $_, @ARGV;
} else { 
 while (<STDIN>)  {
  s /r/n]$鞍g;
  push @{$farray{-s $_}},$_
   unless (-l or -S  or -p or -c or -b);
 }
}
print "now comparing .../n";
for my $fsize (reverse sort {$a <=> $b} keys %farray) {
my ($i,$fptr,$fref,$pnum,%dupes,%index,$chunk);
  # skip files with unique file size
  next if $#{$farray{$fsize}} == 0; 
  $pnum  = 0;
  %dupes = %index = ();
  nx:
  for (my $nx=0;$nx<=$#{$farray{$fsize}};$nx++) # $nx now 1..count of files 
  {                                             # with the same size
 $fptr = /$farray{$fsize}[$nx];          # ref to the first file
    $chunk = getchunk $fsize,$fptr;
    if ($pnum) {
   for $i (@{$index{$chunk}}) {
         $fref = ${$dupes{$i}}[0];
      unless (mycmp $fref) {
            # found duplicate, collecting
         push @{$dupes{$i}},$fptr;
   next nx;
      }
   }
    }
    # nothing found, collecting 
    push @{$dupes{$pnum}},$fptr;
    push @{$index{$chunk}}, $pnum++;
  }
  # show found dupes for actual size
  for $i (keys %dupes) {
    $#{$dupes{$i}} || next;
    print "/n size: $fsize/n/n";
    for (@{$dupes{$i}}) {
        print $$_,"/n"; 
    }
  }
}
close F1;
close F2;
新闻热点
疑难解答