Download
#!/usr/bin/perl -w
# $Revision: 1.1 $
# $Date: 2007-05-08 15:13:34 $
# Luis Mondesi < lemsx1@gmail.com >
#
# DESCRIPTION: diff two files (after sorting by a key)
# USAGE: $0 file0 file1
# LICENSE: GPL
# BUGS:
# * assumes lines are unique
# * if -k is passed, it assumes the field is unique. try using -k and without it to compare output
=pod
=head1 NAME
diff-lists - diff-lists script for Perl
=head1 DESCRIPTION
This script takes two files as input and returns a diff output. It combines the functionality of diff and sort
=cut
use strict;
$|++;
my $revision = '1.1'; # version
# standard Perl modules
use Getopt::Long;
Getopt::Long::Configure('bundling');
#use POSIX; # cwd() ... man POSIX
#use File::Spec::Functions; # abs2rel() and other dir/filename specific
#use File::Copy;
#use File::Find; # find();
use File::Basename; # basename() && dirname()
#use FileHandle; # for progressbar
use Digest::MD5 qw/ md5_hex /;
# Args:
my $PVERSION = 0;
my $HELP = 0;
my $USAGE = 0;
my $DEBUG = 0;
my $VERBOSE = 0;
my $DELIMITER = ",";
my $SKIP = "";
my $KEY = 0;
=pod
=head1 SYNOPSIS
B<diff-lists>
[-D,--debug]
[-d,--delimiter S]
[-h,--help]
[-k,--key N]
[-s,--skip LIST]
[-U,--usage]
[-v,--version]
[-V,--verbose]
=head1 OPTIONS
=over 8
=item -v,--version
Prints version and exits
=item -D,--debug
Enables debug mode
=item -h,--help
Prints this help and exits
=item -U,--usage
Prints usage information and exits
=item -d,--delimiter STRING
String to use as separator (default ,)
=item -s,--skip LIST
Comma separted list of fields to skip when comparing
=item -k,--key N
Use key N when sorting and making the list unique by
=item -V,--verbose
Be verbose and display warning messages as script finds errors
=back
=cut
# get options
GetOptions(
# flags
'v|version' => \$PVERSION,
'h|help' => \$HELP,
'D|debug' => \$DEBUG,
'U|usage' => \$USAGE,
'd|delimiter=s' => \$DELIMITER,
's|skip=s' => \$SKIP,
'k|key=i' => \$KEY,
'V|verbose' => \$VERBOSE,
);
if ($HELP)
{
use Pod::Text;
my $parser = Pod::Text->new(sentence => 0, width => 78);
$parser->parse_from_file($0, \*STDOUT);
exit 0;
}
sub _usage
{
use Pod::Usage;
pod2usage(1);
}
if ($USAGE)
{
_usage();
exit 0; # never reaches here
}
if ($PVERSION) { print STDOUT ($revision, "
"); exit 0; }
my $file0 = ( $ARGV[0] and -r $ARGV[0] ) ? split_to_hash($ARGV[0]) : undef;
my $file1 = ( $ARGV[1] and -r $ARGV[1] ) ? split_to_hash($ARGV[1]) : undef;
if ( $file0 and $file1 )
{
my @_out=();
my @file0_sorted = sort { $a cmp $b || $a <=> $b } keys %$file0;
my @file1_sorted = sort { $a cmp $b || $a <=> $b } keys %$file1;
#print "Sorted 0: ".join("
",@file0_sorted)."
";
#print "Sorted 1: ".join("
",@file1_sorted)."
";
print "--- ".basename($ARGV[0])."
";
print "+++ ".basename($ARGV[1])."
";
# Algorithm:
#
# if line exists only in file0, we print as removed from file1 (prepend -)
# if line exists only in file1, we print as added in file1 (prepend +)
# if line exists in both buth the line content is not the same, we print as modified (prepend - to file0 and + to file1)
# if line exists on both and they are the same, we skip them
my $line_n = 0;
foreach my $_k (@file0_sorted)
{
$line_n++;
if ( exists $file1->{$_k} )
{
next if ( clean_line($file0->{$_k}) eq clean_line($file1->{$_k}) );
print "\@\@ $line_n modified \@\@
";
# TODO find differences and color output
print "-".$file0->{$_k}."
";
print "+".$file1->{$_k}."
";
} else {
print "\@\@ $line_n removed \@\@
";
print "-".$file0->{$_k}."
";
}
}
$line_n=0;
foreach my $_k (@file1_sorted)
{
$line_n++;
chomp($_k);
if ( exists $file0->{$_k} )
{
next;
}
print "\@\@ $line_n added \@\@
";
print "+".$file1->{$_k}."
";
}
} else {
for (sort keys %$file0)
{
print $file0->{chomp($_)};
}
}
sub split_to_hash
{
my $f = shift;
my %_file = ();
return \%_file if ( not defined $f );
print $f,"
" if ($DEBUG);
open(FILE,"<$f") or die("Cannot open $f. $!
");
my @_file = <FILE>;
close(FILE);
my @_sorted_file = sort {clean_line($a) cmp clean_line($b)} @_file;
my $_key_n = 0;
if ($KEY)
{
$_key_n = $KEY-1; # array index
$_key_n = ($_key_n < 0) ? 0 : $_key_n; # sanity check
}
foreach (@_sorted_file)
{
chomp();
my $key = 0;
if ($KEY)
{
# if key was passed, attempt to use this as your key
my @_fields = split(/$DELIMITER/,$_);
if ((@_fields+0) < $_key_n) # no point in continuing if the line didn't split right
{
chomp();
warn "Skipped line '$_'
" if ($VERBOSE);
next;
}
$key = (exists $_fields[$_key_n]) ? $_fields[$_key_n] : $_fields[0];
} else {
$key = md5_hex(clean_line($_));
}
print "key: $key ($_)
" if ($DEBUG);
$_file{$key} = $_;
}
return \%_file;
}
sub clean_line
{
my $l = shift;
my @_out = ();
return "" if ( not defined $l );
print "LINE: ",$l,"
" if ($DEBUG);
my @fields = split(/$DELIMITER/,$l);
my $i=0;
my @_skip = split(/,/,$SKIP);
foreach my $_f (@fields)
{
$i++;
my $SKIPPIT = 0;
foreach my $_s (@_skip)
{
if ( $i == $_s )
{
$SKIPPIT=1;
last;
}
}
next if ($SKIPPIT);
push(@_out,$_f);
}
return join($DELIMITER,@_out);
}
=pod
=head1 AUTHORS
Luis Mondesi <lemsx1@gmail.com>
=cut