| Chess | Tools | Data | Blog | Poetry | Why? | Wiki | Admin | Logout |

Download


#!/usr/bin/perl -w
# $Revision: 1.1 $
# $Date: 2007-05-08 15:13:34 $
# Luis Mondesi < lemsx1@gmail.com >
#
# DESCRIPTION: diff two files (after sorting by a key)
# USAGE: $0 file0 file1
# LICENSE: GPL
# BUGS:
# * assumes lines are unique
# * if -k is passed, it assumes the field is unique. try using -k and without it to compare output

=pod

=head1 NAME

diff-lists - diff-lists script for Perl

=head1 DESCRIPTION 

    This script takes two files as input and returns a diff output. It combines the functionality of diff and sort

=cut

use strict;
$|++;

my $revision = '1.1';    # version

# standard Perl modules
use Getopt::Long;
Getopt::Long::Configure('bundling');
#use POSIX;                    # cwd() ... man POSIX
#use File::Spec::Functions;    # abs2rel() and other dir/filename specific
#use File::Copy;
#use File::Find;               # find();
use File::Basename;           # basename() && dirname()
#use FileHandle;               # for progressbar
use Digest::MD5 qw/ md5_hex /;

# Args:
my $PVERSION = 0;
my $HELP     = 0;
my $USAGE    = 0;
my $DEBUG    = 0;
my $VERBOSE  = 0;
my $DELIMITER = ",";
my $SKIP = "";
my $KEY = 0;

=pod

=head1 SYNOPSIS

B<diff-lists>
                [-D,--debug] 
                [-d,--delimiter S]
                [-h,--help]
                [-k,--key N]
                [-s,--skip LIST]
                [-U,--usage]
                [-v,--version]
                [-V,--verbose]

=head1 OPTIONS

=over 8

=item -v,--version

Prints version and exits

=item -D,--debug

Enables debug mode

=item -h,--help

Prints this help and exits

=item -U,--usage

Prints usage information and exits

=item -d,--delimiter STRING

String to use as separator (default ,)

=item -s,--skip LIST

Comma separted list of fields to skip when comparing

=item -k,--key N

Use key N when sorting and making the list unique by

=item -V,--verbose

Be verbose and display warning messages as script finds errors

=back

=cut

# get options
GetOptions(

    # flags
    'v|version' => \$PVERSION,
    'h|help'    => \$HELP,
    'D|debug'   => \$DEBUG,
    'U|usage'   => \$USAGE,
    'd|delimiter=s'    => \$DELIMITER,
    's|skip=s'    => \$SKIP,
    'k|key=i'   =>  \$KEY,
    'V|verbose' =>  \$VERBOSE,
);

if ($HELP)
{
    use Pod::Text;
    my $parser = Pod::Text->new(sentence => 0, width => 78);
    $parser->parse_from_file($0, \*STDOUT);
    exit 0;
}

sub _usage
{
    use Pod::Usage;
    pod2usage(1);
}

if ($USAGE)
{
    _usage();
    exit 0;    # never reaches here
}

if ($PVERSION) { print STDOUT ($revision, "
"); exit 0; }

my $file0 = ( $ARGV[0] and -r $ARGV[0] ) ? split_to_hash($ARGV[0]) : undef;
my $file1 = ( $ARGV[1] and -r $ARGV[1] ) ? split_to_hash($ARGV[1]) : undef;

if ( $file0 and $file1 )
{
    my @_out=();
    my @file0_sorted = sort { $a cmp $b || $a <=> $b } keys %$file0;
    my @file1_sorted = sort { $a cmp $b || $a <=> $b } keys %$file1;
   
    #print "Sorted 0: ".join("
",@file0_sorted)."
";
    #print "Sorted 1: ".join("
",@file1_sorted)."
";

    print "--- ".basename($ARGV[0])."
";
    print "+++ ".basename($ARGV[1])."
";

    # Algorithm:
    #
    # if line exists only in file0, we print as removed from file1 (prepend -)
    # if line exists only in file1, we print as added in file1 (prepend +)
    # if line exists in both buth the line content is not the same, we print as modified (prepend - to file0 and + to file1)
    # if line exists on both and they are the same, we skip them
   
    my $line_n = 0;
    foreach my $_k (@file0_sorted)
    {
        $line_n++;
        if ( exists $file1->{$_k} )
        {
            next if ( clean_line($file0->{$_k}) eq clean_line($file1->{$_k}) );
            print "\@\@ $line_n modified \@\@
";
            # TODO find differences and color output
            print "-".$file0->{$_k}."
";
            print "+".$file1->{$_k}."
";
        } else {
            print "\@\@ $line_n removed \@\@
";
            print "-".$file0->{$_k}."
";
        }
    }

    $line_n=0;
    foreach my $_k (@file1_sorted)
    {
        $line_n++;
        chomp($_k);
        if ( exists $file0->{$_k} )
        {
            next;
        }

        print "\@\@ $line_n added \@\@
";
        print "+".$file1->{$_k}."
";
    }
} else {
    for (sort keys %$file0)
    {
        print $file0->{chomp($_)};
    }
}

sub split_to_hash
{
    my $f = shift;
    my %_file = ();
    return \%_file if ( not defined $f );

    print $f,"
" if ($DEBUG);

    open(FILE,"<$f") or die("Cannot open $f. $!
");
    my @_file = <FILE>;
    close(FILE);
    my @_sorted_file = sort {clean_line($a) cmp clean_line($b)} @_file;

    my $_key_n = 0;
    if ($KEY)
    {
        $_key_n = $KEY-1; # array index
        $_key_n = ($_key_n < 0) ? 0 : $_key_n; # sanity check
    }

    foreach (@_sorted_file)
    {
        chomp();
        my $key = 0;
        if ($KEY)
        {
            # if key was passed, attempt to use this as your key
            my @_fields = split(/$DELIMITER/,$_);
            if ((@_fields+0) < $_key_n) # no point in continuing if the line didn't split right
            {
                chomp();
                warn "Skipped line '$_'
" if ($VERBOSE);
                next;
            }
            $key = (exists $_fields[$_key_n]) ? $_fields[$_key_n] : $_fields[0]; 
        } else {
            $key = md5_hex(clean_line($_));
        }
        print "key: $key ($_)
" if ($DEBUG);
        $_file{$key} = $_; 
    }
    return \%_file;
}

sub clean_line
{
    my $l = shift;
    my @_out = ();
    return "" if ( not defined $l );

    print "LINE: ",$l,"
" if ($DEBUG);

    my @fields = split(/$DELIMITER/,$l);
    my $i=0;
    my @_skip = split(/,/,$SKIP);
    foreach my $_f (@fields)
    {
        $i++;
        my $SKIPPIT = 0;
        foreach my $_s (@_skip)
        {
            if ( $i == $_s )
            {
                $SKIPPIT=1;
                last;
            }
        }
        next if ($SKIPPIT);
        push(@_out,$_f);
    }

    return join($DELIMITER,@_out);
}

=pod

=head1 AUTHORS

Luis Mondesi <lemsx1@gmail.com>

=cut



Advertisement