21 July 2010

Perl: Finding MD5 checksum of files recusrsivly in directory



#! /usr/bin/perl -w
use strict;
use Digest::MD5 qw(md5_base64);
use constant SUCESS => 0; # Code for succesful execution.
use constant ERROR => 1; # Code for error in execution.
my $exit_status = 0;
my $digest = Digest::MD5->new;

if ($#ARGV < 0) {
print " Inssuficient number of arguments";
print "\nUsage : $0 [dir1]..\n";
exit ERROR;
}

#
# Check provided directories exists.
# If not exists then exit showing error,
# otherwise process the directory.
#
foreach my $dirname (@ARGV) {
chomp ($dirname);
$dirname = trimall($dirname);
if (! -d $dirname) {
print "$dirname is not directory \n";
$exit_status = ERROR;
next;
} elsif (! -r $dirname) {
print "$dirname is not readable. \n";
$exit_status = ERROR;
next;
}
processfiles($dirname);
}
print "\n";
exit $exit_status;

#
# processfiles:
# Read recursively through the directory.
# If it is file calculate MD5 check sum of
# conceptual string from the file's
# [userid] [groupid] [permission] [inode]
# and file content. Print the file full path
# and MD5 check sum against it.
#
# parameters:
# string filename: input file for processing.
# return type: integer: return sucess/failure code.
#

sub processfiles
{
use Cwd 'abs_path';
#my $dirname = $_[0];
my $dirname = abs_path($_[0]);
chomp ($dirname);
opendir(DIRH, $dirname);

#
# Read all the files and directories execluding the current
# '.' and parent directory '..'
#

my @files = sort (grep { !/^\.|\.\.}$/ } readdir (DIRH));
closedir(DIRH);
my $file;
foreach $file (@files) {
my $fullpath = $dirname . "/" . $file;
print "\n$fullpath";
if (-d "$fullpath") {
processfiles("$fullpath");
} else {
print "\t" . getmd5checksum ("$fullpath");
}
}
return 0;
}

#
# getmd5checksum:
# Generate a 64 bit Hex MD5 checksum for file contents
# and string [userid] [groupid] [permission] [inode]
#
# parametrs: string file: file of which MD5 check sum need
# to calculated.
# return type: MD5 checksum of the contents of the file
# with conceptual string
#

sub getmd5checksum
{
my $file = shift;
if (! -r $file) {
return "Not readable";
} else {
open (FILE, $file) or return"";
$digest->reset();
my $fileInfo = getfileinfo($file);
$digest->add($fileInfo);
$digest->addfile(*FILE);
close (FILE);
return $digest->b64digest;
}
}

#
# getfileinfo:
# Returns formated string for entered file as
# [uid] [gid] [mode] [ino]
#
# parameters:
# string filename: filename of which we need to
# find the conceptual string.
# return type: string: return conceptual string in
# format [uid] [gid] [permission] [ino]
#

sub getfileinfo
{
my $file = shift;
my (undef, $ino, $mode, undef, $uid, $gid) = stat($file);
my $oct = sprintf("%o", $mode & 07777);
return $uid . " " . $gid . " " . $oct . " " . $ino;
}

#
# trimall:
# used to trim leading and trailing white space characters
# from string.
# parameters:
# string str: input string from which spaces needs
# to be removed.
# return type : string
# trimed string.
#

sub trimall
{
my $arg = shift;
$arg =~ s/^\s+|\s+$//g;
return $arg;
}

3 comments:

  1. Made some modifications to make it work in windows, the script did not returned the expected MD5 value.

    for module definition:

    use Digest::MD5 qw(md5_hex);

    In sub getmd5checksum after opening the file added:

    binmode(FILE);

    Commented:

    # my $fileInfo = getfileinfo($file);
    # $digest->add($fileInfo);

    and return:

    return $digest->hexdigest;

    That's it, now working fine.

    Regards, Oscar.

    ReplyDelete
  2. it shows only insufficient arguments, can you tell me please where to give directory name in a code and how will be the path of directory in windows

    ReplyDelete
    Replies
    1. Try giving directory path as command line argument to script. e.g. perl -w
      Suppose if you have saved script in file md5.pl and directory path is D:\MyFiles then run the command will look like.

      perl -w m5.pl D:\MyFiles

      Delete