#!/usr/local/bin/perl -w # SNmigrate.pl # Version 1.0 # 12 June 2009 # # Justin Mercier # # This script moves files specified in a listfile from one place to another. # It was primarily designed to migrate data off a StorNext system to our NAS. # It will create any subdirectories that are required and will also verify file copies. # It takes no arguments, instead key variables must be defined in this script. # # CHANGE LOG # 0.1 - Basic copy and file verification engine. # 0.2 - Added logic for doRecover, doExistingCompare, and doDbUpd. # 0.2.5 - Fixed logic for doRecover and doDbUpd. # 0.3 - Added logic for alternating target paths, as well as some cleanup # - Changed journal structure to support multiple target paths. # - Added environment checking for SYBASE. # - Added debug function. # 0.9 - Port to Windows, first successful test against a live instance. # - Added path squashing # 0.9.1 - Minor bug fixes # 0.9.2 - Minor bug fixes. # - Added rejection and non-journalling of files that fail path substitution. # - Added better handling for various forms of Windows paths, i.e. \\ or //, etc. # - Improved readability of output. # 0.9.3 - Changed reporting of database update to report rows affected. # - Split reporting of successful or failed copies to seperate files. # - Journal file is now merely saved for troubleshooting as journal.sav. # 1.0 - 0.9.3 passes testing and is released as version 1.0. # Specify pragmas and modules to use use strict; use warnings; use File::Copy; use File::Basename; use File::Path; use File::Compare; use Fcntl; use Tie::File; # Declare our sub-routines which are defined at the end of the script. sub replacePath; sub squashCase; sub copyFile; sub checkPath; sub compareFile; sub trim; sub logline; sub logdie; sub timestamp; sub updateDbPath; sub readLastLine; sub crashRecovery; sub seekFileByLineMatch; sub switchTarget; sub debug; sub trace; # Enable debug messages. Either comment out or set to 0 to disable. my $doDebug = 0; # standard messages my $doTrace = 0; # low level tracing # The file with a list of source files to copy. # Each line should have a single absolute path to a file and nothing else my $srcfilelist = "filelistA.txt"; # The log files to log results my $logfile = "migrate-A.log"; my $errfile = "error-A.txt"; my $compfile = "completed-A.txt"; # Open our logfile for writing open (LOGFILE, ">>$logfile") or logdie("Can not open log file for writing: $logfile"); # The portion of the source file's path we will replace with $newpath. my $oldpath = "x:/stornext"; # The new base path to which we will copy the file. # You can enter multiple target directories as elements in the array. # This will copy the first file to the first target, the second file to the second target, # and will repeat as necessary. # To use just a single destination just enter a single element. # Sub-directories will be created automatically. my @migrateToPath; $migrateToPath[0] = "n:/stornext"; # A boolean-like setting to enable or disable updating the file path in Sybase. # It can be commented out, set to undef, or set to 0 to disable, otherwise # any other value is true. Then tests for true and declares necessary modules. my $doDbUpd = 1; if ($doDbUpd) { # Check to make sure that proper environment variables are defined. if ($ENV{'SYBASE'}) { debug("SYBASE is set and installed in $ENV{'SYBASE'}"); } else { logdie("SYBASE environment variable is not set. Please source the Sybase environement and try again."); } # If the above check succeeds we now import necessary modules. require DBI; require DBD::Sybase; # Database Connection Parameters my $dbi_server = "myDS"; my $dbi_db = "mydb"; my $dbi_user = "user"; my $dbi_pass = "password"; my $dbi_result = undef; my $sql_string = undef; # Open Database Connection $main::dbi_handle = DBI->connect("DBI:Sybase:server=$dbi_server;database=$dbi_db", $dbi_user, $dbi_pass); logline("INFO", "Connecting to Sybase server $dbi_server:$dbi_db as $dbi_user."); } # The following can be used if doDbUpd is set to true, and are used to set an alternate path # in the database PATH field. This is useful if you use Windows paths on a Unix fileserver. # If these are not set then $oldpath and $newpath will be used. my $oldDbPath = undef; my $newDbPath = undef; # A boolean-like setting to enable or disable comparison of pre-existing files. # This can be useful for verifying already copied files in the event of a crash # but can also cause tape thrash if it needs to retrieve a lot of truncated # files on the HSM to compare with, in which case doRecover is better. my $doExistingCompare = 0; if ($doExistingCompare) { debug("doExistingCompare is enabled."); } else { debug("doExistingCompare is disabled."); } # A boolean-like setting to enable or disable crash recovery. If this is set # then the script will simply log the current operation until it is fully # processed. If the script or system crashes then this 'journal' file will be # referenced to pick up where it left off. my $doRecover = 1; my $recoverJournalFile = "migrate_jnl.txt"; if ($doRecover) { debug("Journalling enabled, using $recoverJournalFile."); } else { debug("Recovery mode is disabled."); } # An arbitrary counter needed for switchTarget() to work correctly. my $count = 0; # Perform crash recovery if doRecover is true. if ($doRecover) { if ( -e $recoverJournalFile ) { $main::recoveryMode = 1; my $lastline = readLastLine("$recoverJournalFile"); debug("readLastLine returned $lastline"); ($main::lastfile, my $lastpath) = split(/,/, $lastline); if ( -e $main::lastfile ) { crashRecovery($main::lastfile, $lastpath); } else { logdie("Failed to read last file from journal."); } open (JOURNAL, "+<", $recoverJournalFile) or logdie "Cannot open journal file in read-write mode: $recoverJournalFile"; } else { open (JOURNAL, ">", $recoverJournalFile) or logdie "Cannot create or overwrite journal file: $recoverJournalFile"; } } # Write a preamble to our logfile and open the error and complettion logs. if ( ! $main::recoveryMode ) { logline("BEGIN", "Starting file migration from $oldpath."); open (ERRFILE, ">$errfile") or logdie("Can not create or overwrite error log: $logfile"); open (COMPFILE, ">$compfile") or logdie("Can not create or overwrite completion log: $logfile"); } else { open (ERRFILE, ">>$errfile") or logdie("Can not open error log for appending: $logfile"); open (COMPFILE, ">>$compfile") or logdie("Can not open completion log for appending: $logfile"); } # This will make our filehandles unbuffered so writes happen immediately. if ($doRecover) {select(JOURNAL); $| = 1;} select(LOGFILE); $| = 1; select(COMPFILE); $| = 1; select(ERRFILE); $| = 1; select(STDOUT); # Open our source file list for reading. open(FILELIST, "$srcfilelist") or logdie("Could not open listfile: $srcfilelist"); # This is our 'main function' # # If we are in recovery mode then we need to fast-forward into the file list. if ($main::recoveryMode) { debug("Seeking to $main::lastfile in source file list."); seekFileByLineMatch(*FILELIST, $main::lastfile); $main::loopcount = 2; # anything over 1 forces a leading newline to separate journal records } else { $main::loopcount = 1; # we're starting over and don't want a leading newline (see 10 lines down) } # Read the filelist and copy each file. while (my $srcfile = ) { chomp($srcfile); $srcfile = trim($srcfile); logline("NEW*", "=== Starting work on $srcfile"); ($srcfile, my $newfile, $oldpath, my $newpath) = replacePath($srcfile, $oldpath); if ( "$newfile" eq "Error" ) { print ERRFILE "$srcfile\n"; next; } if ($doRecover) { if ( $main::loopcount > 1 ) { print JOURNAL "\n"; } print JOURNAL "$srcfile,$newpath"; $main::loopcount++; } if ( -e $srcfile ) { if ( ! -e $newfile ) { copyFile($srcfile, $newfile); } else { logline("INFO", "$newfile already exists and will be verified."); my $status = compareFile($srcfile, $newfile); if ( $status == 1 ) { logline("INFO", "$newfile was successfully verified."); } else { logline("WARN", "$newfile failed verification, file we be re-copied."); unlink($newfile) or logdie "Can not delete $newfile for recopy."; copyFile($srcfile, $newfile); } } updateDbPath($srcfile, $newfile, $oldDbPath, $newDbPath); print "\n"; # print a final newline to visually separate console output. } else { logline("*ERR", "Source file $srcfile does not exist."); } } logline("DONE", "Completed file migration from $oldpath."); close (FILELIST); if ($doRecover) {close (JOURNAL);} if ($doDbUpd) {$main::dbi_handle->disconnect();} close (LOGFILE); move($recoverJournalFile, "journal.sav"); exit; ### SUBROUTINES ### sub replacePath($$) # This function determines the target file path by substituting # the newpath for the oldpath. # However due to the case-sensitivity of Perl and the case-insensitivity of Windows (thus making the # case of any file paths unpredictable) we need to first squash the case of the paths (but not the file) # so regex can do its job. # All resulting paths on Windows will be squashed permanently. { my ($srcfile, $oldpath) = @_; my $newpath = switchTarget(); $srcfile =~ s/\\\\/\//g; # convert double backslash to single forward slash $srcfile =~ s/\\/\//g; # convert single backslash to single forward slash $srcfile =~ s/\/\//\//g; # convert double forward slash to single forward slash $srcfile = squashCase(dirname($srcfile)) . "/" . basename($srcfile); $oldpath =~ s/\\\\/\//g; # convert double backslash to single forward slash $oldpath =~ s/\\/\//g; # convert single backslash to single forward slash $oldpath =~ s/\/\//\//g; # convert double forward slash to single forward slash $oldpath = squashCase($oldpath); $newpath =~ s/\\\\/\//g; # convert double backslash to single forward slash $newpath =~ s/\\/\//g; # convert single backslash to single forward slash $newpath =~ s/\/\//\//g; # convert double forward slash to single forward slash $newpath = squashCase($newpath); my $newfile = $srcfile; $newfile =~ s/$oldpath/$newpath/; if ( "$newfile" eq "$srcfile" ) { logline("WARN", "Path substitution failed for $srcfile, old path: $oldpath, new path: $newpath"); $newfile = "Error"; } return ($srcfile, $newfile, $oldpath, $newpath); } sub squashCase($) # This function simply converts a string to lowercase if the OS we're running on is Windows. { my $string = shift; if ( "$^O" eq "MSWin32" ) { $string =~ tr/[A-Z]/[a-z]/; } return $string; } sub copyFile($$) # Copies the specified file with comparison checking # Requires two arguments as strings for the source file and destination file { my ($srcfile, $newfile) = @_; checkPath($newfile); logline("COPY", "Copying $srcfile->$newfile"); copy($srcfile, $newfile) or logdie("Failure during copy: $srcfile->$newfile"); my $status = compareFile($srcfile, $newfile); if ( $status == 1 ) { logline("PASS", "$newfile was successfully copied and verified."); print COMPFILE "$srcfile\n"; return 1; } else { logline("ERROR", "$newfile was copied but failed verification."); print ERRFILE "$srcfile\n"; return 0; } } sub checkPath($) # Checks to verify that the full path exists to copy the destination file, and creates it if necessary. # Requires a single argument as a string specifying the absolute destination filename { my $newfile = shift; my $newpath = dirname($newfile); if (! -d $newpath) { mkpath($newpath) or logdie "Cannot make file path: $newpath \n"; } } sub compareFile($$) # Compares the source and destination file to verify the copy. # Requires two arguments as strings specifying the absolute source and destination file paths. { my ($srcfile, $newfile) = @_; logline("INFO", "Comparing $srcfile->$newfile."); if (compare($srcfile, $newfile) == 0) { return 1; } else { return 0; } } sub trim($) # Trims leading and trailing whitespace. # Requires a single string argument to trim. { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } sub logline($$) # Logs a level and a string to the logfile with a timestamp, as well as stdout. # Requires two string arguments, the info level (i.e. INFO, WARN, ERROR) and the message to log. { my ($level, $string) = @_; chomp($string); print LOGFILE timestamp("standard") . " [" . $level . "]\t" . $string . "\n"; print timestamp("standard") . " [" . $level . "]\t" . $string . "\n"; } sub logdie($) # Logs a fatal error and dies. # Requires a single string to log and output to console. { my $string = shift; chomp $string; logline("FATAL", $string); die $string; } sub timestamp($) # Creates and returns a formatted time stamp. # Requires a string argument which is either 'standard' or 'unspaced'. # i.e. standard will produce: Apr 28 14:26:33 # unspaced will produce 0428142633 { my $format = shift; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year=$year+1900; $year += 1900; my @month_abbr = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ); if ($mon < 10) { $mon = "0$mon"; } if ($hour < 10) { $hour = "0$hour"; } if ($min < 10) { $min = "0$min"; } if ($sec < 10) { $sec = "0$sec"; } if ($format eq "standard") { return "$month_abbr[$mon] $mday $hour:$min:$sec"; } elsif ($format eq "unspaced") { $mon += 1; return "$mon$mday$hour$min$sec"; } else { logline("WARN", "Error in sub timestamp: argument $format is invalid. Defaulting to UNIX time."); return time(); } } sub readLastLine($) # Reads the last line from a file. Because perl is not aware of 'lines' in filehandles # we must use Tie::File. Takes a single argument: the file to tie. # This function does not support FILEHANDLES. { my $FILE = shift; debug("readLastLine using journal file $FILE"); tie my @file, 'Tie::File', "$FILE"; debug("readLastLine found $file[-1]"); return $file[-1]; } sub crashRecovery($$) # This subroutine performs crash recovery based on the last entry in the journal file. # It will skip to the aborted file in the filelist and then do file comparisons # and re-copy as necessary. This is much more efficient then doExistingCompare. # It takes two arguments, the file and destination path for the last operation. { if ( -e $recoverJournalFile ) { my ($srcfile, $newpath) = @_; chomp($srcfile); $srcfile = trim($srcfile); $srcfile =~ tr/\\/\//; $srcfile = squashCase(dirname($srcfile)) . "/" . basename($srcfile); chomp($newpath); $newpath = trim($newpath); $newpath =~ tr/\\/\//; $newpath = squashCase($newpath); my $oldpath = trim($oldpath); chomp($oldpath); $oldpath =~ tr/\\/\//; $oldpath = squashCase($oldpath); my $newfile = $srcfile; $newfile =~ s/$oldpath/$newpath/; if ( "$srcfile" eq "$newfile" ) { logdie("srcfile and newfile are the same.") } logline("RCVR", "Process was aborted while processing $srcfile. Recovering."); my $status = compareFile($srcfile, $newfile); if ( $status == 1 ) { logline("RCVR", "$newfile already exists and was successfully verified."); updateDbPath($srcfile, $newfile, $oldDbPath, $newDbPath); return 1; } else { logline("RCVR", "$newfile failed verification, file we be re-copied."); $status = copyFile($srcfile, $newfile); if ( $status == 1 ) { updateDbPath($srcfile, $newfile, $oldDbPath, $newDbPath); return 1; } else { logdie("Crash recovery failed to copy $srcfile."); } } } } sub seekFileByLineMatch($$) # This subroutine simple advances a filehand until it reaches a specified line. # It takes two arguments: the file to advance and the line to match. { my ($FILE, $lastline) = @_; chomp($lastline); debug("seekFileByLineMatch attempting to jump to \"$lastline\""); while( my $line = <$FILE> ) { chomp($line); $line = trim($line); $line =~ tr/\\/\//; $line = squashCase(dirname($line)) . "/" . basename($line); trace("Comparing with line \"$line\""); if ( "$line" eq "$lastline" ) { debug("seekFileByFileMatch found $line"); last; } } } sub updateDbPath($$$$) # This subroutine updates the SOMEPATH in SOMETABLE. It can either update the raw paths # or it can use specified values in $oldDbPath and $newDbPath. # It takes four arguments: the old UNIX file path, new Unix fle path, the old DB path, # and the new DB path. The latter two can be undef which will then default to the Unix paths. { my ( $oldfile, $newfile, $oldDbPath, $newDbPath ) = @_; if ($doDbUpd) { my $filename = basename($oldfile); if (! $oldDbPath) { $oldDbPath = dirname($oldfile); } if (! $newDbPath) { $newDbPath = dirname($newfile); } chomp($filename); my $sqlstring = "update SOMETABLE set SOMEPATH='" . $newDbPath . "' where FILENAME='" . $filename . "'"; logline("*DB*", "Updating PATH - " . $sqlstring . "\n"); my $dbi_result = $main::dbi_handle->do($sqlstring); if (! $dbi_result) { logline("ERROR", "Could not update PATH for $filename."); } else { if ($dbi_result == 1) { logline("PASS", "Database PATH update reports 1 row updated."); } else { logline("*ERR", "Database update error, $dbi_result rows were updated."); } } } else { logline("INFO", "doDbUpd is set to false, so PATH update will be skipped."); } } sub switchTarget() # This subroutine simply iterates through an array's elements using a modulus hack. # This allows file copies to alternate between multiple destinations. # Thanks to the Perl Monks (http://www.perlmonks.org/?node_id=13834) { my $newpath = $migrateToPath[ $count++ % @migrateToPath ]; if ($newpath) { if ($doDebug) { logline("DEBUG", "Alternating destination path to $newpath."); } return $newpath; } else { logdie("Cannot swap path variable."); } } sub debug($) # This subroutine will log debug messages if doDebug is true. # Uses a single argument specifying the debug message. { my $string = shift; if ($doDebug) { logline("DEBUG", $string); } } sub trace($) # This subroutine will log trace messages if doTrace is true. # Uses a single argument specifying the trace message. # This can generate lots of output. { my $string = shift; if ($doTrace) { logline("TRACE", $string); } }