#!/usr/bin/perl -w

# reflect - file synchronization tool
# Copyright (C) 2003 by John J. Chew, III <jjchew@math.utoronto.ca>

# This script creates and maintains a mirrored copy of a directory.
#
# I use it to keep redundant backups of my important files.
# I wrote it, because I was unable to find any free software to do
# this that understood MacOS resource forks on HFS volumes under OS/X.
# 
# To use it, use a text editor to create a configuration file for 
# each directory that you want to mirror.  Then at the command-line
# or in your crontab file, enter the name of this script followed
# by the names of your configuration files as arguments.
# 
# Configuration files are Perl scripts themselves, and look like this:
#
#   @mirrors = ({
#     # (optional) the Mac volume on which your important files live 
#     # If omitted, 'from' is interpreted as a path on the local host.
#     # You want to use this if the volume is remote, as you can't
#     # guarantee that it hasn't been mounted at '/Volumes/Remote HD-n/'
#     # instead of '/Volumes/Remote HD'.
#     'from_volume' => 'Local HD',
#     # the path where your important files live
#     'from' => '/Applications/Games/Diablo II Folder/Diablo II Files/Save',
#     # (optional) the Mac volume where you want to keep a mirrored copy
#     # if omitted, 'to' is interpreted as a path on the local host
#     'to_volume' => 'Remote HD',
#     # the path where you want to keep a mirrored copy
#     'to'=> '/Backup/Diablo',
#     # (optional) the Mac volume where copies of deleted files go
#     # if omitted, 'trash' is interpreted as a path on the local host
#     'trash_volume' => 'Remote HD',
#     # the path where copies of deleted files go, must be on the same volume as 'to'
#     'trash'=> '/Backup/Trash',
#     # change 0 to 1 if you want the script to abort if something goes wrong
#     'abort_on_error' => 0,
#     # change 0 to 1 if you want the script to mirror things like owner, group, mode too
#     'update_status' => 0, # unimplemented
#     # change 0 to 1 if you want to see everything that the script is doing
#     'verbose' => 0, 
#     # change 0 to 1 if you don't want to see when a file is copied
#     'quiet' => 0, 
#     # change 1 to 0 if you don't want resource forks copied
#     'copy_resource_forks' => 1,
#     # change 1 to 0 if you want resource forks checked for size changes
#     'stat_resource_forks' => 0, # expensive and unimplemented
#   });
#
# For the technically inclined, here is what the script does.  
# It recursively compares the 'from' and 'to' directories, and does
# a stat() on each file and resource fork in each.  If modification
# times, file sizes or file existence don't match, then action is
# taken.  If the file exists on 'from' but not on 'to', it's copied
# using a forked call to 'ditto'.  If the file exists on both but
# has changed, ditto.  If the file exists on 'to' but not 'from',
# it's moved from 'to' to 'trash'.  It's up to you to empty the 
# trash.
#
# TO-DO (e-mail me if you want any of these done)
#
# - Consider reimplementing 'ditto' in Perl.  Probably not worthwhile,
#   as I suspect the bottleneck is the file copying, not the process
#   forking overhead.
# - Write code for a few unusual situations, which currently 
#   die with an "unimplemented" message, but never turn up on my
#   own system.
# - Consider optionally journalling files by copying them to the trash
#   whenever they've changed.
# 
# VERSION HISTORY
#
# 1.100  Search for volumes by Mac name

use strict;
use warnings;

use Fcntl ':mode';
use File::Spec::Functions qw(catfile curdir);

our $VERSION = "1.100";
our $mirrorp;
our %mountpoints;
our $trash_base = time;
our $trash_serial = 0;

sub CheckVolumes ();
sub CopyFile ($);
sub Debug ($);
sub DirectoryChanged ($$$);
sub Error ($);
sub FileChanged ($$$);
sub FromMissing ($);
sub FromStatFailed ($);
sub Log ($);
sub Main ();
sub Path ($$);
sub RecursiveMirror ($);
sub StatChanged ($);
sub ToStatFailed ($);
sub TypeChanged ($$$);

Main;

# look up mount points of MacOS volumes
sub CheckVolumes () {
  my %disktool;
  for my $type (qw(from to trash)) {
    my $volume = $mirrorp->{"${type}_volume"};
    next unless defined $volume;
    next if defined $mountpoints{$volume};
    if (%mountpoints) {
      die "Unknown volume: $volume\nAborting";
      }
    unless (%disktool) {
      open(DISKTOOL, "disktool -l|") or die "Can't launch disktool: $!\n";
      while (<DISKTOOL>) {
	if (m!^\*\*\*Disk Appeared \('(.*)',Mountpoint = '/Volumes/(.*)', fsType = '.*', volName = '.*'\)\s*$!) {
	  $disktool{$2} = $1;
	  }
#	else { print "No match: $_\n"; }
	}
      close(DISKTOOL);
      my %versions;
      for my $mountpoint (sort { $b cmp $a } keys %disktool) {
	my $version = 0;
	my $base = $mountpoint;
	if ($mountpoint =~ /^(.*)-(\d+)$/) {
	  $version = $2;
	  $base = $1;
	  }
	if (($versions{$base} || -1) < $version) {
	  $versions{$base} = $version;
	  $mountpoints{$base} = "/Volumes/$mountpoint";
	  }
        }
      }
    unless (defined $mountpoints{$volume}) {
      die "Unknown volume: $volume\nAborting";
      }
    $mountpoints{':checked'} = 1; # in case no volumes were found
    }
  }

# actually copies a file
sub CopyFile ($) {
  my $subpath = shift;

  Log "Copying: $subpath";
  my $rv = system(
    'ditto', 
    ($mirrorp->{'copy_resource_forks'} ? ('-rsrcFork') : ()),
    Path('from', $subpath),
    Path('to', $subpath),
    ) >> 8;
  Error "copy($subpath) returned $rv" if $rv;
  }

# print if verbose
sub Debug ($) {
  my $message = shift;
  print $message, "\n" if $mirrorp->{'verbose'};
  }

# handles changed directory contents
sub DirectoryChanged ($$$) {
  my $subpath = shift;
  my $atime = shift;
  my $mtime = shift;

  Debug "Target directory utime needs updating: $subpath";
  my $rv = utime $atime, $mtime, Path('to', $subpath);
  Error "target utime() for $subpath returned $rv: $!" if $rv != 1;
  }

# emit error and exit if appropriate
sub Error ($) {
  my $message = shift;
  print $message, "\n";
  exit 1 if $mirrorp->{'abort_on_error'};
  }

# handles changed file contents
sub FileChanged ($$$) {
  my $subpath = shift;
  my $atime = shift;
  my $mtime = shift;

  Debug "File is out-of-date: $subpath";
  CopyFile $subpath;
  }

# handles missing source file
sub FromMissing ($) {
  my $subpath = shift;

  Log "Moving to trash: $subpath";
  unless (open(TRASH_INFO, '>' .  Path('trash',
    $trash_base . '.' . $trash_serial . '.text'))) {
    Error "Can't create trash info file for $subpath: $!\n";
    return;
    }
  unless (print TRASH_INFO "$subpath\n") {
    Error "Can't write to trash info file for $subpath: $!\n";
    return;
    }
  close(TRASH_INFO);
  my $rv = rename Path('to', $subpath), 
    Path('trash', $trash_base . '.' . $trash_serial);
  if ($rv != 1) {
    Error "Could not move $subpath to trash: $!\n";
    return;
    }
  $trash_serial++;
  }

# handles failure of stat() on a source file
sub FromStatFailed ($) {
  my $subpath = shift;

  my $error = $!;
  Debug "Source stat failed: $subpath\n";
  # if stat failed because file does not exist
  if (-e _) {
    FromMissing $subpath;
    return;
    }
  # otherwise, e.g. because of a permissions problem
  Error "source stat() failed for $subpath: $error";
  }

# print unless quiet
sub Log ($) {
  my $message = shift;
  print $message, "\n" unless $mirrorp->{'quiet'};
  }

# main routine
sub Main () {
  our @ARGV;
  for my $ARGV (@ARGV) {
    my $script;
    open(ARGV, "<$ARGV") or die "Can't open script file $ARGV: $!\n";
    { local($/) = undef; $script = <ARGV>; }
    close(ARGV);
    my @mirrors = ();
    eval $script;
    unless (@mirrors) {
      print "Script file $ARGV did not set up \@mirrors data.\n";
      }
    for $mirrorp (@mirrors) {
      CheckVolumes;
      for my $key (qw(from to trash abort_on_error update_status 
	quiet verbose copy_resource_forks stat_resource_forks)) {
	die "Script file $ARGV did not define a value for key '$key'.\n"
	  unless defined $mirrorp->{$key};
	}
      for my $key (qw(from to trash)) {
        my $volume = $mirrorp->{"${key}_volume"};
	my $dir = $mirrorp->{$key};
	if (defined $volume) {
	  $mirrorp->{$key} = $dir = catfile $mountpoints{$volume}, $dir;
	  }
	die "Can't process script file $ARGV: `$key' directory $dir does not exist.\n"
	  unless -d $dir;
	}
      Log "Mirroring: $mirrorp->{'from'} to $mirrorp->{'to'}";
      RecursiveMirror curdir();
      }
    }
  exit 0;
  }

# return a full pathname to a file under 'from', 'to' or 'trash'
sub Path ($$) {
  my $rootkind = shift;
  my $subpath = shift;
  catfile($mirrorp->{$rootkind}, $subpath);
  }

# recursively called to mirror a file structure
sub RecursiveMirror ($) {
  my $subpath = shift;
  my $from = Path('from', $subpath);
  my $to = Path('to', $subpath);
  Debug "Recursion: $subpath";

  # try to stat source file
  my (@from_stat) = stat $from;
  FromStatFailed $subpath unless @from_stat;

  # try to stat target file
  my (@to_stat) = stat $to;
  unless (@to_stat) {
    my $try_again = ToStatFailed $subpath;
    if ($try_again) {
      @to_stat = stat $to;
      unless (@to_stat) {
	Error "stat($to) failed after copy: $!";
	return;
	}
      }
    else {
      return;
      }
    }

  # see if file type has changed (e.g. from regular file to directory)
  TypeChanged $subpath, S_IFMT($from_stat[2]), S_IFMT($to_stat[2])
    if S_IFMT($from_stat[2]) != S_IFMT($to_stat[2]);

  # check for changed mode, uid, gid, rdev, atime, ctime
  if ($mirrorp->{'update_status'}) {
    for my $i (2, 4, 5, 6, 8, 10) {
      if ($from_stat[$i] != $to_stat[$i]) {
	StatChanged $subpath;
	last;
	}
      }
    }

  # rest depends on type of file
  if (S_ISDIR($from_stat[2])) { # directory
    unless (opendir(DIR, $from)) {
      Error "opendir($from) failed: $!";
      return;
      }
    my @from_files = 
      grep { $_ !~ /^\.{1,2}$/ } # TO-DO: do this in an OS-independent way
      readdir(DIR);
    closedir(DIR);
    my %from_files = map { $_ => 1 } @from_files;
    for my $file (@from_files) {
      RecursiveMirror catfile($subpath, $file);
      }
    unless (opendir(DIR, $to)) {
      Error "opendir($to) failed: $!";
      return;
      }
    my @to_files = 
      grep { $_ !~ /^\.{1,2}$/ } # TO-DO: do this in an OS-independent way
      readdir(DIR);
    closedir(DIR);
    for my $file (@to_files) {
      next if $from_files{$file};
      FromMissing catfile($subpath, $file);
      }
    # update directory modtime 
    DirectoryChanged $subpath, $from_stat[8], $from_stat[9]
      if $from_stat[9] != $to_stat[9]; # mtime changed
    }
  elsif (S_ISREG($from_stat[2])) { # regular file
#   print "$from $from_stat[7] $to_stat[7] $from_stat[9] $to_stat[9]\n";
    if ($from_stat[7] != $to_stat[7] # size changed
    ||  $from_stat[9] != $to_stat[9] # mtime changed
       ) {
      FileChanged $subpath, $from_stat[8], $from_stat[9];
      }
    elsif ($mirrorp->{'stat_resource_forks'}) {
      die "stat_resource_forks: unimplemented.\n";
      }
    }
  else {
    die "$from is neither a regular file nor a directory.\n";
    }
  }

sub StatChanged ($) {
  my $subpath = shift;

  print "stat() for $subpath does not match.\n";
  die "Unimplemented at $.\n";
  }

# handles missing target file
sub ToMissing ($) {
  my $subpath = shift;

  Debug "Target missing: $subpath";
  CopyFile $subpath;
  }

# handles failure of stat() on a target file
sub ToStatFailed ($) {
  my $subpath = shift;

  my $error = $!;
  Debug "Target stat failed: $subpath";
  # if stat failed because file does not exist
  if (!-e _) {
    ToMissing $subpath;
    return 1; # try another stat
    }
  # otherwise, e.g. because of a permissions problem
  Error "target stat() failed for $subpath: $error";
  return 0; # don't try another stat
  }

sub TypeChanged ($$$) {
  my $subpath = shift;
  my $from_type = shift;
  my $to_type = shift;

  Debug "Mode changed: $subpath";
  die "Unimplemented (source mode $from_type, target mode $to_type)\n";
  }

