#!/usr/bin/perl -w

# ====================================================================
# cvs-sync.pl: Keep a CVS working copy and repository up to date with
# a master Subversion repository
#
# For usage, see the usage subroutine or run the script with no
# command line arguments.
#
# ====================================================================
# Copyright (c) 2004 Brian Bassett.  All rights reserved.
# ====================================================================

use strict;
use Carp;

######################################################################
# Configuration section.

# Svnlook path.
my $svnlook = "/usr/bin/svnlook";

# CVS path.
my $cvs = "/usr/bin/cvs";

# Since the path to svnlook depends upon the local installation
# preferences, check that the required programs exist to insure that
# the administrator has set up the script properly.
{
  my $ok = 1;
  foreach my $program ($svnlook, $cvs)
    {
      if (-e $program)
        {
          unless (-x $program)
            {
              warn "$0: required program `$program' is not executable, ",
                   "edit $0.\n";
              $ok = 0;
            }
        }
      else
        {
          warn "$0: required program `$program' does not exist, edit $0.\n";
          $ok = 0;
        }
    }
  exit 1 unless $ok;
}


######################################################################
# Initial setup/command-line handling.

# Each value in this array holds a hash reference which contains the
# associated email information for one project.  Start with an
# implicit rule that matches all paths.
my @project_settings_list = ();

# Process the command line arguments till there are none left.  The
# first two arguments that are not used by a command line option are
# the repository path and the revision number.
my $repos;
my $rev;

# Use the reference to the first project to populate.
my $current_project = &new_project;

# This hash matches the command line option to the hash key in the
# project.  If a key exists but has a false value (''), then the
# command line option is allowed but requires special handling.
my %opt_to_hash_key = ('-l'     => 'log_file',
		       '-m'	=> '');

while (@ARGV)
  {
    my $arg = shift @ARGV;
    if ($arg =~ /^-/)
      {
        my $hash_key = $opt_to_hash_key{$arg};
        unless (defined $hash_key)
          {
            die "$0: command line option `$arg' is not recognized.\n";
          }

        unless (@ARGV)
          {
            die "$0: command line option `$arg' is missing a value.\n";
          }
        my $value = shift @ARGV;

        if ($hash_key)
          {
            $current_project->{$hash_key} = $value;
          }
        else
          {
            # Here handle -m.
            unless ($arg eq '-m')
              {
                die "$0: internal error: should only handle -m here.\n";
              }
            $current_project                = &new_project;
            $current_project->{match_regex} = $value;
            push(@project_settings_list, $current_project);
          }
      }
    elsif ($arg =~ /^-/)
      {
        die "$0: command line option `$arg' is not recognized.\n";
      }
    else
      {
        if (! defined $repos)
          {
            $repos = $arg;
          }
        elsif (! defined $rev)
          {
            $rev = $arg;
          }
        else
          {
            $current_project->{cvs_working_copy} = $arg;
          }
      }
  }

# If the revision number is undefined, then there were not enough
# command line arguments.
&usage("$0: too few arguments.") unless defined $rev;

# Check the validity of the command line arguments.  Check that the
# revision is an integer greater than 0 and that the repository
# directory exists.
unless ($rev =~ /^\d+/ and $rev > 0)
  {
    &usage("$0: revision number `$rev' must be an integer > 0.");
  }
unless (-e $repos)
  {
    &usage("$0: repos directory `$repos' does not exist.");
  }
unless (-d _)
  {
    &usage("$0: repos directory `$repos' is not a directory.");
  }

# Check that all of the regular expressions can be compiled and
# compile them.
{
  my $ok = 1;
  for (my $i=0; $i<@project_settings_list; ++$i)
    {
      my $match_regex = $project_settings_list[$i]->{match_regex};

      # To help users that automatically write regular expressions
      # that match the root directory using ^/, remove the / character
      # because subversion paths, while they start at the root level,
      # do not begin with a /.
      $match_regex =~ s#^\^/#^#;

      my $match_re;
      eval { $match_re = qr/$match_regex/ };
      if ($@)
        {
          warn "$0: -m regex #$i `$match_regex' does not compile:\n$@\n";
          $ok = 0;
          next;
        }
      $project_settings_list[$i]->{match_re} = $match_re;
    }
  exit 1 unless $ok;
}

######################################################################
# Harvest data using svnlook.

# Change into /tmp so that svnlook diff can create its .svnlook
# directory.
my $tmp_dir = '/tmp';
chdir($tmp_dir)
  or die "$0: cannot chdir `$tmp_dir': $!\n";

# Get the author, date, and log from svnlook.
my @svnlooklines = &read_from_process($svnlook, 'info', $repos, '-r', $rev);
my $author = shift @svnlooklines;
my $date = shift @svnlooklines;
shift @svnlooklines;
my @log = map { "$_\n" } @svnlooklines;
push @log, "\n";
push @log, "[[ cvs-sync.pl: Commited into SVN revision $rev on $date by $author ]]\n";

# Figure out what files have changed using svnlook.
@svnlooklines = &read_from_process($svnlook, 'changed', $repos, '-r', $rev);

# Parse the changed nodes.
my @adds;
my @dels;
my @mods;
foreach my $line (@svnlooklines)
  {
    my $path = '';
    my $code = '';

    # Split the line up into the modification code and path, ignoring
    # property modifications.
    if ($line =~ /^(.).  (.*)$/)
      {
        $code = $1;
        $path = $2;
      }

    if ($code eq 'A')
      {
        push(@adds, $path);
      }
    elsif ($code eq 'D')
      {
        push(@dels, $path);
      }
    else
      {
        push(@mods, $path);
      }
  }

# Start plugging away with updating the adds, dels, and mods
my @loglines;
my ($dir, $file);

foreach my $add (@adds)
  {
    foreach my $proj (&find_matching_projects($add))
      {
	# Mark this project as dirty
	$proj->{dirty} = 1;

	# Find the real dir/filename
	($dir, $file) = &svn_path_to_cvs_working_dir($add, $proj);

	# If we added a directory, make it and cvs add it
	if ($add =~ /\/$/)
	  {
	    chdir($dir) or die "$0: cannot chdir `$dir': $!\n";
	    unless (-e $file)
	      {
		mkdir($file) or die "$0: cannot mkdir `$dir/$file': $!\n";
	      }
	  }
	else
	  {
	    # Pull it out of svn, put it in cvs, and add it
	    chdir($tmp_dir) or die "$0: cannot chdir `$tmp_dir': $!\n";
	    my @filelines = &read_from_process($svnlook, 'cat', $repos,
					       '-r', $rev, $add);
	    &spit_out_file($dir, $file, @filelines);
	  }
	chdir($dir) or die "$0: cannot chdir `$dir': $!\n";
      	push @{$proj->{log_lines}}, "[[ Adding in $dir ]]";
      	push @{$proj->{log_lines}}, &read_from_process($cvs, 'add', $file);
      }
  }

foreach my $del (@dels)
  {
    foreach my $proj (&find_matching_projects($del))
      {
	# Find the real dir/filename
	($dir, $file) = &svn_path_to_cvs_working_dir($del, $proj);

	# If it's not a directory we deleted, delete it
	chdir($dir) or die "$0: cannot chdir `$dir': $!\n";
	if ($del !~ /\/$/)
	  {
	    # Mark this project as dirty
	    $proj->{dirty} = 1;

	    push @{$proj->{log_lines}}, "[[ Deleting in $dir ]]";
	    push @{$proj->{log_lines}}, &read_from_process($cvs, 'rm', '-f', $file);
	  }
      }
  }

foreach my $mod (@mods)
  {
    foreach my $proj (&find_matching_projects($mod))
      {
	# Mark this project as dirty
	$proj->{dirty} = 1;

	# Find the real dir/filename
	($dir, $file) = &svn_path_to_cvs_working_dir($mod, $proj);

	# Pull the new version out of svn and put it in the working copy
	chdir($tmp_dir) or die "$0: cannot chdir `$tmp_dir': $!\n";
	my @filelines = &read_from_process($svnlook, 'cat', $repos, '-r',
					   $rev, $mod);
	chdir($dir) or die "$0: cannot chdir `$dir': $!\n";
	&spit_out_file($dir, $file, @filelines);
      }
  }
 
# Prep the logfile for the commit
my $log_tmp_file = "cvs-sync.$$";
&spit_out_file($tmp_dir, $log_tmp_file, @log);

# Do the commit in each of the spec'd CVS working dirs (if nothing was
#  changed, the cvs commit is idempotent)
foreach my $project (@project_settings_list)
  {
    next unless $project->{dirty} == 1;

    my $wd = $project->{cvs_working_copy};
    push @{$project->{log_lines}}, "[[ Committing in $wd ]]";
    chdir($wd) or die "$0: cannot chdir `$wd': $!\n";
    push @{$project->{log_lines}}, &read_from_process($cvs, '-q', 'commit', '-F',
						      "$tmp_dir/$log_tmp_file");

    # Write out our log
    if ($project->{log_file} =~ /\w/)
      {
	if (open(LOGFILE, ">>$project->{log_file}"))
	  {
	    print LOGFILE map { "$_\n" } @{$project->{log_lines}};
	    print LOGFILE "\cL";
	    close LOGFILE or warn "$0: error in closing `$project->{log_file}' for appending: $!\n";
	  }
	else
	  {
	    warn "$0: cannot open `$project->{log_file}' for appending: $!\n";
	  }
      }
  }

# Clean up the logfile
unlink("$tmp_dir/$log_tmp_file") or die "$0: cannot unlink temporary file `$tmp_dir/$log_tmp_file': $!\n";
  
exit 0;  

  
#######
#

sub svn_path_to_cvs_working_dir
{
  my ($svnpath, $project) = @_;

  # Break a repo path down into directory and file
  my @splitdir = split /\//, $svnpath;
  my $svnfile = pop @splitdir;
  my $svndir = join '/', @splitdir;

  # Substitute the location of the working dir for the matched portion
  # of the match_regex
  $svndir =~ s/$project->{match_re}/$project->{cvs_working_copy}/;

  # Tell the world about our results.
  return ($svndir, $svnfile);
}

sub find_matching_projects
{
  my ($svnpath) = @_;
  my @projects = ();

  foreach my $each (@project_settings_list)
    {
      my $match_re = $each->{match_re};
      if ($svnpath =~ $match_re)
   	{
	  push @projects, $each;
     	}
    }

  return @projects;
}

sub spit_out_file
{
  my ($dir, $filename, @lines) = @_;

  open SPITTOON, ">$dir/$filename" or warn "$0: cannot open file `$dir/$filename' for writing: $!\n";
  print SPITTOON map { "$_\n" } @lines;
  close SPITTOON or warn "$0: error in closing `$dir/$filename': $!\n";
}
			   
###########
#

sub usage
{
  warn "@_\n" if @_;
  die "usage: $0 REPOS REVNUM [[-m regex] [options] cvs_working_copy] ...\n",
      "options are\n",
      "  -l logfile            Append entries to this log file\n",
      "  -m regex              Regular expression to match committed path\n",
      "\n",
      "This script supports a single repository with multiple equivalent\n",
      "CVS working copies, where each CVS working copy is synchronized for\n",
      "commits that modify that portion of the Subversion repository.  A\n",
      "portion of the Subversion repository is identified by using the -m\n",
      "command line with a regular expression argument.  If a commit has a\n",
      "path that matches the regular expression, then the entire commit\n",
      "matches.  Any of the following -l command line options and\n",
      "following CVS working copy are associated with this project.  The\n",
      "next -m resets the -l command line options and the CVS working\n",
      "copy.\n",
}

# Return a new hash data structure for a new empty project that
# matches any modifications to the repository.
sub new_project
{
  return {cvs_working_copy => '',
          log_file         => '',
          match_regex      => '',
	  log_lines	   => [],
	  dirty		   => 1};
}

# Start a child process safely without using /bin/sh.
sub safe_read_from_pipe
{
  unless (@_)
    {
      croak "$0: safe_read_from_pipe passed no arguments.\n";
    }

  my $pid = open(SAFE_READ, '-|');
  unless (defined $pid)
    {
      die "$0: cannot fork: $!\n";
    }
  unless ($pid)
    {
      open(STDERR, ">&STDOUT")
        or die "$0: cannot dup STDOUT: $!\n";
      exec(@_)
        or die "$0: cannot exec `@_': $!\n";
    }
  my @output;
  while (<SAFE_READ>)
    {
      s/[\r\n]+$//;
      push(@output, $_);
    }
  close(SAFE_READ);
  my $result = $?;
  my $exit   = $result >> 8;
  my $signal = $result & 127;
  my $cd     = $result & 128 ? "with core dump" : "";
  if ($signal or $cd)
    {
      warn "$0: pipe from `@_' failed $cd: exit=$exit signal=$signal\n";
    }
  if (wantarray)
    {
      return ($result, @output);
    }
  else
    {
      return $result;
    }
}

# Use safe_read_from_pipe to start a child process safely and return
# the output if it succeeded or an error message followed by the output
# if it failed.
sub read_from_process
{
  unless (@_)
    {
      croak "$0: read_from_process passed no arguments.\n";
    }
  my ($status, @output) = &safe_read_from_pipe(@_);
  if ($status)
    {
      return ("$0: `@_' failed with this output:", @output);
    }
  else
    {
      return @output;
    }
}

