#! /usr/bin/perl -w
#
# $Id: rcs2svn.pl 661 2010-01-31 09:55:18Z coelho $
#

use strict;

# script version
my $script = $0;
$script =~ s/.*\///;

my $revision = '$Revision: 661 $';
$revision =~ tr/0-9//cd;

=head1 NAME

B<rcs2svn> convert tree of simple RCS files to SVN.

=head1 SYNOPSIS

rcs2svn [-fghmvpn] [-[crs] cmd] [-u svn-url] [-d dir] [rcs-files]

=head1 DESCRIPTION

The B<rcs2svn> command converts a tree of simple RCS (Revision
Control System) files to SVN (Subversion).

The script is not very clean and may be error prone. You may
consider running B<rcs2cvs> and B<cvs2svn> or others instead.

SVN commits are performed for every revision of every file,
in the same I<global> order as the RCS checkins were performed.
RCS informations are stored as properties in SVN:

=over 4

=item B<rcs:date>

RCS checkin date.

=item B<rcs:author>

RCS checkin author, also put as SVN commit author.

=item B<rcs:rev>

RCS revision numbers, such as '1.28'.

=item B<rcs:state>

RCS revision status, such as 'Exp', 'Rel'...

=item B<rcs:lines>

RCS lines differences, such as '+15 -8'.

=back

=head1 OPTIONS

=over 4

=item B<--help> or B<-h>

show help about command usage.

=item B<--man> or B<-m>

show full manual page.

=item B<--version> or B<-V>

show script revision.

=item B<--verbose> or B<-v>.

be verbose. Repeat for more.

=item B<--quiet> or B<-q>

no interactive stuff.

=item B<--go> or B<-g>

do it! Otherwise, nothing is done.

=item B<--no-user> or B<-n>

do not issue SVN username on commits.
Useful for HTTP repositories, where the username is authenticated.

=item B<--pdel> or B<-p>

remove all "rcs:*" SVN properties in a last commit.
Otherwise they are kept indefinitely.

=item B<--co cmd> or B<-c cmd>

RCS checkout command, default to "co".

=item B<--rlog cmd> or B<-r cmd>

RCS log command, default to "rlog".

=item B<--svn cmd> or B<-s cmd>

SVN client command, default to "svn".

=item B<--url svn-url> or B<-u svn-url>

specify destination repository and possible subdirectory.
default to none, that it assume that the target directory is
an svn working copy.

=item B<--directory path> or B<-d path>

directory to convert. default is '.'.

=item B<--find> or B<-f>

look for RCS files in directory hierarchy. Quite advisable.

=item B<--filter-message cmd> or B<--fm cmd>

run filter on RCS checkin messages.

=item B<--temporary dir> or B<--tmp dir>

temporary directory to be used. default is '/tmp'.

=item B<--time> or B<-t>

use RCS checkin time as SVN commit time.
It is better to do so, but it requires to enable revision property edition.
See SVN repository administration documentation.

If you want to add an RCS project as a subdirectory in an existing SVN
repository, you may consider first converting to a new repository, and
then to merge the repositories so as to keep an overall date order,
but at the price of loosing the initial revision numbers.

=back

=head1 ARGUMENTS

RCS files to convert. Use simpler B<--find> option if possible.

=head1 EXAMPLES

A project added as a directory in an existing repository:

 sh> rcs2svn -d /to/project -u file:///to/repos/proj -f -v -n -g

A new repository that stores the project into "trunk":

 # administration
 sh> svnadmin create /to/repos
 # enable revision property changes:
 sh> echo -e "#! /bin/sh\nexit 0" > /to/repos/hooks/pre-revprop-change
 sh> chmod +x /to/repos/hooks/pre-revprop-change
 sh> svn mkdir -m 'initial setup' file:///to/repos/{trunk,branches,tags}

 # conversion
 sh> cd /to/rcs/project/hierarchy/copy
 sh> svn co file:///to/repos/trunk .
 sh> rcs2svn -f -v -p -t -g

=head1 LICENSE

=for html
<img src="http://www.gnu.org/graphics/gplv3-127x51.png"
alt="GNU GPLv3" align="right" />

(c) 2005-2010 Fabien Coelho <rcs2svn at coelho dot net>
L<http://www.coelho.net/>

This is free software, both inexpensive and available with sources.
The GNU General Public License v3 or more applies (GPLv3+).
The brief summary is: You get as much as you paid for, and
I am not responsible for anything.
See L<http://www.gnu.org/copyleft/gpl.html> for details.

=head1 CAVEAT

Beware! Run at your own risks.
This software may destroy all your data.
You might lose your hairs or your friends because of it.
Think again.

Converting a significant projet requires a lot of shell (process
forks) and file activity. Doing so with a remote (http, svn, file+nfs)
repository is not a good idea. Consider doing that with a local
temporary repository, for instance in "/tmp", and then to move the
stuff later, possibly with svnadmin dump/load.

There are many reasons why the conversion may fail.
Thus consider that it WILL fail, and act accordingly.
Perform your operations on a copy of your directory structure and
on a temporary repository.

=head1 BUGS

Here is a subset of the great bugs provided freely with this software:

=over 4

=item *

the C<--url> option does not work if there is no subdirectory
to create in the repository.

=item *

RCS branches are not handled.

=item *

SVN stops on unexpected string encoding in commit messages.
Fix the message file, relaunch the commit manually, and continue.

=item *

The converted directories are left in a mess, with .svn and RCS
sub-directories everywhere, and files checked out in strange states.

=item *

"rlog" output parsing is fuzzy and may fail in some case.

=item *

SVN username cannot be issued directly on HTTP repositories.

=item *

"rcs:*" properties are kept forever or deleted, although
they would be best removed on the first SVN update.

=item *

Instead of doing all the processing in the target directory, the
program should copy all RCS files to some temporary directory hierarchy.

=item *

Maybe all initial revisions (file creations) should be performed
together at the first revision? Or it could be offered as an option?

=back

=head1 DOWNLOAD

The latest version of the perl script is available at
L<http://www.coelho.net/rcs2svn.pl>.

This additionnal script may be useful to handle
iso-8859-1 encoded RCS comments:
L<http://www.coelho.net/accents_fr.pl>.

=head1 SEE ALSO

Subversion site at L<http://subversion.apache.org/>.

=head1 VERSION

This documentation is about $Revision: 661 $ of the script.

=cut

################################################################ HANDLE OPTIONS

# option defaults
my ($url, $directory, $tmpdir) = ('', '.', '/tmp');
my ($co, $rlog, $svn, $flt) = ('co', 'rlog', 'svn', '');
my ($go, $quiet, $verbose, $find, $user, $del, $time) = (0, 0, 0, 0, 1, 0, 0);

use Pod::Usage;
pod2usage(-verbose => 0) unless @ARGV;

use Getopt::Long qw(:config no_ignore_case);
GetOptions('go|g!' => \$go,
	   # misc
	   'quiet|q!' => \$quiet,
	   'verbose|v+' => \$verbose,
	   # what to do
	   'directory|dir|d=s' => \$directory,
	   'url|u=s' => \$url,
	   'find|f' => \$find,
	   'no-user|nu|n' => sub { $user=0; },
	   'prop-delete|delete|pdel|del|pd|p' => \$del,
	   'filter-message|filter|fm=s' => \$flt,
	   'time|t!' => \$time,
	   'temporary|tmp=s' => \$tmpdir,
	   # get help
	   'help|h' => sub { pod2usage(-verbose => 1); },
	   'man|m' => sub { pod2usage(-verbose => 2); },
	   'version|V' =>
	     sub {
	       print "$script revision=$revision\n",
	         "license is GNU-GPL v3+ (http://www.gnu.org/copyleft/gpl.html)\n",
	         "see http://www.coelho.net/rcs2svn.html\n";
	       exit 0;
	     },
	   # rcs and svn commands
	   'co|c=s' => \$co,
	   'rlog|r=s' => \$rlog,
	   'svn|s=s' => \$svn)
    or pod2usage(-verbose => 0);

######################################################################### UTILS

# collected data
my %history = ();
my %seen = ('.' => 1);
my $tmp = $tmpdir . '/rcs2svn.' . $$;

# compare two dates for sort...
# ??? would the perl cmp operator be enough?
sub cmp_date($$)
{
  my ($date1,$date2) = @_;
  my ($y1,$M1,$d1,$h1,$m1,$s1) = split /[ :\/]+/, $date1;
  my ($y2,$M2,$d2,$h2,$m2,$s2) = split /[ :\/]+/, $date2;
  return $y1 <=> $y2 if $y1 != $y2;
  return $M1 <=> $M2 if $M1 != $M2;
  return $d1 <=> $d2 if $d1 != $d2;
  return $h1 <=> $h2 if $h1 != $h2;
  return $m1 <=> $m2 if $m1 != $m2;
  return $s1 <=> $s2;
}

# convert rcs file name to dirname and basename
sub dir_basename($)
{
  my ($file) = @_;
  return ($1,$3) if $file =~ /^(.*?)(\/RCS)?\/([^\/]*),v$/;
  die "\abad rcs file name ($file)";
}

# get data from rlog. put result in %history.
# the parser is somehow fuzzy, it may fail in some cases.
sub parse_rlog($)
{
  my ($file) = @_;
  my (@description, $kw, $rev, $date, $author, $state, $lines, @comments);

  # explicit -z to chose the UTC format, which may have been overwritten
  # in the environment by RCSINIT.
  open RCS, "$rlog -z " . shell_escape($file) . " |"
    or die "\acannot $rlog '$file' ($!)";

  # get headers
  while (<RCS>)
  {
    $kw = $1 if /^keyword substitution: (\w+)/;
    push @description, $_ if /^description:/ ... /^-{28}-*$/;
    last if /^-{28}/;
  }

  # get revisions
  while (<RCS>)
  {
    $rev = $1 if /^revision\s(\d+\.\d+)(|\s+locked by: \w+;)$/;
    # otherwise, silently skipping...
    if (defined $rev)
    {
      ($date, $author, $state, $lines) = ($1, $2, $3, $5)
	  if /^date:\s+([0-9:\ \/]+);\s+
	       author:\s+(\w+);\s+
	       state:\s+(\w+);
               (\s+lines:\s+([-+0-9\ ]+?)\s*)?$/x;
      # there should not be a ^[-=]{28} line in the comments...
      push @comments, $_ if /^date: / ... /^[-=]{28}/;
      if (/^[-=]{28}/)
      {
	# special case to handle the initial file description
	@comments = @description if $rev eq '1.1' and @description>2;
	# strip out first and last line
	@comments = @comments[1 .. $#comments-1];
	my $com = join '', @comments;
	$com =~ s/\s+$//;
	# stop if in doubt... ($lines may legitimaly be undefined).
	unless (defined $file and defined $rev and defined $author and
		defined $com and defined $kw and defined $state) {
	  # quite explicit error message if we stop here.
	  warn "\$file undef" unless defined $file;
	  warn "\$rev undef\n" unless defined $rev;
	  warn "\$author undef\n" unless defined $author;
	  warn "\$com undef\n" unless defined $com;
	  warn "\$kw undef\n" unless defined $kw;
	  warn "\$state undef\n" unless defined $state;
	  die "\arlog parse error, undefined variable found!";
	}
	# record
	push @{$history{$date}},
	  [ $file, $rev, $author, $com, $kw, $state, $lines ];
	# cleanup for next entry
	undef $date; undef $rev; undef $author; undef $com;
	undef $state; undef $lines; undef @comments;
      }
    }
  }

  close RCS or die "\acannot close ($!)";
}

# escape string (possibly a filename) for the shell
sub shell_escape($)
{
  my ($str) = @_;
  $str =~ s/([^-\/A-Za-z0-9_.])/\\$1/g;
  return $str;
}

# run a command under a shell.
# all arguments but the first are shell escaped...
sub sys($@)
{
  my ($cmd, @args) = @_;
  for my $arg (@args) {
    $arg = shell_escape($arg);
  }
  print STDERR "# $cmd @args\n" if $verbose;
  if ($go && system("$cmd @args"))
  {
    die "\acannot $cmd @args ($!)" if $quiet;
    # else ask for a confirmation
    warn "\acannot $cmd @args ($!)";
    my $answer = '1';
    while (defined($answer))
    {
      print STDERR "continue? (yes|no): ";
      $answer = <STDIN>;
      last if $answer =~ /^\s*(yes|y|no|n)\s*$/i;
    }
    die "interactive stop" unless $answer =~ /^y/i;
  }
}

# add intermediate directories if necessary
sub check_directory_hierarchy($)
{
  my ($dir) = @_;
  return if exists $seen{$dir};
  my $subdir = '';
  for my $d (split '/', $dir)
  {
    $subdir .= ($subdir? '/': '') . $d ;
    sys "$svn add --non-recursive", $subdir unless $seen{$subdir}++;
  }
}

#################################################################### DO THE JOB

print "BEWARE! This program may destroy all your precious data.\n",
      "Use --go option to really run it\n" unless $go;

die "\atemporary directory '$tmpdir' not available" unless -d $tmpdir;

# where we do our job
chdir $directory or die "\acannot chdir to $directory" if $directory;
use Cwd;
my $here = getcwd;

# find rcs files...
if ($find)
{
  use File::Find;
  find sub {
    push(@ARGV, $File::Find::name)
      if $File::Find::name =~ /\/[^\/]*,v$/;
  }, '.';
}

# create repository sub-directory if necessary
die "\atarget directory must be a working copy or use option --url"
  unless $url xor -d '.svn';

# ??? this does not work if there is no subdirectory to url
if ($url)
{
  #system "$svn ls $url" or
  sys "$svn --message='created by rcs2svn' mkdir", $url;
  sys "$svn co", $url, ".";
}

# first collect data from RCS files
for my $file (@ARGV)
{
  die "\ainvalid absolute filename ($file)" if $file =~ /^\//;
  $file = "./$file" unless $file =~ /^\./;
  parse_rlog($file);
}

# then walk thru the history and put everything into svn
for my $date (sort cmp_date keys %history)
{
  print STDERR "### handling $date\n" if $verbose;
  for my $fd (@{$history{$date}})
  {
    my ($rcs_file, $rev, $author, $comments, $kw, $state, $lines) = @$fd;
    my ($dir, $base) = dir_basename($rcs_file);

    print STDERR "## doing $dir $base $rev $author\n" if $verbose;
    check_directory_hierarchy($dir);

    chdir $dir or die "\acannot chdir to $dir ($!)";

    # remove so as to fix 'w' right set by svn
    unlink $base or die "\acannot remove $base ($!)" if $go and -f $base;
    sys "$co -r$rev", $base;

    # only the first time: add file, fix keywords and execution permission
    if ($rev eq '1.1')
    {
      $seen{"$dir/$base"} = 1;
      sys "$svn add -q", $base;

      # RCS/SVN compatible keyword substitutions
      # RCS has:
      # - Author Date Header Id Locker Log RCSfile Revision Source State
      # SVN has:
      # - URL HeadURL, Author LastChangedBy, Date LastChangedDate,
      #   Rev Revision LastChangedRevision, Id
      sys "$svn pset -q svn:keywords 'Author Date Revision Id'", $base
	unless $kw eq 'k' or $kw eq 'o';

      sys "$svn pset -q svn:executable 1", $base if -x $base;
    }

    sys "$svn pset -q rcs:date '$date'", $base;
    sys "$svn pset -q rcs:author '$author'", $base;
    sys "$svn pset -q rcs:rev '$rev'", $base;
    sys "$svn pset -q rcs:state '$state'", $base;
    sys "$svn pset -q rcs:lines '$lines'", $base if $lines;

    # back
    chdir $here or die "\acannot chdir to $here ($!)";

    # commit
    open TMP, ($flt? "|$flt":'') .">$tmp" or die "\acannot open $tmp ($!)";
    print TMP "# on $date (UTC), $author did:\n$comments\n";
    close TMP or die "\acannot close ($!)";
    my $username = $user? "--username=$author": '';
    sys "$svn commit -q --non-interactive $username -F $tmp";
    unlink $tmp or die "\acannot unlink $tmp ($!)";

    # fix commit date
    if ($time)
    {
      my $svndate = "$date.Z";
      $svndate =~ tr/\/ /-T/;
      # COMMITTED?
      sys "$svn pset svn:date --revprop -r HEAD '$svndate'";
      sys "$svn commit";
    }
  }
}

if ($del)
{
  print STDERR "### deleting rcs:* properties\n" if $verbose;
  sys "$svn pdel -q --non-interactive rcs:date -R .";
  sys "$svn pdel -q --non-interactive rcs:author -R .";
  sys "$svn pdel -q --non-interactive rcs:rev -R .";
  sys "$svn pdel -q --non-interactive rcs:state -R .";
  sys "$svn pdel -q --non-interactive rcs:lines -R .";
  sys "$svn commit -q --non-interactive -m 'rcs:* properties removed'";
}
