#!/usr/local/bin/perl
# metaclone - clone gophers

# usage:
# metaclone [www style gopher reference]
# metaclone gopher://gopher.msen.com:70/cicnet

# original NNTP client suggested by eci386!clewis
# socket code mailed to me by cmf@obie.cis.pitt.edu (Carl M. Fongheiser)
# adaptation for gopher by emv@msen.com (Edward Vielmetti)
# modification to indexer by alberti@boombox.micro.umn.edu (Bob Alberti)
# Hacked into metaclone by benseb@sdsc.edu ( Booker C. Bense ) 11/2/92
# This was tested with perl.4.19
# Note: this assumes it is running in the directory underneath which
# you want the metacenter info to appear, i.e. At SDSC we would run 
# it in the MetaCenter directory. 

# Configuration information -- change to reflect your site.

$_ = $ARGV[0] ? $ARGV[0] : 'gopher://darth.sdsc.edu:70/1MetaCenter';
$my_host = "grumpy.sdsc.edu" ;  # who is my gopher server 
				# Might be the same as hostname
#If an argument exists, use it, otherwise use default

($service, $host, $port, $path) = (/^(gopher:\/\/)([^:]+):(\d+)\/.\/(.*)/); 

#If debug = 0, metaclone runs silent.  =1 is a verbose run.  Commented
#debug lines are annoyingly thorough

$DEBUG = 1;              #set this to 0 for silent operation

# stuff for recursion levels and their_host

$last_level = 2 ;		# directories are links at this level
$too_deep = $last_level ;	# Don't go greater than this level 


if ($host && $port && $path) {
    $DEBUG && print "host=$host; port=$port; path=$path\n";
    # Here's how to make your own socket.ph
    # cp /usr/include/sys/socket.h socket.h
    # h2ph socket
    require 'sys/socket.ph';	
    chop($hostname = `hostname`); # get host name in variable

    $their_host = $host ;	# Remember original host !
    ($N) = &tcpconnect($host, $hostname);# open connection 
	if ($path eq "/") {
	    $path = "";
	}
    $recurse_level = 0; 
    &gopherlevel($host, $hostname, $path, N); # clone the gopher

    close(N);			# close the connection.  NOTHING TO IT!
}
else {
   print "Command format:\n\n";
   print "   metaclone service://host.name:port/path/\n\n";
   print "If a directory in the path includes multiple words separated by spaces,\n";
   print "(i.e. /path name/), surround the parameter string with single quotes:\n\n";
   print "   metaclone 'service://host.name:port/path name/'\n\n";

}

sub gopherlevel {  
	     

   # Build a level of gopher directory before recursion
   local($host, $hostname, $path, $N) = @_;
                           $DEBUG && print "sending path=$path\n";

   $recurse_level += 1;		# Actually this is not needed, but I'm paranoid
	if ( $recurse_level  > $too_deep ) {
		$DEBUG && print "Recurse Level too deep $recurse_level\n";
		return ; 
	}

   $path =~ s%^/(\d+)%\1/%; #swap first / and char ( Must have type!)
   send(N,"$path\r\n",0);
                           $DEBUG && print STDERR "$path\r\n";
   local($dirnum, $docnum, $i, @doc, @dir); #avoid scoping errors
   @doc = 0;               #call me a fuddy-duddy but I like to Know
   @dir = 0;
   $filename = sprintf(".Remote@%s",$host); 
   open(FILE, ">>$filename") || die "Couldn't open new file $filename: $!\n";
   while(<N>)  {		#While receiving data
       chop;chop;		# trim data
       next if /^[\. ]*$/;	# quit if a period
       s/^(.)// && ( $type = $1); # otherwise Type is first character
       @G= split(/\t/);		# and split other fields on tabs



       if (($type == 1 && $recurse_level < $last_level ) && $G[2] eq $their_host )  {	
# Add directories to the list of directories
	   $dirnum += 1;
	   $dir[$dirnum] = $G[1]; # to be built after all information received
	   $DEBUG && print "$dirnum: $dir[$dirnum]\n";   
# need to make .cap entries .... 
	   @path = split('/',$dir[$dirnum]); # split off leading entries in path;
	   $dirname = $path[$#path]; # take last item as name

	   $_ = $dirname;	#Bah, this is ungraceful, but 
	   if (/^\S/) {		#sometimes $dirname is blank.
	       if ( ! -d ".cap" ) {
		   mkdir (".cap", 0xfff ) || print  "Mkdir .cap: $!\n"; }
	       if ( ! -f ".cap/$dirname" ) {
		   open(CAPFILE, ">.cap/$dirname") 
		       || die "Couldn't open new file .cap/$dirname: $!\n";
		   print CAPFILE "Name=$G[0]\n" ;
		   close(CAPFILE); }
	   } ; 
       } else {
	   if ( $G[2] ne $my_host ) { # Should check for redundant entries here !
	       #  Something for the next version BCB 11/2/92 
	       #  The server is smart enough in version 1.03 to
	       #  not print redundant entries
	       print FILE "#\nType=$type\n";
	       print FILE "Name=$G[0]\n";
	       print FILE "Path=$G[1]\n";
	       print FILE "Host=$G[2]\n";
	       print FILE "Port=$G[3]\n";
	   } else {
	       $DEBUG && print "@G is :$my_host:$G[2]: \n"; }
       }
   }
   close(FILE);
   
   close(N);

   for ($i = 1; $i <= $dirnum; $i++) {  # Make directories
       @path = split('/',$dir[$i]);     # split off leading entries in path;
       $dirname = $path[$#path];        # take last item as name
       $DEBUG && print "dirname: $dirname\n";
       $_ = $dirname;		        #Bah, this is ungraceful, but 
       if (/^\S/) {		        #sometimes $dirname is blank.
	   if ( ! -d $dirname ) {
	       mkdir ($dirname, 0xfff) || print "Mkdir $dirname: $!\n"; }
       }
       else {
           next;
       }
       chdir ($dirname)        || die print "Chdir $dirname: $!\n";

       $DEBUG && print "Connecting to $host from $hostname\n"; 
       ($N) = &tcpconnect($host, $hostname);

       if ($N) {
	   &gopherlevel($host, $hostname, $dir[$i], N);
	   $recurse_level -= 1; # pop recurse_level on return 
	   sleep(2);		#arbitrary sleeps give sockets time to close
	   chdir("..")          || die print "chdir up: $!\n"; 
       }
       else {
	   die "Couldn't open tcp connection $N: $!\n"; 
       }
       close(N);
   }  
}

sub tcpconnect {                    #Get TCP info in place
   local($host, $hostname) = @_;
   $sockaddr = 'S n a4 x8';

                            #$DEBUG && print "host: $host, me: $hostname\n";

   ($name,$aliases,$proto) = getprotobyname('tcp');
   ($name,$aliases,$port) = getservbyname($port, 'tcp')
        unless $port =~ /^\d+$/;
   ($name,$aliases,$type,$len,$thisaddr) = gethostbyname($hostname);
   ($name,$aliases,$type,$len,$thataddr) = gethostbyname($host);

   $this = pack($sockaddr, &AF_INET, 0, $thisaddr);
   $that = pack($sockaddr, &AF_INET, $port, $thataddr);

   sleep(2);

   socket(N, &PF_INET, &SOCK_STREAM, $proto) || die "socket: $!";
   bind(N, $this)                            || die "bind: $!";
   connect(N, $that)                         || die "connect: $!";

   return(N);
}
