/*
>From: EVERHART%ARISIA.decnet@GE-CRD.ARPA
Newsgroups: comp.os.vms
Subject: Small upgrade to TAR reader
Message-ID: <8901280458.AA21037@ucbvax.Berkeley.EDU>
Date: 27 Jan 89 21:23:00 GMT
Sender: daemon@ucbvax.BERKELEY.EDU
Organization: The Internet
Lines: 489

Folks -
  I noticed that some TAR tape directory trees are too deep for VMS when
trying to use Sid Penstone's TAR reader here. To fix things up, I added a
small edit. Its effect is to add an s option to the reader to effectively
partly flatten directory trees. This is done by replacing the / between
levels of directories after the first with _. As a result, one can
arrange that a directory that might have been created as, for
example
[x11r3.lib.clx.debug]
  gets created instead as
[x11r3.lib_clx.debug]
  and so on. Using the s option more than once allows multiple such
replacements to occur. Thus, basically no information is lost so long
as the total directory length is short enough to fit in a VMS file
specification. The trees remain shallow enough for VMS to handle them and
you can read them in.
  I send it out in thanks to Sid Penstone for the original TAR reader,
which works very nicely.
        Glenn Everhart
        Everhart%Arisia.decnet@ge-crd.arpa

----------- cut here ----------
*/
/* Read a TAR format tape or file , move files into VMS directories */
/* Copyright 1986, Sid Penstone,
*  Department of Electrical Engineering,
*  Queen's University,
*  Kingston, Ontario, Canada K7L3N6
* (613)-545-5925
* BITNET:          PENSTONE@QUCDNEE1  (Preferred)
*       or  PENSTONE@QUCDN
*
* Version 2.3a, Jan. 17, 1989
* mods: - Added a few more chars to VMS forbidden list and allowed -
*       - in filenames. Added "s" option to skip a directory spec,
*       - done by changing the . for subdirs into a _. Multiple s's
*       - cause multiple dirs to be so "skipped". gce.
*       - Use ss to pass one dir, sss for two and so on.
* Version 2.3, Jan.9,1987
* mods: - corrected header size (thanks to Eric Gisin, U .of Waterloo)
*       - No more of the dreaded QIO's ( "  "  " )
*       - tried to sort out link flag format
*       - uses a tape or a file as input
*       - NOTE: default is NO conversion to vms standard text format (cr)
* 2.1   - trapped commas in file names, converted to '_'
* 2.2   - reported translations of names
*       - continued after error in opening output file
*       - exit correctly after error opening input file
* 2.3   - fixed bug in make_new on top level file (thanks to Ursula Natrup,
*                                       natrup@vax.hmi.dfn )
*       - reject "@" in filenames
*/


/* The input data is in record format, length 512, blocks of 10240 bytes;
 */


#include stdio
#include time
#include ssdef
#include iodef
#include descrip
#include ctype

#define ERROR1 -1
#define BUFFSIZE 512
#define ISDIRE 1
#define ISFILE 0
#define NAMSIZE 100
#define SIZE 20480              /* Block size */
#define DSIZE 512               /* Data block size */

struct                  /* A Tar header */
    {
    char title[NAMSIZE];
    char protection[8];
    char field1[8];             /* this is the user id */
    char field2[8];             /*  this is the group id */
    char count[12];             /*  was 11 in error */
    char time[12];              /* UNIX format date  */
    char chksum[8];             /* Header Checksum (ignored) */
    char linkcount;             /* hope this is right */
    char linkname[NAMSIZE]; /* Space for the name of the link */
    char dummy[255];    /* and the rest */
    } header;

static char buffer[DSIZE];      /* BUFFER for a record */

/* Function flags, options:*/
int extract,            /* x option (default) */
    list,                       /* t option : list tape contents */
    verbose,            /* v option, report actions */
    wait;

/* Miscellaneous globals, etc. */

char *tarfile = "tape", /* Input file name  */
    pathname[NAMSIZE],  /* File name as found on tape (UNIX) */
    directory[NAMSIZE], /* Current directory */
    new_directory[NAMSIZE],     /* Directory of current file */
    top[NAMSIZE],               /* Top level or root */
    newfile[NAMSIZE],   /* VMS format of file name */
    outfile[NAMSIZE],   /* Complete output file specification */
    temp[256],          /* Scratch */
    creation[NAMSIZE],  /* Date as extracted from the TAR file */
    *ctime(),           /* System function */
    linkname[NAMSIZE];  /* Linked file name  */

int bytecount,  mode, uic1, uic2, linktype;/* Data from header */
int tarfd;                      /* The input file descriptor */
int dirskp;             /* directories to skip in input */

main(argc,argv)
int argc;
char *argv[];
{
int isterm,status,file_type,j,c, flag;
char *make_directory(), *cp;

/* Decode the options and parameters: */

    dirskp=0;
    if(argc ==1)
        {
        extract = 1;            /* Default for now */
        verbose = 1;
        wait = 0;               /* Don't wait for prompt */
        }
    while(--argc > 0)
        {
        cp = argv[1];
        while(c = *cp++)
            {
            switch(c)
            {
            case 't':
                list=1;
                break;
            case 'x':
                extract=1;
                break;
            case 'v':
                verbose=1;
                break;
            case 'w':
                wait=1;
                break;
            case 's':
                dirskp++;
                break;
            default:
                printf("Option '%c' not recognized.\n",c);
            }
       }
   }


/* Find if this is a terminal */
    isterm = isatty(0);

/* Set up directory names */
    strcpy(top,getenv("PATH"));

/* Start with the default as the top */
    strcpy(directory,top);

/* open the file for reading */
    if((tarfd = opentar()) <= 0)
        {
        printf("Error opening the Tar tape\n");
        exit(2);
        }
/* Now keep reading headers from this file, and decode the names, etc. */

    while((status=hdr_read(&header))==DSIZE)    /* 0 on end of file */
        {
        if(strlen(header.title)!=0)     /* Valid header */
            {
            decode_header();
            if(extract)
                {
                file_type=scan_title(pathname,new_directory,newfile);
                if( make_new(new_directory)!=0)
                    printf("Error creating %s\n",new_directory);
                if(file_type == ISDIRE)
                    {}
                if(file_type == ISFILE)
/*  Now move the data into the output file */
                    if(bytecount>0)
                        {
                        strcpy(outfile,new_directory);
                        strcat(outfile,newfile);
                        if((j=copyfile(outfile,bytecount))<0)
                            printf("Error writing file %s\n",outfile);
                        }
                }
            else                        /* listing only */
                {
                printf("%o %6d %s %s\n",
                    mode,bytecount,creation+4,pathname);
                if(linktype == 0)
                    tarskip(bytecount);
                else
                    printf("     *****( Linked to file: %s)\n",linkname);
                }
            }
        else                    /* Empty header means the end!!! */
            {
            status = 1;
            printf("End of Tar file found.\n");
            break;
            }

        }       /* end while  */
    if(status == 1)                     /* Empty header */
        {
        printf("Do you wish to move past the EOF mark ? y/n\n");
        gets(temp);
        if(tolower(temp[0]) == 'y')
            while((status=hdr_read(&header)) >0);
        else
            exit(SS$_NORMAL);
        }
    if(status==0)                       /* End of tar file  */
        {
        printf("End of file encountered\n");
        exit(SS$_NORMAL);
        }
    if(status<0)                        /* An error  */
        {
        printf("Error reading input.\n");
        exit(2);
        }
}


/* This function simply copies the file to the output, no conversion */

int copyfile(outfile,nbytes)
char outfile[]; /* name of output version */
int nbytes;

{
int inbytes, fil;
/*  Open the output file */
    if((fil=creat(outfile,0)) == ERROR1)
        {
        printf(" Creation error in opening %s \n",outfile);
        tarskip(bytecount);
        return(-2);
        }
    if(linktype !=0)
        {
        sprintf(buffer,"This file is linked to %s\n",linkname);
        write(fil,buffer,strlen(temp));
        }
    else
        {
        while(nbytes>0)
            {
            if((inbytes=read(tarfd,buffer,DSIZE)) > 0)
                {
                write(fil,buffer,(nbytes > DSIZE)? DSIZE:nbytes);
                nbytes -= inbytes;
                }
            else
                {
                printf("End of input file detected\n");
                close(fil);
                return(-1);
                }
            }
        }
/* Close the file */
    close(fil);
    if(verbose)
        {
        printf("CREATED: %s\n",outfile);
        if(linktype!=0)
            printf(" *** REAL DATA IS IN: %s\n",linkname);
        }
    return(0);
}

/* Decode a file name into the directory, and the name, return
* a value to indicate if this is a directory name, or another file
* We return the extracted directory string in "dire", and the
* filename (if it exists) in "fname". The full title is in "line"
* at input.
*/

int scan_title(line,dire,fname)
char line[],dire[],fname[];
{
char temp[NAMSIZE],*end1;
int len,len2,i,ind;
int dlvl;
/* The format will be UNIX at input, so we have to scan for the
* UNIX directory separator '/'
* If the name ends with '/' then it is actually a directory name.
* If the directory consists only of '.', then don't add a subdirectory
* The output directory will be a complete file spec, based on the default
* directory.
*/
    dlvl=0;
    strcpy(dire,top);                   /* Start with the top level */
    if(strncmp(line,"./",2)==0)
        strcpy(line,line+2);            /* ignore "./" */
    strcpy(temp,line);                  /* Start in local buffer */
    ind=vms_cleanup(temp);              /* Remove illegal vms characters */
    if((end1=strrchr(temp,'/'))==0)     /* No directory at all  ? */
        strcpy(fname,temp);             /* Only a file name */
    else
        {                               /* End of directory name is '/' */
        *end1 = 0;                      /* Terminate directory name */
        dlvl=0;
        strcpy(fname,end1+1);           /* File name without directory */
        for (i=0;temp[i];i++)           /* Change '/' to '.' in directory */
            if(temp[i]=='/'){
                temp[i]='.';
/* Change first "n" . chars to _ if s option given */
              if(dlvl<dirskp)temp[i]='_';
              dlvl++;
                        }
        dire[strlen(dire)-1] = (temp[0]=='.')?0:'.' ;
                 /* "." to indicate a subdirectory (unless already there )*/
        strcat(dire,temp);      /* Add on the new directory  */
        strcat(dire,"]") ;              /* And close with ']' */
        }
    if(strlen(fname)==0)        /* Could this cause problems ? */
        {
        return(ISDIRE);
        }
    else
        for(i=0,end1=fname;*end1;end1++) /* Replace multiple . */
            if(*end1 == '.')
                if(i++)*end1 = '_'; /* After the first */
        if((i>1||ind)&& verbose )       /* Any translations ? */
            printf("****RENAMED: %s \n         TO: %s\n",line,fname);
    return(ISFILE);
}

/* Create a new directory, finding out any higher levels that are missing */

/* We will parse the directory name into the next higher directory, and the
* desired directory as "desired.dir".
* Thus: "DEV:[top.sub1.sub2]" is made into "DEV:[top.sub1]sub2.dir" . If
* the directory does not exist , then create the original directory. There
* may be higher levels missing, so we can recurse until we reach the top
* level directory, then work our way back, creating directories at each
* successive level.
* Bug fix: if the input file was at top level, we will not find a '.'
*       and 'name' will be garbage.
*/

int make_new(want)
char want[];
{
int i,len;
char a[NAMSIZE],parent[NAMSIZE],*end,name[NAMSIZE];
    strcpy(parent,want);
    len = strlen(parent);
    parent[len-1] =0 ;          /* Get rid of the "]" */
    end = strrchr(parent,'.');  /* Find the last '.' */
    if(end != NULL)
        {
        strcpy(a,end+1);        /* Get the last parent */
        strcat(a,".dir");       /* Add the extension */
        *end++ = ']' ;          /* Reduce the directory parent */
        *end = 0;               /* Terminate the directory */
        strcpy(name,parent);
        strcat(name,a);

        if(access(name,0) <0)   /* Does the directory exist ? */
            {
                if(strcmp(parent,top)!=0) /* No, are we at the top? */
                    if(make_new(parent))   /*  No, look again */
                        return(-1);     /* recurse */
                if(mkdir(want,0755,0,0,0)) /* make it */
                    return(-1);         /* Leave on error */
                else
                    if(verbose)
                        printf("CREATED: %s\n",want);
                return(0);
            }
        }
    return(0);
}

 /* Function to open and get data from the blocked input file */
int opentar()
{
int fd;
    fd = open(tarfile, 0, "rfm = fix","mrs = 512");
    if(fd < 0)
        {
        printf("Can't open input file \n");
        return(0);
        }
    return(fd);
}

/* Get the next file header from the input file buffer. We will always
* move to the next 512 byte boundary.
*/
int hdr_read(buffer)
char *buffer;
{
int stat;
    stat = read(tarfd,buffer,DSIZE);    /* read the header */
    return(stat);                               /* Catch them next read ? */
}


/* This is supposed to skip over data to get to the desired position */
/* Position is the number of bytes to skip. We should never have to use
* this during data transfers; just during listings. */
int tarskip(bytes)
int bytes;
{
int i=0;
    while(bytes > 0)
        {
        if((i=read(tarfd,buffer,DSIZE)) == 0)
            {
            printf("End of file encountered while skipping.\n");
            return(-1);
            }
        bytes -= i;
        }
    return(0);
}

/* Decode the fields of the header */

int decode_header()
{
int idate, *bintim;
char ll;
bintim = &idate;
    linktype=0; strcpy(linkname,"");
    strcpy(pathname,header.title);
    sscanf(header.time,"%o",bintim);
    strcpy(creation,ctime(bintim));     /* Work on this! */
    creation[24]=0;
    sscanf(header.count,"%o",&bytecount);
    sscanf(header.protection,"%o",&mode);
    sscanf(header.field1,"%o",&uic1);
    sscanf(header.field2,"%o",&uic2);
    /* We may have the link written as binary or as character:  */
    linktype = isdigit(header.linkcount)?
            (header.linkcount - '0'):header.linkcount;
    if(linktype != 0)
        sscanf(header.linkname,"%s",linkname);
    return(0);
}


/* remove illegal characters from directory and file names; replace
* hyphens and commas with underscores.Returns number of translations
* that were made.
*/
vms_cleanup(string)
char string[];
{
int i,flag=0;
char c;
    for(i=0;c=string[i];i++)
        {
        switch (c)
            {
/* allow hyphens; ok sinve vms 4.4 */
/*            case '-':    */       /* No hyphens in file names */
            case ',':           /* No commas in file names  */
            case '+':           /* No plusses in file names */
            case '@':           /* No '@' allowed in a name  */
            case '^':           /* No '^' allowed in a name  */
            case '&':           /* No '&' allowed in a name  */
            case '?':           /* No '?' allowed in a name  */
            case '*':           /* No '*' allowed in a name  */
                string[i]= '_';
                flag++;         /* Record if any changes were made */
                break;
            default:
                break;
            }
        }
    return(flag);
}
