/* ------------------------------------------------------------------------ *
 *                            K R 2 A N S I . C                             *
 * ------------------------------------------------------------------------ *
 *                                                                    9/23/91
 *  Author:   Harry Karayiannis
 *            ______________________E-MAIL:_____________________
 *            INTERnet:                | BITnet:
 *               harryk@bucsf.bu.edu   |   cscrzcc@buacca.bu.edu
 *            --------------------------------------------------
 *
 *  Purpose:  Read a C-code source file written in Kernighan-Ritchie's
 *            programming style, and produce ANSI prototypes for all
 *            functions. (Hint: using output redirection you can create
 *            a file with prototypes of all the functions in your K&R C 
 *            source-file. The program is automagically putting the word
 *            "extern" in front of the prototype-line, so you can #include
 *            the file in you current source code and compile it with an
 *            ANSI compiler).
 *
 *  Usage:    kr2ansi [-p] [ [-r file1] | [usr_def1 ... usr_defn] ] file2
 *               -p       : show parameters
 *               -r file1 : read user-defined types from 'file1'
 *                usr_def1 ... usr_defn :
 *                          user-defined types present in 'file2'
 *                file2   : the file to read C-code from
 *
 *  Notes:    This program is useful for people (like myself) who like
 *            K&R programming style, but want to take advantage of the
 *            parameter-cheking, during compilation, typically done by
 *            ANSI compilers (e.g. gcc, Turbo-C, Prospero-C, etc).
 *            Others simply want their K&R code to compile under an ANSI
 *            compiler without watching all those "irritating" warnings
 *            saying: "Function call without prototyping".
 *              For either case, kr2ansi can prove very handy.....
 *
 *  Caveats:  The program is not bug-free. I tried to make it work with
 *            files that follow the programming style presented in
 *            "The C Programming Language (1st Edition)", by Kernighan
 *            and Ritchie.
 *            In other words, it expects K&R function declarations to
 *            be in the following form:
 *            (NOTE: I write comments with backslashes '\' cause
 *                   MWC does not parse nested comments)
 *
 *              FUNCTION-TYPE FUNCTION-NAME(P1,P2,...,PN) \* comments *\
 *              \* also you can have ... *\
 *              \* ...more comments here *\
 *                  PARAMETER-TYPE  P1,P2;      \* comments *\
 *              \* ... or here *\
 *                  PARAMETER-TYPE  P3;         \* more comments *\
 *                  PARAMETER-TYPE  P4,...,PN;  \* yet more comments *\
 *              {                       \* ... or even here *\
 *                function-body
 *              }
 *
 *            The program gets confused when it finds 1) comments inside the
 *            parameter list, 2) semicolons placed after comments, 3) comments
 *            that open in one line and close in a different one, 4) curly
 *            braces '{' that do not appear as the first char in a separate
 *            line, and perhaps in some more case I've not figured out yet.
 *            But you shouldn't use such a bad programming style anyways 8*)
 *            (Hint: I think that unix-like preprocessors (like "cpp" in
 *                   MWc, gcc, etc) take a flag that causes comments to be
 *                   removed from the source-file...check it out)
 *
 *            However, it is almost guaranteed to work with files that follow
 *            K & R, or Rochkind's programming style (except in one case...)
 *
 *
 *  Bugs:     kr2ansi fails to find integer functions that do not include
 *            the data-type in the beginning of the function-declaration:
 *            e.g.
 *                  main(argc,argv)
 *                      int   argc;
 *                      char  *argv[];
 *                  {
 *                    ...
 *                  }
 *
 *            The reason is that the program identifies a K&R function header
 *            by checking the first word of the line, which *must* be a valid
 *            data-type (standard or user-defined). If you come up with a bet-
 *            ter algorithm please let me know. (Please don't tell me to write
 *            a complete C parser, cause I won't)
 */



#include <stdio.h>              /* don't tell me you don't know this one */
#include "kr2ansi.h"            /* constants, macros & user-defined types */

char  *progname;                    /* the name of the program */
char *data_types[]={ DATA_TYPES };  /* array of valid data-types */
unsigned options = NONE;            /* bit map with command-line options */

/* ======================================================================== *
 *                                main
 * ======================================================================== */

int main(argc, argv)
    int   argc;
    char  *argv[];
{
  char  fname_in[MAXNAME];                  /* name of input file */
  GLOBAL char *data_types[];                /* array of valid data-types */

  void    kr2ansi();
  BOOLEAN parse();        /* return FALSE on command_line error */
  extern void usage();    /* in file: ERROR.C */



  progname = argv[0];     /* set the program's name */
  if ( !parse(argc, argv, data_types, fname_in) )
    usage("[-p] [ [-r file1] | [usr_def1 ... usr_def10] ] file2");
  else
    kr2ansi(fname_in);

  return(0);
}


/* ======================================================================== *
 *                                parse
 * ======================================================================== */

BOOLEAN parse(argc, argv, data_types, fname_in)
    int   argc;               /* number of args in command-line */
    char  *argv[];            /* the arguments themselves */
    char  *data_types[];      /* array of valid data-types */
    char  *fname_in;          /* name of the input file */
{
/* Check the validity of the command line,
 * make valid specified user-defined data-types,
 * and assign the specified filename to : fname_in
 */

  register int i,u;           /* integer counters */
  void set_dtypes();          /* read user-def. types from a file */
  GLOBAL unsigned options;    /* bit map with command-line options */
  STD_CLIB char *strcpy();    /* part of the standard C-library */


  if (argc < 2)                       /* too few arguments */
    return(FALSE);

  i=1;             /* you can add your own options in this loop */
  while ( argv[i][0] == '-' )
  {
    if ( !strcmp(argv[i],"-p") )      /* option -p: */
    {                                   /* show parameters */
      if (i == argc-1)
        return(FALSE);
      options |= SHOW_PARA;
    }
    else if ( !strcmp(argv[i],"-r") ) /* option -r: */
    {                                   /* read user-defined types from file */
      if (i != argc-3)
        return(FALSE);
      options |= RD_TYPES;
      set_dtypes(argv[i+1], data_types);
    }
    else if ( !strcmp(argv[i],"") )
      options |= UNUSED4;               /* unused slot */
    else if ( !strcmp(argv[i],"") )
      options |= UNUSED5;               /* unused slot */
    else if ( !strcmp(argv[i],"") )
      options |= UNUSED6;               /* unused slot */
    else if ( !strcmp(argv[i],"") )
      options |= UNUSED7;               /* unused slot */
    else if ( !strcmp(argv[i],"") )
      options |= UNUSED8;               /* unused slot */
    else                              /* invalid option */
      return(FALSE);                    /* return FALSE */
    i++;
  }
 
  if ( !(options & RD_TYPES) )      /* get user-def types from command-line */
    for (u=i; (u<=N_DT_USR  &&  u<argc-1); u++)
      strcpy(data_types[DT_USR1+u-i], argv[u]);

  strcpy(fname_in, argv[argc-1]);   /* get name of the input-file */

  return(TRUE);
}



/* ======================================================================== *
 *                                kr2ansi
 * ======================================================================== */

void kr2ansi(fname)
    char  *fname;             /* name of the input file */
{
/* This function opens the input file, and reads all
 * the lines, one at a time. If a line is a valid K&R
 * function decleration (header) it calls make_ansi()
 * to convert the line into an ANSI function decleration
 */

  FILE  *fp_in;               /* used for reading the file: fname */
  char  ln[MAXLINE];          /* next line in the file: fname */

  BOOLEAN is_KR_header();     /* TRUE if ln is a valid K&R function header */
  void  make_ansi();          /* make ln an ANSI function header */
  extern void fatal();        /* in file: ERROR.C */
  STD_CLIB char *fgets();     /* part of the standard C-library  */



  fp_in = fopen(fname, "r");
  demand(fp_in != NULL, "file2 does not exist");
  while (fgets(ln, MAXLINE, fp_in) != NULL)
  {
    if ( is_KR_header(ln) )
    {
      make_ansi(fp_in, ln);
      printf("%s",ln);
    }
  }
  fclose(fp_in);
}



/* ======================================================================== *
 *                              set_dtypes()
 * ======================================================================== */

void set_dtypes(fname, data_types)
    char  *fname;         /* name of the file to read user-def types from */
    char  *data_types[];  /* array with valid data-types */
{
/*
 * set_dtypes() reads up to N_DT_USR words from file 'fname' and assigns
 * them to the array 'data_types[]'. Each word can be up to DT_MAXWORD
 * characters long (words longer than DT_MAXWORD are truncated).
 */

  FILE  *fp;              /* used for reading the file: fname */
  int  c;
  BOOLEAN onword;         /* TRUE if we are on a word */
  register int ccount;    /* counter for word's letters (up to DT_MAXWORD) */
  register int wcount;    /* counter for words (up to N_DT_USR) */
  extern void fatal();    /* in file: ERROR.C */
  STD_CLIB int fgetc();   /* part of the standard C-library */


  fp = fopen(fname, "r");
  demand( fp != NULL, "file1 does not exist");
  while ((c=fgetc(fp)) != EOF  && IS_BLANK(c)); /* skip leading blanks */
  if (c == EOF)                                 /* if file is empty, exit */
  {
    fclose(fp);
    fatal("file1 is empty");
  }

  /*
   * IMPORTANT:
   *    at this point we know for a fact that 'c' is
   *    the first letter of the first word in the file
   */

  wcount = DT_USR1;                   /* the first slot for user-def types */
  ccount = 0;                         /* initialize char-counter */
  data_types[wcount][ccount++] = c;   /* set the first character */
  onword = TRUE;                      /* we are on the first word */
  while ( wcount < DT_USR1+N_DT_USR  &&  (c=fgetc(fp)) != EOF )
  {
    if ( IS_BLANK(c) )                /* we are on a blank character */
    {
      if (onword)                       /* if it immediately follows a word */
      {                                 /* we should take care of some stuff */
        onword = FALSE;                     /* we're not on a word anymore */
        data_types[wcount][ccount] = '\0';  /* terminate previous word */
        wcount++;                           /* increase word-counter */
        ccount = 0;                         /* reset char-counter */
      }
    }
    else                              /* we are on a letter... */
    {
      onword = TRUE;                    /* ..thus we are on a word */
      if (ccount < DT_MAXWORD-1)        /* no more DT_MAXWORD chars allowed */
        data_types[wcount][ccount++] = c;   /* append 'c' in current data-type */
    }
  }
  fclose(fp);
}


/* ======================================================================== *
 *                              is_KR_header
 * ======================================================================== */

BOOLEAN is_KR_header(header)
    char  *header;            /* potential K&R func. header */
{
/* This function recognises a "valid" K&R func. header line
 * by testing three basic conditions (the order is significant):
 *
 *  1. the first word _must_ be a valid data-type.
 *
 *  2. the line _must not_ contain a semicolon.
 *     (BUG: we screw up if line contains ';' inside a comment)
 *
 *  3. a) after we remove potentially commented characters,
 *     b) the last non-blank character _must_ be a closing parenthesis: ')'
 *
 * If any of the above conditions fail then the function returns FALSE,
 * otherwise the line is considered to be valid, it is modified a little
 * (see below), and the function returns TRUE.
 *
 * If the function concludes that the line is a valid K&R func-header
 * it modifies the line in order to bring it in the form expected by
 * the function make_ansi(): a) removes any white spaces and potential
 * comments after the closing parenthesis, and b) appends a semicolon
 * and a newline character.
 * (Actually potential comments are _always_ removed)
 */

  char  word[MAXWORD];        /* the first word of the line */
  char  *get_1st_word();      /* returns the 1st word in a string */
  register int i;             /* just a counter */
  BOOLEAN valid();            /* TRUE if the 1st param. is a valid data-type */
  STD_CLIB int strlen();      /* part of the standard C-library */ 
  STD_CLIB char *strcpy();    /* part of the standard C-library */



  strcpy(word,get_1st_word(header));/* C o n d i t i o n   # 1: */
  if ( !valid(word) )               /*  1st word must be a valid data-type */
    return(FALSE);

  i = strlen(header);               /* C o n d i t i o n   # 2: */
  while (i > 0){                    /*  no ';' allowed in the header */
    if ( header[i] == ';' )
      return(FALSE);
    i--;
  }
                                    /* C o n d i t i o n   # 3: */
  i = 0;                            /*  a. remove potential comments */
  while (header[i] != '\0' && header[i] != '/') i++;
  header[i] = '\0';
  i = strlen(header)-1;             /*  b. last non-blank char must be ')' */
  while ( IS_BLANK(header[i]) && i>0 ) i--;
  if ( header[i] != ')' )
    return(FALSE);
                                    /* M o d i f y  L i n e: */
  header[++i] = ';';                /* append a semicolon, and */
  header[++i] = '\n';               /* a newline character     */
  header[++i] = '\0';

  return(TRUE);
}



/* ======================================================================== *
 *                             get_1st_word
 * ======================================================================== */

char *get_1st_word(line)
    char  *line;
{
/* Return the first word in the parameter string
 *  If the word is longer than MAXWORD,
 *  the function returns W_TOO_LONG
 *  NOTE:
 *    W_TOO_LONG should contain _at most_ MAXWORD chars in the quotes
 */

  char  word[MAXLINE];      /* reserve space for MAXLINE chars (see below) */
  register int i;           /* just a counter */
  STD_CLIB char *strcpy();  /* part of the standard C-library */



  i=0;                            /* skip leading blanks and... */
  while (line[i] != '\0' && IS_BLANK(line[i]))
    i++;
  strcpy(word, &line[i]);         /* ...put result in: 'word' */
                                  /* NOTE: 'word' has room for MAXLINE chars */

  i=0;                            /* keep only the 1st word */
  while (word[i] != '\0' && !IS_BLANK(word[i]))
    i++;
  word[i] = '\0';
                                  /* return 'word' (or W_TOO_LONG) */
  return( (strlen(word) > MAXWORD) ? W_TOO_LONG : word );
}




/* ======================================================================== *
 *                                valid
 * ======================================================================== */

BOOLEAN valid(word)
    char  *word;
{
/* Compare the string 'word' against all
 * strings listed in the array 'data_types'.
 * Return TRUE on the first match, or FALSE
 * if 'word' is not listed in 'data_types'.
 */

  register int i;             /* just a counter */
  GLOBAL char *data_types[];  /* array with valid data-types */
  STD_CLIB int strcmp();      /* part of the standard C-library */


  for (i=DT_STD1; i<=(LAST_DT_STD+N_DT_USR); i++)
    if ( !strcmp(word, data_types[i]) )
      return(TRUE);

  return(FALSE);
}





/* ======================================================================== *
 *                              make_ansi
 * ======================================================================== */


void make_ansi(fp, proto)
    FILE  *fp;                    /* pointer to file: fname_in */
    char  *proto;                 /* ANSI-prototyping to be produced */
{
/* This function takes the string 'proto' and converts it to
 * a valid ANSI function prototype:
 *
 *   First it inserts the word "extern" into the string 'line',
 * just in front of the function's data-type.
 *   Then it checks if the parameter-list is empty, via the
 * function: has_param(), and removes all the chars after the
 * opening parenthesis (i.e. 'proto' becomes:
 *                            "extern fn_type fn_name(" ).
 *   Now, if the parameter list was empty, the string: "void);\n"
 * is appended to 'proto' and the function returns. Otherwise it
 * is expecting to find the parameter-declarations between the
 * function-declaration and the first '{' character. For each
 * such line, it removes potential comments and checks the first
 * word (parameter-type) against all valid data-types.
 *   If everything is ok, the parameter-type along with the
 * parameter itself are appended to 'proto' (via the function:
 * append_param() ). Otherwise the parameter-type is "assumed"
 * to be invalid and the string constant: UNDEFINED_DATA_TYPE
 * is used instead.
 * BUG: The above algorithm fails when a comment is opened in
 *      one line and is closed in a different line.
 *      The result is that in the output line commented words
 *      will appear as a parameters of type UNDEFINED_DATA_TYPE.
 */


  char  *cp;
  char  par_decl[MAXLINE];    /* parameter-declaration line */
  char  par_type[MAXWORD];    /* parameter's data-type */
  char  *get_1st_word();
  BOOLEAN no_err = TRUE;
  BOOLEAN append_param();     /* see below */
  BOOLEAN has_params();       /* see below */
  BOOLEAN valid();
  STD_CLIB int  strlen();     /* part of the standard C-library */
  STD_CLIB char *strcpy();    /* part of the standard C-library */
  STD_CLIB char *strcat();    /* part of the standard C-library */
  STD_CLIB char *strchr();    /* part of the standard C-library */



  strcpy(par_decl, "extern ");     /* put "extern" in the front */
  strcat(par_decl, proto);         /* (note: here we use 'par_decl' */
  strcpy(proto, par_decl);         /*  as temporary string storage) */

  if ( !has_params(proto) )        /* check & remove parameter-list */
  {
    strcat(proto, "void);\n");
    return;
  }
                                              /* get next 'par_decl' */
  while ( fgets(par_decl, MAXLINE, fp) != NULL  &&  no_err)
  {
    char  *ptr;

    if ( ptr = strchr(par_decl,'/') )         /* remove potential comments */
      *ptr = '\0';

    strcpy(par_type, get_1st_word(par_decl)); /* get the parameter-type */

    if (par_type[0] == '{')                   /* if we hit a '{' we stop */
      break;

    if ( !valid(par_type) )                   /* check for valid 'par_type' */
      strcpy(par_type, UNDEFINED_DATA_TYPE);
                                              /* append ANSI parameter-list */
    no_err = append_param(proto, par_type, par_decl);
  }


  /* The function append_param() converts 'proto' to the following form: */
  /*    "extern fn_type fn_name(ptype p1, ptype p2, ..., ptype pn, "    */
  /*  So we need to fix 'proto''s tail by  1. erasing the last two chars */
  /*  (namely ' ' and ',')  and  2. appending the string: ");\n"  */

  cp = proto + (strlen(proto)-2);  /* go two chars back */
  *cp = ')';
  *(cp+1) = ';';
  *(cp+2) = '\n';
  *(cp+3) = '\0';
}


/* ======================================================================== *
 *                              has_params
 * ======================================================================== */

BOOLEAN has_params(header)
    char  *header;        /* the function-header line */
{
/* This function checks if the parameter-list is empty,
 * and removes all chars after the opening parenthesis.
 * Its task is to modify 'header' and to return TRUE if
 * the parameter-list was empty.
 * NOTICE that the parameter-list is considered empty
 * when either the char ')' comes right after char '('
 *          or it consists of white(BLANK) characters.
 */

  char    *cp1, *cp2;           /* temporary pointers */
  BOOLEAN param_yes = FALSE;    /* what the function returns */
  STD_CLIB char *strchr();      /* part of the standard C-library */



  cp1 = cp2 = strchr(header,'('); /* save the start of param-list in cp2 */

  if ( *(cp1+1) == ')' )          /* if ')' comes right after '('  */
  {                               /*   Remove all charactes coming */
    *(cp1+1) = '\0';              /*   right after '(', and...     */
    return(FALSE);                /*   ...return FALSE             */
  }

                                  /* check for BLANK parameter-list */
  while ( *cp2 != '\0'  &&  IS_BLANK(*cp2) )
    cp2++;
  if (*cp2 != ')')
    param_yes = TRUE;

  *(cp1+1) = '\0';

  return(param_yes);
}



/* ======================================================================== *
 *                              append_param
 * ======================================================================== */

BOOLEAN append_param(proto, par_type, par_decl)
    char  *proto;                 /* the output ANSI-prototype */
    char  *par_type;              /* the data-type of the parameter */
    char  *par_decl;              /* the parameter-declaration line */
{
/* Get a parameter-declaration line, construct the appropriate
 * ANSI-prototyped-declaration string, and append it to the ANSI 
 * prototype.
 *                  *** I m p o r t a n t ***
 *  'par_decl' has been ensured (by function make_ansi()), to be
 *  valid (i.e. the 1st word is a valid data-type). So the string
 *  coming after the 1st word(='par_type') should be a list of
 *  parameters. BUT if 'par_type' is one of the strings "unsigned",
 *  short" or "long", then the 2nd word might be "int" (which should
 *  not be treated as a parameter, but as part of the parameters'
 *  data-type). Furthermore, 'par_type' may be "register", in which
 *  case we only the 2nd word only (or the string "register" if the
 *  2nd word is not a valid data-type).
 */

  char  *s, *param;
  char  _str[MAXWORD];
  register int i;

  void construct_ANSI_declaration();
  BOOLEAN valid();
  STD_CLIB int  *strcmp();
  STD_CLIB char *strcat(), *strtok();



  i = 0;                      /* skip the first word of 'par_decl' */
  while (par_decl[i] != '\0' &&  IS_BLANK(par_decl[i])) i++;
  while (par_decl[i] != '\0' && !IS_BLANK(par_decl[i])) i++;
  s = &par_decl[i];
                              /* check for: register data-type,*
                               * or unsigned/short/long int    */
  strcpy(_str, get_1st_word(s));
  if ( valid(_str) )
  {
    if ( !strcmp(par_type,"register") )         /* handle "register" cases */
      strcpy(par_type, _str);
    else                                        /* handle unsigned/long/short */
    {
      strcat(par_type, " ");                      /* cat " int" in 'par_type'*/ 
      strcat(par_type, _str);
    }
    while( *s != '\0'  &&   IS_BLANK(*s) ) s++; /* skip the 2nd word: "int" */
    while( *s != '\0'  &&  !IS_BLANK(*s) ) s++;
  }
                              /* append ANSI param-declaration to 'proto' */
  while ( (param = strtok(s, ",; \t\n")) != NULL )
  {
    if ( strlen(proto) >= MAXLINE-strlen(par_type)-strlen(param)-4 )
    {                         /* error-check for "output line too long" */
      strcat(proto, "<...>, ");
      return(FALSE);
    }
    construct_ANSI_declaration(proto, par_type, param);
    s = (char *)NULL;
  }

  return(TRUE);
}


/* ======================================================================== *
 *                      construct_ANSI_declaration
 * ======================================================================== */

void construct_ANSI_declaration(proto, par_type, param)
    char  *proto;         /* the functin prototype */
    char  *par_type;      /* parameter's data-type */
    char  *param;         /* the parameter itself */
{
/* Construct the ANSI parameter-declaration,
 * so it can be appended in the parameter-list
 * (if DONT_SHOW_PARAMETERS is not defined, the
 *  parameter itself is also included)
 */

  register int i;           /* just a counter */
  GLOBAL unsigned options;  /* bit map with command-line options */
  STD_CLIB int  strlen();   /* part of the standard C-library */
  STD_CLIB char *strcat();  /* part of the standard C-library */


  strcat(proto, par_type);        /* append the parameter's data-type */
  strcat(proto, " ");

  if (options & SHOW_PARA)        /* include parameter in parameter-list */
    strcat(proto, param);
  else                            /* exclude parameter from parameter-list */
  {
    for (i=0; param[i] != '\0'; i++)    /* look for any pointers */
      if (param[i] == '['  ||  param[i] == '*')
        strcat(proto, "*");
  }

  i = strlen(proto)-1;            /* separate parameters with ", " */
  if ( proto[i] == ' ' )
    proto[i] = '\0';
  strcat(proto, ", ");
}
