/*
 * main.c -- Expression tree constructors and main program for gawk. 
 */

/* 
 * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
 * 
 * GAWK is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * GAWK is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with GAWK; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "getopt.h"
#include "awk.h"
#include "patchlevel.h"

static void usage P((int exitval));
static void copyleft P((void));
static void cmdline_fs P((char *str));
static void init_args P((int argc0, int argc, char *argv0, char **argv));
static void init_vars P((void));
static void pre_assign P((char *v));
SIGTYPE catchsig P((int sig, int code));
static void gawk_option P((char *optstr));
static void nostalgia P((void));
static void version P((void));
char *gawk_name P((char *filespec));

#ifdef MSDOS
extern int isatty P((int));
#endif

extern void resetup P((void));

/* These nodes store all the special variables AWK uses */
NODE *FS_node, *NF_node, *RS_node, *NR_node;
NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
NODE *CONVFMT_node;
NODE *ERRNO_node;
NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
NODE *ENVIRON_node, *IGNORECASE_node;
NODE *ARGC_node, *ARGV_node, *ARGIND_node;
NODE *FIELDWIDTHS_node;

long NF;
long NR;
long FNR;
int IGNORECASE;
char *RS;
char *OFS;
char *ORS;
char *OFMT;
char *CONVFMT;

/*
 * The parse tree and field nodes are stored here.  Parse_end is a dummy item
 * used to free up unneeded fields without freeing the program being run 
 */
int errcount = 0;	/* error counter, used by yyerror() */

/* The global null string */
NODE *Nnull_string;

/* The name the program was invoked under, for error messages */
const char *myname;

/* A block of AWK code to be run before running the program */
NODE *begin_block = 0;

/* A block of AWK code to be run after the last input file */
NODE *end_block = 0;

int exiting = 0;		/* Was an "exit" statement executed? */
int exit_val = 0;		/* optional exit value */

#if defined(YYDEBUG) || defined(DEBUG)
extern int yydebug;
#endif

struct src *srcfiles = NULL;		/* source file name(s) */
int numfiles = -1;		/* how many source files */

int do_unix = 0;		/* turn off gnu extensions */
int do_posix = 0;		/* turn off gnu and unix extensions */
int do_lint = 0;		/* provide warnings about questionable stuff */
int do_nostalgia = 0;		/* provide a blast from the past */

int in_begin_rule = 0;		/* we're in a BEGIN rule */
int in_end_rule = 0;		/* we're in a END rule */

int output_is_tty = 0;		/* control flushing of output */

extern char *version_string;	/* current version, for printing */

NODE *expression_value;

static struct option optab[] = {
	{ "compat",		no_argument,		& do_unix,	1 },
	{ "lint",		no_argument,		& do_lint,	1 },
	{ "posix",		no_argument,		& do_posix,	1 },
	{ "nostalgia",		no_argument,		& do_nostalgia,	1 },
	{ "copyleft",		no_argument,		NULL,		'C' },
	{ "copyright",		no_argument,		NULL,		'C' },
	{ "field-separator",	required_argument,	NULL,		'F' },
	{ "file",		required_argument,	NULL,		'f' },
	{ "assign",		required_argument,	NULL,		'v' },
	{ "version",		no_argument,		NULL,		'V' },
	{ "usage",		no_argument,		NULL,		'u' },
	{ "help",		no_argument,		NULL,		'u' },
	{ "source",		required_argument,	NULL,		's' },
#ifdef DEBUG
	{ "parsedebug",		no_argument,		NULL,		'D' },
#endif
	{ 0, 0, 0, 0 }
};

int
main(argc, argv)
int argc;
char **argv;
{
	int c;
	char *scan;
	/* the + on the front tells GNU getopt not to rearrange argv */
	const char *optlist = "+F:f:v:W:m:";
	int stopped_early = 0;
	int old_optind;
	extern int optind;
	extern int opterr;
	extern char *optarg;

#ifdef __EMX__
	_response(&argc, &argv);
	_wildcard(&argc, &argv);
	setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
#endif

	(void) signal(SIGFPE,  (SIGTYPE (*) P((int))) catchsig);
	(void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig);
#ifdef SIGBUS
	(void) signal(SIGBUS,  (SIGTYPE (*) P((int))) catchsig);
#endif

	myname = gawk_name(argv[0]);
        argv[0] = (char *)myname;
#ifdef VMS
	vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
#endif

	/* remove sccs gunk */
	if (strncmp(version_string, "@(#)", 4) == 0)
		version_string += 4;

	if (argc < 2)
		usage(1);

	/* initialize the null string */
	Nnull_string = make_string("", 0);
	Nnull_string->numbr = 0.0;
	Nnull_string->type = Node_val;
	Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);

	/* Set up the special variables */
	/*
	 * Note that this must be done BEFORE arg parsing else -F
	 * breaks horribly 
	 */
	init_vars();

	/* worst case */
	emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
	memset(srcfiles, '\0', argc * sizeof(struct src));

	/* Tell the regex routines how they should work. . . */
	resetup();

	/* we do error messages ourselves on invalid options */
	opterr = 0;

	/* option processing. ready, set, go! */
	for (optopt = 0, old_optind = 1;
	     (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
	     optopt = 0, old_optind = optind) {
		if (do_posix)
			opterr = 1;
		switch (c) {
		case 'F':
			cmdline_fs(optarg);
			break;

		case 'f':
			/*
			 * a la MKS awk, allow multiple -f options.
			 * this makes function libraries real easy.
			 * most of the magic is in the scanner.
			 */
			/* The following is to allow for whitespace at the end
			 * of a #! /bin/gawk line in an executable file
			 */
			scan = optarg;
			while (isspace(*scan))
				scan++;
			++numfiles;
			srcfiles[numfiles].stype = SOURCEFILE;
			if (*scan == '\0')
				srcfiles[numfiles].val = argv[optind++];
			else
				srcfiles[numfiles].val = optarg;
			break;

		case 'v':
			pre_assign(optarg);
			break;

		case 'm':
			/*
			 * Research awk extension.
			 *	-mf=nnn		set # fields, gawk ignores
			 *	-mr=nnn		set record length, ditto
			 */
			if (do_lint)
				warning("-m[fr] option irrelevant");
			if ((optarg[0] != 'r' && optarg[0] != 'f')
			    || optarg[1] != '=')
				warning("-m option usage: -m[fn]=nnn");
			break;

		case 'W':       /* gawk specific options */
			gawk_option(optarg);
			break;

		/* These can only come from long form options */
		case 'V':
			version();
			break;

		case 'C':
			copyleft();
			break;

		case 'u':
			usage(0);
			break;

		case 's':
			if (optarg[0] == '\0')
				warning("empty argument to --source ignored");
			else {
				srcfiles[++numfiles].stype = CMDLINE;
				srcfiles[numfiles].val = optarg;
			}
			break;

#ifdef DEBUG
		case 'D':
			yydebug = 2;
			break;
#endif

		case 0:
			/*
			 * getopt_long found an option that sets a variable
			 * instead of returning a letter. Do nothing, just
			 * cycle around for the next one.
			 */
			break;

		case '?':
		default:
			/*
			 * New behavior.  If not posix, an unrecognized
			 * option stops argument processing so that it can
			 * go into ARGV for the awk program to see. This
			 * makes use of ``#! /bin/gawk -f'' easier.
			 *
			 * However, it's never simple. If optopt is set,
			 * an option that requires an argument didn't get the
			 * argument. We care because if opterr is 0, then
			 * getopt_long won't print the error message for us.
			 */
			if (! do_posix
			    && (optopt == 0 || strchr(optlist, optopt) == NULL)) {
				/*
				 * can't just do optind--. In case of an
				 * option with >=2 letters, getopt_long
				 * won't have incremented optind.
				 */
				optind = old_optind;
				stopped_early = 1;
				goto out;
			} else if (optopt)
				/* Use 1003.2 required message format */
				fprintf (stderr,
				"%s: option requires an argument -- %c\n",
					myname, optopt);
			/* else
				let getopt print error message for us */
			break;
		}
	}
out:

	if (do_nostalgia)
		nostalgia();

	/* check for POSIXLY_CORRECT environment variable */
	if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
		do_posix = 1;
		if (do_lint)
			warning(
	"environment variable `POSIXLY_CORRECT' set: turning on --posix");
	}

	/* POSIX compliance also implies no Unix extensions either */
	if (do_posix)
		do_unix = 1;

#ifdef DEBUG
	setbuf(stdout, (char *) NULL);	/* make debugging easier */
#endif
	if (isatty(fileno(stdout)))
		output_is_tty = 1;
	/* No -f or --source options, use next arg */
	if (numfiles == -1) {
		if (optind > argc - 1 || stopped_early) /* no args left or no program */
			usage(1);
		srcfiles[++numfiles].stype = CMDLINE;
		srcfiles[numfiles].val = argv[optind];
		optind++;
	}
	init_args(optind, argc, (char *) myname, argv);
	(void) tokexpand();

	/* Read in the program */
	if (yyparse() || errcount)
		exit(1);

	/* Set up the field variables */
	init_fields();

	if (do_lint && begin_block == NULL && expression_value == NULL
	     && end_block == NULL)
		warning("no program");

	if (begin_block) {
		in_begin_rule = 1;
		(void) interpret(begin_block);
	}
	in_begin_rule = 0;
	if (!exiting && (expression_value || end_block))
		do_input();
	if (end_block) {
		in_end_rule = 1;
		(void) interpret(end_block);
	}
	in_end_rule = 0;
	if (close_io() != 0 && exit_val == 0)
		exit_val = 1;
	exit(exit_val);		/* more portable */
	return exit_val;	/* to suppress warnings */
}

/* usage --- print usage information and exit */

static void
usage(exitval)
int exitval;
{
	const char *opt1 = " -f progfile [--]";
#if defined(MSDOS) || defined(OS2) || defined(VMS)
	const char *opt2 = " [--] \"program\"";
#else
	const char *opt2 = " [--] 'program'";
#endif
	const char *regops = " [POSIX or GNU style options]";

	fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
	fprintf(stderr, "Usage:\t%s%s%s file ...\n\t%s%s%s file ...\n",
		myname, regops, opt1, myname, regops, opt2);

	/* GNU long options info. Gack. */
	fputs("POSIX options:\t\tGNU long options:\n", stderr);
	fputs("\t-f progfile\t\t--file=progfile\n", stderr);
	fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr);
	fputs("\t-v var=val\t\t--assign=var=val\n", stderr);
	fputs("\t-m[fr]=val\n", stderr);
	fputs("\t-W compat\t\t--compat\n", stderr);
	fputs("\t-W copyleft\t\t--copyleft\n", stderr);
	fputs("\t-W copyright\t\t--copyright\n", stderr);
	fputs("\t-W help\t\t\t--help\n", stderr);
	fputs("\t-W lint\t\t\t--lint\n", stderr);
#ifdef NOSTALGIA
	fputs("\t-W nostalgia\t\t--nostalgia\n", stderr);
#endif
#ifdef DEBUG
	fputs("\t-W parsedebug\t\t--parsedebug\n", stderr);
#endif
	fputs("\t-W posix\t\t--posix\n", stderr);
	fputs("\t-W source=program-text\t--source=program-text\n", stderr);
	fputs("\t-W usage\t\t--usage\n", stderr);
	fputs("\t-W version\t\t--version\n", stderr);
	exit(exitval);
}

static void
copyleft ()
{
	static char blurb_part1[] =
"Copyright (C) 1989, 1991, 1992, Free Software Foundation.\n\
\n\
This program is free software; you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License as published by\n\
the Free Software Foundation; either version 2 of the License, or\n\
(at your option) any later version.\n\
\n";
	static char blurb_part2[] =
"This program is distributed in the hope that it will be useful,\n\
but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
GNU General Public License for more details.\n\
\n";
	static char blurb_part3[] =
"You should have received a copy of the GNU General Public License\n\
along with this program; if not, write to the Free Software\n\
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";

	fputs(blurb_part1, stderr);
	fputs(blurb_part2, stderr);
	fputs(blurb_part3, stderr);
	fflush(stderr);
}

static void
cmdline_fs(str)
char *str;
{
	register NODE **tmp;
	/* int len = strlen(str); *//* don't do that - we want to
	                               avoid mismatched types */

	tmp = get_lhs(FS_node, (Func_ptr *) 0);
	unref(*tmp);
	/*
	 * Only if in full compatibility mode check for the stupid special
	 * case so -F\t works as documented in awk even though the shell
	 * hands us -Ft.  Bleah!
	 *
	 * Thankfully, Posix didn't propogate this "feature".
	 */
	if (str[0] == 't' && str[1] == '\0') {
		if (do_lint)
			warning("-Ft does not set FS to tab in POSIX awk");
		if (do_unix && ! do_posix)
			str[0] = '\t';
	}
	*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
	set_FS();
}

static void
init_args(argc0, argc, argv0, argv)
int argc0, argc;
char *argv0;
char **argv;
{
	int i, j;
	NODE **aptr;

	ARGV_node = install("ARGV", node(Nnull_string, Node_var, (NODE *)NULL));
	aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
	*aptr = make_string(argv0, strlen(argv0));
	(*aptr)->flags |= MAYBE_NUM;
	for (i = argc0, j = 1; i < argc; i++) {
		aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
		*aptr = make_string(argv[i], strlen(argv[i]));
		(*aptr)->flags |= MAYBE_NUM;
		j++;
	}
	ARGC_node = install("ARGC",
			node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
}

/*
 * Set all the special variables to their initial values.
 */
struct varinit {
	NODE **spec;
	const char *name;
	NODETYPE type;
	const char *strval;
	AWKNUM numval;
	Func_ptr assign;
};
static struct varinit varinit[] = {
{&NF_node,	"NF",		Node_NF,		0,	-1, set_NF },
{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS,	"",	0,  0 },
{&NR_node,	"NR",		Node_NR,		0,	0,  set_NR },
{&FNR_node,	"FNR",		Node_FNR,		0,	0,  set_FNR },
{&FS_node,	"FS",		Node_FS,		" ",	0,  0 },
{&RS_node,	"RS",		Node_RS,		"\n",	0,  set_RS },
{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE,	0,	0,  set_IGNORECASE },
{&FILENAME_node, "FILENAME",	Node_var,		"",	0,  0 },
{&OFS_node,	"OFS",		Node_OFS,		" ",	0,  set_OFS },
{&ORS_node,	"ORS",		Node_ORS,		"\n",	0,  set_ORS },
{&OFMT_node,	"OFMT",		Node_OFMT,		"%.6g",	0,  set_OFMT },
{&CONVFMT_node,	"CONVFMT",	Node_CONVFMT,		"%.6g",	0,  set_CONVFMT },
{&RLENGTH_node, "RLENGTH",	Node_var,		0,	0,  0 },
{&RSTART_node,	"RSTART",	Node_var,		0,	0,  0 },
{&SUBSEP_node,	"SUBSEP",	Node_var,		"\034",	0,  0 },
{&ARGIND_node,	"ARGIND",	Node_var,		0,	0,  0 },
{&ERRNO_node,	"ERRNO",	Node_var,		0,	0,  0 },
{0,		0,		Node_illegal,		0,	0,  0 },
};

static void
init_vars()
{
	register struct varinit *vp;

	for (vp = varinit; vp->name; vp++) {
		*(vp->spec) = install((char *) vp->name,
		  node(vp->strval == 0 ? make_number(vp->numval)
				: make_string((char *) vp->strval,
					strlen(vp->strval)),
		       vp->type, (NODE *) NULL));
		if (vp->assign)
			(*(vp->assign))();
	}
}

void
load_environ()
{
#if !defined(MSDOS) && !defined(OS2) && !(defined(VMS) && defined(__DECC))
	extern char **environ;
#endif
	register char *var, *val;
	NODE **aptr;
	register int i;

	ENVIRON_node = install("ENVIRON", 
			node(Nnull_string, Node_var, (NODE *) NULL));
	for (i = 0; environ[i]; i++) {
		static char nullstr[] = "";

		var = environ[i];
		val = strchr(var, '=');
		if (val)
			*val++ = '\0';
		else
			val = nullstr;
		aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var)));
		*aptr = make_string(val, strlen (val));
		(*aptr)->flags |= MAYBE_NUM;

		/* restore '=' so that system() gets a valid environment */
		if (val != nullstr)
			*--val = '=';
	}
}

/* Process a command-line assignment */
char *
arg_assign(arg)
char *arg;
{
	char *cp, *cp2;
	int badvar;
	Func_ptr after_assign = NULL;
	NODE *var;
	NODE *it;
	NODE **lhs;

	cp = strchr(arg, '=');
	if (cp != NULL) {
		*cp++ = '\0';
		/* first check that the variable name has valid syntax */
		badvar = 0;
		if (! isalpha(arg[0]) && arg[0] != '_')
			badvar = 1;
		else
			for (cp2 = arg+1; *cp2; cp2++)
				if (! isalnum(*cp2) && *cp2 != '_') {
					badvar = 1;
					break;
				}
		if (badvar)
			fatal("illegal name `%s' in variable assignment", arg);

		/*
		 * Recent versions of nawk expand escapes inside assignments.
		 * This makes sense, so we do it too.
		 */
		it = make_str_node(cp, strlen(cp), SCAN);
		it->flags |= MAYBE_NUM;
		var = variable(arg, 0);
		lhs = get_lhs(var, &after_assign);
		unref(*lhs);
		*lhs = it;
		if (after_assign)
			(*after_assign)();
		*--cp = '=';	/* restore original text of ARGV */
	}
	return cp;
}

static void
pre_assign(v)
char *v;
{
	if (!arg_assign(v)) {
		fprintf (stderr,
			"%s: '%s' argument to -v not in 'var=value' form\n",
				myname, v);
		usage(1);
	}
}

SIGTYPE
catchsig(sig, code)
int sig, code;
{
#ifdef lint
	code = 0; sig = code; code = sig;
#endif
	if (sig == SIGFPE) {
		fatal("floating point exception");
	} else if (sig == SIGSEGV
#ifdef SIGBUS
	        || sig == SIGBUS
#endif
	) {
		msg("fatal error: internal error");
		/* fatal won't abort() if not compiled for debugging */
		abort();
	} else
		cant_happen();
	/* NOTREACHED */
}

/* gawk_option --- do gawk specific things */

static void
gawk_option(optstr)
char *optstr;
{
	char *cp;

	for (cp = optstr; *cp; cp++) {
		switch (*cp) {
		case ' ':
		case '\t':
		case ',':
			break;
		case 'v':
		case 'V':
			/* print version */
			if (strncasecmp(cp, "version", 7) != 0)
				goto unknown;
			else
				cp += 6;
			version();
			break;
		case 'c':
		case 'C':
			if (strncasecmp(cp, "copyright", 9) == 0) {
				cp += 8;
				copyleft();
			} else if (strncasecmp(cp, "copyleft", 8) == 0) {
				cp += 7;
				copyleft();
			} else if (strncasecmp(cp, "compat", 6) == 0) {
				cp += 5;
				do_unix = 1;
			} else
				goto unknown;
			break;
		case 'n':
		case 'N':
			/*
			 * Undocumented feature,
			 * inspired by nostalgia, and a T-shirt
			 */
			if (strncasecmp(cp, "nostalgia", 9) != 0)
				goto unknown;
			nostalgia();
			break;
		case 'p':
		case 'P':
#ifdef DEBUG
			if (strncasecmp(cp, "parsedebug", 10) == 0) {
				cp += 9;
				yydebug = 2;
				break;
			}
#endif
			if (strncasecmp(cp, "posix", 5) != 0)
				goto unknown;
			cp += 4;
			do_posix = do_unix = 1;
			break;
		case 'l':
		case 'L':
			if (strncasecmp(cp, "lint", 4) != 0)
				goto unknown;
			cp += 3;
			do_lint = 1;
			break;
		case 'H':
		case 'h':
			if (strncasecmp(cp, "help", 4) != 0)
				goto unknown;
			cp += 3;
			usage(0);
			break;
		case 'U':
		case 'u':
			if (strncasecmp(cp, "usage", 5) != 0)
				goto unknown;
			cp += 4;
			usage(0);
			break;
		case 's':
		case 'S':
			if (strncasecmp(cp, "source=", 7) != 0)
				goto unknown;
			cp += 7;
			if (cp[0] == '\0')
				warning("empty argument to -Wsource ignored");
			else {
				srcfiles[++numfiles].stype = CMDLINE;
				srcfiles[numfiles].val = cp;
				return;
			}
			break;
		default:
		unknown:
			fprintf(stderr, "'%c' -- unknown option, ignored\n",
				*cp);
			break;
		}
	}
}

/* nostalgia --- print the famous error message and die */

static void
nostalgia()
{
	fprintf(stderr, "awk: bailing out near line 1\n");
	abort();
}

/* version --- print version message */

static void
version()
{
	fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
	/* per GNU coding standards, exit successfully, do nothing else */
	exit(0);
}

/* this mess will improve in 2.16 */
char *
gawk_name(filespec)
char *filespec;
{
	char *p;
	
#ifdef VMS	/* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
	char *q;

	p = strrchr(filespec, ']');  /* directory punctuation */
	q = strrchr(filespec, '>');  /* alternate <international> punct */

	if (p == NULL || q > p) p = q;
	p = strdup(p == NULL ? filespec : (p + 1));
	if ((q = strrchr(p, '.')) != NULL)  *q = '\0';  /* strip .typ;vers */

	return p;
#endif /*VMS*/

#if defined(MSDOS) || defined(OS2) || defined(atarist)
	char *q;

	for (p = filespec; (p = strchr(p, '\\')); *p = '/')
		;
	p = filespec;
	if ((q = strrchr(p, '/')))
		p = q + 1;
	if ((q = strchr(p, '.')))
		*q = '\0';
	strlwr(p);

	return (p == NULL ? filespec : p);
#endif /* MSDOS || atarist */

	/* "path/name" -> "name" */
	p = strrchr(filespec, '/');
	return (p == NULL ? filespec : p + 1);
}
