static char * sccsid = "@(#)  strings.c  (v1.6.9 5/23/89)";
/*
 * strings
 * =======
 *
 * Find and output the printable strings with a certain minimal length or
 * more in any files.
 *
 * This is a rewrite of 4BSD strings, which had some errors:
 * - ^L is a printable character (4.3BSD, SUN OS)
 * - 0x80 is a printable character (4.3BSD, SUN OS)
 * - on some systems (MX2) it does not get the segment of initialized data
 *   correctly
 *
 * A printable string is any string of printable characters. A printable
 * character is any obvious one, plus blank. If flag -t was specified TAB
 * is considered a printable character too. If flag -c was specified
 * on the command line, only strings are output, which are followed
 * by LF or NUL, just like the original BSD manual entry claimed.
 *
 */

# include "strings.h"

# include <ctype.h>

char isp [256];
char * isp_mid;
# define IS_PRINTABLE(c)	(isp_mid[c])

int fd;

CHAR_TYPE buf [IN_BUF_LEN];

int ind_whole	= 0;		/* Shall we examine whole file ? */
int ind_offset	= 0;		/* Offsets required ? */
int min_str_len = 4;		/* If a string has more characters, it is output */
int ind_file	= 0;		/* # of filenames in command */
int ind_c		= 0;		/* Only strings ending with NUL or LF ? */
int ind_tab		= 0;		/* Is TAB a printable character ? */
int ind_prefix  = 0;		/* should the filename be added before string ? */
int ind_version = 0;		/* print version information ? */

LSEEK_TYPE offset;

char * cur_file_name;

extern CHAR_TYPE * out_buf, * level;
extern int num_out_buf;
extern int saved;

# define SEARCH		1
# define DECIDE		2
# define TRY 		3

usage (n)
int n;
{
	out ("usage: strings [options] [file ...]\n");
# ifdef I_SPECIAL
	out ("-a : look in whole file. Default : only look in initialized data\n");
	out ("-  : the same as -a\n");
# endif
	out ("-o : output offset in decimal before each string.\n");
	out ("-N : only output sequences of length >= N; N is a number > 0.\n");
	out ("-c : only output C strings; terminated by \\0 or \\n\n");
	out ("-e : the next word is taken as a filename, even if it starts with '-'.\n");
	out ("-t : TAB is considered a printable character too.\n");
	out ("-p : the name of the input file is output before each string.\n");
	out ("-v : only print version information, examine no files.\n");
# ifdef I_SPECIAL
	out ("Options can be combined like '-a20ot'.\n");
# else
	out ("Options can be combined like '-20o'.\n");
	out ("The whole file is scanned.\n");
# endif
	exit (n);
}

out_int (n)
register int n;
{
	char s [20];
	register int i;
	s [19] = '\0';
	i = 18;
	if (n == 0) {
		s [18] = '0';
		i = 17;
	}
	while (n != 0) {
		s[i--] = '0' + (n % 10);
		n /= 10;
	}
	out (s+i+1);
}

print_version()
{
	out (sccsid); out ("\n\n");
	out ("Compilation flags:\n");
	out ("FOUND = \""); out (FOUND); out ("\"\n");
# ifdef FCNTL
	out ("FCNTL = "); out_int (FCNTL); out ("\n");
# else
	out ("FCNTL is not defined.\n");
# endif FCNTL
# ifdef FAST_COPY
	out ("FAST_COPY is defined.\n");
# else
	out ("FAST_COPY is not defined.\n");
# endif FAST_COPY
# ifdef WHAT_LSEEK
	out ("WHAT_LSEEK = "); out_int (WHAT_LSEEK); out ("\n");
# else
	out ("WHAT_LSEEK is not defined.\n");
# endif WHAT_LSEEK
# ifdef I_SPECIAL
	out ("I_SPECIAL is defined.\n");
# else
	out ("I_SPECIAL is not defined.\n");
# endif I_SPECIAL
	out ("IN_BUFLEN  = "); out_int (IN_BUF_LEN); out ("\n");
	out ("OUT_BUFLEN = "); out_int (OUT_BUF_LEN); out ("\n");
	out ("THRESHOLD  = "); out_int (THRESHOLD); out ("\n");
}

options (ac, av, f)
int ac;
char * av [];
char ** f;
{
	register int i, j;
	int take_file = 0;				/* is the next word a file ? */
	int had_whole, had_offset, had_min_str_len;
	int had_c, had_tab, had_prefix, had_version;

	had_whole = had_offset = had_min_str_len = had_c = had_tab = 0;
	had_prefix = had_version = 0;
	for (i = 1; i < ac; i++) {
		if (take_file == 0 && av[i][0] == '-') {
			if (av[i][1] == '\0') {
				if (had_whole == 1)
					usage (5);
				had_whole = 1;
				ind_whole = 1;
			} else {
				for (j = 1; av[i][j] != '\0'; j++) {
					if ('0' <= av[i][j] && av[i][j] <= '9') {
						if (had_min_str_len == 1)
							usage (3);
						for (min_str_len = 0; '0' <= av[i][j]  && av[i][j] <= '9'; j++)
							min_str_len = min_str_len * 10 + av[i][j] - '0';
						j--;	/* So we don't lose a character */
						had_min_str_len = 1;
					} else
						switch (av[i][j]) {
# ifdef I_SPECIAL
							case 'a':
							case '-':
								if (had_whole == 1)
									usage (5);
								had_whole = 1;
								ind_whole = 1;
								break;
# endif
							case 'o':
								if (had_offset == 1)
									usage (6);
								had_offset = ind_offset = 1;
								break;
							case 'c':
								if (had_c == 1)
									usage (7);
								had_c = ind_c = 1;
								break;
							case 'f':
								take_file = 1;
								break;
							case 't':
								if (had_tab == 1)
									usage (8);
								had_tab = ind_tab = 1;
								break;
							case 'p':
								if (had_prefix == 1)
									usage (9);
								had_prefix = ind_prefix = 1;
								break;
							case 'v':
								if (had_version == 1)
									usage (10);
								had_version = ind_version = 1;
								break;
							default:
								usage (8);
								break;
						}
				}
			}
		} else {
			f [ind_file++] = av[i];
			take_file = 0;
		}
	}
	if (min_str_len <= 0)
		usage (4);
# ifdef DEBUG
	fprintf (prot, "ind_offset  = %3d\n", ind_offset);
	fprintf (prot, "ind_whole   = %3d\n", ind_whole);
	fprintf (prot, "ind_file    = %3d\n", ind_file);
	fprintf (prot, "ind_c       = %3d\n", ind_c);
	fprintf (prot, "min_str_len = %3d\n", min_str_len);
	fprintf (prot, "ind_tab     = %3d\n", ind_tab);
	fprintf (prot, "ind_prefix  = %3d\n", ind_prefix);
	fprintf (prot, "ind_version = %3d\n", ind_version);
	if (ind_file == 0)
		fprintf (prot, "had no files on command line\n");
	else {
		fprintf (prot, "had %1d files on command line\n", ind_file);
		for (i = 0; i < ind_file; i++)
			fprintf (prot, "%s\n", f[i]);
	}
# endif
}

init ()
{
	register int i;
	int min;
	char c;

	min = 0;
	for (i = 0; i < 256; i++) {
		c = i;
		if (c < min)
			min = c;
	}
	isp_mid = isp - min;
	for (i = 0; i < 256; i++) {
		c = i;
		isp_mid [c] = isascii(c) && isprint(c);
	}
	if (ind_tab)
		isp_mid['\t'] = 1;

	init_output ();
}

main (argc, argv)
int argc;
char * argv[];
{
	register int i;
	char ** f;

# ifdef DEBUG
	if ((prot = fopen ("prot", "w")) == NULL) {
		fprintf (stderr, "could not open prot\n");
		exit (1);
	}
# endif
	f = (char **) malloc ((unsigned)(sizeof (char *) * argc));
	options (argc, argv, f);
	if (ind_version) {
		print_version ();
		exit (0);
	}
	init ();

	if (ind_file == 0)
		strings ((char*)NULL);
	else
		for (i = 0; i < ind_file; i++) {
			if (ind_file != 1)
				out_name (f[i]);
			strings (f[i]);
		}
	exit (0);
}

out_name (b)
register CHAR_TYPE * b;
{
	CHAR_TYPE s [45];
	CHAR_TYPE * s2 = (CHAR_TYPE*) "  ";
	register int n, i;

	for (i = 0; i < 45; i++)
		s [i] = '-';
	n = strlen (s);
	i = strlen (b);
# ifdef DEBUG
	fprintf (prot, "out_name :: n = %d, i = %d\n", n, i);
# endif
	if (n*2 + (i+2) > 80)
		n = (80 - (i+2)) / 2;
# ifdef DEBUG
	fprintf (prot, "out_name :: first string is %d long.\n", n);
# endif
	(void) append (s, s+n, 0);

	(void) append (s2, s2+1, 0);
	(void) append (b, b+i, 0);
	(void) append (s2, s2+1, 0);
	
	if (2*n + (i+2) < 80)
		n++;
# ifdef DEBUG
	fprintf (prot, "out_name :: second string is %d long.\n", n);
# endif
	(void) append (s, s+n, 1);
}

int
examine (state, n)
register int state;
int n;
/*
 * Find strings of printable characters in buf and append them to
 * the output buffer, if they meet certain conditions.
 *
 * The main part of this routine is a DFA (deterministic finite automaton) with
 * three states.
 * These states are
 * SEARCH : search for a printable character by examining characters in
 *			distance min_str_len. If found, set b1 to the start
 *			of the sequence and enter state TRY.
 * TRY    : We have found a printable character. Set b2 to the first character
 *			after the end of the sequence by single stepping. Set state to
 *			DECIDE.
 * DECIDE : We have found a sequence of printable characters. If the first
 *			character after the sequence is in the buffer, then we can decide
 *			what to do with the sequence (even if flag -c was not specified).
 *			If not then the stuff is buffered, state set to TRY, and returned
 *			to the caller to read a new block of input.
 *			It is tested whether the sequence meets the requirements.
 *			Either it is output by placing it permanently into the output
 *			buffer, or it is forgotten.
 */
{
	register CHAR_TYPE * b, * b1, * b2, * end;

	end = buf + n;

	b = b1 = b2 = buf;
	for (;b < end; b = b2+1) {
# ifdef DEBUG
		fprintf (prot, "state = %s; b at %d\n",
			state == SEARCH ? "SEARCH" : (state == DECIDE ? "DECIDE" : "TRY"),
			(int)(b - buf));
# endif
		b1 = b;
		switch (state) {
			case SEARCH:
				/*
				 * Search a character which might be in a sequence of
				 * printable characters. Note that it suffices to examine
				 * characters in distance min_str_len.
				 */
				for (;b2 < end && !IS_PRINTABLE(*b2); b2 += min_str_len);
				/*
				 * If we have stepped outside the buffer, we must examine
				 * the end of the buffer yet.
				 */
				if (b2 >= end)
					b2 = end;
				b1 = b2-1;
				/*
				 * Find the start of the current sequence.
				 */
				while (b1 >= buf && IS_PRINTABLE(*b1))
					b1--;
				b1++;
				if (b1 >= end)
					return (SEARCH);
				/* FALL THROUGH */
		case TRY:
				/*
				 * Find  the end of the current sequence. Set b2 one beyond.
				 */
				while (b2 < end && IS_PRINTABLE(*b2))
					b2++;
# ifdef DEBUG
				fprintf (prot, "found seq between %1d and %1d -->",
					(int)(b1-buf), (int)(b2-buf));
				{ CHAR_TYPE * tmp;
					for (tmp = b1; tmp < b2; tmp++)
						if (IS_PRINTABLE(*tmp))
							fputc (*tmp, prot);
						else
							fputc ('.', prot);
				}
				fprintf (prot, "<--\n");
# endif
				/*
				 * Should set state to DECIDE; but we don't need it.
				 * state will be reset anyway.
				 */
				/* FALL THROUGH */
			case DECIDE:
				/*
				 * Can we decide what to do with the sequence which
				 * we have found? We cannot, if we are at the end of
				 * the block, because we need just one more character.
				 */
				if (b2 >= end) {
# ifdef DEBUG
					fprintf (prot, "I cannot decide. Must read a new block.\n");
# endif
					(void) append (b1, b2, 0);
					return (TRY);
				}
# ifdef DEBUG
				fprintf (prot, "I can decide.\n");
				if (ind_c) {
					if (*b2 == '\0' || *b2 == '\n')
						fprintf (prot, "String is a C string; followed by %s\n",
							*b2 == '\0'?"NUL":"\\n");
				}
# endif
				if (((int)(b2-b1)+saved >= min_str_len) &&
					(!ind_c || (*b2 == '\0' || *b2 == '\n'))) {
					/*
					 * String is accepted. Copy it to the output buffer.
					 */
# ifdef DEBUG
					fprintf (prot, "Accept string.\n");
# endif
					(void) append (b1, b2, 1);
				} else {
					/*
					 * String is refused. Forget any temporarily buffered
					 * stuff in output buffer.
					 */
# ifdef DEBUG
					fprintf (prot, "String refused.\n");
# endif
					level = out_buf + num_out_buf;
					saved = 0;
				}
				state = SEARCH;
		}	/* switch */
	}	/* for (;b < end; ... */
	return (state);
}

strings (name)
char * name;
/*
 * Find strings in a file or an input stream.
 * This routine sets the limits to handle a file, either to the
 * whole file, or to the initialized data only.
 * In a loop it reads blocks from the file and calls the DFA ('examine').
 * Examine returns its state, so that it can be reentered at the
 * right place.
 */
{
	register int n, state;
	LSEEK_TYPE l, first, last;

	if (name == NULL) {
		fd = 0;
	} else
		if ((fd = open (name, O_RDONLY, 0)) == -1) {
			perror (name);
			return;
		}
	if (name == NULL || ind_whole == 1) {
		first = (LSEEK_TYPE)0;
		last = (LSEEK_TYPE)(-1); 		/* --> no limit */
	} else {
# ifdef I_SPECIAL
		/*
		 * Get the limits for reading.
		 * If the file is not an object, then we look at whole file.
		 */
		get_limits (fd, &first, &last);
# ifdef DEBUG
		fprintf (prot, "lseek to %ld; last = %ld\n", (long)first, (long)last);
# endif
		if (lseek (fd, first, 0) != first) {
			perror ("lseek");
			return;
		}
# else I_SPECIAL
		first = (LSEEK_TYPE)0;
		last = (LSEEK_TYPE)(-1); 		/* --> no limit */
# endif I_SPECIAL
	}

	cur_file_name = name;
	offset = first;
	state = SEARCH;
	for (;;) {
		/*
		 * Do we really have to read a block ?
		 * How much should we read? The difficult thing here
		 * is to watch out not to read beyond the limits of
		 * initialized data.
		 */
		if (last != (LSEEK_TYPE)(-1)) {
			l = last - offset;
			if (l <= 0)
				break;
			if (l > IN_BUF_LEN)
				l = IN_BUF_LEN;
		} else
				l = IN_BUF_LEN;
# ifdef DEBUG
		fprintf (prot, "reading %1ld chars\n", l);
# endif
		if ((n = read (fd, buf, (int)l)) <= 0)
			break;
# ifdef DEBUG
		fprintf (prot, "read %1d characters\n", n);
		fflush (prot);
# endif

		state = examine (state, n);

		offset += n;
	}
	if (n == -1)
		perror ("read");
	/*
	 * If the piece of the file ended with a string of printable characters,
	 * we must check whether this string is valid.
	 * We need not peek at the first character after the strings, as we know
	 * that it cannot be \0 or \n.
	 */
	if (saved > 0 && !ind_c)
		if (saved >= min_str_len)
			(void) append (buf, buf, 1);
	/*
	 * We must flush the output buffer.
	 */
	flush_output ();
	if (name != 0)
		(void)close (fd);
}

# ifndef FAST_COPY
char *
FAST_COPY (from, to, count)
register char * from, * to;
register int count;
{
	register char * tmp;

	tmp = to;
	while (count--)
		*to++ = *from++;
	return (tmp);
}
# endif FAST_COPY
