/* this file extracts the to, from, subject cc list and bcc lists from a file
   in argv[1]. If if discovers a line just containing \n it stops processing
   as we may be inside the body of the message. on discovering a from line
   it reads the first word and discards the reas (many mailers generate
   from lines like 
      From: tim.graves@uk.sun.com (Tim Graves Principal Sales Support Specialist)
   and the address is all we are interested in.
   To, cc and bcc (should we ever see a bcc list!) can be multi line
   to handle these we just join the lines together untill we see a line
   containing a : in the future we should remove whitespace as well

   After extracting the information it is written to a file based on the
   input file as defined by the define HDRSKELETON. Ths information
   is in a format ready for the meta routines appart from the header and
   trailer strings 

   NOTE this is based around the way the sun mailer works. I dont have
   information about other mail delivery systems */
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define TOSTR "To: "
#define FROMSTR "From: "
#define CCSTR "Cc: "
#define BCCSTR "Bcc: "
#define SUBJECTSTR "Subject: "

#define HDRSKELETON "%s.hdr"
/* the maximum size used in a headr, note the psion currently uses 100 as this value */
/* we should be OK with this as I think SMTP limits header stuff to 80 chars */
#define MAXHDR 1024
/* DO not change this without changing the HDRLEN #define in the mail.c program for the psion */
#define PSIONHDR 100
/* return values from getline */
#define COLON 3
#define NOCOLON 2
#define TRUE 1
#define FALSE 0
/* temp location to hold the headerless message */
#define TMPNAME "/tmp/nohdr.msg"
/* globals, makes life easier in this case */
char to[MAXHDR], from[MAXHDR], cc[MAXHDR], bcc[MAXHDR], subject[MAXHDR] ;
char inpline[MAXHDR] ;
FILE * fd ;
FILE * outfd ;
int dontfgets = FALSE ;
int delflag = FALSE ;
int truncflag = FALSE ;


main(argc, argv)
int argc ;
char * argv[] ;
{
	char fname[MAXHDR] ;
	char cmd[1024] ;
	int ret ;
	if ((argc < 2) || (argc > 3))
	{
		printf("Usage: extractheadr mailfile [-d]\n") ;
		exit(1) ;
	}
	if (argc == 3) 
	{
		if (strcmp("-d", argv[2]) == 0)
			delflag = TRUE ;
		else
		{
			printf("Usage:  extractheadr mailfile [-d]\n") ;
			delflag = FALSE ;
		}
	}
	if ((fd = fopen(argv[1], "r"))== NULL)
	{
		printf("extractheadr, cant open input %s\n", argv[1]) ;
		exit(2) ;
	}
	init() ;
	ret = process(fd) ;
	if (ret == EOF)
	{
		printf("Error found EOF in headers\n") ;
	}
	if ((delflag == TRUE) && (ret == FALSE))
	{
		/* if truncflag is TRUE one or more header lines will have been truncated */
		if (truncflag == TRUE)
			fprintf(outfd, "WARNING - One or more header lines have been truncated\n") ;
		/* if we are deleting the headers */
		copyremaining() ;
		fclose(outfd) ;
		sprintf(cmd, "mv %s %s", TMPNAME, argv[1]) ;
		system(cmd) ;
	}
	fclose(fd) ;
	sprintf(fname, HDRSKELETON, argv[1]) ;
	if ((fd = fopen(fname, "w")) == NULL) 
	{
		printf("extractheadr, cant open output file %s\n", fname) ;
		exit(3) ;
	}
	output(fd, argv[1]) ;
	fclose(fd) ;
}
output(fd, fname)
FILE * fd ;
char * fname;
{
	doout(fd, fname) ;
	doout(fd, to) ;
	doout(fd, from) ;
	doout(fd, subject) ;
	doout(fd, cc) ;
	doout(fd, bcc) ;
	doout(fd, "N") ; /* mark the email as unread */
}
doout(fd, str)
FILE * fd ;
char * str ;
{
	fprintf(fd, "%s\n", str) ;
}
shortstr(str)
char * str ;
{
	int i ;
	/* output upto PSIONHDR chars AT MOST */
	if (strlen(str) >= ((size_t) PSIONHDR))
	{
		/* munge down to the first space before the PSIONHDR */
		i = PSIONHDR ;
		while (! isspace(str[i]))
		{
			str[i] = '\0' ;
			i -- ;
		}
		/* str[i] will be a space, bash that as well */
		str[i] = '\0' ;
		truncflag = TRUE ;
	}
}

init()
{
	to[0] = '\0' ;
	from[0] = '\0' ;
	subject[0] = '\0' ;
	cc[0] = '\0' ;
	bcc[0] = '\0' ;
	if (delflag == TRUE)
	{
		if ((outfd = fopen(TMPNAME, "w")) == NULL)
		{
			printf("Warning, could not open %s as temp file, cannot remove unwanted headers\n", TMPNAME) ;
			delflag = FALSE ;
		}
	}
}

process()
{
	int ret;
	ret = getline() ;
	while(( ret != FALSE) && (ret != EOF))
	{
		/* do the comparisons */
		if (strncasecmp(FROMSTR, inpline, strlen(FROMSTR)) == 0)
			ret = dofrom() ;
		else if(strncasecmp(TOSTR, inpline, strlen(TOSTR)) == 0)
			ret = domulti(to, TOSTR) ;
		else if(strncasecmp(SUBJECTSTR, inpline, strlen(SUBJECTSTR)) == 0)
			ret = dosubject() ;
		else if(strncasecmp(CCSTR, inpline, strlen(CCSTR)) == 0)
			ret = domulti(cc, CCSTR) ;
		else if(strncasecmp(BCCSTR, inpline, strlen(BCCSTR)) == 0)
			ret = domulti(bcc, BCCSTR) ;
		if((ret != FALSE) && (ret != EOF))
			ret = getline() ;
	}
	return (ret) ;
}
dofrom()
{
	char * start, *end ;
	/* ths line is probabaly of the format
	   From: tim.graves@uk.sun.com (Tim Graves, Sun UK Principal Sales Support)
	   fortunatly we know we have removed all the whitespace down to
	   single spaces to the character after the first space is the
	   start of the address and the next space (If it is there)
	   is the end */
	start = strchr(inpline, ' ') ;
	/* if start is NULL there is no from address (dogy of what) 
	   hopefully it will be found later */
	if (start == NULL)
		return(COLON) ;
	/* move over the space */
	start ++ ;
	/* find the next space */
	end = strchr(start, ' ') ;
	/* if end is null no space otherwise terminate the string */
	if (end != NULL)
	{
		*end = '\0' ;
	}
	/* copy the string over */
	strcpy(from, start) ;
	/* reduce the string to something manageable */
	shortstr(from) ;
	/* if delflag write the line out */
	if (delflag == TRUE)
	{
		fprintf(outfd, "%s%s\n", FROMSTR, from) ;
	}
	return(COLON) ; /* there MUST have been a colon here for us to be called */
}
dosubject()
{
	char * start ;
	/* this is even easier, we just need to find the first space and then copy over */
	start = strchr(inpline, ' ') ;
	/* if start is NULL there is no subject */
	if (start == NULL)
		return(COLON) ;
	start ++ ; /* move over the space */
	strcpy(subject, start) ;
	/* reduce the string to something manageable */
	shortstr(subject) ;
	/* ifdelflag write the line out */
	if (delflag == TRUE)
	{
		fprintf(outfd, "%s%s\n", SUBJECTSTR, subject) ;
	}
	return(COLON) ;
}

domulti(targetstr, hdr)
char * targetstr, *hdr ;
{
	char * start ;
	int ret ;
	/* as per subject but to cc and bcc can be multi line
	   copy over the first line and then call multiline to add the 
	   following lines */
	start = strchr(inpline, ' ') ;
	if (start == NULL)
		return(COLON) ;
	start ++ ;
	strcpy(targetstr, start) ;
	ret = multiline(targetstr) ;
	/* reduce the string to something manageable */
	shortstr(targetstr) ;
	if (delflag == TRUE)
	{
		fprintf(outfd, "%s%s\n", hdr, targetstr) ;
	}
	return(ret) ;
}
multiline(targetstr)
char * targetstr;
{
	int ret ;
	int inlen, targetlen ;
	/* provided the line returns NOCOLON from getline add the current
	   and new inpute together (remembering to add a space)
	   if getline doesnot return NOCOLON we have reached the end
	   set dontfgets to leave the line untopuched for further work */
	ret = getline() ;
	if (ret != NOCOLON)
	{
		dontfgets = TRUE ;
		return(ret) ;
	}
	/* we are OK add to the current line */
	while (ret == NOCOLON)
	{
		/* work out the lengths */
		inlen = strlen(inpline) ;
		targetlen = strlen(targetstr) ;
		/* if there if no space eat up the input and break out */
		if ((inlen + targetlen) > MAXHDR)
		{
			while((ret = getline()) == COLON)
				;
			dontfgets = TRUE ;
			return(ret) ;
		}
		/* append a ' ' to targetstr */
		strcat(targetstr, " ") ;
		strcat(targetstr, inpline) ;
		ret = getline() ;
	}
	/* we have got a line containing : set dontfget and return the ret */
	dontfgets = TRUE ;
	return(ret) ;
}
copyremaining()
{
	char * ret ;
	char tmp[1024] ;
	ret = &tmp[0] ;
	while (ret != NULL)
	{
		ret = fgets(tmp, 1024, fd) ;
		fputs(tmp, outfd) ;
	}
}
getline()
{
	char tmp[MAXHDR] ;
	int inctr, outctr ;
	char *ret ;
	int white ;
	int len ;

	/* if EOF or the line is just \n return FALSE or EOF
	   otherwise return COLON if the line contains a :
	   or NOCOLON otherwise */

	/* if dontfgets is set the line has been left over from
	   previous processing, the line will be OK and will contain
	   a : to have got here to just return COLON */
	if (dontfgets == TRUE)
	{
		dontfgets = FALSE ; /* done want to get into a loop */
		return(COLON) ;
	}
	ret = fgets(tmp, MAXHDR, fd) ;
	/* if the EOF ret will be NULL so return */
	if (ret == NULL)
		return(EOF) ;
	/* if the line is just a newline return FALSE */
	len = strlen(tmp) ;
	if ((len == 1) && (tmp[0] == '\n'))
	{
		return(FALSE) ;
	}
	/* remove the whitespace */
	inctr = 0 ;
	outctr = 0 ;
	white = FALSE ;
	len = strlen(tmp) ;
	while (inctr < len) 
	{
		/* if the char is space AND white is true just move along */
		if ((white == TRUE) && isspace(tmp[inctr]))
		{
			inctr ++ ;	
			continue ;
		}
		/* if the char is space replace it with ' ' and move along 
		   setting white to eat up further white space */
		if (isspace(tmp[inctr]))
		{
			/* if we are at the start of a line just move along */
			if (outctr == 0)
			{
				white = TRUE ;
				inctr ++ ;
			}
			else
			{
				inpline[outctr] = ' ' ;
				outctr ++ ;
				inctr ++ ;
				white = TRUE ;
			}
			continue ;
		}
		/* we are not dealing with white space, copy over and unset white */
		inpline[outctr] = tmp[inctr] ;
		inctr ++ ;
		outctr ++ ;
		white = FALSE ;
	}
	/* terminate inpline */
	inpline[outctr] = '\0' ;
	/* if there is a space at the end of the line remove it */
	len = strlen(inpline) ;
	if (isspace(inpline[len -1]))
		inpline[len-1] = '\0' ;
	/* if there is a : in the line return COLON otherwise return NOCOLON */
	if (strchr(inpline, ':') == NULL)
		return(NOCOLON) ;
	else
		return(COLON) ;
}
