/*
**  Header-cracking and address re-writing routines, with sincere
**  apologies to Upas and Sendmail.
*/
#include "gate.h"
#if	defined(DO_GETHOSTBYNAME)
#include <netdb.h>
#endif	/* defined(DO_GETHOSTBYNAME) */
#if	defined(RCSID)
static char RCS[] =
	"$Header: /nfs/papaya/u2/rsalz/src/newsgate/RCS/hdr.c,v 1.21 91/07/18 20:00:57 rsalz Exp $";
#endif	/* defined(RCSID) */


#if	defined(TEST)
#if	!defined(DO_FIX_ADDRESS)
#define DO_FIX_ADDRESS
#endif	/* !defined(DO_FIX_ADDRESS) */
#if	!defined(DO_ADDRESS_CLEANUP)
#define DO_ADDRESS_CLEANUP
#endif	/* !defined(DO_ADDRESS_CLEANUP) */
#else
#define dprintf(string, buff)	/* NULL */
#endif	/* defined(TEST) */


#if	defined(DO_ADDRESS_CLEANUP)
/*
**  List of domains that we recognize.
*/
STATIC char	*Domains[] = {
    /* These aren't official domains, but we use them. */
	".BITNET",
	".UUCP",
    /* Official organizational domains. */
	".ARPA",	".COM",	".EDU",	".GOV",	".INT",	".MIL",
	".NATO",	".NET",	".ORG",
    /* Official natonal domans. */
	".AR",	".AT",	".AU",	".BE",	".BR",	".CA",	".CH",	".CL",
	".CN",	".CR",	".CS",	".DE",	".DK",	".EC",	".EG",	".ES",
	".FI",	".FR",	".GR",	".HK",	".HU",	".IE",	".IL",	".IN",
	".IS",	".IT",	".JP",	".KR",	".LK",	".MX",	".MY",	".NI",
	".NL",	".NO",	".NZ",	".PH",	".PL",	".PR",	".PT",	".SE",
	".SG",	".SU",	".TH",	".TR",	".TW",	".UK",	".US",	".UY",
	".YU",	".ZA"
};


/*
**  Handle route-addresses:
**	@cruft:joe@site --> joe@site
*/
STATIC void
RouteAddr(p)
    register ADDRCHAR	*p;
{
    register ADDRCHAR	*q;
    register ADDRCHAR	*r;

    if (*p != '@')
	/* Not a route -- leave alone. */
	return;

    /* Find the last '@'. */
    for (r = p; *r; r++)
	continue;
    while (*--r != '@')
	continue;

    /* Find the preceeding colon. */
    for (q = r; q > p && *q != ':'; q--)
	continue;
    r = q++;
    if (r > p)
	while ((*p++ = *q++) != 0)
	    continue;
}


/*
**  Handle the '%' syntax:
**	joe%site.EDU@gateway.DOMAIN -> joe@site.EDU
*/
STATIC void
Percent(p)
    register ADDRCHAR	*p;
{
    register ADDRCHAR	*q;
    register ADDRCHAR	*r;
    register ADDRCHAR	*s;
    register ADDRCHAR	**dp;
    register char	*pat;

    /* Find the rightmost '@'. */
    for (r = p; *r; r++)
	continue;
    while (--r > p)
	if (*r == '@')
	    break;

    for (; *r == '@'; r = q) {
	/* Find the '%' just before the '@'. */
	for (q = r; q > p && *q != '&'; q--)
	    continue;
	if (*q != '%')
	    break;

	for (dp = Domains; dp < ENDOF(Domains); dp++) {
	    pat = *dp + strlen(*dp);
	    for (s = r, pat--, s--; pat > *dp && s > q; pat--, s--)
		if (*s != *pat
		 && (!isupper(*pat) || *s != tolower(*pat)))
		    break;
	    if (*s != *pat)
		/* Assume **dp == '.' or not letter. */
		continue;
	    *r = '\0';
	    *q = '@';
	    break;
	}
    }
}


/*
**  Handle hybrid "!" and "@" addresses:
**	a!site!joe@site --> leave alone
**	a!site!joe --> site!joe@a.uucp
**	a!site.EDU!joe --> joe@site.EDU
*/
STATIC void
Hybrid(p)
    register ADDRCHAR	*p;
{
    register ADDRCHAR	*q;
    register ADDRCHAR	*r;
    register ADDRCHAR	*user;

    /* Already has an '@' */
    for (q = p; *q; q++)
	if (*q == '@')
	    return;

    /* Make sure there is a '!' */
    for (user = p; *user; user++)
	if (*user == '!')
	    break;
    if (*user != '!')
	return;

    for (*q++ = '@', *user = '\0', r = p; *r; r++)
	*q++ = *r;
    /* Copy back to beginning of list */
    for (*q = '\0', r = p; *++user; r++)
	*r = *user;
    for (*r = '\0', user = r; *user != '@'; user--)
	if (*user == '.')
	    return;
    *r++ = '.';
    *r++ = 'U';
    *r++ = 'U';
    *r++ = 'C';
    *r++ = 'P';
    *r++ = '\0';
}
#endif	/* defined(DO_ADDRESS_CLEANUP) */


#if	defined(TEST)
STATIC void
dprintf(prompt, p)
    char	*prompt;
    ADDRCHAR	*p;
{
    (void)printf("%s returns:  ", prompt);
    for ( ; *p; p++) {
	if (*p & QUOTE_MASK)
	    (void)putchar('\\');
	(void)putchar(*p & UNQUOTE_MASK);
    }
    (void)putchar('\n');
}
#endif	/* defined(TEST) */

/*
**  General address canonicalizer.
*/
STATIC char *
FixAddress(p)
    register ADDRCHAR	*p;
{
    static char		buff[1024];
    register char	*cp;
    register int	local;
    char		host[128];

#if	defined(DO_ADDRESS_CLEANUP)
    RouteAddr(p);
    dprintf("RouteAddr", p);
    Percent(p);
    dprintf("  Percent", p);
    Hybrid(p);
    dprintf("   Hybrid", p);
#endif	/* defined(DO_ADDRESS_CLEANUP) */

    /* Copy address, removing all quoting, and see if it's local. */
    for (local = 1, cp = buff; (*cp = *p & UNQUOTE_MASK) != '\0'; cp++, p++)
	if (NETCHR(*p))
	    local = 0;

    if (local) {
	if (gethostname(host, sizeof host) < 0) {
	    Fprintf(stderr, "%s:  Can't get my hostname, %s.\n",
		    Pname, strerror(errno));
	    exit(EX_TEMPFAIL);
	}
	Sprintf(cp, "%s@%s", p, host);
    }
    return buff;
}


/*
**  This subroutine is a concession to the realities of the Internet and
**  and the USENET. Much as the idea is distasteful and likely to get me
**  in trouble, I have to hack message-ids into a format that the USENET
**  won't choke on.  Pray that if we're doing multiple insertion point
**  gatewaying that ALL the gateways mung exactly the same things.
**
**  (Death to HERMES! Death to UNIX/MM-11! Death to EAN!)
*/
STATIC int
FixMessageID(s)
    register char	*s;
{
    register int	atdot;
    register int	closed;

    /* Quickie tests -- why waste time? */
    if (*s != '<')
	return FALSE;

    for (atdot = FALSE, closed = FALSE; *++s; )
	switch (*s) {
	default:
	    if (!isascii(*s) || iscntrl(*s) || isspace(*s))
		return FALSE;
	    break;
	case '<':
	    /* Already got one. */
	    return FALSE;
	case '>':
	    /* I hope no one is stupid enough to quote this... */
	    closed = TRUE;
	    s[1] = '\0';
	    break;
	case '.':
	case '@':
	    /* We should check for a domain spec, not just either/or. */
	    atdot = TRUE;
	    break;
	case '\t':
	case ' ':
	case '/':
	case '"':
	    /* Avoid various problem characters. */
	    *s = '_';
	    break;
	}

    return atdot && closed;
}


/*
**  Fix up the contents of In-Reply-To: fields and References: fields.
*/
STATIC void
FixReferences(hp)
    register HBUF		*hp;
{
    register char		*cp;
    register char		*ep;
    register char		*p;
    register char		*max;
    char			scratch[LG_SIZE];

    cp = hp->followid;
    max = cp + strlen(cp);
    for (p = scratch; (cp = IDX(cp, '<')) != NULL; ) {
	if ((ep = IDX(cp, '>')) == NULL
	 || ((ep - cp) + 1) > sizeof scratch - (p - scratch + 2))
	    /* Unterminated ID, or no more room. */
	    break;

	if (FixMessageID(cp)) {
	    if (p > scratch) {
		*p++ = ' ';
		*p++ = '\0';
	    }
	    p += APPEND(p, cp);
	}
	cp = ep + 2;
	if (cp >= max)
	    break;
    }
    Strcpy(hp->followid, scratch);
}


/*
**  Count the number of '@' in the string.
*/
STATIC int
AtCount(s)
    register char	*s;
{
    register int	n;

    for (n = 0; *s; s++)
	if (*s == '@')
	    n++;
    return n;
}


/*
**  Canonicalize the "From:" line into the form
**	From: local-part@domain (full-name)
** RFC822 doesn't require the comment to be at the end of the string
** like that.
*/
STATIC void
FixFrom(hp)
    register HBUF		*hp;
{
    register char		*p;
#if	defined(DO_GETHOSTBYNAME)
    register struct hostent	*host;
#endif	/* defined(DO_GETHOSTBYNAME) */
    char			address[LG_SIZE];
    char			fullname[LG_SIZE];
    char			scratch[sizeof address];
    ADDRCHAR			temp[LG_SIZE];

    /* We should handle "Full-Name:" too, but it doesn't get read by the
     * news header reader. */
    (void)CrackFrom(temp, fullname, hp->from);
    Strcpy(address, FixAddress(temp));

    if (AtCount(address) != 1)
	p = NULL;
    else {
	p = RDX(address, '@');
	*p++ = '\0';

#if	defined(DO_GETHOSTBYNAME)
	/* If we can find the host's official name use that. */
	if ((host = gethostbyname(p)) != NULL)
	    p = host->h_name;
#endif	/* defined(DO_GETHOSTBYNAME) */

	/* We know have the canonical hostname; glue back together. */
	Sprintf(scratch, "%s@%s", address, p);
	Strncpy(address, scratch, sizeof address);
	address[sizeof address - 1] = '\0';
	p = IDX(address, '@');
	*p++ = '\0';
    }

    /* Policy decision; what to put in the path? */
#if	defined(FIXED_PATH)
    Strcpy(hp->path, FIXED_PATH);
#else
#if	defined(GATEWAY_NAME)
    Sprintf(scratch, "%s!%s!%s", GATEWAY_NAME, p, address);
#else
    Sprintf(scratch, "%s!%s", p, address);
#endif	/* defined(GATEWAY_NAME) */
    Strncpy(hp->path, scratch, sizeof hp->path);
    hp->path[sizeof hp->path - 1] = '\0';
#endif	/* defined(FIXED_PATH) */

    /* Restore the @ if we took it out. */
    if (p)
	*--p = '@';

    if (fullname[0]) {
	p = address + strlen(address);
	*p++ = ' ';
	*p++ = '(';
	p += APPEND(p, fullname);
	*p++ = ')';
	*p++ = '\0';
    }

    /* Stick the canonicalized From: back in. */
    Strcpy(hp->from, address);
    for (p = hp->from; *p; p++)
	*p &= 0x7F;
}


#define ERROR "\
Message-ID syntax error.\n\
*** Please refer to page 23, paragraph 4.6.1. and Appendix D\n\
*** of NIC RFC #822 for the correct syntax, and fix your mailer."

/*
** Check an RFC822 header for validity and hack it to RFC1036 spec.
** returns NULL for everything OK, or a character pointer to an
** error message.
*/
char *
HackHeader(hp, SubjectRequired)
    register HBUF		*hp;
    int				SubjectRequired;
{
#if	defined(REQUIRE_MESSAGE_ID)
    /* Sendmail (almost) always has a Message-ID */
    if (hp->ident[0] == '\0')
	return "Message-ID header missing";
    if (!FixMessageID(hp->ident))
	return ERROR;
#else
    /* MMDF doesn't always have a Message-ID. */
    if (hp->ident[0] && !FixMessageID(hp->ident))
	return ERROR;
#endif	/* defined(REQUIRE_MESSAGE_ID) */

    /* Newsgroups */
    if (hp->nbuf[0] == '\0')
	return "Newsgroups header missing";

    /* Subject */
    if (hp->title[0] == '\0') {
	if (SubjectRequired)
	    return "Subject header missing";
	Strcpy(hp->title, "(none)");
    }

    /* From */
    if (hp->from[0] == '\0')
	return "From header missing";
    FixFrom(hp);

    /* References and In-Reply-To */
    if (hp->followid[0]) 
	FixReferences(hp);

    return NULL;
}


#if	defined(TEST)

char	*Pname = "address-test";

#if	!defined(HAVE_STRERROR)
/*
**  Return a printable representation of errno.
*/
char *
strerror(x)
    int			x;
{
    static char		buff[20];

    if (x >= 0 && x < sys_nerr)
	return sys_errlist[x];
    Sprintf(buff, "Error code %d", x);
    return buff;
}
#endif	/* !defined(HAVE_STRERROR) */

main()
{
    HBUF	hp;
    char	buff[BUFSIZ];
    int		interactive;

    interactive = isatty(0);
    if (interactive)
	(void)printf("Enter addresses, EOF to exit:\n");
    for ( ; ; ) {
	if (interactive) {
	    (void)printf(">  ");
	    (void)fflush(stdout);
	}
	if (gets(buff) == NULL)
	    break;
	if (buff[0] != '#' && buff[0] != '\0') {
	    if (!interactive) {
		(void)printf("%s\n", buff);
		(void)fflush(stdout);
	    }
	    (void)strcpy(hp.from, buff);
	    FixFrom(&hp);
	    (void)printf("\t-> %s\n\n", hp.from);
	}
    }

    exit(0);
}
#endif	/* defined(TEST) */
