/*
** extract_title.c					Version 1.0
**
**    -	extracts the title of an HTML document.
**	Scans only up to 20 first lines of the file.
**	If the <TITLE> tag is not found, prints out
**	the file name (the part relative to WWW home).
**
** USAGE:
**	extract_title dir path
**
**	where:	dir is the home directory of WWW doc tree	
**		path is the pathname of the file relative
**		to WWW home.
** EXAMPLE:
**	extract_title /a/b /c/d.html
**
**	- the file is physically /a/b/c/d.html
**	- if <TITLE> is not found /c/d.html is printed out
**	  (i.e. WITHOUT the WWW home path in front of it)
**
** NOTE:
**	This program is designed to be a part of the index
**	search server. This is the reason for funny parameters.
**
** WRITTEN BY:
**	Ari Luotonen, CERN, 8 Sep 1993, luotonen@dxcern.cern.ch
*/

#include <stdio.h>
#include <string.h>

typedef int BOOL;
#define TRUE 1
#define FALSE 0

main(argc, argv)
int argc;
char **argv;
{
    FILE *fp;
    char filename[256];
    char buffer[1024];
    char *p;
    BOOL found = FALSE;
    BOOL tag = FALSE;
    int lines = 0;

    if (argc != 3) {
	fprintf(stderr, "\n\
This program takes an HTML document and extracts to its stdout\n\
the TITLE of the document, all in one line.\n\n\
Usage:\n\
\t%s WWW-home-directory relative-path\n\n", argv[0]);
	exit(1);
    }

    strcpy(filename, argv[1]);
    if (*filename)
      strcat(filename, "/");
    strcat(filename, argv[2]);

    if (!(fp = fopen(filename, "r"))) {
	fprintf(stderr, "%s: Unable to open file \"%s\"\n",
		argv[0], filename);
	exit(2);
    }

    while (lines++ < 20 &&	/* Scan only first 20 lines */
	   NULL != (p = fgets(buffer, 1024, fp))) {
	if (*p)
	    p[strlen(p)-1] = (char)0;	/* Overwrite newline */
	while (p && *p) {
	    if (tag) {
		p = strchr(p, '>');
		if (p) { 
		    p++;
		    tag = FALSE;
		}
		else continue;
	    }
	    if (found)
		while (*p && *p != '<') fputc(*(p++), stdout);
	    else
		while (*p && *p != '<') p++;
	    if (!*p) {
		if (found)
		    fputc(' ', stdout);	/* We replace newline with space */
		continue;
	    }
	    else if (!found && !strncmp(p, "<TITLE>", 7)) {
		p += 7;
		found = TRUE;
	    }
	    else if (found && !strncmp(p, "</TITLE>", 8)) {
		fclose(fp);
		fputc('\n', stdout);
		exit(0);
	    }
	    else tag = TRUE;
	} /* while stuff in buffer */
    } /* while not EOF and not very many lines read */

    /* If we come here, the title was not found among */
    /* the first few lines. */
    printf("%s\n", argv[2]);  /* Then using filename */
    fclose(fp);
}

