/*
** mg_parse.c for  in 
** 
** Made by vianney rancurel
** Login   <vianney@epita.fr>
** 
** Started on  Wed Aug 25 12:14:11 1999 vianney rancurel
** Last update Thu Oct 28 20:15:46 1999 
*/
#include <ctype.h>
#include "mg.h"

/* parse variables from a file descriptor.
   This is a big convenience routine. It parse fd from entries like
   "name = value newline" (assuming sep is =).
   It ignores bytes following a comment char (#).
   Name and value must not exceed BUFSIZ bytes.
   I've got no idea what happen if there is no final newline!
   A correct implementation should be with a context and an input
   buffer.
   Returns 0 if OK. Might returns various errors */
t_status		parse_vars(fd,sep,dict,proc)
int			fd;		/* File descriptor, e.g. 0  */
int			sep;		/* Separator, e.g. '=' */
t_dict			*dict;		/* Dictionary of strings */
t_dict_str_add_proc	proc;		/* Allows the user to choose
					   between dict_str_add(3) and
					   dict_str_override(3) */
{
  t_boolean		backslash;
  char			name[BUFSIZ];
  char			value[BUFSIZ];
  char			*current;
  t_boolean		comment;
  t_boolean		quote;
  t_status		status;

  quote = FALSE;
  backslash = FALSE;
  comment = FALSE;
  current = name;
  name[0] = 0;
  value[0] = 0;
  while (1)
    {
      char		buf[BUFSIZ];
      int		cc;
      int		i;	

      if ((cc = read(fd,buf,sizeof (buf))) < 0)
	return (ERR_MG_READ);
      if (cc == 0)
	return (0);
      i = 0;
      while (i < cc)
	{
	  char		c;

	  c = buf[i];
	  if (comment)
	    {
	      if (c == '\n')
		comment = FALSE;
	      else
		i++;
	      continue ;
	    }
	  if (backslash)
	    {
	      if (c == 'n')
		c = '\n';
	      else
		if (c == 'r')
		  c = '\r';
		else
		  if (c == 't')
		    c = '\t';
	      if ((status = str_cat_char(current,BUFSIZ,c)) != 0)
		return (status);
	      backslash = FALSE;
	      i++;
	      continue ;
	    }
	  if (c == '\\')
	    {
	      backslash = TRUE;
	      i++;
	      continue ;
	    }
	  if (quote)
	    {
	      if (c == '"')
		quote = FALSE;
	      else
		if ((status = str_cat_char(current,BUFSIZ,c)) != 0)
		  return (status);
	      i++;
	      continue ;
	    }
	  if (c == '"')
	    {
	      quote = TRUE;
	      i++;
	      continue ;
	    }
	  if (c == '#')
	    {
	      comment = TRUE;
	      i++;
	      continue ;
	    }
	  if (current != value)
	    if (c == sep)
	      {
		current = value;
		i++;
		continue ;
	      }
	  if (c == ' ' || c == '\t')
	    {
	      i++;
	      continue ;
	    }
	  if (c == '\n')
	    {
	      if (name[0] != 0)
		if ((status = proc(dict,name,value)) != 0)
		  return (status);
	      name[0] = 0;
	      value[0] = 0;
	      current = name;
	      i++;
	      continue ;
	    }
	  if ((status = str_cat_char(current,BUFSIZ,c)) != 0)
	    return (status);
	  i++;
	}
    }
  return (0);
}

/* initializes the private part of a parse_lines_context.
   Note that most of the structure must be inited by the user (see a_parse.h).
   Returns 0 if OK. Might return various errors */
t_status		parse_lines_init(plc)
t_parse_lines_context	*plc;
{
  t_status		status;
  
  assert(plc->vec && plc->wordbuf && plc->wordseps && plc->lineseps);
  plc->comment = FALSE;
  if ((plc->subvec = vec_new(VEC_BASE,
			     FALSE,
			     plc->vec->alloc_algorithm_proc,
			     plc->vec->alloc_proc,
			     plc->vec->realloc_proc,
			     plc->vec->free_proc,
			     "parse_lines_init",
			     &status)) == NULL)
    return (status);
  plc->wordbuf[0] = 0;
  return (0);
}

/* treats the final newline case and deletes private parts of the context.
   Returns 0 if OK. Might return various errors */
t_status		parse_lines_finnish(plc)
t_parse_lines_context	*plc;
{
  t_status		status;

  if (plc->wordbuf[0] != 0)
    {
      if ((status = vec_str_add(plc->subvec,
				plc->wordbuf)) != 0)
	return (status);
    }
  if (VEC_COUNT(plc->subvec) == 0)
    vec_str_delete(plc->subvec);
  else
    if ((status = vec_add(plc->vec,plc->subvec)) != 0)
	return (status);
  return (0);
}

/* parse lines to a vector of vector of words.
   All lines are contained in the main vector. All words of a line
   are also contained in a vector. The main feature of this function is that
   number of words and number of lines are theorically infinite. Words
   are limited by the wordbuflen member in parse_line_context.
   It ignores bytes after the comment char (#) and allows the user
   to define word separators and line separators with the wordseps
   and lineseps members. The member vec must be initialized by the user.
   You can do subsequent passes to this function as the state of parsing
   is stored in the context.
   Returns 0 if OK, might return various errors */
t_status		parse_lines(plc,buf,len)
t_parse_lines_context	*plc;
char			*buf;		/* Buffer to be parsed	*/
int			len;		/* Length of the buffer */
{
  t_status		status;
  int			i;
  
  i = 0;
  while (i < len)
    {
      if (plc->comment)
	{
	  if (buf[i] == '\n')
	    plc->comment = FALSE;
	  else
	    i++;
	  continue ;
	}
      if (buf[i] == '#')
	{
	  plc->comment = TRUE;
	  i++;
	  continue ;
	}
      if (index(plc->wordseps,buf[i]))
	{
	  if (plc->wordbuf[0] == 0)
	    {
	      i++;
	      continue ;
	    }
	  if ((status = vec_str_add(plc->subvec,plc->wordbuf)) != 0)
	    return (status);
	  plc->wordbuf[0] = 0;
	}
      else
	if (index(plc->lineseps,buf[i]))
	  {
	    if (plc->wordbuf[0] != 0)
	      {
		if ((status = vec_str_add(plc->subvec,
					  plc->wordbuf)) != 0)
		  return (status);
		plc->wordbuf[0] = 0;
	      }
	    if (VEC_COUNT(plc->subvec) == 0)
	      {
		i++;
		continue ;
	      }
	    if ((status = vec_add(plc->vec,plc->subvec)) != 0)
	      return (status);
	    if ((plc->subvec = vec_new(VEC_BASE,
				       FALSE,
				       plc->vec->alloc_algorithm_proc,
				       plc->vec->alloc_proc,
				       plc->vec->realloc_proc,
				       plc->vec->free_proc,
				       plc->vec->comment,
				       &status)) == NULL)
	      return (status);
	  }
	else
	  if ((status = str_cat_char(plc->wordbuf,plc->wordbuflen,
				     buf[i])) != 0)
	    return (status);
      i++;
    }
  return (0);
}

/* initializes a bufferize_context.
   Note that this function inits the private part of the bufferize context
   structure. Other members (marked as MUST BE SET in a_parse.h) must be
   set by the user.
   Returns 0 if OK, might return various errors */
t_status			bufferize_init(bc)
t_bufferize_context		*bc;
{
  t_status			status;

  if ((bc->vec_buf = vec_new(VEC_BASE,
			     FALSE,
			     alloc_algorithm_factor2,
			     alloc_malloc,
			     realloc_realloc,
			     free_free,
			     "bufferize_init",
			     &status)) == NULL)
    return (status);
  bc->len = 0;
  return (0);
}

/* destroys the private part of a bufferize_context. */
VOID_FUNC			bufferize_destroy(bc)
t_bufferize_context		*bc;
{
  vec_buf_delete(bc->vec_buf);
}

/* this function tries to find the next newline character in a fd.
   It reads the file descriptor until a newline is found. If it's found
   it keeps the already read data in memory (in a vec_buf) for
   subsequent searches. If no newline is found it performs another call
   to read and so on.
   Returns a new string containing the line. Returns NULL on errors, if
   NULL is returned and status is 0, it means that read(2) is terminated, the
   trailing garbage can be recuperated in the vec_buf member */
char				*get_next_line(bc,status)
t_bufferize_context		*bc;
t_status			*status;
{
  char				*str;

  while (1)
    {
      char			*ptr;

      if (bc->len == 0)
	if ((bc->len = read(bc->fd,bc->buf,bc->buflen)) < 0)
	  {
	    (*status) = ERR_MG_READ;
	    return (NULL);
	  }
      if (bc->len == 0)
	{
	  (*status) = 0;
	  return (NULL);
	}
      if (ptr = bindex(bc->buf,bc->len,'\n'))
	{
	  int			newlen;
	  
	  if (((*status) = vec_buf_add(bc->vec_buf,
				       bc->buf,
				       ptr - bc->buf)) != 0)
	    return (NULL);
	  if ((str = vec_buf_str(bc->vec_buf,status)) == NULL)
	    return (NULL);
	  vec_buf_destroy(bc->vec_buf);
	  ptr++;
	  if ((newlen = bc->len - (ptr - bc->buf)) > 0)
	    {
	      bcopy(ptr,bc->buf,newlen);
	      bc->len = newlen;
	      return (str);
	    }
	  bc->len = 0;
	  return (str);
	}
      else
	{
	  if (((*status) = vec_buf_add(bc->vec_buf,
				       bc->buf,
				       bc->len)) != 0)
	    return (NULL);
	  bc->len = 0;
	}
    }
}

/* bufferizes a file descriptor into a vec_buf.
   Returns 0 if OK. Might return various errors */
t_status			bufferize(bc)
t_bufferize_context		*bc;
{
  char				*str;
  t_status			status;

  while (1)
    {
      char			*ptr;

      if ((bc->len = read(bc->fd,bc->buf,bc->buflen)) < 0)
	return (ERR_MG_READ);
      if (bc->len == 0)
	return (0);
      if ((status = vec_buf_add(bc->vec_buf,
				bc->buf,
				bc->len)) != 0)
	return (status);
    }
}

/* initializes the private part of a tmpl_context.
   Note that other members must be filled by the user. */
t_status		tmpl_context_init(tc)
t_tmpl_context		*tc;
{
  tc->varbuf[0] = 0;
  tc->invar = FALSE;
  tc->varlen = 0;
  return (0);
}

/* checks if the parse is not in a variable.
   Returns 0 if OK, returns ERR_MG_BADMATCH if in a variable (meaning
   that the template contains a missing right separator) */
t_status		tmpl_finnish(tc)
t_tmpl_context		*tc;
{
  if (tc->varlen != 0)
    return (ERR_MG_BAD_MATCH);
  return (0);
}

/* applies a template to a buffer.
   If a variable is found (between left separator and right separator) then
   do_proc is called (even if it is a zero string).
   Otherwise write_proc is called. 
   For the moment, the mechanism calling write_proc is not very optimized
   because it calls for each byte but write_proc can bufferize it to
   perform an IO operation.
   Return 0 if OK. Might return various errors */
t_status		tmpl_replace(tc,buf,len)
t_tmpl_context		*tc;
char			*buf;
int			len;
{
  int			i;
  t_status		status;

  i = 0;
  while (i < len)
    {
      char		c;
      
      c = buf[i];
      if (tc->invar)
	{
	  if (c == tc->right_sep)
	    {
	      tc->varbuf[tc->varlen] = 0;
	      tc->varlen = 0;
	      tc->invar = FALSE;
	      if ((status = tc->do_proc(tc->do_data,
					tc->varbuf,
					tc->data)) != 0)
		return (status);
	      i++;
	      continue ;
	    }
	  else
	    {
	      if (tc->varlen < tc->varbuflen)
		tc->varbuf[(tc->varlen)++] = c;
	      else
		return (ERR_MG_BO);
	    }
	}
      else
	{
	  if (c == tc->left_sep)
	    {
	      tc->invar = TRUE;
	      i++;
	      continue ;
	    }
	  else
	    {
	      if ((status = tc->write_proc(tc->write_data,
					   &c,
					   1,
					   tc->data)) != 0)
		return (0);
	    }
	}
      i++;
    }
  return (0);
}

/* is a t_tmpl_write_proc.
   It is used internally by tmpl_str_to_str(3). */
t_status		tmpl_str_write(bs,buf,len,data)
t_bridled_str		*bs;
char			*buf;
int			len;
VOID_PTR		data;
{
  return (str_cat_buf(bs->str,bs->max_len,buf,len));
}

/* is a convenience routine of tmpl_replace(3) for strings.
   It applies the template and catenates the result into a bridled
   string. Left and right separators are assumed to be '%'.
   Do_proc must be provided by the user.
   Returns 0 if OK. Might return various errors */
t_status		tmpl_str_to_str(tmpl,do_proc,data,str,max_len)
char			*tmpl;		/* The template with '%' */
t_tmpl_do_proc		do_proc;	/* The substitution procedure */
VOID_PTR		data;		/* The data passed to do_proc */
char			*str;		/* The destination string */
int			max_len;	/* Maximum allowed length */
{
  char			varbuf[STR_BUFSIZ];
  t_tmpl_context	tc;
  t_bridled_str		bs;
  t_status		status;

  bs.str = str;
  bs.max_len = max_len;
  tc.varbuf = varbuf;
  tc.varbuflen = sizeof (varbuf);
  tc.left_sep = '%';
  tc.right_sep = '%';
  tc.do_proc = (t_tmpl_do_proc)do_proc;
  tc.do_data = (VOID_PTR)(&bs);
  tc.write_proc = (t_tmpl_write_proc)tmpl_str_write;
  tc.write_data = (VOID_PTR)(&bs);
  tc.data = data;
  if ((status = tmpl_context_init(&tc)) != 0)
    return (status);
  if ((status = tmpl_replace(&tc,tmpl,strlen(tmpl))) != 0)
    return (status);
  if ((status = tmpl_finnish(&tc)) != 0)
    return (status);
  return (0);
}

/* is a t_tmpl_write_proc.
   It is used internally by tmpl_file_to_file(3) */
t_status		tmpl_file_write(ofile,buf,len,data)
FILE			*ofile;
char			*buf;
int			len;
VOID_PTR		data;
{
  return (fwrite(buf,sizeof (char),len,ofile));
}

/* is a convenience routine of tmpl_replace(3) for streams.
   It applies the template and fwrite the result into a stream.
   Left and right separators are assumed to be '%'.
   Do_proc must be provided by the user.
   Returns 0 if OK. Might return various errors */
t_status		tmpl_file_to_file(ifile,do_proc,data,ofile)
FILE			*ifile;		/* The input stream */
t_tmpl_do_proc		do_proc;	/* The substitution procedure */
VOID_PTR		data;		/* Data passed to do_proc */
FILE			*ofile;		/* The output file */
{
  char			varbuf[STR_BUFSIZ];
  t_tmpl_context	tc;
  t_status		status;

  tc.varbuf = varbuf;
  tc.varbuflen = sizeof (varbuf);
  tc.left_sep = '%';
  tc.right_sep = '%';
  tc.do_proc = (t_tmpl_do_proc)do_proc;
  tc.do_data = (VOID_PTR)ofile;
  tc.write_proc = (t_tmpl_write_proc)tmpl_file_write;
  tc.write_data = (VOID_PTR)ofile;
  tc.data = data;
  if ((status = tmpl_context_init(&tc)) != 0)
    return (status);
  while (!feof(ifile))
    {
      int		c;
      char		c2;

      c = fgetc(ifile);
      if (c == -1)
	break ;
      c2 = c;
      if ((status = tmpl_replace(&tc,&c2,1)) != 0)
	return (status);
    }
  if ((status = tmpl_finnish(&tc)) != 0)
    return (status);
  return (0);
}
