From decwrl!uunet!allbery Sun Mar 25 18:44:18 PST 1990 Article 1461 of comp.sources.misc: Path: decwrl!uunet!allbery From: csirmaz@poe.rutgers.edu Newsgroups: comp.sources.misc Subject: v11i088: texpp - TeX preprocessor, 02/02 Message-ID: <82385@uunet.UU.NET> Date: 26 Mar 90 00:33:45 GMT Sender: allbery@uunet.UU.NET Lines: 1630 Approved: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) Posting-number: Volume 11, Issue 88 Submitted-by: csirmaz@poe.rutgers.edu Archive-name: texpp/part02 #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'texpp.c' <<'END_OF_FILE' X/*************************************************************************** X * * X * texpp TeX preprocessor, Version 1.2. * X * Laci Csirmaz, DIMACS - Rutgers * X * Feb. 25, 1990 * X * * X *************************************************************************** X X You are granted to use, modify, copy, redistribute this program any X way you want. However no warranties are made for this program or the X accopanying documentation. X X To compile the program simply invoke cc by typing X cc texpp.c -o texpp X On certain computers the 'strdup()' function is missing from the standard X C library. In this case, recompile the preprocessor by X cc -DSTRDUP texpp.c -o texpp X X Please send your comments, suggestions, etc. to: X csirmaz@cs.rutgers.edu X X ***************************************************************************/ X X X/*-------------------------------------------------------------------------* X | include files | X *-------------------------------------------------------------------------*/ X#include X#include X#include X#include X#include X#include X X/*-------------------------------------------------------------------------* X | prototypes not in UNIX | X *-------------------------------------------------------------------------*/ X#define byte unsigned char /* define new mode */ Xchar *calloc(); char *sprintf(); X X/*-------------------------------------------------------------------------* X | mode and style | X *-------------------------------------------------------------------------*/ X#define MATH_MODE 0x01 X#define DISP_MODE 0x02 X#define DEFINE_MODE 0x04 X#define COMMENT_MODE 0x08 X X#define SIMPLE_STYLE 0 /* style for `$' or `$$' */ X#define DEFINE_STYLE (-1) /* style for %mdefine */ X Xint global_mode; /* mode flags - in math, display mode; X skipping a comment, or reading a TeXpp X definition. */ Xint mode_style; /* MATH and DISP style number to distinguish X between different pairs of math and display X mode switches. */ X X#define in_comment_mode() (global_mode&COMMENT_MODE) X#define in_def_mode() (global_mode&DEFINE_MODE) X#define in_disp_mode() ((global_mode&DISP_MODE)!=0) X#define in_math_mode() ((global_mode&MATH_MODE)!=0) X#define in_plain_mode() ((global_mode&(DISP_MODE|MATH_MODE))==0) X#define set_plain_mode() {global_mode&= ~(DISP_MODE|MATH_MODE);} X#define set_disp_mode() {global_mode |= DISP_MODE;} X#define set_math_mode() {global_mode |= MATH_MODE;} X X/*--------------------------------------------------------------------------*/ X/* input/output variables */ X/*--------------------------------------------------------------------------*/ XFILE *input_file; /* stream to read from */ XFILE *output_file; /* stream to write to */ XFILE *error_file; /* stream for error messages */ Xchar *input_file_name; /* the actual input file name */ Xint input_line_number; /* which line we are in */ X Xint exit_value=0; /* 1 if an error occured */ X Xunsigned output_position=0; /* where the next token goes */ Xunsigned last_output_position=0;/* saved output_position */ Xunsigned error_position=0; /* end of last error position */ X X#define LAST_OUT (last_output_position) X#define CURRENT_OUT (output_position) X#define ERROR_OUT (error_position) X X/*-------------------------------------------------------------------------* X | characters used as special tokens | X *-------------------------------------------------------------------------*/ X#define E_LBRACE ('{'|0x80) /* replaces \{ */ X#define E_RBRACE ('}'|0x80) /* replaces \} */ X#define E_PCT_MARK ('%'|0x80) /* replaces \% */ X#define E_BACKSLASH ('\\'|0x80) /* replaces \\ */ X#define E_DOLLAR ('$'|0x80) /* replaces \$ */ X#define E_EOF 255 /* replaces EOF */ X#define E_KEYWORD 254 /* keyword ahead */ X X#define make_par(x) (((x)-'1')|0x80)/* replaces #1 ... #9 */ X#define extract_par(x) ((x)-0x80) X#define is_par(x) (0x80<=(x)&&(x)<0x89) X X/*-------------------------------------------------------------------------* X | Storing and retrieving macro text | X *-------------------------------------------------------------------------*/ Xtypedef struct MACRO { X byte type; /* flags */ X byte leftpar,rightpar; /* number of left and right pars */ X int style; /* style if mode switch word */ X byte *name; /* macro name */ X byte *body; /* macro body, can be NULL */ X struct MACRO *link; /* pointer to the next macro */ X struct MACRO *keyword; /* pointer to the next \-keyword */ X}; X X/*--------------------------- MACRO flags ---------------------------------*/ X#define K_MATH 0x01 /* 1 for MATH and DISP mode only */ X#define K_PRESERVE 0x02 /* 1 for \preserve keyword */ X#define K_BACKSLASH 0x04 /* 1 if starts with backslash */ X#define K_CHECKLETTER 0x08 /* 1 if cannot be followed by a letter */ X#define K_INOUT 0x10 /* bit for IN (0) or OUT (1) mode switch */ X#define K_MATHDISP 0x20 /* bit for MATH (0) or DISP (1) mode switch */ X#define K_STANDALONE 0x40 /* 1 for identical IN and OUT mode switch */ X X#define is_math_macro(x) ((x)->type & K_MATH) X#define word_length_k(x) strlen((char *)((x)->name)) X#define style_k(x) ((x)->style) X#define body_k(x) ((x)->body) X#define left_pars_k(x) ((x)->leftpar) X#define right_pars_k(x) ((x)->rightpar) X#define is_preserve_k(x) ((x)->type & K_PRESERVE) X#define is_backslash_k(x) ((x)->type & K_BACKSLASH) X#define is_modeswitch_k(x) ((x)->style > 0) X#define is_math_k(x) (((x)->type & K_MATHDISP)==0) X#define is_in_k(x) (((x)->type & K_INOUT)==0) X#define is_standalone_k(x) ((x)->type & K_STANDALONE) X#define check_letter_k(x) ((x)->type & K_CHECKLETTER) X X/*-------------------------------------------------------------------------* X | symbols | X *-------------------------------------------------------------------------*/ X#define SOFT_DELIMITER 1 /* space, tab, newline */ X#define HARD_DELIMITER 2 /* newline in DEF_MODE */ X#define DELIMITER 3 /* control character, not soft delimiter */ X#define MACRO_DELIM 4 /* macro text delimiter in DEF_MODE */ X#define MATH_IN 5 /* entering math mode */ X#define MATH_OUT 6 /* leaving math mode */ X#define DISP_IN 7 /* entering displayed mode */ X#define DISP_OUT 8 /* leaving displayed mode */ X#define PRESERVE 9 /* \preserve keyword */ X#define DEF_KEYWORD 10 /* %define keyword */ X#define MDEF_KEYWORD 11 /* %mdefine keyword */ X#define UNDEF_KEYWORD 12 /* %undefine keyword */ X#define MATH_KEYWORD 13 /* %mathmode keyword */ X#define DISP_KEYWORD 14 /* %dispmode keyword */ X#define COMMENT 15 /* comment in a line */ X#define EMPTY_LINE 16 /* empty line, cannot be in a macro */ X#define WORD 17 /* a word of visible characters */ X#define OPEN 18 /* { */ X#define CLOSE 19 /* } */ X#define ENDFILE 20 /* at end of file */ X#define PARAMETER 21 /* #1 .. #9 */ X Xint SYMBOL; /* the last symbol */ Xint S_aux1; /* if SYMBOL==SOFT_DELIMITER then S_aux1=0 X says that the delimiter vanishes at X substitution; if SYMBOL==PARAMETER then the X parameter's value (0..8) */ Xstruct MACRO *S_aux2; /* if SYMBOL==WORD then the corresponding X MACRO entry, or NULL if none */ X X/*-------------------------------------------------------------------------* X | Preprocessor units | X *-------------------------------------------------------------------------*/ X#define X_PARAMETER 0 /* a parameter */ X#define X_DMODE_OUT 1 /* $ or $$ leaving mode */ X#define X_XMODE_OUT 2 /* other mode closing symbol */ X#define X_CLOSE 3 /* closing brace */ X#define X_ERROR 4 /* error encountered */ X#define X_OTHER 5 /* other special symbol */ X X/*-------------------------------------------------------------------------* X | TeX and TeXpp texts | X *-------------------------------------------------------------------------*/ X#define T_DEFINE "define" /* TeXpp keywords after % */ X#define T_DEFINE_LEN 6 X#define T_MDEFINE "mdefine" X#define T_MDEFINE_LEN 7 X#define T_UNDEFINE "undefine" X#define T_UNDEFINE_LEN 8 X#define T_MATHMODE "mathmode" X#define T_MATHMODE_LEN 8 X#define T_DISPMODE "dispmode" X#define T_DISPMODE_LEN 8 X X#define T_PRESERVE "\\preserve" /* should start with backslash!!! */ X#define T_PRESERVE_LEN 9 X X#define TeXpp_MACRO_DEFINITION "%%% TeXpp macro definition %" X /* replacement text for TeXpp macro definition */ X X/*-------------------------------------------------------------------------* X | error message texts | X *-------------------------------------------------------------------------*/ X#define TEX_ERROR_FORMAT "%%%%%%TeXpp error in %s line %d: " X /* used as a format to insert error message into TeX text */ X#define ERR_ERROR_FORMAT "Error in %s line %d: " X /* used as a format to write error message into stderr */ X X#define CANNOT_OPEN_FILE "cannot open the file" X#define WRONG_FORMAL_PARAMETER "no digit after #-mark" X#define PARAMETER_TWICE "parameter #%d declared twice" X#define WRONG_MACRO_NAME "macro name expected" X#define WRONG_MODE_SWITCH_DEF "wrong mode switch keyword definition" X#define MISSING_DELIMITER "missing macro text delimiter %% " X#define TOO_LESS_LEFT_PARAMS "less than %d left parameters for %s " X#define TOO_LESS_PARAMS "less than %d parameters for %s " X#define TOO_LONG_MACRO_DEF "too long definition for macro %s " X#define TOO_LONG_PARAMETER "too long parameter for macro %s " X#define UNDEFINED_PARAMETER "parameter #%d wasn't declared" X#define WRONG_DOLLAR_SWITCH "erroneous $ mode switch" X#define WRONG_CLOSING_DOLLAR "erroneous closing $ mode switch" X#define EMPTY_LINE_IN_MODE "empty line in %s mode" X#define ENDFILE_IN_MODE "end of file in %s mode" X#define WRONG_MODE_SWITCH "erroneous %s mode switch" X#define OUT_OF_MEMORY "no more memory" X Xvoid error(); /*VARARGS1*/ /* just to clean up things */ X X/*=========================================================================*/ X/* standard procedures not in UNIX */ X/*=========================================================================*/ X#define upper(x) ((x)|0x20) /* convert letters to uppercase */ X Xint stricmp(left,right) char *left,*right; X/* compares strings with no case -- works only with ASCII characters */ X{ X while(*left != 0 && (*left == *right || X (isalpha(*left) && isalpha(*right) && upper(*left)==upper(*right))) X ) { left++; right++;} X return(*left-*right); X} X Xvoid setmem(to,len,c) byte *to; unsigned len; byte c; X/* fills `len' bytes starting from `to' by the value of `c' */ X{unsigned i; X for(i=0;i126, returns E_EOF on end of file, and makes some | X | local translations depending on the mode stored in `global_mode': | X | o in COMMENT_MODE, all characters are returned. In | X | o in other modes the pairs \{ \} \$ \% and \\ are coded as single chars| X | o in DEFINE_MODE pairs #1 .. #9 are recognized as parameters; and ## | X | is replaced by a single # char. | X *-------------------------------------------------------------------------*/ X Xint next_char() /* lowest level reading from `input_file' */ X{int c,c1; X while((c=getc(input_file))==0 || c>0x7E);/* skip 0 and >126 chars */ X if(c<0) return(E_EOF); /* here is the end of the file */ X if(in_comment_mode()) return(c); /* skipping a comment */ X if(c=='\\'){ /* the char is backslash */ X switch(c1=getc(input_file)){ /* next char */ Xcase '%': c=E_PCT_MARK; break; Xcase '{': c=E_LBRACE; break; Xcase '}': c=E_RBRACE; break; Xcase '\\': c=E_BACKSLASH; break; Xcase '$': c=E_DOLLAR; break; Xdefault: ungetc(c1,input_file); break;/* simply put back the ahead char */ X } X } else if(c=='#' && in_def_mode()){ /* check formal parameters */ X c1=getc(input_file); X if('1'<=c1 && c1<='9') c=make_par(c1); X else if(c1!='#'){ X error(WRONG_FORMAL_PARAMETER); X ungetc(c1,input_file); X } X } X return(c); X} X X/*-------------------------------------------------------------------------* X | On the medium level, values given by `next_char()' are passed over as | X | tokens. But tokens can be read AHEAD, so special procedures are used to | X | deal with them. The circular buffer `token_ahead[]' stores the tokens | X | read ahead; its size must be a power of 2. Procedures handling tokes: | X | -- initialize_token_reading() should be called first. | X | -- spy_token() returns the next token but does not advances ahead. | X | -- spy_string_ahead() returns TRUE(!=0) if the next item agrees with | X | the parameter, and checks whether the item following the string is | X | a white space or is NOT a letter (to agree with TeX's backslash | X | convention). | X | -- get_next_token() simply returns the next character. | X | -- skip_tokens(n) skips `n' tokens ahead. | X | To comply with the mode dependent character reading, the spied chars | X | should not change the mode -- so be careful when spying ahead ... | X *-------------------------------------------------------------------------*/ X X#define MAX_TOKEN_AHEAD 128 /* must be a power of 2 */ Xbyte token_ahead[MAX_TOKEN_AHEAD]; /* circular buffer */ Xint token_ahead_in=0, token_ahead_out=0; /* buffer pointers */ X X#define initialize_token_reading() {token_ahead_in=token_ahead_out=0;} X Xbyte spy_token_ahead() /* Returns the next token but does not advances */ X{ X if(token_ahead_in==token_ahead_out){ /* ahead buffer is empty */ X token_ahead[token_ahead_in]=next_char(); X token_ahead_in=(token_ahead_in+1)&(MAX_TOKEN_AHEAD-1); X } X return(token_ahead[token_ahead_out]); X} X X#define FOLLOW_NOTHING 0 X#define FOLLOW_NO_LETTER 1 X#define FOLLOW_SPACE 2 X Xint spy_string_ahead(str,follow_up) char *str; int follow_up; X{int t,i,same; byte tt; X t=token_ahead_out; same=1; X while(same && (*str || follow_up)){ X if(t==token_ahead_in){ /* should read ahead */ X i=(token_ahead_in+1)&(MAX_TOKEN_AHEAD-1); X if(i!=token_ahead_out){ X token_ahead[t]=next_char(); token_ahead_in=i; X } else return(0); /* ahead buffer is full, not found */ X } X tt=token_ahead[t]; X if(*str){ X same=((unsigned char)(*str))==tt; X str++; t=(t+1)&(MAX_TOKEN_AHEAD-1); X } else { X same=follow_up==FOLLOW_NO_LETTER ? ((tt > 127) || !isalpha(tt)) : X (tt==' ' || tt=='\t'); X follow_up=0; X } X } X return(same); X} X Xint get_next_token() /* gives the next token */ X{byte res; X if(token_ahead_in==token_ahead_out) X return(next_char()); X res=token_ahead[token_ahead_out]; X token_ahead_out=(token_ahead_out+1)&(MAX_TOKEN_AHEAD-1); X return(res); X} X Xvoid skip_tokens(n) int n; /* skips the next `n' subsequent tokens */ X{int stored; X stored=(token_ahead_in+MAX_TOKEN_AHEAD-token_ahead_out) X &(MAX_TOKEN_AHEAD-1); X if(n 0) next_char(); X } X} X X/*=========================================================================*/ X/* token output */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | Output is done through double buffering: OUT_BUFFER and OTHER_OUT_ | X | BUFFER hold the output until the other is full. This means that every | X | time the last OUT_BUFFER_LEN output tokens are recoverable. This | X | mechanism is used to store macro parameters which are erased after | X | substitution. | X | -- output_position is used as an absolute position pointer. | X | -- alloc_outbuffers() allocates memory for the buffers. | X | -- store_token(t) puts `t' into the output buffer. | X | -- store_string(str) puts the tokens of `str' into the output buffer. | X | -- flush_output() flushes output buffers. | X | -- set_output_position(pos) erases all output written after the absolute| X | position `pos', if it is possible. | X | -- retrieve_out(from,till,to) reads back the output between positions | X | `from' and `till' and stores it at `to'. | X *-------------------------------------------------------------------------*/ X X#define OUT_BUFFER_LEN 16384 /* should be a power of 2 */ X Xbyte *OUT_BUFFER,*OTHER_OUT_BUFFER; X Xint other_buffer_is_full=0; /* indicates if OTHER_OUT_BUFFER is full */ Xint output_index=0; /* next free place in OUT_BUFFER */ X Xint alloc_outbuffers(){ X OUT_BUFFER=(byte*)malloc(OUT_BUFFER_LEN); X OTHER_OUT_BUFFER=(byte*)malloc(OUT_BUFFER_LEN); X return(OUT_BUFFER==NULL || OTHER_OUT_BUFFER==NULL); X} X Xvoid write_output(from,len) byte *from; int len; X/* writes `len' tokens to `output_file' with appropriate translation */ X{byte token; X while(len-- > 0){ X switch(token = *from){ Xcase E_LBRACE: putc('\\',output_file); putc('{',output_file); break; Xcase E_RBRACE: putc('\\',output_file); putc('}',output_file); break; Xcase E_PCT_MARK: putc('\\',output_file); putc('%',output_file); break; Xcase E_BACKSLASH: putc('\\',output_file); putc('\\',output_file); break; Xcase E_DOLLAR: putc('\\',output_file); putc('$',output_file); break; Xdefault: if(token < 128) putc((char)token,output_file); X } X from++; X } X} X Xvoid store_token(t) int t; /* puts token `t' into OUT_BUFFER */ X{byte *bf; X OUT_BUFFER[output_index]=t; X output_index++; output_index &= OUT_BUFFER_LEN-1; X output_position++; X if(output_index==0){ /* overturn */ X if(other_buffer_is_full!=0){ /* write OTHER_OUT_BUFFER */ X write_output(OTHER_OUT_BUFFER,OUT_BUFFER_LEN); X } X other_buffer_is_full=1; X bf=OUT_BUFFER; OUT_BUFFER=OTHER_OUT_BUFFER; OTHER_OUT_BUFFER=bf; X } X} X Xvoid store_string(str) char *str; /* stores the elements of the string */ X{ X while(*str){ store_token(*str); str++;} X} X Xvoid flush_output() /* writes everything out */ X{ X if(other_buffer_is_full) X write_output(OTHER_OUT_BUFFER,OUT_BUFFER_LEN); X other_buffer_is_full=0; X write_output(OUT_BUFFER,output_index); X output_index=0; X} X Xint set_out_position(pos) unsigned pos; X/* erases everything which was written after position `pos' -- if possible */ X{unsigned back; byte *bf; X if(posname=(byte *)strdup(word))==NULL){ X error(OUT_OF_MEMORY); return(NULL); X } X } X new_macro_entry->link=NULL; X return(new_macro_entry); X} X Xvoid insert_macro() /* inserts `new_macro_entry' into its place */ X{ X if(new_macro_entry==NULL) return; X new_macro_entry->link=hash_table[new_hashcode]; X hash_table[new_hashcode]=new_macro_entry; X} X Xvoid unlink_macro(old,hashcode) struct MACRO *old; unsigned hashcode; X/* unlinks "old" from the hash table */ X{struct MACRO *k,*k1; X hashcode%=PRIME; k=hash_table[hashcode]; /* unlink from hash table */ X if(k==old) hash_table[hashcode]=old->link; X else { X while(k1=k->link, k1!=NULL && k1!=old) k=k1; X k->link=old->link; X } X if(is_backslash_k(old)){ /* unlink from keyword */ X if(mode_keywords==old) mode_keywords=old->keyword; X else { X k=mode_keywords; X while(k1=k->keyword, k1!=NULL && k1!=old) k=k1; X k->keyword=old->keyword; X } X } X} X Xint set_macro_structure(k,type,left_par,right_par,len) X struct MACRO *k; int type,left_par,right_par; unsigned len; X/* fills k with the given values */ X{ X k->type &= ~K_MATH; /* clear K_MATH bit */ X if(type) k->type |= K_MATH; /* set K_MATH bit if necessary */ X k->leftpar=left_par; X k->rightpar=right_par; X if(macro_text_length+len < TEXT_LENGTH){ /* reserved memory */ X k->body=macro_text+macro_text_length; X macro_text_length+=len; X return(0); X } X if((k->body=(byte *)malloc(len))==NULL){ X error(OUT_OF_MEMORY); return(1); X } X return(0); X} X Xvoid set_modeswitch(s,disp,standalone,out) X struct MACRO *s; int disp,standalone,out; X/* sets the appropriate mode for "s". Also puts it on the mode_keyword list */ X{int last_char; struct MACRO *k; X if(s==NULL) return; X if(out==0) next_style_number++; X s->style=next_style_number; X s->type &= ~(K_INOUT | K_MATHDISP | K_STANDALONE); X if(standalone) s->type |= K_STANDALONE; X if(disp) s->type |= K_MATHDISP; X if(out) s->type |= K_INOUT; X if(*(s->name)=='\\' && !is_backslash_k(s)){ /* starts with backslash */ X last_char=(s->name)[word_length_k(s)-1]; X if(last_char < 128 && isalpha(last_char)) X s->type |= K_CHECKLETTER; X s->type |= K_BACKSLASH; X k=mode_keywords; /* is it on the list ? */ X while(k!=NULL && k!=s)k=k->keyword; X if(k==NULL){ X s->keyword=mode_keywords; X mode_keywords=s; X } X } X} X X/*---------------------------------------------------------------------------*/ Xstruct MACRO *search_word(word,hashcode) byte *word; unsigned hashcode; X/* returns the structure whose name agrees with `word', given its hash code. */ X{struct MACRO *k; X k=hash_table[hashcode%PRIME]; X while(k!=NULL){ X if(strcmp(word,k->name)==0) return(k); X k=k->link; X } X return(NULL); X} X Xstruct MACRO *check_backslash_keyword(i) int i; X/* returns the structure whose `name' starting at the `i'-th character agrees X with the spy_string_ahead. */ X{struct MACRO *k; X k=mode_keywords; while(k!=NULL){ X if(spy_string_ahead((char *)((k->name)+i), X check_letter_k(k) ? FOLLOW_NO_LETTER : FOLLOW_NOTHING)) X return(k); /* found */ X k=k->keyword; X } X return(NULL); X} X X/*=========================================================================*/ X/* Word handling */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | Words, i.e. character sequences between white spaces are stored sepa- | X | rately (not only in the output buffers); also their hash code is | X | computed "on the fly". Macro handling routines got their approproate | X | parameters here. | X | -- alloc_word() allocates initial memory. | X | -- clear_word_store() should be called before a new word is dealt with. | X | -- store_word_token(t) store `t' as the next word constituent. | X | -- close_word_store() closes the word. | X | -- prepare_new_macro_entry() the last word is becoming a new macro. | X | -- remove_macro() the last word is a macro to be "undefined". | X | -- look_up_word() searches the stored word as a macro. | X *-------------------------------------------------------------------------*/ X#define MAX_WORD_LENGTH 512 /* no longer words are dealt with */ Xbyte *WORD_STORE; /* tokens of the last word */ Xint word_store_index; /* index to WORD_STORE */ Xunsigned word_hash_code; /* hash code computed on the fly */ X Xint alloc_word() /* allocates initial memory */ X{ WORD_STORE=(byte*)malloc(MAX_WORD_LENGTH); X return(WORD_STORE==NULL); X} X X#define clear_word_store() {word_store_index=0;word_hash_code=952;} X Xvoid store_word_token(t) int t; X/* stores the word consitutent `t' in `WORD_STORE[]', and computes the X hash code of the word "in fly". */ X{ X WORD_STORE[word_store_index++]=t; X word_hash_code = ((t+word_hash_code)<<4)+t; X if(word_store_index==MAX_WORD_LENGTH) word_store_index--; X} X X#define close_word_store() {WORD_STORE[word_store_index]=0;} X X#define prepare_new_macro_entry() \ X new_macro(S_aux2,WORD_STORE,word_hash_code) X X#define remove_macro() unlink_macro(S_aux2,word_hash_code) X X#define look_up_word() search_word(WORD_STORE,word_hash_code) X X/*========================================================================*/ X/* symbols */ X/*========================================================================*/ X X/*------------------------------------------------------------------------* X | Highest level reading. The input text is broken into "symbol"s which | X | are passed to the main loop. A "symbol" is a sequence of tokens; and | X | `store_token()' is called with all tokens in it. | X | o SYMBOL is the type of the symbol read; | X | o LAST_OUT holds the output position where the tokens forming the | X | last symbol start; | X | o S_aux1 contains some extra information about the SYMBOL; | X | o S_aux2 is the associated macro definition for the symbol if it is a | X | WORD. | X | o LAST_TOKEN and last_keyword are auxiliary variables to prevent | X | double parsing of certain sequences. | X | The procedures which are called outside: | X | -- initialize_symbol_reading() should be called first. | X | -- next_symbol() produces the next symbol. | X | -- skip_line_as_comment() skips everything till the end of line. | X | -- skip_soft_delimiters() reads until the SYMBOL differs from | X | SOFT_DELIMITER. | X *------------------------------------------------------------------------*/ X Xint LAST_TOKEN='\n'; /* last token dealt with */ Xstruct MACRO *last_keyword; /* parsed keyword */ X X#define initialize_symbol_reading() {LAST_TOKEN='\n';} X Xint check_token(t) int t; /* checks the type of the next token */ X{ X t &= 0xFF; X if(t<=' ' || is_par(t)) return(0); /* word boundary */ X switch(t){ Xcase '{': case '}': case '%': case E_EOF: Xcase '$': return(0); /* word boundary */ Xcase '\\': if(in_def_mode() && spy_string_ahead("\\\n",FOLLOW_NOTHING)) X return(0); /* word boundary */ X return(2); /* check for keywords */ Xdefault: return(1); /* word constituent */ X } X} X Xvoid next_symbol() /* produces the next SYMBOL */ X{int t,lt,len,i; struct MACRO *k; X LAST_OUT=CURRENT_OUT; /* where SYMBOL output starts */ X if(in_comment_mode()){ /* read until the end of line */ X while((t=get_next_token())!='\n' && t!=E_EOF) store_token(t); X input_line_number++; X if(t==E_EOF) t='\n'; X LAST_TOKEN=t; store_token(t); X SYMBOL=COMMENT; return; X } Xtry_again: /* after \newline in def mode */ X t=get_next_token(); lt=LAST_TOKEN; LAST_TOKEN=t; X store_token(t); X clear_word_store(); store_word_token(t); X switch(t){ Xcase E_EOF: LAST_TOKEN='\n'; SYMBOL=ENDFILE; return; Xcase '{': SYMBOL=OPEN; return; Xcase '}': SYMBOL=CLOSE; return; Xcase '%': if(in_def_mode()) {SYMBOL=MACRO_DELIM; return;} X SYMBOL=COMMENT; X if(lt=='\n'){ /* check for %keywords */ X len=0; X if(spy_string_ahead(T_DEFINE,FOLLOW_SPACE)){ X len=T_DEFINE_LEN; SYMBOL=DEF_KEYWORD; X } else if(spy_string_ahead(T_MDEFINE,FOLLOW_SPACE)){ X len=T_MDEFINE_LEN; SYMBOL=MDEF_KEYWORD; X } else if(spy_string_ahead(T_UNDEFINE,FOLLOW_SPACE)){ X len=T_UNDEFINE_LEN; SYMBOL=UNDEF_KEYWORD; X } else if(spy_string_ahead(T_MATHMODE,FOLLOW_SPACE)){ X len=T_MATHMODE_LEN; SYMBOL=MATH_KEYWORD; X } else if(spy_string_ahead(T_DISPMODE,FOLLOW_SPACE)){ X len=T_DISPMODE_LEN; SYMBOL=DISP_KEYWORD; X } X if(len>0) skip_tokens(len); X } X return; Xcase ' ': case '\t': S_aux1=0; SYMBOL=SOFT_DELIMITER; return; X /* S_aux1==0 says that the delimiter vanishes at substitution */ Xcase '\n': input_line_number++; X S_aux1=1; SYMBOL= lt=='\n' ? EMPTY_LINE : X in_def_mode() ? HARD_DELIMITER : SOFT_DELIMITER; X return; Xcase '$': X if(in_math_mode() && mode_style==SIMPLE_STYLE) /* single $ */ X SYMBOL=MATH_OUT; X else if(spy_token_ahead()=='$'){ /* double $$ */ X skip_tokens(1); store_token('$'); X SYMBOL=in_disp_mode() && mode_style==SIMPLE_STYLE ? X DISP_OUT : DISP_IN; X } else SYMBOL=MATH_IN; X return; Xcase '\\': /* E_KEYWORD means a \keyword was succesfully parsed */ X k=lt==E_KEYWORD ? last_keyword : check_backslash_keyword(1); X if(k!=NULL){ /* LAST_TOKEN=='\\' */ X len=word_length_k(k)-1; /* number of tokens in k */ X for(i=0;i=0;i--){ X if(pop_par(&replace,&start[i],&pend[i])) return(0); X len += pend[i]-start[i]+1; X } X if(*from < replace) replace = *from; /* place to replace from */ X *from=replace; X if(len>0){ X memory=malloc(len); X p=(byte *)memory; X if(p==NULL){ X error(OUT_OF_MEMORY); X return(0); X } X for(i=0;iname); X free(memory); return(0); X } X p+=pend[i]-start[i]+1; X } X } X if(set_out_position(replace)){ X error(TOO_LONG_PARAMETER,k->name); X if(memory!=NULL) free(memory); return(0); X } X body=k->body; X while(t = *body++){ X if(is_par(t)){ X p=parameters[extract_par(t)]; X while(t = *p++) store_token((int)t); X } else store_token((int)t); X } X if(memory!=NULL) free(memory); X return(1); X} X X/*=========================================================================*/ X/* Macro definition */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | This part deals with macro and keyword definitions. All of them vanish | X | from the output text, and are replaced by TeXpp_MACRO_DEFINITION. The | X | macro text is expanded in the output buffer, and copied into the memory | X | later. Calling `translate_parameters()' changes all references to the | X | formal parameters from their face value into their position. | X | -- init_macro_definition() saves the old mode, the actual output posi- | X | tion, and changes into DEFINE_MODE. | X | -- close_macro_definition() restores the original mode rewinds the | X | output, and inserts the appropriate text. | X | -- read_macro_definition(type) handles the macro definition. The `type' | X | tells whether the definition was %mdefine (=1) or not (=0). | X | -- undefine_macro() handler the case %undefine. The macro is unliked | X | both from the hash table and the keyword list. | X | -- define_keyword(type) deals with the %mathmode and %dispmode keywords | X *-------------------------------------------------------------------------*/ X Xint old_mode, old_style; Xunsigned save_out_position, start_macro_text; Xint params[9]; X Xvoid translate_parameters(body) byte *body; X/* replaces parameter #i by its absolute position */ X{byte p; X while((p = *body)){ X if(is_par(p)) *body=make_par(params[extract_par(p)]+'0'); X body++; X } X} X Xvoid init_macro_definition() X{int i; X old_mode=global_mode; old_style=mode_style; X global_mode=DEFINE_MODE; /* save old mode, switch to define */ X save_out_position=CURRENT_OUT; /* only a single % has been stored */ X shrink_par_stack(); /* no previous parameter */ X flush_output(); /* no backtrack beyond this point */ X for(i=0;i<9;i++)params[i]=0; /* no parameters defined */ X} X Xvoid close_macro_definition() X{ X set_out_position(save_out_position);/* cancel garbage */ X store_string(TeXpp_MACRO_DEFINITION); X skip_line_as_comment(); /* do not deal with the rest */ X global_mode=old_mode; mode_style=old_style; X X} X Xvoid read_macro_definition(type) int type; X/* reads a macro definition -- issues appropriate error messages */ X{int result; struct MACRO *k; int left_params,all_params; X init_macro_definition(); X left_params=0; Xnext_left_param: /* read leftist parameters */ X next_symbol(); skip_soft_delimiters(); X if(SYMBOL==PARAMETER){ X if(params[S_aux1]!=0){ /* declared twice */ X error(PARAMETER_TWICE,S_aux1+1); X close_macro_definition(); return; X } X params[S_aux1]= ++left_params; X goto next_left_param; X } X if(SYMBOL!=WORD){ X error(WRONG_MACRO_NAME); close_macro_definition(); return; X } X k=prepare_new_macro_entry(); /* if NULL, then no memory */ X if(k==NULL){ close_macro_definition(); return; } X all_params=left_params; Xnext_right_param: /* read rightist parameters */ X next_symbol(); skip_soft_delimiters(); X if(SYMBOL==PARAMETER){ X if(params[S_aux1]!=0){ /* declared twice */ X error(PARAMETER_TWICE,S_aux1+1); X close_macro_definition(); return; X } X params[S_aux1]= ++all_params; X goto next_right_param; X } X if(SYMBOL!=MACRO_DELIM){ X error(MISSING_DELIMITER); close_macro_definition(); return; X } X start_macro_text=CURRENT_OUT; X if(type!=0){ /* %mdefine */ X global_mode |= MATH_MODE; mode_style=DEFINE_STYLE; X } X do{ next_symbol();} while((result=deal_range())==X_CLOSE); X if(result==X_ERROR){ X close_macro_definition(); return; X } X if(SYMBOL!=MACRO_DELIM) error(MISSING_DELIMITER); X if(set_macro_structure( k,type,left_params,all_params-left_params, X LAST_OUT-start_macro_text+1)){ /* no more memory */ X close_macro_definition(); return; X } X if(retrieve_out(start_macro_text,LAST_OUT,k->body)){ X error(TOO_LONG_MACRO_DEF,k->name); X close_macro_definition(); return; X } X translate_parameters(k->body); X insert_macro(); X close_macro_definition(); X} X Xvoid undefine_macro() /* %undefine */ X{ X init_macro_definition(); X next_symbol(); skip_soft_delimiters(); X if(SYMBOL==WORD && S_aux2!=NULL){ /* delete it */ X remove_macro(); X } else error(WRONG_MACRO_NAME); X close_macro_definition(); X} X Xvoid define_mode_keyword(type) int type; /* %mathmode or %dispmode */ X{struct MACRO *k1,*k2; X init_macro_definition(); X next_symbol(); skip_soft_delimiters(); /* to mode keyword */ X if(SYMBOL!=WORD){ X error(WRONG_MODE_SWITCH_DEF); X close_macro_definition(); X return; X } X k1=prepare_new_macro_entry(); X insert_macro(); /* puts to the "keywords" */ X next_symbol(); skip_soft_delimiters(); X switch(SYMBOL){ /* from mode keyword */ Xcase MACRO_DELIM: case HARD_DELIMITER: X set_modeswitch(k1,type,1,0); /* single keyword */ X break; Xcase WORD: X if(k1==S_aux2) k2=NULL; X else {k2=prepare_new_macro_entry(); insert_macro();} X next_symbol(); skip_soft_delimiters(); X if(SYMBOL==MACRO_DELIM || SYMBOL==HARD_DELIMITER){ X set_modeswitch(k1,type,k2==NULL,0); /* single keyword ? */ X set_modeswitch(k2,type,0,1); X break; X } Xdefault: X error(WRONG_MODE_SWITCH_DEF); break; X } X close_macro_definition(); X} X X/*=========================================================================*/ X/* Macro and mode switch handling */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | -- deal_range() reads things between {...} and returns X_CLOSE, X_ERROR | X | or X_OTHER, skipping unbalanced mode switches. At the end does | X | not advances. | X | -- set_mode(k) switches into mode as given by struct MACRO parameter k. | X | Returns !=0 if the switch is unbalanced. | X | -- store_mode_block() stores a block enclosed by mode switches. This | X | behaves as a single (unbreakable) parameter. | X | -- deal_mode_switch(k) decides whether `k' is a also a mode switch. If | X | not, then pushes it as a paramter. If yes, skips until the closing | X | switch. | X | -- deal_word() checks whether the last word is a macro name, or is a | X | mode switch. Performs the appropriate actions in each case. | X *-------------------------------------------------------------------------*/ X Xint deal_range() X/* Reads things between {...} and returns X_CLOSE, X_ERROR or X_OTHER, X skipping unbalanced mode switches. At the end does not advance. */ X{int result; X while(1){ X while((result=store_parameter(1))==X_PARAMETER); X if(result==X_ERROR) return(X_ERROR); X if(result==X_OTHER){ switch(SYMBOL){ Xcase CLOSE: return(X_CLOSE); Xcase DELIMITER: break; /* allowed withing braces */ Xdefault: return(X_OTHER); X }} X shrink_par_stack(); next_symbol(); X } X} X Xint set_mode(k) struct MACRO *k; X/* Switches into math or disp mode. Returns !=0 if the switch is wrong. */ X{ X if(is_standalone_k(k) || is_in_k(k)){ X global_mode |= is_math_k(k) ? MATH_MODE : DISP_MODE; X mode_style=style_k(k); X return(0); X } X return(1); X} X Xint store_mode_block(replace,from,mode_out) X unsigned replace,from; int mode_out; X/* advances and stores a mode block closed by `mode_out' */ X{int result; X open_range(); next_symbol(); X while((result=store_parameter(1))==X_PARAMETER); X close_range(); X if(result!=mode_out) return(result); X push_par(replace,from,CURRENT_OUT); X return(X_PARAMETER); X} X Xint deal_mode_switch(k,replace,from) X struct MACRO *k; unsigned replace,from; X/* checks whether the last word is also a mode switch */ X{ X if(k==NULL || !is_modeswitch_k(k)){ /* not a mode switch */ X push_par(replace,from,CURRENT_OUT); X return(X_PARAMETER); X } X if(in_plain_mode()){ /* switch to mode */ X if(set_mode(k)){ /* wrong switch */ X error(WRONG_MODE_SWITCH,k->name); X return(X_XMODE_OUT); X } X return(store_mode_block(replace,from,X_XMODE_OUT)); X } X if(mode_style!=style_k(k) || (!is_standalone_k(k) && is_in_k(k))){ X error(WRONG_MODE_SWITCH,k->name); X set_plain_mode(); set_mode(k); X } else set_plain_mode(); X return(X_XMODE_OUT); X} X Xint deal_word(replace_from,advance) unsigned replace_from; int advance; X/* Checks whether the word is a macro name. Also checks for mode switch. */ X{struct MACRO *k; int i,replaced,result,right_pars; X k=S_aux2; X if(k==NULL || body_k(k)==NULL || (is_math_macro(k) && in_plain_mode())){ X replaced=0; X result=deal_mode_switch(k,replace_from,LAST_OUT); X } else { /* macro name */ X if(stack_depth() < left_pars_k(k)) { X error(TOO_LESS_LEFT_PARAMS,left_pars_k(k),k->name); X return(X_ERROR); X } X right_pars=right_pars_k(k); X if(right_pars>0) next_symbol(); X result=X_PARAMETER; X for(i=1; result==X_PARAMETER && i<=right_pars;i++) X result=store_parameter(iname); X return(X_ERROR); X } X replaced=macro_substitution(&replace_from,k); X result=deal_mode_switch(k,replace_from,replace_from); X } X if(result==X_PARAMETER && advance){ X replaced &= SYMBOL!=CLOSE; /***** ?????? ******/ X next_symbol(); /* skip whitespace after a WORD */ X if(replaced && SYMBOL==SOFT_DELIMITER && S_aux1==0){ X set_out_position(LAST_OUT); next_symbol(); X } X } X return(result); X} X X/*=========================================================================*/ X/* Reading parameters */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | -- skip_balanced_expression() used skipping a {...} parameter for the | X | keyword \preserve. | X | -- skip_word() skips until the next SOFT_DELIMITER after \preserve. | X | -- store_parameter(advance) stores and handles the next SYMBOL. If | X | `advance' is TRUE (!=0) then reads ahead one more SYMBOL. | X *-------------------------------------------------------------------------*/ X Xvoid skip_balanced_expression() /* skip until an unbalanced CLOSE */ X{int level=0; X while(1){ X next_symbol(); switch(SYMBOL){ Xcase HARD_DELIMITER: case EMPTY_LINE: case ENDFILE: X return; Xcase OPEN: level++; break; Xcase CLOSE: level--; if(level<0) return; Xdefault: break; X } X } X} X Xvoid skip_word() /* skips a word after \preserve */ X{ X while(1){ switch(SYMBOL){ Xcase SOFT_DELIMITER: case HARD_DELIMITER: case EMPTY_LINE: case ENDFILE: X return; Xdefault: X next_symbol(); break; X }} X} X Xint store_parameter(advance) int advance; X/* Stores a single parameter. If returns !=0 or advance==0 then does not X advances */ X{unsigned replace_from,start; int whitespace; int result; X whitespace=0; Xagain: X if(whitespace==0) replace_from=LAST_OUT; X switch(SYMBOL){ Xcase SOFT_DELIMITER: /* if S_aux1==0 the delimiter vanishes at substitution */ X if(S_aux1==0){ whitespace=1; replace_from=LAST_OUT;} X else whitespace=0; X next_symbol(); X goto again; Xcase WORD: X return(deal_word(replace_from,advance)); Xcase PARAMETER: /* formal parameter in macro text */ X if(params[S_aux1]==0){ X error(UNDEFINED_PARAMETER,1+S_aux1); X return(X_ERROR); X } X push_par(replace_from,LAST_OUT,CURRENT_OUT); X if(advance) next_symbol(); X return(X_PARAMETER); Xcase PRESERVE: /* \preserve keyword */ X start=LAST_OUT; X do{ set_out_position(LAST_OUT); next_symbol();} X while(SYMBOL==SOFT_DELIMITER); /* skip soft delimiters */ X if(SYMBOL==OPEN){ /* skip until the corresponding CLOSE */ X set_out_position(LAST_OUT); /* do not copy OPEN */ X skip_balanced_expression(); X } else skip_word(); X set_out_position(LAST_OUT); /* do not copy CLOSE */ X if(advance) next_symbol(); X push_par(replace_from,start,LAST_OUT); X return(X_PARAMETER); Xcase MATH_IN: case DISP_IN: X if(!in_plain_mode()){ X error(WRONG_DOLLAR_SWITCH); X set_plain_mode(); X } X global_mode|= SYMBOL==MATH_IN ? MATH_MODE : DISP_MODE; X mode_style=SIMPLE_STYLE; X result=store_mode_block(replace_from,LAST_OUT,X_DMODE_OUT); X if(result==X_PARAMETER && advance) next_symbol(); X return(result); Xcase MATH_OUT: /* do not advance! */ X if(!in_math_mode() || mode_style!=SIMPLE_STYLE){ X error(WRONG_CLOSING_DOLLAR); X } X set_plain_mode(); X return(X_DMODE_OUT); Xcase DISP_OUT: /* do not advance! */ X if(!in_disp_mode() || mode_style!=SIMPLE_STYLE){ X error(WRONG_CLOSING_DOLLAR); X } X set_plain_mode(); X return(X_DMODE_OUT); Xcase OPEN: X replace_from=LAST_OUT; start=CURRENT_OUT; X open_range(); X next_symbol(); /* advance */ X result=deal_range(); X close_range(); X if(result!=X_CLOSE) return(result); X push_par(replace_from,start,LAST_OUT); X if(advance) next_symbol(); /* what comes after CLOSE */ X return(X_PARAMETER); Xdefault: /* do not advance! */ X return(X_OTHER); X } X} X X/*=========================================================================*/ X/* Main cycle */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | read_file() is the main cycle of the program. It is called with all | X | input files. The procedure reads in a cycle until the end of the file, | X | flushing all the output and shrinking the parameter stack in each | X | iteration. Macro parameters cannot go over these constructs, e.g. empty | X | line, comment, TeX commands, etc. | X *-------------------------------------------------------------------------*/ Xvoid read_file() /* goes through a file */ X{int result; X initialize_token_reading(); X initialize_symbol_reading(); Xagain: X shrink_par_stack(); /* no previous parameter */ X flush_output(); /* no backtrack beyond this point */ X next_symbol(); /* first symbol to read */ X while((result=store_parameter(1))==X_PARAMETER); X /* read until can */ X if(result!=X_OTHER) goto again; X shrink_par_stack(); /* no parameters to deal with */ X switch(SYMBOL){ /* what caused the trouble */ Xcase EMPTY_LINE: X if(!in_plain_mode()){ /* check math and disp mode */ X error(EMPTY_LINE_IN_MODE,in_math_mode() ? "math" : "display"); X set_plain_mode(); X } X goto again; Xcase ENDFILE: /* end of everything */ X if(!in_plain_mode()){ X error(ENDFILE_IN_MODE,in_math_mode() ? "math":"display"); X set_plain_mode(); X } X break; Xcase COMMENT: /* a % sign somewhere */ X skip_line_as_comment(); X goto again; Xcase DELIMITER: /* control character */ Xcase CLOSE: /* unmatched closing bracket */ X goto again; Xcase DEF_KEYWORD: /* %define */ X read_macro_definition(0); X goto again; Xcase MDEF_KEYWORD: /* %mdefine */ X read_macro_definition(1); X goto again; Xcase UNDEF_KEYWORD: /* %undefine */ X undefine_macro(); X goto again; Xcase MATH_KEYWORD: /* %matmode */ X define_mode_keyword(0); X goto again; Xcase DISP_KEYWORD: /* %dispmode */ X define_mode_keyword(1); X goto again; X/*** case MATH_IN: case MATH_OUT: case DISP_IN: case DISP_OUT: case PRESERVE: X case HARD_DELIMITER: case OPEN: case WORD: case MACRO_DELIM: X case PARAMETER: case SOFT_DELIMITER: ***/ Xdefault: /* something which should not occur */ X fprintf(stderr,"Unreachable symbol: %d\n"); break; X } X} X X/*=========================================================================*/ X/* Command line arguments */ X/*=========================================================================*/ X X/*-------------------------------------------------------------------------* X | Arguments in the command line are the files which are to be processed. | X | Argument STDIN_ARG means the standard input; a file name preceeded by | X | WRITE_ARG or APPEND_ARG is considered the output file. If no output | X | file is present then the output goes to the standard output. In this | X | latter case error messages are not repeated at stderr. | X *-------------------------------------------------------------------------*/ X#define WRITE_ARG "-w" X#define APPEND_ARG "-a" X#define STDIN_ARG "-" X#define HELP_ARG "-h" X XFILE *find_output(argc,argv) int argc; char *argv[]; X/* searches the argument list for NOPAR_ARG */ X{FILE *f; int i,found; X for(i=1;i