/*
**      $VER: Funcs.c 1.0
**
**      Library functions for HTML datatype
*/

#define __USE_SYSBASE

#include <exec/types.h>
#include <exec/memory.h>

#include <proto/exec.h>
#include <proto/dos.h>

#include <string.h>

#include "ixdt.h"

/*
** Every iX Datatype has three functions:
**
** DTF_Identify() which performs some tests on given file
** to determine if that file should be read by this datatype.
** Function returns an integer value between 0 and 9.
** Value of 9 means that file is recogized 100%.
** Value of 0 means that file is not recognized.
** Value of e.g. 5 means that datatype is about 50% sure that
** file is of required type.
** Value of e.g. 8 means that datatype is almost 100% sure that
** file is of required type etc.
**
** DTF_FileStruct() which is used to determine begin-end of
** some logical parts of file.
** For example, AmigaGuide database is made up of nodes, and this
** function should find begin, end and name of every node.
**  
** DTF_ProcessChunk() which reads one part of file and returns
** information on that part in iX_ChunkInfo form readable by iX-Guide
*/

long ec,ecc,ParserECS;

ULONG __saveds __asm DTF_Identify(register __a0 STRPTR name)
{
/*
** we search for a <HTML> tag but if it is not first tag or
** if it is not first after <!comment(s)> we return 0 which
** means it is not HTML.
*/
char buff[1],hbuff[5];
BPTR fh;

 if (fh=Open(name,MODE_OLDFILE))
 {
  while(FRead(fh,buff,1,1))
   {
    if (*buff=='<')
     {
       FRead(fh,hbuff,1,5);
       if (strnicmp(hbuff,"HTML>",5)==0) {Close(fh);return(9L);};
       if (*hbuff=='!')
         {
           while(FRead(fh,buff,1,1)) if (*buff=='>') break;
         } else break;
     } else if ((*buff!=' ') && (*buff!='\n')) break;
   };
  Close(fh);
 };
 return(0L);
}

struct iX_Node * __saveds __asm DTF_FileStruct(register __a0 BPTR fh,
                                               register __d0 long from,
                                               register __a1 BOOL *save_index)
{
 struct iX_Node *nd=NULL;
 
 *save_index = FALSE;   // we don't want iX-Guide to save node index
                        // since we have only one node
 
 if (!from)             // if from is 0 which means we are at the begining
                        // of file, we build our node, otherwise we are called
                        // for a second time and we return NULL since we have
                        // only one node  
 {
   nd = AllocVec(sizeof(struct iX_Node),MEMF_CLEAR);
   nd->name = AllocVec(5,0);
   strcpy(nd->name,"main");  // "main" node will be loaded first
   nd->node_begin=0L;        // node begin is actually begining of file
   Seek(fh,0L,OFFSET_END);
   nd->node_end=Seek(fh,0L,OFFSET_CURRENT); // node end is end of file
 };

 return(nd);
}

/************ support functions ************/
void *Realloc(void *oldptr,long newsz,long oldsz)
{
 void *mempt;
 
 mempt = AllocVec(newsz,MEMF_ANY);
 CopyMem(oldptr,mempt,oldsz);
 FreeVec(oldptr);

 return(mempt);
}
BOOL CheckMemory(struct iX_ChunkInfo *tpp,long *chunk_c,long entry_c,long ParserECS)
{
     if (*chunk_c==ParserECS)
       {
        *chunk_c=0;
        tpp->ci_entry=Realloc(tpp->ci_entry,entry_c+ParserECS,entry_c);
        tpp->cie_position=Realloc(tpp->cie_position,4*entry_c+ParserECS*4,4*entry_c);
        tpp->cie_length=Realloc(tpp->cie_length,2*entry_c+ParserECS*2,2*entry_c);
       };

if (!tpp->ci_entry || !tpp->cie_position || !tpp->cie_length) return(FALSE);
}

void InsertEntries(struct iX_ChunkInfo *tpp,UBYTE *array,long num_e)
{
 long ecn;
 
 for(ecn=0;ecn<num_e;ecn++)
  {
   tpp->ci_entry[ec] = array[ecn];
   ec++;ecc++;
   CheckMemory(tpp,&ecc,ec,ParserECS);
  };
}
long FindN(char *t,char **arr)
{
 long nnum=0;
 
 while(arr[nnum])
  {
    if (stricmp(t,arr[nnum])==0) break;
    nnum++;
  };
 return(nnum+1);
}
/********************************/
/*** parser stuff ***/
#define TYPE_TAG    1
#define TYPE_WORD   2
#define TYPE_RTRN   3
#define TYPE_SPC    4

/*** HTML tags ***/
#define TAG_A           1
#define TAG_TITLE       2
#define TAG_B           3
#define TAG_I           4
#define TAG_U           5
#define TAG_H1          6
#define TAG_H2          7
#define TAG_H3          8
#define TAG_H4          9
#define TAG_H5         10
#define TAG_H6         11  
#define TAG_CENTER     12
#define TAG_P          13
#define TAG_BR         14
#define TAG_OL         15
#define TAG_UL         16
#define TAG_LI         17
#define TAG_IMG        18

struct iX_ChunkInfo * __saveds __asm DTF_ProcessChunk(register __a0 STRPTR name,
                                                      register __d0 long from,
                                                      register __d1 long to,
                                                      register __d2 BOOL head)
{
  struct iX_ChunkInfo *ci=NULL;
  BPTR fhan;
  char *author="Created by HTML datatype V12.27";    // this will show up
                                                    // in iXG INFO window
  char *deffont="F1";
  char *dp,store,*beg;
  UBYTE header[]={ ET_STAG,ET_DATABASE,ET_ATTR,ET_RTRN, // @database tag
                   ET_STAG,ET_AUTHOR,ET_ATTR,ET_RTRN,  // @author tag
                   ET_STAG,ET_FONT,ET_ATTR,ET_RTRN,
                   ET_STAG,ET_IXF,ET_RTRN,ET_END};  // @ixf tag (enable iXG formatting)
  UBYTE link[]={ET_ATTR,ET_LINK,ET_ATTR};
  UBYTE fontspec[]={ET_STAG,ET_FONT,ET_ATTR,ET_RTRN};
  UBYTE align[]={ET_A,ET_ATTR,ET_ATTR};
  UBYTE img[]={ET_ATTR,ET_PIC};
  
  char *HTMLtags[]={"a","title","b","i","u","h1","h2","h3","h4",
                    "h5","h6","center","p","br","ol","ul","li",
                    "img",NULL};
  
  char *ixg_strings = "F0F1F2F3F4F5F6DEFFLCRDWUPDEFA";
                   //       fonts        alignement
  long nn,etype;
  BOOL neg,hreffnd,inlink;
                         
  if (head)          // iX-Guide wants header of database
   {
    ci = AllocVec(sizeof(struct iX_ChunkInfo),MEMF_CLEAR);
    ci->ci_data = AllocVec(strlen(author)+strlen(name)+strlen(deffont)+2,MEMF_CLEAR);
    strcpy(ci->ci_data,author);strcat(ci->ci_data,name);strcat(ci->ci_data,deffont);
    ci->ci_datalen=strlen(author)+strlen(name)+strlen(deffont)+2;
    ci->ci_entrynum=16;
    ci->ci_entry=AllocVec(16,MEMF_CLEAR);
    ec=0;ecc=0;
    InsertEntries(ci,header,16);
    ci->cie_position=AllocVec(16*4,MEMF_CLEAR);
    ci->cie_position[2] = strlen(author);ci->cie_position[6] = 0L;
    ci->cie_position[10] = strlen(author)+strlen(name);
    ci->cie_length=AllocVec(16*2,MEMF_CLEAR);
    ci->cie_length[2] = strlen(name); ci->cie_length[6]=strlen(author);
    ci->cie_length[10] = strlen(deffont);
   }
  else  // process chunk
   {
    ParserECS=1000;
    if ((to-from) > 50000) ParserECS=5000;
    else if ((to-from) > 20000) ParserECS=3000;
    
    ci = AllocVec(sizeof(struct iX_ChunkInfo),MEMF_CLEAR);
    ci->ci_entry = AllocVec(ParserECS,MEMF_ANY);
    ci->cie_position = AllocVec(ParserECS*4,MEMF_ANY);
    ci->cie_length = AllocVec(ParserECS*2,MEMF_ANY);
    ci->ci_data = AllocVec(to-from+2+strlen(ixg_strings),MEMF_ANY);

    if (!ci || !ci->ci_entry || !ci->cie_position || !ci->cie_length || !ci->ci_data)
      return(NULL);
    
    if (fhan=Open(name,MODE_OLDFILE))
       {
        ci->ci_datalen=strlen(ixg_strings);
        strcpy(ci->ci_data,ixg_strings);
                    
        Seek(fhan,from,OFFSET_BEGINING);
        ci->ci_datalen += FRead(fhan,ci->ci_data+strlen(ixg_strings),1,to-from+1);
        ci->ci_data[ci->ci_datalen] = 0;
        
        dp = ci->ci_data+strlen(ixg_strings);
        ec=0;ecc=0;
 
      while(*dp)
       {
         etype=0;
         switch(*dp)
          {
            case '<':
              etype=TYPE_TAG;
             break;
            case ' ':
              etype=TYPE_SPC;
             break;
            case '\n':
              etype=TYPE_RTRN;
             break;
            default:
              etype=TYPE_WORD;
             break;
          };
          
        switch(etype)
         {
           case TYPE_WORD:
            ci->ci_entry[ec]=ET_WORD;
            ci->cie_position[ec]=dp-ci->ci_data;
            ci->cie_length[ec]=0L;
            while(*dp)
             {
               if (*dp==' ') {dp++;ci->cie_length[ec]++;break;};
               if (*dp=='\n') break;
               if (*dp=='<') break;
               dp++;ci->cie_length[ec]++;
             };
            ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
           break;
           
           case TYPE_SPC:
            ci->ci_entry[ec]=ET_SPC;
            ci->cie_length[ec]=0L;
            while(*dp && (*dp==' ')) {dp++;ci->cie_length[ec]++;};
            ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
           break;
             
           case TYPE_RTRN:
            ci->ci_entry[ec]=ET_RTRN;
            *dp=' ';
            dp++;
            ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
           break;
           
           case TYPE_TAG:
            ci->ci_entry[ec]=ET_TAG;
            ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
            dp++;
            if (*dp=='!') while(*dp && (*dp!='>')) dp++;  // comment
            else
              {
                if (*dp=='/') {neg=TRUE;dp++;} else neg=FALSE;
                while(*dp==' ') dp++;
                beg=dp;
                while(*dp && (*dp!='>') && (*dp!=' ')) dp++;
                store=*dp;
                *dp='\0';
                nn = FindN(beg,HTMLtags);
                *dp=store;
      
                switch(nn)
                   {
                    case TAG_A:
                      if (neg) inlink=FALSE;
                      hreffnd=FALSE;
                      while (*dp && (*dp!='>'))
                        {
                          while(*dp==' ') dp++;
                          if (strnicmp(dp,"HREF=",5)==0)
                           {
                            hreffnd=TRUE;inlink=TRUE;
                            dp+=5;
                            if(*dp=='"')
                               {
                                dp++;
                                if (strnicmp(dp,"file://localhost/",17)==0) dp+=17;
                                else if (strnicmp(dp,"file:///",8)==0) dp+=8;
                                beg=dp;
                                while(*dp && (*dp!='"')) dp++;
                               }
                             else
                               {
                                if (strnicmp(dp,"file://localhost/",17)==0) dp+=17;
                                else if (strnicmp(dp,"file:///",8)==0) dp+=8;
                                beg=dp;
                                while(*dp && (*dp!=' ') && (*dp!='>')) dp++;
                               };
                                store=*dp;*dp='\0';
                                strcpy(beg-5,beg);
                                if (!strchr(beg-5,'#')) strcat(beg-5,"/main");
                                InsertEntries(ci,link,3);
                                ci->cie_position[ec-1]=beg-ci->ci_data-5;
                                ci->cie_length[ec-1]=strlen(beg-5);
                                ci->cie_position[ec-3]=beg-ci->ci_data-5;
                                ci->cie_length[ec-3]=strlen(beg-5);
                                *dp=store;
                                
                                while(*dp && (*dp!='>')) dp++;
                                if (*dp=='>') dp++;
                                if (*dp!='<')
                                {
                                ci->cie_position[ec-3]=dp-ci->ci_data;
                                ci->cie_length[ec-3]=0;
                                while(*dp && (*dp!='<')) {dp++;ci->cie_length[ec-3]++;};
                                };
                            break;
                           }
                          else while(*dp && (*dp!='>') && (*dp!=' ')) dp++;
                        };
                     break;
                     
                     case TAG_IMG:
                      while (*dp && (*dp!='>'))
                        {
                          while(*dp==' ') dp++;
                          if (strnicmp(dp,"SRC=",4)==0)
                           {
                            dp+=4;
                            if(*dp=='"')
                               {
                                dp++;
                                if (strnicmp(dp,"file://localhost/",17)==0) dp+=17;
                                else if (strnicmp(dp,"file:///",8)==0) dp+=8;
                                beg=dp;
                                while(*dp && (*dp!='"')) dp++;
                               }
                             else
                               {
                                if (strnicmp(dp,"file://localhost/",17)==0) dp+=17;
                                else if (strnicmp(dp,"file:///",8)==0) dp+=8;
                                beg=dp;
                                while(*dp && (*dp!=' ') && (*dp!='>')) dp++;
                               };
                                store=*dp;*dp='\0';
                                InsertEntries(ci,img,2);
                                ci->cie_position[ec-2]=beg-ci->ci_data;
                                ci->cie_length[ec-2]=strlen(beg);
                                *dp=store;
                            break;
                           }
                          else while(*dp && (*dp!='>') && (*dp!=' ')) dp++;
                        };
                     break;
                     
                     case TAG_TITLE:
                      ci->ci_entry[ec-1]=ET_RTRN;
                      InsertEntries(ci,fontspec,4);
                      if (neg)
                      ci->cie_position[ec-2]=2;     // FONT F1 if tag is </title>
                      else
                      ci->cie_position[ec-2]=10;    // FONT 5 in ixg_strings
                      ci->cie_length[ec-2]=2;
                     break;
                     
                     case TAG_H1:
                     case TAG_H2:
                     case TAG_H3:
                     case TAG_H4:
                     case TAG_H5:
                     case TAG_H6:
                      ci->ci_entry[ec-1]=ET_RTRN;
                      InsertEntries(ci,fontspec,4);
                      if (neg)
                      ci->cie_position[ec-2]=2;    // F1 if </Hx>
                      else
                      ci->cie_position[ec-2]=12-(nn-6)*2; // H1 is F6, H2 is F5...
                      ci->cie_length[ec-2]=2;
                     break;
                     
                     case TAG_B:
                      if (neg)
                      ci->ci_entry[ec]=ET_UB;
                      else
                      ci->ci_entry[ec]=ET_B;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                     case TAG_I:
                      if (neg)
                      ci->ci_entry[ec]=ET_UI;
                      else
                      ci->ci_entry[ec]=ET_I;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                     case TAG_U:
                      if (neg)
                      ci->ci_entry[ec]=ET_UU;
                      else
                      ci->ci_entry[ec]=ET_U;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                     
                     case TAG_CENTER:
                      InsertEntries(ci,align,3);
                      if (neg)
                      {
                      ci->cie_position[ec-2]=18;
                      ci->cie_position[ec-1]=21;
                      }
                      else
                      {
                      ci->cie_position[ec-2]=19;
                      ci->cie_position[ec-1]=21;
                      };
                      ci->cie_length[ec-2]=1;
                      ci->cie_length[ec-1]=2;
                     break;
                     
                     case TAG_P:
                      ci->ci_entry[ec]=ET_N;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                      ci->ci_entry[ec]=ET_N;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                     
                     case TAG_BR:
                      ci->ci_entry[ec]=ET_N;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                     
                     case TAG_OL:
                     case TAG_UL:
                     if (neg)
                      ci->ci_entry[ec]=ET_UUL;
                     else
                      ci->ci_entry[ec]=ET_UL;
                     ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                     case TAG_LI:
                      ci->ci_entry[ec]=ET_LI;
                      ec++;ecc++;CheckMemory(ci,&ecc,ec,ParserECS);
                     break;
                   };
              };
            while(*dp && (*dp!='>')) dp++; // skip tag
            if (*dp=='>') dp++;
           break;
         };         
       };
       
       ci->ci_entry[ec]=ET_END;
       ci->ci_entrynum=ec;
       Close(fhan);
       };
   };
   
  return(ci);
}
