
/* diverse.c  22-3-92  code for computing the diversity index */
/*** Diverse: Version 3.1  Copyright (c) 1990, 1991, 1992  Tom Ray ***/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <ctype.h>
#ifdef __TURBOC__
#include <alloc.h>
#endif /* __TURBOC__ */

#ifdef __STDC__           /* ANSI prototyping */
#define P_(A) A
#define const const
#else                 /* non-ANSI prototyping */
#define P_(A) ()
#define const
#endif

typedef unsigned int   Uint;
typedef unsigned long  Ulong;
typedef long int       I32s;

struct siz {
    long   itime;
    long   TotPop;
    long   num_sizes;
    float  siz_div;
    long   ave_size_age;
    } ;

struct gen {
    long   num_genos;
    float  geno_div;
    long   ave_geno_age;
    } ;

struct event {  /* structure for time measured in executed instructions */
    long  m;   /* count of millions of instructions */
    long  i;   /* count of instructions */
    } ;

struct last_out {
    Ulong  time;
    Ulong  ctime;
    Ulong  itime;
    char   bd;
    Uint   size;
    char   label[4];
    } ;

struct gene_dat {
    struct tnode  *t;
    long          index;
    } ;

struct snode {
    long             max;  /* max pop value upon which tree is sorted */
    long             num;  /* number of genotypes of this max value */
    long             gsiz; /* allocated size of *gd array */
    struct gene_dat  *g;
    struct snode     *l;  /* left sub-tree */
    struct snode     *r;  /* right sub-tree */
    } ;

struct pop_dat {
    long  end; /* current population of this size or genotype */
    long  age; /* current age of this size or genotype */
    } ;

struct tnode {
    long            size;  /* genome size */
    struct pop_dat  sd; /* pop_dat for this size class */
    int             gsize; /* allocated size of *g array */
    struct pop_dat  *g; /* array of pop_dat structures */
    struct tnode    *l;  /* left sub-tree */
    struct tnode    *r;  /* right sub-tree */
    } ;

struct Totals {
    long int  TotPop;
    long int  TotSizes;
    long int  TotGenos;
    double    time;
    } totals;

struct DivDat {
    float     size_div;
    float     geno_div;
    long int  num_sizes;
    long int  num_genos;
    long int  num_cells_s;
    long int  num_cells_g;
    double    tot_size_age;
    double    tot_geno_age;
    } divdat;

struct range {
    double    n;  /* minimum value */
    double    x;  /* maximum value */
    long int  m;  /* count of values */
    int       f;  /* first time through = 1 */
    } timer, sdivr, gdivr, nsr, ngr, ncr, asar, agar;

void main P_((int  argc, char  **argv));
void doranges P_(());
struct tnode * AddTree P_((struct tnode  *p, struct last_out  *lo));
void divCountTree P_((struct tnode  *t, struct DivDat  *d));
void minmax2 P_((double  v, struct range  *r));
int Lbl2Int P_((char  s[]));
void FreeTTree P_((struct tnode  *p));
int t_read P_((char  data[], struct last_out  *lo, int  *first,
     int  *genotypes));

int     genotypes;
double  ave_size_age, ave_geno_age;
char    ifile[13], ofile[13], bifile[13], bofile[13], data[81];
FILE    *inf, *ouf;

struct last_out rlo;

void main (argc, argv)
int  argc;
char  **argv;
{   unsigned long  size, fsize = 0;
    I32s           mtime = 0, otime = 0;
    int            binum = 1, bonum = 1;
    int            first = 1, first2 = 1, format = 0;
    char           c;
    struct tnode   *troot = NULL;
    struct gen  ge;
    struct siz  si;

    timer.n = timer.x = sdivr.n = sdivr.x = gdivr.n = gdivr.x = 0.;
    nsr.n = nsr.x = ngr.n = ngr.x = ncr.n = ncr.x = 0.;
    asar.n = asar.x = agar.n = agar.x = 0.;
    timer.f = sdivr.f = gdivr.f = nsr.f = ngr.f = ncr.f = asar.f = agar.f = 1;
    timer.m = sdivr.m = gdivr.m = nsr.m = ngr.m = ncr.m = asar.m = agar.m = 0;
    totals.TotPop = totals.TotSizes = totals.TotGenos = 0;
    totals.time = 0.;

    if(argc == 1)
    {   printf (" Usage: %s in_file out_file format break_size\n",argv[0]);
        printf ("        break_size in K: out_file.1, out_file.2 ...\n");
        printf ("        output format: binary or ascii\n\n");
        exit (1);
    }
    if(argc == 2)
        sprintf(ofile,"divdat");
    if(argc == 3)
    {   sscanf(argv[2],"%[^.]", ofile);
    }
    size = 4096;
    if(argc == 4)
    {   if(!strcmp(argv[3],"binary"))
            format = 0;
        else format = 1;
        sscanf(argv[2],"%[^.]", ofile);
    }
    if(argc == 5)
    {   if(!strcmp(argv[3],"binary"))
            format = 0;
        else format = 1;
        sscanf(argv[2],"%[^.]", ofile);
        sscanf(argv[4],"%lu", &size);
    }
    size *= 1024;
    strcpy(ifile,argv[1]);
    sscanf(ifile,"%[^.]", bifile);
    inf = fopen(ifile,"r");
    if(inf == NULL)
    {   printf("\n Input File %s NOT found. \n", ifile);
        exit(1);
    }
    strcpy(bofile,ofile);
    strcat(ofile,".1");
    rlo.time = rlo.ctime = rlo.size = 0;
    for(;;)
    {   if(fgets(data,80,inf) == NULL)
        {   binum++;
            sprintf(ifile,"%s.%d", bifile, binum);
            fclose(inf);
            inf = fopen(ifile,"r");
            if(inf == NULL)
                break ;
            if(fgets(data,80,inf) == NULL)
                break ;
        }
        t_read(data, &rlo, &first, &genotypes);
        if(first2)
        {   first2 = 0;
#ifdef __TURBOC__
            printf("\nTime = %4ld million  Coreleft = %6lu\r",
                0L, coreleft());
#else  /* __TURBOC__ */
            printf("\nTime = %4d million\r", first2);
#endif /* __TURBOC__ */
            fflush(stdout);
            if(format)
            {   ouf = fopen(ofile,"w");
                if(genotypes)
                    fprintf(ouf,"11\n");
                else
                    fprintf(ouf,"10\n");
            }
            else
            {   ouf = fopen(ofile,"wb");
                c = (char) format;
                fwrite(&c,sizeof(char),1,ouf);
                c = (char) genotypes;
                fwrite(&c,sizeof(char),1,ouf);
            }
        }
        troot = AddTree(troot,&rlo);
        if(rlo.bd == 'd')
            totals.TotPop--;
        else
            totals.TotPop++;
        divdat.num_cells_s = 0;
        divdat.num_sizes = 0;
        divdat.size_div = 0;
        divdat.tot_size_age = 0.;
        if(genotypes)
        {   divdat.num_cells_g = 0;
            divdat.num_genos = 0;
            divdat.geno_div = 0;
            divdat.tot_geno_age = 0.;
        }

        divCountTree(troot,&divdat);

        if(divdat.num_cells_s != totals.TotPop)
            printf("divdat.num_cells_s = %ld  totals.TotPop = %ld\n",
                divdat.num_cells_g, totals.TotPop);
        if(divdat.num_sizes != totals.TotSizes)
            printf("divdat.num_sizes = %ld  totals.TotSizes = %ld\n",
                divdat.num_sizes, totals.TotSizes);
        if(genotypes)
        {   if(divdat.num_genos != totals.TotGenos)
                printf("divdat.num_genos = %ld  totals.TotGenos = %ld\n",
                    divdat.num_genos, totals.TotGenos);
            if(divdat.num_cells_g != divdat.num_cells_s)
                printf("divdat.num_cells_g = %ld  divdat.num_cells_s = %ld\n",
                    divdat.num_cells_g, divdat.num_cells_s);
        }
	ave_size_age = divdat.tot_size_age / (double) totals.TotSizes;
        if(genotypes)
	    ave_geno_age = divdat.tot_geno_age / (double) totals.TotGenos;

        if(format)
        {   if(genotypes)
	        fsize += 1 + fprintf(ouf,"%lx %ld %ld %g %lx %ld %g %lx\n",
                rlo.itime, totals.TotPop, divdat.num_sizes, divdat.size_div,
                (Ulong) ave_size_age, divdat.num_genos, divdat.geno_div,
                (Ulong) ave_geno_age);
            else fsize += 1 + fprintf(ouf,"%lx %ld %ld %g %lx\n",
                rlo.itime, totals.TotPop, divdat.num_sizes, divdat.size_div,
                (Ulong) ave_size_age);
        }
        else
        {   si.itime = rlo.itime;
            si.TotPop = totals.TotPop;
            si.num_sizes = divdat.num_sizes;
            si.siz_div = divdat.size_div;
            si.ave_size_age = (Ulong) ave_size_age;
            fwrite(&si, sizeof(struct siz), 1, ouf);
            fsize += sizeof(struct siz);
            if(genotypes)
            {   ge.num_genos = divdat.num_genos;
                ge.geno_div = divdat.geno_div;
                ge.ave_geno_age = (Ulong) ave_geno_age;
                fwrite(&ge, sizeof(struct gen), 1, ouf);
                fsize += sizeof(struct gen);
            }
        }
        if(fsize > size)
        {   fsize = 0;
            bonum++;
            fclose(ouf);
            sprintf(ofile,"%s.%d", bofile, bonum);
            if(format)
                ouf = fopen(ofile,"w");
            else
                ouf = fopen(ofile,"wb");
        }

        totals.time += (double) rlo.itime;
        mtime = (long int) (totals.time / 1000000L);
        if (mtime > otime)
        {
#ifdef __TURBOC__
            printf("Time = %4ld million  Coreleft = %6lu\r",
                mtime, coreleft());
#else  /* __TURBOC__ */
            printf("Time = %4ld million\r", mtime);
#endif /* __TURBOC__ */
            fflush(stdout);
            otime = mtime;
        }
        minmax2((double) totals.time,&timer);
	minmax2((double) totals.TotPop,&ncr);
	minmax2((double) divdat.num_sizes,&nsr);
        minmax2((double) divdat.size_div,&sdivr);
	minmax2((double) ave_size_age,&asar);
        if(genotypes)
	{   minmax2((double) divdat.geno_div,&gdivr);
	    minmax2((double) divdat.num_genos,&ngr);
	    minmax2((double) ave_geno_age,&agar);
        }
    }
    doranges();
    /* FreeTTree(troot);*/
}

void doranges()
{   fclose(ouf);
    ouf = fopen("divrange","w");
    fprintf(ouf,"Time    %10.0lf %12.0lf %ld\n", timer.n, timer.x, timer.m);
    fprintf(ouf,"NumCell %10.0lf %12.0lf %ld\n", ncr.n,   ncr.x,   ncr.m);
    fprintf(ouf,"NumSize %10.0lf %12.0lf %ld\n", nsr.n,   nsr.x,   nsr.m);
    fprintf(ouf,"SizeDiv %10lf %12lf %ld\n", sdivr.n, sdivr.x, sdivr.m);
    fprintf(ouf,"AgeSize %10.0lf %12.0lf %ld\n", asar.n,  asar.x,  asar.m);
    if(genotypes) {
    fprintf(ouf,"NumGeno %10.0lf %12.0lf %ld\n", ngr.n,   ngr.x,   ngr.m);
    fprintf(ouf,"GenoDiv %10lf %12lf %ld\n", gdivr.n, gdivr.x, gdivr.m);
    fprintf(ouf,"AgeGeno %10.0lf %12.0lf %ld\n", agar.n,  agar.x,  agar.m);
    }
    fclose(ouf);
}

struct tnode * AddTree(p, lo)
struct tnode  *p;
struct last_out  *lo;
{   int  i, j, osize;
    struct pop_dat  *pd;

    if(p == NULL) /* this is a new size class */
    {   if(lo->bd == 'd')
        {   printf("new node is a death, exiting\n");
            doranges();
            exit(0);
        }
        totals.TotSizes++;
        p = (struct tnode  *) calloc(1,sizeof(struct tnode));
        if(p == NULL)
        {   printf("calloc failure, exiting\n");
            doranges();
            exit(0);
        }
        p->size = lo->size;
        p->sd.end = 1;
        p->sd.age =(-lo->itime);
        if(genotypes)
        {   i = Lbl2Int(lo->label);
            totals.TotGenos++;
            p->gsize = i + 1;
            p->g = (struct pop_dat  *)
                calloc(p->gsize,sizeof(struct pop_dat));
            if(p->g == NULL)
            {   printf("calloc failure, exiting\n");
                doranges();
                exit(0);
            }
            (p->g + i)->age =(-lo->itime);
            (p->g + i)->end = 1;
        }
        p->l = p->r = NULL;
    }
    else if(lo->size < p->size)
        p->l = AddTree(p->l,lo);
    else if(lo->size > p->size)
        p->r = AddTree(p->r,lo);
    else
    {   if(genotypes) /* reallocating gsize array */
        {   i = Lbl2Int(lo->label);
            if(i >= p->gsize)
            {   osize = p->gsize;
                p->gsize = i + 5;
                if(p->g == NULL)
                    p->g = (struct pop_dat  *)
                        calloc(p->gsize, sizeof(struct pop_dat));
                else
                    p->g = (struct pop_dat  *)
                        realloc(p->g, p->gsize * sizeof(struct pop_dat));
                if(p->g == NULL)
                {   printf("realloc failure, exiting\n");
                    doranges();
                    exit(0);
                }
                for(j = osize; j < p->gsize; j++)
                {   pd = p->g + j;
                    pd->end = pd->age = 0;
                }
            }
        }
        if(lo->bd == 'b')
        {   p->sd.end++;
            if(p->sd.end == 1)
            {   totals.TotSizes++;
                p->sd.age =(-lo->itime);
            }
            if(genotypes)
            {   pd = p->g + i;
                if(!pd->end)
                    totals.TotGenos++;
                pd->end++;
            }
        }
        else
        {   p->sd.end--;
            if(!p->sd.end)
            {   totals.TotSizes--;
                p->sd.age = 0;
            }
            if(genotypes)
            {   pd = p->g + i;
                pd->end--;
                if(!pd->end) totals.TotGenos--;
            }
        }
    }
    return p;
}

void divCountTree(t, d)
struct tnode  *t;
struct DivDat  *d;
{   struct pop_dat  *pd;
    int i;

    if(t != NULL)
    {   if(t->sd.end > 0)
        {   d->num_sizes++;
            t->sd.age += rlo.itime;
            divdat.tot_size_age += (double) t->sd.age;
            divdat.num_cells_s += t->sd.end;
            d->size_div -= ((float) t->sd.end / (float) totals.TotPop)
                * (log((float) t->sd.end / (float) totals.TotPop));
            if(genotypes) for(i = 0; i < t->gsize; i++)
            {   pd = t->g + i;
                if(pd->end > 0)
                {   d->geno_div -= ((float) pd->end / (float) totals.TotPop)
                        * (log((float) pd->end / (float) totals.TotPop));
                    d->num_genos++;
                    pd->age += rlo.itime;
                    divdat.tot_geno_age += (double) pd->age;
                    divdat.num_cells_g  += pd->end;
                }
            }
        }
        divCountTree(t->l,d);
        divCountTree(t->r,d);
    }
}

void minmax2(v, r)
double  v;
struct range  *r;
{   if(r->f)
    {   r->f = 0;
        r->n = r->x = v;
    }
    if(v < r->n) r->n = v; if(v > r->x) r->x = v;
    r->m++;
}

int Lbl2Int(s)
char  s[];
{   if(s[0] == '-') return 0;
    return 1 + (s[2]- 'a') + (26 * (s[1] - 'a')) + (676 * (s[0] - 'a'));
}

void FreeTTree(p)
struct tnode  *p;
{   if(p != NULL)
    {   FreeTTree(p->l);
        FreeTTree(p->r);
        free(p->g);
        free(p);
    }
}

int t_read(data, lo, first, genotypes)
char  data[];
struct last_out  *lo;
int  *first;
int  *genotypes;
{   struct last_out  ti;
    int    nargs;
    char   v2[9], v3[9], v4[9];

    sscanf(data,"%s", v2);
    if(!strcmp(v2,"num_sp")) return 0;
    nargs = sscanf(data,"%lx%s%s%s", &ti.time, v2, v3, v4);
    lo->itime = ti.time;
    if(*first)
    {   *first = 0;
        if(nargs == 4) *genotypes = 1;
        else *genotypes = 0;
        lo->time += ti.time;     /* assumes lo structure initialized to zero */
        if(lo->time >= 1000000L)
        {   lo->time %= 1000000L;
            lo->ctime++;
        }
        lo->bd = v2[0];
        sscanf(v3,"%u", &lo->size);
        if(*genotypes) strcpy(lo->label,v4);
        else strcpy(lo->label,"");
    }
    else
    {   lo->time += ti.time;
        if(lo->time >= 1000000L)
        {   lo->time %= 1000000L;
            lo->ctime++;
        }
        if(*genotypes) switch(nargs)
        {   case 1: break;
            case 2:
            {   if(isdigit(v2[0]))
                {   sscanf(v2,"%u", &lo->size); break; }
                else
                {   if(strlen(v2) == 1)
                    {   lo->bd = v2[0]; break; }
                    else
                    {   strcpy(lo->label,v2); break; }
                }
            }
            case 3:
            {   if(isdigit(v2[0]))
                {   sscanf(v2,"%u", &lo->size);
                    strcpy(lo->label,v3);
                }
                else
                {   lo->bd = v2[0];
                    if(isdigit(v3[0]))
                        sscanf(v3,"%u", &lo->size);
                    else
                        strcpy(lo->label,v3);
                }
                break;
            }
            case 4:
            {   lo->bd = v2[0];
                sscanf(v3,"%u", &lo->size);
                strcpy(lo->label,v4);
                break;
            }
        }
        else switch(nargs)
        {   case 1: break;
            case 2:
            {   if(isdigit(v2[0]))
                    sscanf(v2,"%u", &lo->size);
                else
                    lo->bd = v2[0];
                break;
            }
            case 3:
            {   lo->bd = v2[0];
                sscanf(v3,"%u", &lo->size);
                break;
            }
        }
    }
    return 1;
}
