/* genebank.c   6-5-92  genebank manager for the Tierra Simulator */
/* Tierra Simulator V3.11: Copyright (c) 1991, 1992 Tom Ray & Virtual Life */

#ifndef lint
static char sccsid[] = "%W%        %G%";

#endif

#include "license.h"
#include "tierra.h"
#include "extern.h"
#include <errno.h>
#include <sys/types.h>
#ifdef unix
#include <dirent.h>
#endif               /* unix */
#ifdef __TURBOC__
#include <dir.h>
#include <dos.h>
#define d_name ff_name
#endif               /* __TURBOC__ */


#ifdef MEM_CHK
#include <memcheck.h>
#endif

/*
 * CheckGenotype(ce, flags)
 * Check if cell ce is a new genotype.  If it is a new genotype, it will be
 * assigned a unique label.  If this genotype is not present in the RAM
 * genebank, it will be placed there, but there will be no demographic
 * information updated (this is not assumed to be a birth or death).
 * 
 *  flags: bit 0  (1): check .gen files
 *  flags: bit 1  (2): check .tmp files
 *  flags: bit 2  (4): check files even if rambank does not indicate presence
 *      of genotype on disk (used for startup of old or new soups).
 *  flags: bit 3  (8): use all files in the genebank to assemble the list in
 *      the rambank.  Each time a new size is checked, all genomes of that
 *      size in the genebank will become listed in the rambank as being on
 *      the disk (used for startup of old soups and for cumulative genebanks).
 *      However, the genomes will remain on disk until they are actually
 *      accessed, at which time they will be placed in the genequeue in RAM.
 *  flags: bit 4 (16): when reading a file from the disk genebank,
 *      zero bit 1 of the tgl->bits field, otherwise preserve it.
 * 
 * NEW : if new soup_in gloabl CumGeneBnk is non zero, bit 3 is forced ON 
 */

void CheckGenotype(ce, flags)
    Pcells  ce;  /* cell to be checked */
    I32u     flags;
{
    I32s si;
    I16s gi;

    if(CumGeneBnk)
        SetBit(&flags,3,ONE);
    si = ce->mm.s;
    ce->d.gi = Lbl2Int(ce->d.gen.label);
    if (IsNewSize(si)) 
        NewSize(ce, flags);
    if ((gi = IsInGenQueue(ce)) >= 0)
    {   gq_movtop(sl[si]->g[gi]);
        return;
    }
    if (/* (flags & 12) && */ IsInGenBank(ce, flags)) /* TOM RAY */
        return;
    NewGenotype(ce);  /* register new genotype in the lists */
}

I8s IsNewSize(si)
    I32s si;
{
    if (si < siz_sl && sl[si])
        return 0;
    return 1;
}

void NewSize(ce, flags)
    Pcells  ce;
    I32s     flags;
{
    I32s i, j, si;
    I16s gi;
    FILE *afp;
    head_t head;
    indx_t *indx, *tindx, gindx;
    SList Fp Fp  tsl;
    GList Fp Fp  tgl;
    Pgl sgl = NULL;

    si = ce->mm.s;
    if (si >= siz_sl)
    {   tsl = (SList Fp Fp) trecalloc(sl,
            sizeof(SList Fp) * (si + 1),
            sizeof(SList Fp) * siz_sl);
        if (tsl)
            sl = tsl;
        else if (sl)
        {   tfree(sl);
            sl = NULL;
            FEError(-300,EXIT,WRITE, "Tierra NewSize() sl trecalloc error");
        }
#ifndef __TURBOC__
        for (i = siz_sl; i <= si; i++)
            sl[i] = NULL;
#endif /* __TURBOC__ */
        siz_sl = si + 1;
#ifdef ERROR
        sprintf(mes[0], "genebank: recalloc, siz_sl = %ld", siz_sl - 1);
        FEMessage(1,mes);
#endif
    }
    sl[si] = (SList *) tcalloc(1, sizeof(SList));
    sl[si]->a_num = 20;
    sl[si]->g = (GList **) tcalloc(20, sizeof(GList *));
    if (IsBit(flags, 3))  /* use for old soups, and cumulative genebanks */
    {
#ifdef IBM3090
        sprintf(Buff, "%04ld.gen.d", si);
#else
        sprintf(Buff, "%s%04ld.gen", GenebankPath, si);
#endif
        afp = fopen(Buff, "rb");
        if (!afp) return;
        head = read_head(afp);
#ifdef __TURBOC__
        indx = &gindx;
#else  /* __TURBOC__ */
        indx = read_indx(afp, &head);
#endif /* __TURBOC__ */
        for (i = head.n - 1; i >= 0; i--)
        {
#ifdef __TURBOC__
            find_gen(afp, indx, "---", i);
            tindx = indx;
#else  /* __TURBOC__ */
            tindx = &indx[i];
#endif /* __TURBOC__ */
            sgl = get_gen(afp, &head, tindx, i);
            gi = Lbl2Int(sgl->gen.label);
            if (gi >= sl[si]->a_num)
            {   tgl = (GList Fp Fp) trecalloc(sl[si]->g,
                    sizeof(GList *) * (gi + 1),
                    sizeof(GList *) * sl[si]->a_num);
                if (tgl)
                    sl[si]->g = tgl;
                else if (sl[si]->g)
                {   tfree(sl[si]->g);
                    sl[si]->g = NULL;
                    FEError(-301,EXIT,WRITE,
                        "Tierra NewSize() sl[si]->g trecalloc error");
                }
#ifndef __TURBOC__
                for (j = sl[si]->a_num; j <= gi; j++)
                    sl[si]->g[j] = NULL;
#endif /* __TURBOC__ */
                sl[si]->a_num = gi + 1;
            }
            sl[si]->g[gi] = (Pgl)1;    /* permanent genotype name */
            if (sgl)
            {   if (sgl->genome)
                {   tfree(sgl->genome);
                    sgl->genome = NULL;
                }
                if (sgl->gbits)
                {   tfree(sgl->gbits);
                    sgl->gbits = NULL;
                }
                tfree(sgl);
                sgl = NULL;
            }
        }
        fclose(afp);
#ifndef __TURBOC__
        if (indx)
        {   thfree(indx);
            indx = NULL;
        }
#endif /* __TURBOC__ */
    }
}

I16s IsInGenQueue(ce) /* returns the index of the genotype in the list */
    Pcells  ce;
{
    I32s si = ce->mm.s;
    I16s gi = ce->d.gi, i;
    GList  *tgl;

    if (gi >= 0)
    {   if (gi < sl[si]->a_num && (I32u) sl[si]->g[gi] > 4)
            return gi;
        return -1;
    }
    for (i = 0; i < sl[si]->a_num; i++)
    {   tgl = sl[si]->g[i];
        if ((I32u) tgl > 4 && IsSameGen(si, soup + ce->mm.p, tgl->genome))
        {   ce->d.gi = i;
            strcpy(ce->d.gen.label, Int2Lbl(i));
            return i;
        }
    }
    return -1;
}

/*
 * Check to see if ce is in the disk genebank.  This will require the reading
 * of successive genotypes of this size from the .gen files on disk.
 * Each genotype that is read will be placed in the genequeue and the complete
 * genome will be placed in the rambank.
 */

I8s IsInGenBank(ce, flags)
    Pcells  ce;
    I32s     flags;
{
    static char *ext[] = {"gen", "tmp"};
    I32s i, si = ce->mm.s;
    I32u t, j, n;
    I16s gi = ce->d.gi;
    Pgl g;
    FILE *afp;
    head_t head;
    indx_t *indx, *tindx, gindx;
    GList Fp Fp  tgl;

    /*
     * return 0 if we are looking for a specific geneotype, and it does not
     * appear in the genequeue list, and we are not starting up a soup
     */
    if (gi >= 0 && !sl[si]->g[gi] && !IsBit(flags, 2))
        return 0;
    for (i = 0; i < 2; i++) if (IsBit(flags, i))
    {
#ifdef IBM3090
        sprintf(Buff, "%04ld.%s.d", si, ext[i]);
#else
        sprintf(Buff, "%s%04ld.%s", GenebankPath, si, ext[i]);
#endif
        if (afp = fopen(Buff, "rb"))
        {   head = read_head(afp);
#ifdef __TURBOC__
            indx = &gindx;
#else  /* __TURBOC__ */
            indx = read_indx(afp, &head);
#endif /* __TURBOC__ */
        }
        else continue;
        if (gi >= 0)  /* if we know what genotype we are looking for */
        {   if (gi >= sl[si]->a_num)
            {   tgl = (GList Fp Fp) trecalloc(sl[si]->g,
                    sizeof(GList Fp) * (gi+1),
                    sizeof(GList Fp) * sl[si]->a_num);
                if (tgl)
                    sl[si]->g = tgl;
                else if (sl[si]->g)
                {   tfree(sl[si]->g);
                    sl[si]->g = NULL;
                    FEError(-302,EXIT,WRITE,
                        "Tierra IsInGenBank() sl[si]->g trecalloc error");
                }
#ifndef __TURBOC__
                for (j = sl[si]->a_num; j <= gi; j++)
                    sl[si]->g[j] = NULL;
#endif /* __TURBOC__ */
                sl[si]->a_num = gi + 1;
            }
            n = find_gen(afp, indx, Int2Lbl(gi), head.n);
            if (n < head.n)
            {
#ifdef __TURBOC__
                tindx = indx;
#else  /* __TURBOC__ */
                tindx = &indx[n];
#endif /* __TURBOC__ */
                sl[si]->g[gi] = g = get_gen(afp, &head, tindx, n);
                if (IsBit(flags, 4))
                    SetBit(&g->bits, 1, 0);
                gq_add(g);
                /* if disk genotype matches soup genotype */
                /* name cell and put genotype in genequeue */
                if (IsSameGen(si, (FpInst) (soup + ce->mm.p), g->genome))
                {   ce->d.gen = g->gen;
                    ce->d.gi = gi;
#ifndef __TURBOC__
                    if (indx)
                    {   thfree(indx);
                        indx = NULL;
                    }
#endif /* __TURBOC__ */
                    fclose(afp);
                    return 1;
                }
            }
        }
        else /* we don't know what genotype we are looking for */
        /*
         * check only genotypes that are listed in the rambank as on
         * disk, but whose genomes are not held in the rambank
         * (0 < sl[si]->g[j] <= 4); or which are not listed in the
         * rambank at all (!sl[si]->g[j]), if bit 2 is set, which
         * means we are starting a new or old soup and don't have
         * a list of what is on disk. 
         */
        for (j = 0; j < sl[si]->a_num; j++)
        {   if (!((I32s) sl[si]->g[j] > 0 && (I32s) sl[si]->g[j] <= 4
                    || !sl[si]->g[j] && IsBit(flags, 2)))
                continue;
            n = find_gen(afp, indx, Int2Lbl(j), head.n);
            if (n < head.n)
            {
#ifdef __TURBOC__
                tindx = indx;
#else  /* __TURBOC__ */
                tindx = &indx[n];
#endif /* __TURBOC__ */
                sl[si]->g[j] = g = get_gen(afp, &head, tindx, n);
                if (IsBit(flags, 4))
                    SetBit(&g->bits, 1, 0);
                gq_add(g);
                /* if disk genotype matches soup genotype */
                /* name cell and put genotype in genequeue */
                if (IsSameGen(si, soup + ce->mm.p, g->genome))
                {   ce->d.gen = g->gen;
                    ce->d.gi = j;
#ifndef __TURBOC__
                    if (indx)
                    {   thfree(indx);
                        indx = NULL;
                    }
#endif /* __TURBOC__ */
                    fclose(afp);
                    return 1;
                }
            }
        }
        if (afp)
        {
#ifndef __TURBOC__
            if (indx)
            {   thfree(indx);
                indx = NULL;
            }
#endif /* __TURBOC__ */
            fclose(afp);
        }
    }
    return 0;
}

/*
 * add a new genotype to the RAM list
 */

void NewGenotype(ce)
    Pcells  ce;
{
    GList  *tgl;
    I32s   i, j, size = ce->mm.s, gi;
    I8s    found = 0;
    GList Fp Fp  tglp;

    /* find a free name if there is one */
    for (i = 0; i < sl[size]->a_num; i++) if (!sl[size]->g[i])
    {   gi = i;
        found = 1;
        break;
    }
    if (!found)
    {   gi = sl[size]->a_num;
        tglp = (GList Fp Fp) trecalloc(sl[size]->g,
            sizeof(GList Fp) * (gi + 4),
            sizeof(GList Fp) * gi);
        if (tglp)
            sl[size]->g = tglp;
        else if (sl[size]->g)
        {   tfree(sl[size]->g);
            sl[size]->g = NULL;
            FEError(-303,EXIT,WRITE,
                "Tierra NewGenotype() sl[size]->g trecalloc error");
        }
#ifndef __TURBOC__
        for (i = gi; i < gi + 4; i++)
            sl[size]->g[i] = NULL;
#endif /* __TURBOC__ */
        sl[size]->a_num += 4;
    }
    sl[size]->g[gi] = tgl =
        (GList *) tcalloc(1, sizeof(GList));
    strcpy(ce->d.gen.label, strcpy(tgl->gen.label, Int2Lbl(gi)));
    tgl->gen.size = ce->d.gen.size = size;
    ce->d.gi = gi;
    tgl->genome = (FpInst) tcalloc(size, sizeof(Instruction));
    if (tgl->genome == NULL)
        FEError(-304,EXIT,WRITE, "Tierra NewGenotype() tcalloc error 1");
    tgl->gbits = (FpGenB) tcalloc(size, sizeof(GenBits));
    if (tgl->gbits == NULL)
        FEError(-305,EXIT,WRITE, "Tierra NewGenotype() tcalloc error 2");
    gq_add(tgl);
    for (i = 0; i < size; i++) 
#if PLOIDY == 1
        tgl->genome[i] = soup[ad(ce->mm.p + i)];
#else
        for (j = 0; j < PLOIDY; j++)
        tgl->genome[i][j] = soup[ad(ce->mm.p + i)][j];
#endif
    tgl->originC = time(NULL);
    tgl->originI = InstExe;
    tgl->parent = ce->d.parent;
    tgl->bits = 0;
    tgl->pop = 0;
    if (reaped)
    {   tgl->MaxPropPop = (float) 1 / (float) NumCells;
        tgl->MaxPropInst = (float) size / (float) SoupSize;
        tgl->mpp_time = InstExe;
    }
    tgl->ploidy = ce->d.ploidy;
    tgl->track = ce->c.tr;
}

I32u WhoIs(ce, a)
    Pcells  Fp ce;
    Ind a;
{
    I8s md;

    if (a >= (*ce)->mm.p && a < (*ce)->mm.p + (*ce)->mm.s)
        return 0;          /* same cell */
    if (a >= (*ce)->md.p && a < (*ce)->md.p + (*ce)->md.s)
        return 1;          /* daughter cell */
    if (IsFree(a))
        return 3;          /* is free memory */
    WhichCell(a, ce, &md);
    if (md == 'm')
        return 2;          /* is other cell */
    return 4;              /* is the daughter of another cell */
}

I8s IsSameGen(size, g1, g2)/* compare two genomes */
    I32s size;
    FpInst g1, g2;
{
    I32s i, j;

    for (i = 0; i < size; i++) 
#if PLOIDY == 1
        if ((g1 + i)->inst != (g2 + i)->inst)
#else /* PLOIDY > 1 */
    for (j = 0; j < PLOIDY; j++)
        if ((g1 + i)[j]->inst != (g2 + i)[j]->inst)
#endif /* PLOIDY > 1 */
            return 0;
    return 1;
}

void gq_add(p)
    GList  *p;
{
    if (!NumGenRQ++)
    {   gq_top = gq_bot = p->a = p->b = p;
        return;
    }
    /* NumGenotypes hasn't been updated yet so add 1 in test */
    while (NumGenRQ > RamBankSiz && NumGenRQ > NumGenotypes + 1)
        gq_swap();
    p->b = gq_top;
    gq_top = gq_top->a = p->a = p;
}

I8s gq_swap()
{   GList  *p;
    I8s saved = 0;
    FILE *fp;
    head_t head;
    indx_t *indx, gindx;

    p = gq_bot;
    while (p->pop > 0 && p != gq_top)
        p = p->a;
    if (p->pop > 0)
    {   if (gq_bot != gq_top)
        {   p = gq_bot;
            GoDown = 1;
            IMode = PLN_STATS;
            sprintf(mes[0], "gq_swap: NumGenRQ = %ld  NumGenotypes = %ld\n",
                NumGenRQ, NumGenotypes);
            sprintf(mes[1],
                "         all genotypes extant, living genome deleted\n");
            sprintf(mes[2],
  "system coming down, then bring back up, to defragment memory, or:\n");
            sprintf(mes[3],
"try higher SavThrMem & SavThrPop, lower SoupSize, or turn off genebanker\n");
            FEMessage(4,mes);
        }
        else
        {   sprintf(mes[0], "gq_swap: NumGenRQ = %ld  NumGenotypes = %ld\n",
                NumGenRQ, NumGenotypes);
            sprintf(mes[1],
                "         attempt to swap out last living genome\n");
            FEMessage(2,mes);
            FEError(-306,EXIT,NOWRITE,
"try higher SavThrMem & SavThrPop, lower SoupSize, or turn off genebanker\n");
            return 0;
        }
    }
    saved = IsBit(p->bits, 0);
    sprintf(Buff,
#ifdef IBM3090
        "%04ld.%s.d",
#else  /* IBM3090 */
        "%s%04ld.%s", GenebankPath,
#endif /* IBM3090 */
        p->gen.size, "gen");
    if (!(fp = open_ar(Buff, p->gen.size, GFormat, -1)))  
        FEError(-307,EXIT,WRITE,    
            "Tierra gq_swap() unable to open genome file %s",Buff);
    head = read_head(fp);
#ifdef __TURBOC__
    indx = &gindx;
#else  /* __TURBOC__ */
    indx = read_indx(fp, &head);
#endif /* __TURBOC__ */
    add_gen(fp, &head, &indx, p);
#ifndef __TURBOC__
    if (indx)
    {   thfree(indx);
        indx = NULL;
    }
#endif /* __TURBOC__ */
    fclose(fp);
    gq_rem(p);
    if (p)
    {   if (p->gbits)
        {   tfree(p->genome);
            p->genome = NULL;
        }
        if (p->gbits)
        {   tfree(p->gbits);
            p->gbits = NULL;
        }
        sl[p->gen.size]->g[Lbl2Int(p->gen.label)] = (Pgl) saved;
        tfree(p);
        p = NULL;
    }
    return 1;
}

void gq_rem(p)
    GList  *p;
{
    if (gq_top == gq_bot)
        gq_top = gq_bot = 0;
    else if (p == gq_top)
        gq_top = p->b->a = p->b;
    else if (p == gq_bot)
        gq_bot = p->a->b = p->a;
    else
    {   p->a->b = p->b;
        p->b->a = p->a;
    }
    NumGenRQ--;
}

void gq_movtop(p)
    GList  *p;
{
    if (p == gq_top)
        return;
    gq_rem(p);
    gq_add(p);
}

GList *gq_read(si, gi)
{   GList *p = sl[si]->g[gi];
    I16s n;
    FILE *fp;
    head_t head;
    indx_t *indx, *tindx, gindx;

    if ((I32u) p > 4)
        return p;
    sprintf(Buff,
#ifdef IBM3090
        "%04ld.%s.d",
#else
        "%s%04ld.%s", GenebankPath,
#endif
        si, (I32u) p == 1 ? "gen" : "mem");
    if (!(fp = open_ar(Buff, si, GFormat, 0)))
        FEError(-308,EXIT,WRITE,    
            "Tierra gq_read() unable to open genome file %s",Buff);
    head = read_head(fp);
#ifdef __TURBOC__
    indx = &gindx;
#else  /* __TURBOC__ */
    indx = read_indx(fp, &head);
#endif /* __TURBOC__ */
    n = find_gen(fp, indx, Int2Lbl(gi), head.n);
#ifdef __TURBOC__
    tindx = indx;
#else  /* __TURBOC__ */
    tindx = &indx[n];
#endif /* __TURBOC__ */
    p = get_gen(fp, &head, tindx, n);
    fclose(fp);
#ifndef __TURBOC__
    if (indx)
    {   thfree(indx);
        indx = NULL;
    }
#endif /* __TURBOC__ */
    gq_add(p);
    return p;
}

void printq()
{   GList  *p;
    int    i = 1;

    printf("%ld:B ", NumGenRQ);
    if (p = gq_bot)
    {   printf("%ld%s[%ld] ", p->gen.size, p->gen.label, p->pop);
        while (p != gq_top)
        {   p = p->a, i++;
            printf("%ld%s[%ld] ", p->gen.size, p->gen.label, p->pop);
        }
    }
    printf("%d:T\n", i);
}

I16s Lbl2Int(s)
    I8s *s;
{
    if (s[0] == '-')
    return -1;
    return (s[2] - 'a') + (26 * (s[1] - 'a')) + (676 * (s[0] - 'a'));
}

I8s *Int2Lbl(i)
    I32s i;
{
    static I8s s[4];

    if (i < 0) {
    strcpy(s, "---");
    return s;
    }
    s[0] = 'a' + (I16s) i / 676;
    i %= 676;
    s[1] = 'a' + (I16s) i / 26;
    i %= 26;
    s[2] = 'a' + (I16s) i;
    s[3] = 0;
    return s;
}

/* rationale for the functioning of the genebank:

The term ``rambank'' refers to a collection of genotypes maintained in RAM
The term ``diskbank'' refers to a collection of genotypes maintained on disk
The term ``genebank'' refers to both the rambank and the diskbank

Genotype names have two parts: size-label, for example 0080aaa, 0045eat,
6666god.

1) When a creature is born its genotype will be compared to that of its parent.
   A) if they are the same, the daughter will be given the same name as the
      mother.
   B) if they are not the same, the genebank will be searched.
      a) if the daughter genotype is found in the genebank, it will be given
         the same name as the genotype that it matches in the bank.
      b) if the daughter genotype does not match any genotype in the bank,
         a new name will be created for it, and it will be entered into the
         rambank.
2) For each birth and death a count of the population of both the genotype
   and the size class involved will be incremente or decremented, so that we
   have a count of the current population of each genotype and each size class.
   This information is maintained in rambank.
3) If a genotype frequency crosses a critical threshold, the genotype name
   will become permanent and the genotype will be saved to the diskbank.
   There may be several types of thresholds: proportion of population
   (e.g., 2%), proportion of soup, or just numbers of creatures.
4) When a genotype frequency drops to zero:
   A) If the genotype never crossed the thresholds, the genotype will be
      removed from the genebank, and its unique name will become available for
      reuse.
   B) If the genotype crossed the threshold, gaining a permanent name, it
      should be retained in the genebank.
5) Periodically, Tierra saves the complete state of the machine (e.g., every
   100 million instructions executed).  At that time, the complete rambank
   is saved to the diskbank.  For this reason, 4 A applies also to genotypes
   which never became permanent, and these must be removed from the diskbank
   as well.  The bitfield in the genotype structure tells us if a genotype is
   saved to the diskbank, and if it is permanent.
6) If the rambank becomes ``too full'', some relatively unused portion of it
   should be swapped to the diskbank.  In DOS, ``too full'' could be signaled
   by a malloc failure.  In unix, ``too full'' could be signaled by some
   specified limit on how big the rambank should get, if this seems wise.
   That portion of the rambank to be swapped to the diskbank might consist of
   the least recently accessed size class.  For this reason it might be
   worthwhile to maintain a queue of size classes, ordered by last use.
*/
