// what source (c) '96/97 Niklas Beisert, Nils Piepenbrink
// this header must stay at top of this file.
// you may only look at the source, you may not use it.
// you may not release anything containing this code or derivatives.
// you may modify for personal needs or experiments.
// you may use the algorithms in non commercial productions if you credit me.

#include <string.h>

#define M_PI 3.14159265358979323846

long double fsin(long double);
long double fcos(long double);
long double fsqr(long double);
long double fpatan(long double, long double);
long double fsqrt(long double);
long double fprem(long double, long double);
long double ffabs(long double);
long double flog2(long double);
long double flog(long double);
long double flog10(long double);
long double fexp2(long double);
long double fexp(long double);
long double fexp10(long double);
long double fpow(long double, long double);
void finitmode();
#pragma aux finitmode = "push 1F7Fh" "fldcw word ptr [esp]"

#pragma aux fsin parm [8087] value [8087] modify [8087] = "fsin"
#pragma aux fcos parm [8087] value [8087] modify [8087] = "fcos"
#pragma aux fpatan parm [8087] [8087] value [8087] modify [8087] = "fpatan"
#pragma aux fsqr parm [8087] value [8087] modify [8087] = "fmul st,st"
#pragma aux fsqrt parm [8087] value [8087] modify [8087] = "fsqrt"
#pragma aux fprem parm [8087] [8087] value [8087] modify [8087] = "fprem" "fxch st(1)" "fstp st(0)"
#pragma aux ffabs parm [8087] value [8087] modify [8087] = "fabs"
#pragma aux flog2 parm [8087] value [8087] modify [8087] = "fld1" "fxch st(1)" "fyl2x"
#pragma aux flog parm [8087] value [8087] modify [8087] = "fldln2" "fxch st(1)" "fyl2x"
#pragma aux flog10 parm [8087] value [8087] modify [8087] = "fldlg2" "fxch st(1)" "fyl2x"
#pragma aux fexp2 parm [8087] value [8087] modify [8087] = "fld1" "fld st(1)" "fprem" "f2xm1" "faddp st(1),st" "fscale" "fxch st(1)" "fstp st(0)"
#pragma aux fexp parm [8087] value [8087] modify [8087] = "fldl2e" "fmulp st(1),st" "fld1" "fld st(1)" "fprem" "f2xm1" "faddp st(1),st" "fscale" "fxch st(1)" "fstp st(0)"
#pragma aux fexp10 parm [8087] value [8087] modify [8087] = "fldl2t" "fmulp st(1),st" "fld1" "fld st(1)" "fprem" "f2xm1" "faddp st(1),st" "fscale" "fxch st(1)" "fstp st(0)"
#pragma aux fpow parm [8087] [8087] value [8087] modify [8087] = "fyl2x" "fld1" "fld st(1)" "fprem" "f2xm1" "faddp st(1),st" "fscale" "fxch st(1)" "fstp st(0)"

int decodestillmpg(void *pic, const void *src);

extern "C"
{
  void xfadetc32(void *dest, void *s1, void *s2, int len, void *mix, void *mix2);
    #pragma aux xfadetc32 parm [edi] [esi] [ebx] [eax] [edx] [ecx] modify [eax]
  void morf3320hc(void *dst, const void *src, long (*map)[41][3]);
    #pragma aux morf3320hc parm [edi] [esi] [ebx] [ecx] modify [eax ebx ecx edx esi edi]
  void morfg320hc(void *dst, const void *src, long (*map)[41][3], const void *colmap);
    #pragma aux morfg320hc parm [edi] [esi] [ebx] [eax] [ecx] modify [eax ebx ecx edx esi edi]
  void morf3320tc(void *dst, const void *src, long (*map)[41][3]);
    #pragma aux morf3320tc parm [edi] [esi] [ebx] [ecx] modify [eax ebx ecx edx esi edi]
  void txDecodeBWMap(unsigned char *dst, const unsigned char *src, int back, int fore);
    #pragma aux txDecodeBWMap parm [edi] [esi] [eax] [ecx] modify [edx]
  void txDecodeBWMapF(unsigned char *dst, const unsigned char *src, int fore);
    #pragma aux txDecodeBWMapF parm [edi] [esi] [eax] modify [ecx edx]
  void txHexagonMap(unsigned char *dest, int xn, int yn, int c1, int c2, int c3);
    #pragma aux txHexagonMap parm [edi] [eax] [ecx] [ebx] [edx] [esi]
  void txTwirlify(unsigned char *dst, const unsigned char *src, const unsigned char *xmap, const unsigned char *ymap, const signed char *xfn, const signed char *yfn);
    #pragma aux txTwirlify parm [edi] [ebx] [esi] [eax] [edx] [ecx]
  void txPlasmaMap(unsigned char *buf, int colbits, int p, long seed);
    #pragma aux txPlasmaMap parm [ebx] [ecx] [edi] [eax] modify [eax ebx ecx edx esi edi]
  void txSmoothMap(unsigned char *dest, const unsigned char *src);
    #pragma aux txSmoothMap parm [edi] [ebx] modify [eax ebx ecx edx esi edi]

  extern unsigned char GGGG_RLE[];
  extern unsigned char TUNNEL_PAL[];
  extern unsigned char CT_MPG[];
  extern unsigned char SEEN_MPG[];
};


int musicstart(void *song);
void musicstop();
double musictimer();

unsigned short videogetcol16(int r, int g, int b);
int videoinit();
void videoclose();
void videoput16(void *buf);
void videoput32(void *buf);
int keyhit();

static unsigned long mpgbufs[5][320*240];
static char fadetabmem[0x300];
static char mapptr[22*0x10000];
static char twmapptr[0x500];
static long mappos[31][41][3];
static short scrbuf[320*240];


static void colconv(float h, float s, float i, int &r, int &g, int &b)
{
  h*=6;
//  int t=floor(h);
  int t=h;
  h-=t;
  switch (t%6)
  {
  case 0: r=256;       g=256*h;     b=0;         break;
  case 1: r=256*(1-h); g=256;       b=0;         break;
  case 2: r=0;         g=256;       b=256*h;     break;
  case 3: r=0;         g=256*(1-h); b=256;       break;
  case 4: r=256*h;     g=0;         b=256;       break;
  case 5: r=256;       g=0;         b=256*(1-h); break;
  }
  r+=(256-r)*s;
  g+=(256-g)*s;
  b+=(256-b)*s;
  r*=i;
  g*=i;
  b*=i;
}


#define min(a,b) (((a)<(b))?(a):(b))
#define max(a,b) (((a)>(b))?(a):(b))

void demoproc()
{
  finitmode();

  int i,j;

  unsigned long *colmap1;
  unsigned long *colmap2;
  unsigned short *colmap3;
  unsigned short *colmap4;
  unsigned short *colmap5;
  unsigned char *texmap4;
  unsigned short *colmap6;
  unsigned char *texmap6;
  char *fadetabs;


  unsigned char *map=(unsigned char*)(((unsigned long)mapptr+0x3FFFF)&~0x3FFFF);
  unsigned char (*tmap)[65536]=(unsigned char(*)[65536])((((unsigned long)mpgbufs)+0xFFFF)&~0xFFFF);
  signed char (*twmap)[256]=(signed char(*)[256])(((unsigned long)twmapptr+0xFF)&~0xFF);
  fadetabs=(char*)((((long)fadetabmem)+0x100)&~0xFF);

  colmap1=(unsigned long*)(map+0x00000);
  colmap2=(unsigned long*)(map+0x40000);
  colmap3=(unsigned short*)(map+0x80000);
  colmap4=(unsigned short*)(map+0xa0000);
  colmap5=(unsigned short*)(map+0xc0000);
  colmap6=(unsigned short*)(map+0xe0000);
  texmap6=(unsigned char*)(map+0x100000);
  texmap4=(unsigned char*)(map+0x110000);

  txPlasmaMap(tmap[0], 8, 6000, 240897);
  txPlasmaMap(tmap[2], 8, 5000, 20234857);
  txPlasmaMap(tmap[4], 8, 3000, 20456857);
  for (i=0; i<256; i++)
  {
    twmap[0][i]=fsin(2*M_PI/256*i*3)*20*i/256;
    twmap[1][i]=fcos(2*M_PI/256*i*3)*20*i/256;
    twmap[2][i]=fsin(2*M_PI/256*i*3)*10*i/256;
    twmap[3][i]=fcos(2*M_PI/256*i*3)*10*i/256;
  }
  txTwirlify(tmap[1], tmap[2], tmap[0], tmap[4], twmap[0], twmap[1]);
  txTwirlify(tmap[4], tmap[0], tmap[1], tmap[0], twmap[2], twmap[3]);
  for (i=0; i<65536; i++)
  {
    int r,g,b;
    colconv(tmap[4][i]/400.0+0.9, tmap[1][i]/256.0, tmap[1][i]/256.0, r, g, b);
    colmap1[i]=(r<<16)|(g<<8)|b;
  }
  if (keyhit())
    return;




  txPlasmaMap(tmap[2], 8, 6000, 240897);
  txDecodeBWMap(tmap[3], GGGG_RLE, 208, 0);
  for (i=0; i<30; i++)
  {
    txSmoothMap(tmap[5], tmap[3]);
    txSmoothMap(tmap[3], tmap[5]);
    if (keyhit())
      return;
  }
  for (i=0; i<256; i++)
  {
    twmap[0][i]=fsin(2*M_PI/256*i)*100;
    twmap[1][i]=fcos(2*M_PI/256*i)*100;
    twmap[2][i]=fsin(2*M_PI/256*i*3)*20*i/256;
    twmap[3][i]=fcos(2*M_PI/256*i*3)*20*i/256;
  }

  txPlasmaMap(tmap[5], 6, 2000, 2423457);
  for (j=0; j<256; j++)
    for (i=0; i<256; i++)
      if (j&32)
        tmap[5][j*256+i]+=64;

  txPlasmaMap(tmap[4], 8, 500, 623457);
  txTwirlify(tmap[4], tmap[5], tmap[4], tmap[4], twmap[0], twmap[1]);
  txDecodeBWMapF(tmap[4], GGGG_RLE, 255);
  for (i=0; i<20; i++)
  {
    txSmoothMap(tmap[5], tmap[4]);
    txSmoothMap(tmap[4], tmap[5]);
    if (keyhit())
      return;
  }
  txTwirlify(tmap[5], tmap[2], tmap[4], tmap[4], twmap[2], twmap[3]);
  memcpy(tmap[2], tmap[5], 0x10000);
  for (i=0; i<65536; i++)
  {
    int r,g,b;
    colconv(tmap[2][i]/400.0+0.5, tmap[3][i]/256.0, tmap[4][i]/256.0, r, g, b);
    colmap2[i]=(r<<16)|(g<<8)|b;
    colmap3[i]=videogetcol16(r,g,b);
  }
  if (keyhit())
    return;





  // plasma 1
  txHexagonMap(texmap4, 4,8,192, 0, 0);
  for (i=0; i<3; i++)
  {
    txSmoothMap(tmap[3], texmap4);
    txSmoothMap(texmap4, tmap[3]);
    if (keyhit())
      return;
  }
  txPlasmaMap (tmap[3], 7, 1342,234234);
  for (i=0; i<0x10000; i++)
    texmap4[i]+=tmap[3][i];
  for (i=0; i<8; i++)
  {
    txSmoothMap(tmap[3], texmap4);
    txSmoothMap(texmap4, tmap[3]);
    if (keyhit())
      return;
  }

  // Generate Colormap #4
  txPlasmaMap(tmap[0], 8, 5000, 240897);
  txPlasmaMap(tmap[2], 8, 2340, 20234857);
  txPlasmaMap(tmap[4], 8, 3000, 20456857);
  for (i=0; i<256; i++)
  {
    twmap[0][i]=fsin(2*M_PI/256*i*3)*10*i/256;
    twmap[1][i]=fcos(2*M_PI/256*i*3)*10*i/256;
    twmap[2][i]=fsin(2*M_PI/256*i*3)*7*i/256;
    twmap[3][i]=fcos(2*M_PI/256*i*3)*7*i/256;
  }
  txTwirlify(tmap[1], tmap[2], tmap[0], tmap[4], twmap[0], twmap[1]);
  txTwirlify(tmap[4], tmap[0], tmap[1], tmap[0], twmap[2], twmap[3]);
  for (i=0; i<65536; i++)
  {
    int r,g,b;
    colconv(tmap[4][i]/400.0+0.5, tmap[1][i]/256.0*0.7, tmap[1][i]/256.0, r, g, b);
    colmap4[i]=videogetcol16(r,g,b);
  }
  if (keyhit())
    return;




  // Generate Colormap #5
  txPlasmaMap(tmap[0], 8, 4546, 456797);
  txPlasmaMap(tmap[2], 8, 2645, 3234857);
  txPlasmaMap(tmap[4], 8, 2843, 26857);
  for (i=0; i<256; i++)
  {
    twmap[0][i]=fsin(2*M_PI/256*i*3)*13*i/256;
    twmap[1][i]=fcos(2*M_PI/256*i*3)*12*i/256;
    twmap[2][i]=fsin(2*M_PI/256*i*3)*7*i/256;
    twmap[3][i]=fcos(2*M_PI/256*i*3)*7*i/256;
  }
  txTwirlify(tmap[1], tmap[2], tmap[0], tmap[4], twmap[0], twmap[1]);
  txTwirlify(tmap[4], tmap[0], tmap[1], tmap[0], twmap[2], twmap[3]);
  for (i=0; i<65536; i++)
  {
    int r,g,b;
    colconv(tmap[4][i]/400.0+0.4, (tmap[1][i]-tmap[4][i])<0?0:(tmap[1][i]-tmap[4][i])/512.0*0.7, (tmap[1][i]+tmap[4][i])/512.0, r, g, b);
    colmap5[i]=videogetcol16(r,(r+b)/2,b);
  }
  if (keyhit())
    return;





  for (j=0; j<256; j++)
    for (i=0; i<256; i++)
    {
      int r,g,b;
      r=TUNNEL_PAL[3*i+0]<<2;
      g=TUNNEL_PAL[3*i+1]<<2;
      b=TUNNEL_PAL[3*i+2]<<2;
      float s=(255-j)*(255-j)/256.0/256;
      if (s>1)
        s=1;
      if (s<0)
        s=0;
      r+=(256-r)*s;
      g+=(256-g)*s;
      b+=(256-b)*s;
      r*=(64+j)/320.0;
      g*=(64+j)/320.0;
      b*=(64+j)/320.0;
      colmap6[j*256+i]=videogetcol16(r,g,b);
    }

  txPlasmaMap(tmap[2], 8, 3000, 20456857);
  txPlasmaMap(tmap[1], 8, 3000, 240897);
  txPlasmaMap(tmap[0], 8, 5023, 23418);
  for (i=0; i<256; i++)
  {
    twmap[0][i]=fsin(2*M_PI/256*i*3)*5*i/256;
    twmap[1][i]=fcos(2*M_PI/256*i*3)*5*i/256;
  }
  txTwirlify(texmap6, tmap[0], tmap[1], tmap[2], twmap[0], twmap[1]);
  if (keyhit())
    return;

  decodestillmpg(mpgbufs[1], SEEN_MPG);
  decodestillmpg(mpgbufs[2], CT_MPG);
  for (i=0; i<320*240; i++)
  {
    char r=mpgbufs[1][i]>>16;
    char g=mpgbufs[1][i]>>8;
    char b=mpgbufs[1][i]>>0;
    int in=r*0.299+g*0.587+b*0.114;
    mpgbufs[3][i]=(in<<16)|(in<<8)|in;
  }
  memset(mpgbufs[4],0,320*240*4);
  if (keyhit())
    return;









  if (!videoinit())
    return;
  if (!musicstart(0))
    return;


  while (!keyhit())
  {
    float t=musictimer();
    if (t>=0.14*288)
      break;
    float mfac=(t>26)?(65536*256*1.5):(65536*256.0);
    float frametime;
    if (t>26)
    {
      frametime=(t-26)*0.2;
      if (t>36)
        frametime+=(t-36)*(t-36)*0.04;
    }
    else
      frametime=(t-6)*0.09;

    if (t>6)
      for (j=0; j<=30; j++)
        for (i=0; i<=40; i++)
        {
          float x,y;

          x=i*0.05;
          y=j*0.05;
          x+=y*fexp(fsin(x+frametime*0.7)*1)*0.52389;
          y+=(1-x)*fexp(fcos(y+frametime*4.2345)*1)*0.3;
          x+=(1-y)*fexp(fcos(y+frametime*2)*1)*0.24;
          y+=x*fexp(fsin(x+frametime*1.5234)*1)*0.3235;
          x+=y*(1-y)*fsin(x+y+frametime*2.54)*0.3235;

          mappos[j][i][0]=x*mfac;
          mappos[j][i][1]=y*mfac;
          mappos[j][i][0]<<=8;
          mappos[j][i][1]<<=8;
        }

    if (t>36)
    {
      morf3320hc(scrbuf, colmap3, mappos);
      videoput16(scrbuf);
    }
    else
    {
      int fadetime;
      if (t>6)
        fadetime=fcos(M_PI*(t-6)/10.0)*127+128;
      else
        fadetime=fcos(M_PI*t/3)*127+128;
      void *fade1=(t>6)?mpgbufs[4]:mpgbufs[3];
      void *fade2=(t>16)?mpgbufs[2]:(t>3)?mpgbufs[1]:mpgbufs[4];
      if (t>6)
        morf3320tc(mpgbufs[4], (t>26)?colmap2:colmap1, mappos);

      for (i=0; i<256; i++)
      {
        fadetabs[i]=(i*fadetime)>>8;
        fadetabs[i+256]=(i*(255-fadetime))>>8;
      }
      xfadetc32(mpgbufs[0], fade2, fade1, 320*240, fadetabs, fadetabs+256);

      videoput32(mpgbufs[0]);
    }
  }


  while (!keyhit())
  {
    float frametime=musictimer()-0.14*288;
    if (frametime>=128*0.14)
      break;
    int arms=frametime/(0.14*32);
    arms=(arms&1)?2:1;

    float fak=(1-fexp(-frametime*0.3))*64;

    for (j=0; j<=30; j++)
      for (i=0; i<=40; i++)
      {
        float x,y;

        float cj=(j-fsin(frametime*0.5)*13.0-15);
        float ci=i-fcos(frametime*1.3)*16.0-20.0;
        float cj2=(j-fsin(1.234+frametime*.1)*13.0-15);
        float ci2=i-fcos(2.342+frametime*0.443)*16.0-20.0;
        float at1=arms*fpatan(ci,cj);
        float at2=fpatan(ci2,cj2);

        mappos[j][i][0]=i*0.05*(30+fak+fak*(1+fsin((ci2-cj2)/10))*fsin(frametime+2*(0.5*(at1+at2))))*32768;
        mappos[j][i][1]=j*0.05*(30+fak+fak*(1+fcos((ci-cj)/10))*fcos(frametime+2*(0.5*(at1+at2))))*32768;
        mappos[j][i][2]=((frametime)+127+127.0*fsin(4*frametime+2*((at1+at2))))*32768;

        mappos[j][i][0]<<=8;
        mappos[j][i][1]<<=8;
        mappos[j][i][2]<<=8;

      }

    morfg320hc(scrbuf, texmap4, mappos, (arms==2)?colmap5:colmap4);
    videoput16(scrbuf);
  }


  while (!keyhit())
  {
    float frametime=musictimer()-0.14*416;
    if (frametime>=128*0.14)
      break;

    for (j=0; j<=30; j++)
      for (i=0; i<=40; i++)
      {
        float x,x2,y,y2,g;

        x2=x=i*0.05;
        y2=y=j*0.05;

        float f1=(1+fsin(fsin(x)+frametime*0.7)+fcos(fsin(y)+frametime*0.3));
        float f2=0.5+0.5*fsin(x+y+frametime*1.4);
        x+=max(f1,f2);

        f1=(1+fcos(fsin(y)+frametime*0.345)+fsin(fsin(x)+frametime*0.64));
        f2=0.5+0.5*fcos(y+x+frametime*1.3);
        y+=max(f1,f2);

        x2+=(1+fsin(fsin(x2)+frametime*0.25)+fcos(fsin(y2)+frametime*0.3));
        y2+=(1+fcos(fsin(y)+frametime*0.565)+fsin(fsin(x2)+frametime*0.64));

        mappos[j][i][0]=max(x,x2)*65536.0*128;
        mappos[j][i][1]=max(y,y2)*65536.0*128;
        mappos[j][i][2]=(min(x,x2)*min(y,y2))*65536.0*128;

        mappos[j][i][0]<<=8;
        mappos[j][i][1]<<=8;
        mappos[j][i][2]<<=8;
      }
    morf3320hc(scrbuf, colmap4, mappos);
    videoput16(scrbuf);
  }

  while (!keyhit())
  {
    float frametime=musictimer()-0.14*544;
    if (frametime>320*0.14)
      break;

    float movex=-frametime*0.3;
    float movey=fsin(frametime*0.15)*2;
    float wobyf=7;
    float wobyi=0.25;
    float wobyp=frametime*0.52635;
    float twistf=0;
    float twisti=0;
    float twistp=frametime*0.9;
    float flat=(1+fsin(frametime*2))*0.5;
    float highfi;
    float lowfi;
    float intens;

    intens=1;
    if (frametime<32*0.14)
      intens=frametime/(32*0.14);
    if (frametime>288*0.14)
      intens=1-(frametime-288*0.14)/(32*0.14);

#define LOWFIIN 10
    lowfi=0;
    if (frametime>LOWFIIN)
      lowfi=4*(1-fexp((LOWFIIN-frametime)*0.3));

#define HIGHFIIN 15
    highfi=0;
    if (frametime>HIGHFIIN)
      highfi=0.5*(1-fexp((HIGHFIIN-frametime)*0.3));

#define WOBYIIN 5
    wobyi=0;
    if (frametime>WOBYIIN)
      wobyi=0.3*(1-fexp((WOBYIIN-frametime)*0.3));

#define TWISTIN 20
#define TWISTIN2 35
    twistf=0;
    twisti=0;
    if (frametime>TWISTIN)
    {
      twistf=3*(1-fexp((TWISTIN-frametime)*0.5));
      twisti=0.5*(1-fexp((TWISTIN-frametime)*0.5));
    }
    if (frametime>TWISTIN2)
      twistf+=6*(1-fexp((TWISTIN2-frametime)*0.8));

    for (j=0; j<=30; j++)
      for (i=0; i<=40; i++)
      {
        long *mp=mappos[j][i];

        float x,y,q;

        x=(i-20.5);
        y=(j-15.5);

        x+=fcos(frametime*0.31234)*4;
        y+=fsin(frametime*0.40454)*2;

        x+=highfi*fsin(y*0.6+frametime*1.3-x*0.43);
        y+=highfi*fcos(x*0.6+frametime*0.6+y*0.43);
        x+=lowfi*fcos(y*0.1*(1+fcos(frametime*0.1))+frametime*1.4+x*0.1234);

        q=fpatan(y,x)/(2*M_PI);
        x=fsqrt(x*x+y*y);

        x*=1+fsin(twistf*2*M_PI*q+twistp)*twisti;

        y=intens*(fexp(-10*0.6/x)*1.3-0.3);

        x=5*(0.002*x)*flat+(1-flat)*5/x+movex;
        q+=movey+wobyi*fsin(wobyf*x+wobyp);

        mp[0]=x*65536*256;
        mp[1]=q*65536*256;
        mp[2]=y*65536*256;
        if (mp[2]<1*65536)
          mp[2]=1*65536;
        if (mp[2]>255*65536)
          mp[2]=255*65536;
        mp[0]<<=8;
        mp[1]<<=8;
        mp[2]<<=8;
      }

    morfg320hc(scrbuf, texmap6, mappos, colmap6);
    videoput16(scrbuf);
  }

  musicstop();
  videoclose();
}
