/* (Not quite) Routines for doing pretty things ...  */

#include <stdio.h>
#include <math.h>
#include <stdlib.h>

#include "utils.h"
#include "linstats.h"
#include "distribs.h"

/* Prototypes: */

void GenAwk (double *beta, int K,
	     char **labels, int *maptable, int labelcount);
void GenEPP (char **labels, int K, double *beta, double *S, double sigma2);

/* ols(...) is the bottom line doer of all work, called by main() */

int 
ols (char **labels, float **data, int K, int ObsNum,
     int purebeta, int pscript, int epp,
     int *maptable, int labelcount)
     /* calls olsengine for computations and does display. */
{
  double *beta, *S;
  float *Yhat;
  double sigma2, R2, probv, F;
  int noinference, i, error, dof1, dof2;
  char *DASHLINE =
  "-------------+-----------------------------------------------";

  if ((beta = (double *) malloc (K * sizeof (double))) == NULL)
    {
      fprintf (stderr, "Out of memory when allocating beta.\n");
      return 1;
    }
  if ((S = (double *) malloc (K * sizeof (double))) == NULL)
    {
      fprintf (stderr, "Out of memory when allocating S.\n");
      return 1;
    }
  if ((Yhat = (float *) malloc (ObsNum * sizeof (float))) == NULL)
    {
      fprintf (stderr, "Out of memory when allocating Yhat.\n");
      return 1;
    }

  /* if pscript is on, you don't need to do inference. */
  noinference = FALSE;
  if (pscript)
    noinference = TRUE;
  if (1 == (error = olsengine (noinference, data, K, ObsNum,
			       beta, S, &sigma2, &R2, &F, Yhat)))
    {
      fprintf (stderr, "Errors in OLS calculations.\n");
      return 1;
    }

  if (purebeta)
    {
      for (i = 0; i < K; i++)
	printf (" %f ", beta[i]);
      printf ("%f\n", sqrt (sigma2));
      return 0;
    }
  if (pscript)
    {
      GenAwk (beta, K, labels, maptable, labelcount);
      return 0;
    }
  if (epp)
    {
      GenEPP (labels, K, beta, S, sigma2);
      return 0;
    }

  /* Now start printing results. */
  printf ("%d observations, %d rhs regressors.\n", ObsNum, K);
  printf ("%s\n", DASHLINE);
  printf ("%12s | %12s	%12s  %8s  %8s\n",
	  "Variable",
	  "Coefficient",
	  "Std.Error",
	  "t   ",
	  "Prob>|t|");
  printf ("%s\n", DASHLINE);

  for (i = 0; i < K; i++)
    {
      if (S[i] == 0.0)
	probv = 0.0;
      else
	{
	  if (1 == tprob ((double) -fabs (beta[i] / S[i]), ObsNum - K, &probv))
	    {
	      fprintf (stderr, "Fatal error in computing t probability.\n");
	      return 1;
	    }
	}
      printf ("%12s | %12.6f  %12.6f  %8.3f  %8.3f\n",
	      labels[i],
	      beta[i],
	      S[i],
	      (S[i] == 0.0 ? 0.0 : beta[i] / S[i]),
	      (probv * 2.0)
	);
    }
  printf ("%s\n", DASHLINE);

  dof1 = K - 1;
  dof2 = ObsNum - K;		/* F(dof1, dof2) */
  printf ("R2 = %5.3f, F(%d, %d) = %3.1f, Prob>F = %5.3f\n",
	  R2, dof1, dof2, F, pF (F, dof1, dof2));
  printf ("RMSE = %5.3f\n", sqrt (sigma2));
  return 0;
}

void 
GenAwk (double *beta, int K,
	char **labels, int *maptable, int labelcount)
     /* Recall layout of maptable:
	entries are defined from 1..K (not 1..labelcount),
	(maptable[i] == j) ==>
		i'th field of beta comes from j'th field of input line.
*/
{
  int i;
  char *ylabel;

  printf ("\n#\n# This awk program was generated by ols.\n");
  printf ("# It reads data from StdIn and prints predictions to StdOut.\n");
  printf ("#\n");
  printf ("# It is built for the data layout used when estimating.\n");
  printf ("# However, this should not be hard to modify.\n");
  printf ("#\n\n");

  printf ("{\n");
  for (i = 0; i < K; i++)
    printf ("\t%s = $%d;\n", labels[i], (1 + maptable[i]));
  ylabel = labels[K];
  printf ("\t%s = $%d;\n", ylabel, (1 + maptable[K]));

  printf ("\n\tpredicted = ");
  for (i = 0; i < K; i++)
    {
      printf ("(%f*%s)", beta[i], labels[i]);
      if (i != (K - 1))
	printf (" \\\n\t\t+ ");
      else
	printf (";\n");
    }

  /* Now to find y. */
  printf ("\terror = %s - predicted;\n", ylabel);
  printf ("\tprint predicted \" \" %s \" \" error;\n", ylabel);
  printf ("\tSSE += (error*error);\n");
  printf ("}\n");

  printf ("\nEND {\n");
  printf ("\tMSE = SSE/NR;\n");
  printf ("\tprint \"MSE = \" MSE \", SSE = \" SSE > \"/dev/tty\";\n");
  printf ("}\n\n");
  printf ("# End of generated script.\n\n");
}

/* Example of generated awk, when labels are unknown:
 * #
 * # This awk program was generated by ols.
 * # It reads data from StdIn and prints predictions to StdOut.
 * #
 * # It is built for the data layout used when estimating.
 * # However, this should not be hard to modify.
 * #
 *
 * {
 * 	$1 = $1;
 * 	$2 = $2;
 * 	$3 = $3;
 * 		# In out-of-sample prediction, $3 defaults to 0.0.
 *
 * 	predicted = (1.500000*$1) \
 * 		+ (0.700000*$2);
 * 	error = $3 - predicted;
 * 	print predicted " " $3 " " error;
 * 	SSE += (error*error);
 * }
 *
 * END {
 * 	MSE = SSE/NR;
 * 	print "MSE = " MSE ", SSE = " SSE > "/dev/tty";
 * }
 *
 * # End of script.
 */

void 
GenEPP (char **labels, int K, double *beta, double *S, double sigma2)
     /* Given estimation results, generate EPP.  This can be
post-processed into TeX for sure. */
{
  int i;

  printf ("caption Results of OLS Regression for %s\n", labels[K]);
  printf ("comment Regression Coefficients\n");
  for (i = 0; i < K; i++)
    printf ("estimate %s yes %12.6f %8.3f\n",
	    labels[i], beta[i], S[i]);
  printf ("blank\n");
  printf ("comment Regression Standard Error\n");
  printf ("estimate $\\sigma$ yes %5.3f not-estimated\n", sqrt (sigma2));
}

/* Example:
 * caption Typical display of results after OLS
 * comment Regression Coefficients
 * estimate $\beta$ yes -9.209 0.0286
 * estimate $\tau$ yes -6.000 0.234
 * estimate $\rho$ yes -1.078 0.0288
 * estimate $\delta$ yes -2.303 0.21
 * estimate $\theta$ yes -9.000 0.113
 * estimate $\xi$ yes -9.000 1.00
 * blank
 * comment Regression Standard Error
 * estimate $\sigma_v$ no 0.867
 */
