/*
	Simple linear regression

	Input : A reldb file of the form :
	
		xname	yname
		-----	-----
		x1	y1
		x2	y2
		etc
		
	Usage : regress < datafile
	
	Output : to std. out, all sorts of regression statistics.

NOTE: regress in Gary Perlman's package is much better than this
and should be preferred

									*/

#include <stdio.h>
main(){
	char xname[30],yname[30];	/* variable names */
	char buf[60];

	int c;			/* to recieve errors from scanf */

	float x,y;		/* input vbls */

	double n,sumx,sumy,sumxx,sumyy,sumxy;	/* obvious meaning */
	double xbar,ybar;	/* ditto */
	double stdevx,stdevy;
	double ssxx,ssyy,ssxy;	/* sum of sq. deviations */
	double sstot,sse,ssreg;	/* the ss's in the anova table */
	double mstot,mse,msreg; /* the ms's ------------------- */
	double t,F;		/* t*t=F for testing slope =0 */
	double alpha,beta;	/* regression coefficients */
	double r,r2;		/* correlation & explnd variation */
	double sqrt();

	c= -1;
	while((xname[++c]=getchar())!='\t')	/* get name of x */
		;
	xname[c]='\0';
	c= -1;
	while((yname[++c]=getchar())!='\n')	/* get name of y */
		;
	yname[c]='\0';
	gets(buf,60);				/* skip 2nd reldb line */

	while((c=scanf("%f %f",&x,&y))!=EOF){
		n+=1.;sumx+=x;sumy+=y;sumxx+=x*x;sumyy+=y*y;sumxy+=x*y;
	}

	ssyy=sumyy-sumy*sumy/n;	/* prepare regression estimates */
	ssxx=sumxx-sumx*sumx/n;	/* so compute numerators for variances */
	ssxy=sumxy-sumx*sumy/n;	/* and covariances */
	xbar=sumx/n;		/* not to forget means */
	ybar=sumy/n;
	stdevx=sqrt(  ssxx/(n-1.));
	stdevy=sqrt(  ssyy/(n-1.));

	beta=ssxy/ssxx;		/* slope estimate */
	alpha=ybar-beta*xbar;   /* intercept -- */
	r=ssxy/sqrt(  ssxx*ssyy);
	r2=ssxy*ssxy/(ssxx*ssyy);

	sstot=ssyy;		/* now for the anova table */
	ssreg=beta*beta*ssxx;	/* compute the sums of squares, */
	sse=sstot-ssreg;	
	mstot=sstot/(n-1.);	/* the mean squares */
	mse=sse/(n-2.);
	msreg=ssreg/1.;
	F=msreg/mse;		/* and the F-value */

	printf("Number of observations %.0f\n",n);
	printf("Average of %s-values    %.2f,",xname,xbar);
	printf("\taverage of %s-values    %.2f .\n",yname,ybar);

	printf("Std. dev. in %s-values  %.2f, ",xname,stdevx); 	
	printf("\tstd. dev. in %s-values  %.2f .\n",yname,stdevy); 

	printf("Correlation coefficient between %s and %s : %.4f",xname,yname,r);
	printf("\tExplained variation %.2f\n",r2);
	printf("Estimated regression line : %s = ",yname);
	printf(" %.2f + %.2f * %s \n\n",alpha,beta,xname);

	printf("Anova table \n\n");
	printf("Source\t\t\tSS\t\tdf\t\tMS\t\tF\n");
	printf("------------------------------------------");
	printf("--------------------------------\n");
	printf("Regression \t %10.2f \t %10.2f \t %10.2f \t %10.2f\n",ssreg,1.,msreg,F);
	printf("Error\t\t %10.2f \t %10.2f \t %10.2f\n",sse,n-2.,mse);
	printf("------------------------------------------");
	printf("--------------------------------\n");
	printf("Total \t\t %10.2f \t %10.2f \t %10.2f\n",sstot,n-1.,mstot);
}
