/******************************************************************************
* Numerical Recipes' SVD singular value decomposition code.		      *
* "Numerical Recipes in C", by William H. Press el al., University of	      *
* Cambridge, 1988, ISBN 0521-35465-X.					      *
*   Really ugly piece of code, that works quite well.			      *
******************************************************************************/

#include <math.h>
#include "irit_sm.h"
#include "imalloc.h"

#define SVD_TOL 1.0e-5

#define SIGN2(a, b)	((b) >= 0.0 ? fabs(a) : -fabs(a))
#define PYTHAG(a, b)	sqrt(SQR(a) + SQR(b))
#define SVD_VECTOR(n)	((RealType *) IritMalloc(sizeof(RealType) * (n + 1)))
#define SVD_FREE_VEC(p)	IritFree((VoidPtr) p)

static void svdcmp(RealType **a, int m, int n, RealType *w, RealType **v);

/*****************************************************************************
* DESCRIPTION:                                                               *
* Singular value decomposition of matrix A into A W V.			     *
*   Given A is m by n, m >= n.						     *
*   Output:								     *
*	A is m by n modified in place into U,				     *
*	W is a vector of length m,					     *
*	V is a square matrix of size n by n.				     *
*   Due to the Fortran style of this code all indices are from 1 to k, so    *
* an array from 1 to k actually have k + 1 elements in this C translation.   *
*                                                                            *
* PARAMETERS:                                                                *
*   a:    A m by m matrix.                                                   *
*   m, n: Dimensions of A, W and V					     *
*   w:    A vector of length m						     *
*   v:    A matrix of size n by n.					     *
*                                                                            *
* RETURN VALUE:                                                              *
*   void                                                                     *
*****************************************************************************/
static void svdcmp(RealType **a, int m, int n, RealType *w, RealType **v)
{
    int flag, i, its, j, jj, k, l, nm;
    RealType c, f, h, s, x, y, z, *rv1;
    RealType
	anorm = 0.0,
	g = 0.0,
	scale = 0.0;

    if (m < n)
	IritFatalError("SVDCMP: You must augment A with extra zero rows");

    rv1 = SVD_VECTOR(n);
    for (i = 1; i <= n; i++) {
	l = i + 1;
	rv1[i] = scale * g;
	g = s = scale = 0.0;
	if (i <= m) {
	    for (k = i; k <= m; k++)
		scale += fabs(a[k][i]);
	    if (scale) {
		for (k = i; k <= m; k++) {
		    a[k][i] /= scale;
		    s += a[k][i] * a[k][i];
		}
		f = a[i][i];
		g = -SIGN2(sqrt(s),f);
		h = f * g - s;
		a[i][i] = f - g;
		if (i != n) {
		    for (j = l; j <= n; j++) {
			for (s = 0.0, k = i; k <= m; k++)
			    s += a[k][i]*a[k][j];
			f = s / h;
			for (k = i; k <= m; k++)
			    a[k][j] += f*a[k][i];
		    }
		}
		for (k = i; k <= m; k++)
		    a[k][i] *= scale;
	    }
	}
	w[i] = scale * g;
	g = s = scale = 0.0;
	if (i <= m && i != n) {
	    for (k = l; k <= n; k++)
		scale += fabs(a[i][k]);
	    if (scale) {
		for (k = l; k <= n; k++) {
		    a[i][k] /= scale;
		    s += a[i][k]*a[i][k];
		}
		f = a[i][l];
		g = -SIGN2(sqrt(s), f);
		h = f * g - s;
		a[i][l] = f - g;
		for (k = l; k <= n; k++)
		    rv1[k]=a[i][k]/h;
		if (i != m) {
		    for (j = l;j <= m; j++) {
			for (s = 0.0, k=l; k <= n; k++)
			    s += a[j][k] * a[i][k];
			for (k = l; k <= n; k++)
			    a[j][k] += s * rv1[k];
		    }
		}
		for (k=l;k<=n;k++) a[i][k] *= scale;
	    }
	}
	s = fabs(w[i])+fabs(rv1[i]);
	anorm = MAX(anorm, s);
    }
    for (i = n; i >= 1; i--) {
	if (i < n) {
	    if (g) {
		for (j = l; j <= n; j++)
		    v[j][i] = (a[i][j] / a[i][l]) / g;
		for (j = l; j <= n; j++) {
		    for (s = 0.0, k = l; k <= n; k++)
			s += a[i][k]*v[k][j];
		    for (k = l; k <= n; k++)
			v[k][j] += s * v[k][i];
		}
	    }
	    for (j = l; j <= n; j++)
		v[i][j]=v[j][i]=0.0;
	}
	v[i][i] = 1.0;
	g = rv1[i];
	l = i;
    }
    for (i = n; i >= 1; i--) {
	l = i+1;
	g = w[i];
	if (i < n)
	    for (j = l;j <= n;j++)
		a[i][j]=0.0;
	if (g) {
	    g=1.0 / g;
	    if (i != n) {
		for (j = l; j <= n; j++) {
		    for (s = 0.0, k = l; k <= m; k++)
			s += a[k][i] * a[k][j];
		    f = (s / a[i][i]) * g;
		    for (k = i; k <= m; k++)
			a[k][j] += f * a[k][i];
		}
	    }
	    for (j = i; j <= m; j++)
		a[j][i] *= g;
	}
	else {
	    for (j = i; j <= m; j++)
		a[j][i] = 0.0;
	}
	++a[i][i];
    }
    for (k = n; k >= 1; k--) {
	for (its = 1; its <= 30; its++) {
	    flag = 1;
	    for (l = k; l >= 1; l--) {
		nm = l-1;
		if (fabs(rv1[l]) + anorm  ==  anorm) {
		    flag = 0;
		    break;
		}
		if (fabs(w[nm]) + anorm == anorm)
		    break;
	    }
	    if (flag) {
		c = 0.0;
		s = 1.0;
		for (i = l; i <= k; i++) {
		    f = s * rv1[i];
		    if (fabs(f) + anorm != anorm) {
			g = w[i];
			h = PYTHAG(f, g);
			w[i] = h;
			h = 1.0 / h;
			c = g * h;
			s = (-f * h);
			for (j = 1; j <= m; j++) {
			    y = a[j][nm];
			    z = a[j][i];
			    a[j][nm] = y * c + z * s;
			    a[j][i] = z * c - y * s;
			}
		    }
		}
	    }
	    z = w[k];
	    if (l == k) {
		if (z < 0.0) {
		    w[k] = -z;
		    for (j = 1; j <= n; j++)
			v[j][k] = (-v[j][k]);
		}
		break;
	    }
	    if (its == 30)
		IritFatalError("No convergence in 30 SVDCMP iterations");
	    x = w[l];
	    nm = k - 1;
	    y = w[nm];
	    g = rv1[nm];
	    h = rv1[k];
	    f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
	    g = PYTHAG(f, 1.0);
	    f = ((x - z) * (x + z) + h * ((y / (f + SIGN2(g, f))) - h)) / x;
	    c = s = 1.0;
	    for (j = l; j <= nm; j++) {
		i = j + 1;
		g = rv1[i];
		y = w[i];
		h = s * g;
		g = c * g;
		z = PYTHAG(f, h);
		rv1[j] = z;
		c = f / z;
		s = h / z;
		f = x * c + g * s;
		g = g * c - x * s;
		h = y * s;
		y = y * c;
		for (jj = 1; jj <= n; jj++) {
		    x = v[jj][j];
		    z = v[jj][i];
		    v[jj][j] = x * c + z * s;
		    v[jj][i] = z * c - x * s;
		}
		z = PYTHAG(f, h);
		w[j] = z;
		if (z) {
		    z = 1.0 / z;
		    c = f * z;
		    s = h * z;
		}
		f = (c * g) + (s * y);
		x = (c * y) - (s * g);
		for (jj = 1; jj <= m; jj++) {
		    y = a[jj][j];
		    z = a[jj][i];
		    a[jj][j] = y * c + z * s;
		    a[jj][i] = z * c - y * s;
		}
	    }
	    rv1[l] = 0.0;
	    rv1[k] = f;
	    w[k] = x;
	}
    }
    SVD_FREE_VEC(rv1);
}

/*****************************************************************************
* DESCRIPTION:                                                               M
* Least square solves A x = b.						     M
*   The vector X is of size Nx, vector b is of size NData and matrix A is    M
* of size Nx by NData.							     M
*   Uses singular value decomposition.					     M
*   If A != NULL is SVD decomposition is computed, otherwise (A == NULL) a   M
* solution is computed for the given b and is placed in x.		     M
*                                                                            *
* PARAMETERS:                                                                M
*   A:         The matrix of size Nx by NData.                               M
*   x:         The vector of sought solution of size Nx.                     M
*   b:         The vector of coefficients of size NData.                     M
*   NData, Nx:  Dimensions of input.                                         M
*                                                                            *
* RETURN VALUE:                                                              M
*   RealType:  The reciprocal of the condition number, if A != NULL, zero    M
*	       otherwise.						     M
*                                                                            *
* KEYWORDS:                                                                  M
*   SvdLeastSqr,  singular value decomposition, linear systems               M
*****************************************************************************/
RealType SvdLeastSqr(RealType *A, RealType *x, RealType *b, int NData, int Nx)
{
    static int
	AllocNData = 0,
	AllocNx = 0;
    static RealType
	**SVD_U = NULL,
	**SVD_V = NULL,
	*SVD_W = NULL;
    int i, j;

    if (A != NULL) {
	RealType Min, Max;

	if (SVD_U != NULL) {		 /* Free old instance of aux. data. */
	    for (i = 0; i <= AllocNData; i++)
		SVD_FREE_VEC(SVD_U[i]);
	    IritFree((VoidPtr) SVD_U);
	    for (i = 0; i <= AllocNx; i++)
		SVD_FREE_VEC(SVD_V[i]);
	    IritFree((VoidPtr) SVD_V);
	    SVD_FREE_VEC(SVD_W);
	}

	SVD_U = (RealType **) IritMalloc((NData + 1) * sizeof(RealType *));
	SVD_V = (RealType **) IritMalloc((Nx + 1) * sizeof(RealType *));
	SVD_W = SVD_VECTOR(1 + MAX(NData, Nx));
	for (i = 0; i <= NData; i++)
	    SVD_U[i] = SVD_VECTOR(Nx);
	for (i = 0; i <= Nx; i++)
	    SVD_V[i] = SVD_VECTOR(Nx);
	AllocNData = NData;
	AllocNx = Nx;

	for (i = 0; i < NData; i++) {
	    for (j = 0; j < Nx; j++) {
		SVD_U[i + 1][j + 1] = A[i * Nx + j];
	    }
	}
	svdcmp(SVD_U, NData, Nx, SVD_W, SVD_V);

	Min = Max = SVD_W[1];
	for (i = 1; i <= Nx; i++) {
	    if (Min > SVD_W[i])
	        Min = SVD_W[i];
	    if (Max < SVD_W[i])
	        Max = SVD_W[i];

#	    ifdef DEBUG_PRINT_SVD_W
		fprintf(stderr, "W[%d] = %f\n", i, SVD_W[i]);
#	    endif /* DEBUG_PRINT_SVD_W */
	}
	return Min / Max; /* The reciprocal of the codition number. */
    }
    else {
	RealType
	    *TVec = SVD_VECTOR(Nx);

	for (j = 1; j <= Nx; j++) {
	    RealType s = 0.0;

	    if (SVD_W[j]) {
		for (i = 1; i <= NData; i++)
		    s += SVD_U[i][j] * b[i - 1];
		s /= SVD_W[j];
	    }
	    TVec[j] = s;
	}
	for (j = 1; j <= Nx; j++) {
	    RealType s = 0.0;
	    
	    for (i = 1; i <= Nx; i++)
		s += SVD_V[j][i] * TVec[i];
	    x[j - 1] = s;
	}

	SVD_FREE_VEC(TVec);

	return 0.0;
    }
}
