/////////////////////////////////////////////////////////////////
// tstmat4.cpp: This program compares using static 2-d matrices
// with the more general SimpleMatrix and Matrix objects when
// it comes to doing matrix multiplies.
// Copyright(c) 1993 Azarona Software. All rights reserved.
//
// Usage: tstmat4 [num_iter]
//
// Meant to be used with a stop-watch to time the algorithms.
//
// Test 1: Uses statically allocated matrices -- fast!
// Test 2: Uses dynamically allocated SimpleMatrix objects,
//         with m[i][j] style of subscripting.
// Test 3: Uses fast vector pointer form for SimpleMatrix objects
// Test 4: Uses Matrix objects with m[i][j] subscripting. (Slow!)
// Test 5: Uses Matrix objects with m(i,j) subscripting.
// Test 6: Uses fastest form possible for Matrix objects, using
//         fast vector pointers.
/////////////////////////////////////////////////////////////////
#include <iostream.h>
#include <iomanip.h>
#include <stdlib.h>
#include <conio.h>
#include "simpmat.h"
#include "matrix.h"

typedef double Number;

INITNULLVEC(Number);

const int d1 = 30;
const int d2 = 30;

Number raw_a[d1][d2];
Number raw_b[d2][d1];
Number raw_c[d1][d1];

SimpleMatrix<Number> simp_a(d1, d2);
SimpleMatrix<Number> simp_b(d2, d1);
SimpleMatrix<Number> simp_c(d1, d1);

Matrix<Number> a(d1, d2);
Matrix<Number> b(d2, d1);
Matrix<Number> c(d1, d1);


void Test1(Number (*c)[d1], const Number (*a)[d2], const Number (*b)[d1])
// Fastest, cause it uses statically allocated matrices, with constant
// dimensions. However, very inflexible.
{
  unsigned i, j, k;
  for (i = 0; i < d1; i++) {
      for (j = 0; j < d1; j++) {
          Number sum = 0;
          for (k = 0; k < d2; k++) sum += a[i][k] * b[k][j];
          c[i][j] = sum;
      }
  }
}

template<class TYPE>
void Test2(SimpleMatrix<TYPE> &c, 
  const SimpleMatrix<TYPE> &a, const SimpleMatrix<TYPE> &b)
// Uses dynamically allocated SimpleMatrix objects with subscripting.
// Kind of slow.
{
  unsigned i, j, k;
  for (i = 0; i < a.NRows(); i++) {
      for (j = 0; j < b.NCols(); j++) {
          TYPE sum = 0;
          for (k = 0; k < b.NRows(); k++) sum += a[i][k] * b[k][j];
          c[i][j] = sum;
      }
  }
}

template<class TYPE>
void Test3(SimpleMatrix<TYPE> &c, 
   const SimpleMatrix<TYPE> &a, const SimpleMatrix<TYPE> &b)
// Fastest form for SimpleMatrices, by assuming row vectors 
// have a stride of 1, thus using ordinary pointers instead
{
  unsigned i, j, k;

#ifdef SUPPOSEDLYFASTEST
  const TYPE *ar        = a.RowPtr();
#else
  VecPtr<const TYPE> ar = a.RowPtr();
#endif
  VecPtr<const TYPE> ac = a.ColPtr();
#ifdef SUPPOSEDLYFASTEST
  const TYPE *br        = b.RowPtr();
#else
  VecPtr<const TYPE> br = b.RowPtr();
#endif
  VecPtr<const TYPE> bc = b.ColPtr();
#ifdef SUPPOSEDLYFASTEST
  TYPE *cr              = c.RowPtr();
#else
  VecPtr<TYPE> cr       = c.RowPtr();
#endif
  VecPtr<TYPE> cc       = c.ColPtr();

  const TYPE *bstart = br;

  for (i = 0; i < a.NRows(); i++) {
      cr = cc; // Point to start of row i
      br = bstart;
      for (j = 0; j < b.NCols(); j++) {
          TYPE sum = 0;
          ar = ac; // Point to start of row i.
          bc = br; // Point to start of col j.
          for (k = 0; k < b.NRows(); k++) {
               sum += *ar * *bc;
               ar++; // Next col
               bc++; // Next row
          }
          br++; // Next col
          *cr = sum;
          cr++; // Next col
      }
      ac++; // Next row
      cc++; // Next row
  }
}

template<class TYPE>
void Test4(Matrix<TYPE> &c, const Matrix<TYPE> &a, const Matrix<TYPE> &b)
// Uses dynamically allocated Matrix objects with double-subscripting.
// Really S--L--O--W.
{
  unsigned i, j, k;
  for (i = 0; i < a.NRows(); i++) {
      for (j = 0; j < b.NCols(); j++) {
          TYPE sum = 0;
          for (k = 0; k < b.NRows(); k++) sum += a[i][k] * b[k][j];
          c[i][j] = sum;
      }
  }
}

template<class TYPE>
void Test5(Matrix<TYPE> &c, const Matrix<TYPE> &a, const Matrix<TYPE> &b)
// Uses dynamically allocated Matrix objects with 2-d subscripting.
// Better than Test4, but still slow.
{
  unsigned i, j, k;

  for (i = 0; i < a.NRows(); i++) {
      for (j = 0; j < b.NCols(); j++) {
          TYPE sum = 0;
          for (k = 0; k < b.NRows(); k++) sum += a(i,k) * b(k,j);
          c(i,j) = sum;
      }
  }
}


template<class TYPE>
void Test6(Matrix<TYPE> &c, const Matrix<TYPE> &a, const Matrix<TYPE> &b)
// Fastest routine for Matrix objects, using vector pointers.
// Should be pretty decent.
{
  unsigned i, j, k;

  VecPtr<const TYPE> ar = a.RowPtr();
  VecPtr<const TYPE> ac = a.ColPtr();
  VecPtr<const TYPE> br = b.RowPtr();
  VecPtr<const TYPE> bc = b.ColPtr();
  VecPtr<TYPE> cr = c.RowPtr();
  VecPtr<TYPE> cc = c.ColPtr();
  
  const TYPE *bstart = b.PtrToAll();

  for (i = 0; i < a.NRows(); i++) {
      cr = cc; // Point to start of row i
      br = bstart;
      for (j = 0; j < b.NCols(); j++) {
          TYPE sum = 0;
          ar = ac; // Point to start of row i
          bc = br; // Point to start of col j
          for (k = 0; k < b.NRows(); k++) {
               sum += *ar * *bc;
               ar++; // Next col
               bc++; // Next row
          }
          br++; // Next col
          *cr = sum;
          cr++; // Next col
      }
      ac++; // Next row
      cc++; // Next row
  }
}


void testn(int n, unsigned unsigned num_iter)
{
  unsigned i;

  cout << "Press return to start test " << n;
  getch();
  cout << '\n';
  switch(n) {
    case 1: for(i = 0; i<num_iter; i++) Test1(raw_c, raw_a, raw_b);
    break;
    case 2: for(i = 0; i<num_iter; i++) Test2(simp_c, simp_a, simp_b);
    break;
    case 3: for(i = 0; i<num_iter; i++) Test3(simp_c, simp_a, simp_b);
    break;
    case 4: for(i = 0; i<num_iter; i++) Test4(c, a, b);
    break;
    case 5: for(i = 0; i<num_iter; i++) Test5(c, a, b);
    break;
    case 6: for(i = 0; i<num_iter; i++) Test6(c, a, b);
    break;
  }
  cout << "Finished\n";
}

main(int argc, char *argv[])
{
  unsigned n = 1;

  if (argc <= 1) {
     cout << "Usage: tstmat4 [num_iter]\n";
     return 1;
  }

  n = (unsigned)atoi(argv[1]);

  unsigned i, j, k;

  for(i = 0, k = 1; i<d1; i++)
     for(j = 0; j<d2; j++) raw_a[i][j] = k++;

  for(i = 0, k = 2; i<d2; i++)
     for(j = 0; j<d1; j++) raw_b[i][j] = k++;


  for(i = 0, k = 1; i<d1; i++)
     for(j = 0; j<d2; j++) simp_a[i][j] = k++;

  for(i = 0, k = 2; i<d2; i++)
     for(j = 0; j<d1; j++) simp_b[i][j] = k++;

  for(i = 0, k = 1; i<d1; i++)
     for(j = 0; j<d2; j++) a[i][j] = k++;

  for(i = 0, k = 2; i<d2; i++)
     for(j = 0; j<d1; j++) b[i][j] = k++;

  testn(1, n);
  testn(2, n);
  testn(3, n);
  testn(4, n);
  testn(5, n);
  testn(6, n);
  return 0;
}

