#include <stdio.h>
#include <math.h>

#define MAIN_PROG
#include "comm.h"
#include "ocean.h"
#include "CMMDlib.h"

char *progname;

main(argc, argv)
int argc;
char **argv;
 
   {void Ocean_Work (), Ocean_Idle (), Compute_My_Coordinates ();
    int num_iter;

    init_task(argv);

   CMMD_receive_bc_from_host ((char *) (& num_iter), sizeof (int));
    fprintf(stderr, "(task %d) received bcast. num_iter = %d\n", _my_taskid, 
	    num_iter);
    Compute_My_Coordinates ();

    if (my_node < TOT_PROCS)
       Ocean_Work (num_iter);
    else
       Ocean_Idle (num_iter);
    exit(0);
/*    CMMD_sync_with_host (); */
  }


void Ocean_Idle (num_iter)
int num_iter;

   {int inx, all_flag;

    do {all_flag = CMMD_reduce_int (0, CMMD_combiner_ior); 
/*        if (my_node == 100) printf ("one: %d\n", all_flag); */
       } while (all_flag == TRUE);

    (void) CMMD_reduce_double (0.0, CMMD_combiner_add);

    for (inx = 0;  inx < num_iter;  inx ++)
       {do {all_flag = CMMD_reduce_int (0, CMMD_combiner_ior); 
/*            if (my_node == 100) printf ("two: %d\n", all_flag); */
           } while (all_flag);

        (void) CMMD_reduce_double (0.0, CMMD_combiner_add);

        do {all_flag = CMMD_reduce_int (0, CMMD_combiner_ior);
/*            if (my_node == 100) printf ("three: %d\n", all_flag); */
           } while (all_flag);
       }
   }


void Ocean_Work (num_iter)
int num_iter;

   {void Compute_Constants (), Initial_Matrix (), Exchange ();
    void Compute_Tau (), Do_One_Iteration (), Compute_F ();
    void Solve_Helmholtz (), Print_Mat ();
    int inx, jnx, knx;
    double partial_sum, time, Compute_Sum ();

    only_on (OBSERVER) Second_Set (N_TIMER);
    only_on (OBSERVER) 
        {time = Second_Look (N_TIMER);
         CMMD_send (CMMD_host_node, TIME_TAG, &time, sizeof (double));
        }

    Compute_Constants ();

    Compute_F ();

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Psi_Um[inx][jnx] = 0.0;
/*    only_on (OBSERVERS) Print_Mat ("Psi_Um", Psi_Um);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Psi_Lm[inx][jnx] = 0.0;
/*    only_on (OBSERVERS) Print_Mat ("Psi_Lm", Psi_Lm);  */

    Initial_Matrix (Psi_B,  1.0, 1.0, 0.0);
    Exchange (Psi_B);
/*    only_on (OBSERVERS) Print_Mat ("Psi_B", Psi_B);  */
  
    Solve_Helmholtz (eig_2, Psi_B, Psi_B);
/*    only_on (OBSERVERS) Print_Mat ("Psi_B", Psi_B);  */

    partial_sum = Compute_Sum (Psi_B, 0.25, 0.50, 1.00);
    psi_bi = CMMD_reduce_double (partial_sum, CMMD_combiner_add);
/*    only_on (OBSERVERS) printf ("psi_bi = %e\n", psi_bi);  */

    for (knx = 0;  knx < 2;  knx ++)
        for (inx = 0;  inx < X_PTS_2;  inx ++)
            for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
                Psi_M[knx][inx][jnx] = 0.0;
/*    only_on (OBSERVERS) Print_Mat ("Psi_M", Psi_M);  */
  
    for (knx = 0;  knx < 2;  knx ++)
        for (inx = 0;  inx < X_PTS_2;  inx ++)
            for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
                Psi[knx][inx][jnx] = 0.0;
/*    only_on (OBSERVERS) Print_Mat ("Psi", Psi);  */
  
    Compute_Tau ();
    Exchange (Tauz);
/*    only_on (OBSERVERS) Print_Mat ("Tauz", Tauz);  */

    for (inx = 0;  inx < num_iter;  inx ++)
        Do_One_Iteration ();

    for (inx = 1;  inx < X_PTS;  inx ++)
        for (jnx = 1;  jnx < Y_PTS;  jnx ++)
            Psi_Um[inx][jnx] += Psi_M[0][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Psi_Um", Psi_Um);  */

    for (inx = 1;  inx < X_PTS;  inx ++)
        for (jnx = 1;  jnx < Y_PTS;  jnx ++)
            Psi_Lm[inx][jnx] += Psi_M[1][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Psi_Lm", Psi_Lm);  */

    only_on (OBSERVER)
        {time = Second_Look (N_TIMER);
         CMMD_send (CMMD_host_node , TIME_TAG, &time, sizeof (double));
        }
   }


void Compute_My_Coordinates ()

   { int my_id;

#if 0
     TOT_PROCS =  numtasks();
     X_PROCS =  NPX(TOT_PROCS);
     Y_PROCS = TOT_PROCS/X_PROCS;
     X_PTS   = (X_TOT_PTS / X_PROCS);
     Y_PTS   = (Y_TOT_PTS / Y_PROCS);
     X_PTS_1 =  (X_PTS + 1);
     Y_PTS_1 = (Y_PTS + 1);
     X_PTS_2 = (X_PTS + 2);
     Y_PTS_2 = (Y_PTS + 2);
     ELEMS_Y2= (DB * Y_PTS_2);
#endif

     my_id = CMMD_self_address ();
     my_node = my_id - 1;

    my_x    = my_node % X_PROCS;
    my_y    = my_node / X_PROCS;

    my_west = my_x == 0 ?           UNDEFINED : my_id - 1;
    my_east = my_x == X_PROCS - 1 ? UNDEFINED : my_id + 1;
    my_sout = my_y == 0 ?           UNDEFINED : my_id - X_PROCS;
    my_nort = my_y == Y_PROCS - 1 ? UNDEFINED : my_id + X_PROCS;

    lower_x = my_x == 0           ? 2     : 1;
    upper_x = my_x == X_PROCS - 1 ? X_PTS : X_PTS_1;
    lower_y = my_y == 0           ? 2     : 1;
    upper_y = my_y == Y_PROCS - 1 ? Y_PTS : Y_PTS_1;
   }


void Compute_Constants ()

   {pi = 4.0 * atan (1.0);
    t0 = 0.5e-04;
    h_inv  = 1.0 / h;
    h1_inv = 1.0 / h1;
    hh1    = h1 / h;
    hh3    = h3 / h;
    timst  = 2.0 * dtau;
    psi_bi = 0.0;
    ysca   = (double) (X_TOT_PTS - 1) * res;
    ysca_1 = 0.5 * ysca;
    factor = - t0 * pi / ysca_1;
    eig_2  = - h * f0 * f0 / (h1 * h3 * gpr);
    del_sq_inv  = res * res;
    del_x_sq    =   1.0 / (res * res);
    del_y_sq    =   1.0 / (res * res);
    fact_jacob  = - 1.0 / (12.0 * res * res);
    fact_laplac =   1.0 / (res * res);
   }


void Compute_F ()

   {int inx;

    for (inx = lower_x - 1;  inx < upper_x + 1;  inx ++)
        F[inx] = f0 + beta * ((double) (my_x * X_PTS + inx - 1) * 
                 res - ysca_1);
   }


void Initial_Matrix (Matrix, corner_val, border_val, center_val)
double Matrix[][Y_PTS_2], corner_val, border_val, center_val;

   {int inx, jnx;

    if (my_x == 0)
       {if (my_y == 0)  Matrix[1][1] = corner_val;
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            Matrix[1][jnx] = border_val;
       }

    if (my_x == X_PROCS - 1)
       {if (my_y == Y_PROCS - 1)  Matrix[X_PTS][Y_PTS] = corner_val;
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            Matrix[X_PTS][jnx] = border_val;
       }

    if (my_y == 0)
       {if (my_x == X_PROCS - 1) Matrix[X_PTS][1] = corner_val;
        for (inx = lower_x;  inx < upper_x;  inx ++)
            Matrix[inx][1] = border_val; 
       }

    if (my_y == Y_PROCS - 1)
       {if (my_x == 0)  Matrix[1][Y_PTS] = corner_val;
        for (inx = lower_x;  inx < upper_x;  inx ++)
            Matrix[inx][Y_PTS] = border_val;
       }

    for (inx = lower_x;  inx < upper_x;  inx ++)
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            Matrix[inx][jnx] = center_val;
   }


double Compute_Sum (Matrix, corner_coef, border_coef, center_coef)
double Matrix[][Y_PTS_2], corner_coef, border_coef, center_coef;

   {double sum;
    int inx, jnx;

    sum = 0.0;
    if (my_x == 0)
       {if (my_y == 0)  sum += corner_coef * Matrix[1][1];
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            sum += border_coef * Matrix[1][jnx];
       }
/*    only_on (OBSERVERS) printf ("sum = %e\n", sum);  */

    if (my_x == X_PROCS - 1)
       {if (my_y == Y_PROCS - 1) sum += corner_coef * Matrix[X_PTS][Y_PTS];
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            sum += border_coef * Matrix[X_PTS][jnx];
       }
/*    only_on (OBSERVERS) printf ("sum = %e\n", sum);  */

    if (my_y == 0)
       {if (my_x == X_PROCS - 1)  sum += corner_coef * Matrix[X_PTS][1];
        for (inx = lower_x;  inx < upper_x;  inx ++)
            sum += border_coef * Matrix[inx][1];
       }
/*    only_on (OBSERVERS) printf ("sum = %e\n", sum);  */

    if (my_y == Y_PROCS - 1)
       {if (my_x == 0)  sum += corner_coef * Matrix[1][Y_PTS];
        for (inx = lower_x;  inx < upper_x;  inx ++)
            sum += border_coef * Matrix[inx][Y_PTS];
       }
/*    only_on (OBSERVERS) printf ("sum = %e\n", sum);  */

    for (inx = lower_x;  inx < upper_x;  inx ++)
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            sum += center_coef * Matrix[inx][jnx];
/*    only_on (OBSERVERS) printf ("sum = %e\n", sum);  */

    return (sum);
   }


void Compute_Tau ()

   {double curl, tmpd;
    int inx, jnx;

    for (inx = lower_x - 1;  inx < upper_x + 1;  inx ++)
      
       {tmpd = pi * (double) (my_x * X_PTS + inx - 1) * 
	       res / ysca_1;
	curl = factor * sin (0.0);
        for (jnx = lower_y - 1;  jnx < upper_y + 1;  jnx ++)
            Tauz[inx][jnx] = curl;
       }
   }


void Do_One_Iteration ()

   {int inx, jnx, knx;
    double f4, psi_ai, partial_sum, Compute_Sum ();
    void Laplacian (), Jacobian (), Exchange (), Solve_Helmholtz ();
    void Compute_Ga_and_Gb (), Print_Mat ();

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Ga[inx][jnx] = 0.0;
/*    only_on (OBSERVERS) Print_Mat ("Ga", Ga);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Gb[inx][jnx] = 0.0;
/*    only_on (OBSERVERS) Print_Mat ("Gb", Gb);  */

    for (knx = 0;  knx < 2;  knx ++)
       {Laplacian (Psi[knx], Work_1[knx]);
	Exchange  (Work_1[knx]);
/*        only_on (OBSERVERS) Print_Mat ("Work_1[knx]", Work_1[knx]);  */
       }

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Work_2[inx][jnx] = Psi[0][inx][jnx] - Psi[1][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Work_2", Work_2);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Work_3[inx][jnx] = hh3 * Psi[0][inx][jnx] + hh1 * Psi[1][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Work_3", Work_3);  */

    for (knx = 0;  knx < 2;  knx ++)
        for (inx = 0;  inx < X_PTS_2;  inx ++)
            for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
                Temp[knx][inx][jnx] = Psi[knx][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Temp", Temp);  */

    for (knx = 0;  knx < 2;  knx ++)
        for (inx = 0;  inx < X_PTS_2;  inx ++)
            for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
                Psi[knx][inx][jnx] = Psi_M[knx][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Psi[0]", Psi[0]);  */
/*    only_on (OBSERVERS) Print_Mat ("Psi[1]", Psi[1]);  */

    for (knx = 0;  knx < 2;  knx ++)
       {Laplacian (Psi_M[knx], Work_7[knx]);
	Exchange  (Work_7[knx]);
/*        only_on (OBSERVERS) Print_Mat ("Work_7[knx]", Work_7[knx]);  */
       }

    for (knx = 0;  knx < 2;  knx ++)
        for (inx = lower_x - 1;  inx < upper_x + 1;  inx ++)
            for (jnx = lower_y - 1;  jnx < upper_y + 1;  jnx ++)
                Work_1[knx][inx][jnx] += F[inx];
/*    only_on (OBSERVERS) Print_Mat ("Work_1[0]", Work_1[0]);  */
/*    only_on (OBSERVERS) Print_Mat ("Work_1[1]", Work_1[1]);  */

    for (knx = 0;  knx < 2;  knx ++)
       {Jacobian (Work_1[knx], Temp[knx], Work_5[knx]);
	Exchange (Work_5[knx]);
/*        only_on (OBSERVERS) Print_Mat ("Work_5[knx]", Work_5[knx]);  */
       }

    for (knx = 0;  knx < 2;  knx ++)
        for (inx = 0;  inx < X_PTS_2;  inx ++)
            for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
                Psi_M[knx][inx][jnx] = Temp[knx][inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Psi_M[0]", Psi_M[0]);  */
/*    only_on (OBSERVERS) Print_Mat ("Psi_M[1]", Psi_M[1]);  */

    for (knx = 0;  knx < 2;  knx ++)
       {Laplacian (Work_7[knx], Work_4[knx]);
	Exchange  (Work_4[knx]);
/*        only_on (OBSERVERS) Print_Mat ("Work_4[knx]", Work_4[knx]);  */
       }

    Jacobian (Work_2, Work_3, Work_6);
    Exchange (Work_6);
/*    only_on (OBSERVERS) Print_Mat ("Work_6", Work_6);  */

    for (knx = 0;  knx < 2;  knx ++)
       {Laplacian (Work_4[knx], Work_7[knx]);
	Exchange  (Work_7[knx]);
/*        only_on (OBSERVERS) Print_Mat ("Work_7[knx]", Work_7[knx]);  */
       }

    Compute_Ga_and_Gb ();
    Exchange (Ga);
    Exchange (Gb);
/*    only_on (OBSERVERS) Print_Mat ("Ga", Ga);  */
/*    only_on (OBSERVERS) Print_Mat ("Gb", Gb);  */

    Solve_Helmholtz (eig_2, Ga, Old_Ga);
/*    only_on (OBSERVERS) Print_Mat ("Ga", Ga);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Old_Ga[inx][jnx] = Ga[inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Old_Ga", Old_Ga);  */

    partial_sum = Compute_Sum (Ga, 0.25, 0.50, 1.00);
    psi_ai = CMMD_reduce_double (partial_sum, CMMD_combiner_add);
/*    only_on (OBSERVERS) printf ("psi_ai = %e\n", psi_ai);  */

    f4 = - psi_ai / psi_bi;
    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Ga[inx][jnx] += f4 * Psi_B[inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Ga", Ga);  */

    Solve_Helmholtz (eig_2, Gb, Old_Gb);
/*    only_on (OBSERVERS) Print_Mat ("Gb", Gb);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Old_Gb[inx][jnx] = Gb[inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Old_Gb", Old_Gb);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            {Work_2[inx][jnx] = Gb[inx][jnx] - hh1 * Ga[inx][jnx];
             Work_3[inx][jnx] = Gb[inx][jnx] + hh3 * Ga[inx][jnx];
	    }
/*    only_on (OBSERVERS) Print_Mat ("Work_2", Work_2);  */
/*    only_on (OBSERVERS) Print_Mat ("Work_3", Work_3);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Psi[0][inx][jnx] += timst * Work_3[inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Psi[0]", Psi[0]);  */

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Psi[1][inx][jnx] += timst * Work_2[inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Psi[1]", Psi[1]);  */
   }


void Exchange (Mat)
double Mat[][Y_PTS_2];

   {if (X_PROCS > 1)
        if (my_x != 0 && my_x != X_PROCS - 1)
           {Send_Receive (my_west, &Mat[0][0], my_east, &Mat[X_PTS][0]);
            Send_Receive (my_east, &Mat[X_PTS_1][0], my_west, &Mat[1][0]);
           }
        else
	    if (my_x == 0)
              {Send    (my_east, &Mat[X_PTS][0]);
               Receive (my_east, &Mat[X_PTS_1][0]);
	     }
	    else
              {Receive (my_west, &Mat[0][0]);
               Send    (my_west, &Mat[1][0]);
	     }

    if (Y_PROCS > 1)
        if (my_y != 0 && my_y != Y_PROCS - 1)
           {Send_Receive_V (my_sout, &Mat[0][0], my_nort, &Mat[0][Y_PTS]);
            Send_Receive_V (my_nort, &Mat[0][Y_PTS_1], my_sout, &Mat[0][1]);
           }
        else
           if (my_y == 0)
              {Send_V    (my_nort, &Mat[0][Y_PTS]);
               Receive_V (my_nort, &Mat[0][Y_PTS_1]);
	      }
           else
              {Receive_V (my_sout, &Mat[0][0]);
               Send_V    (my_sout, &Mat[0][1]);
	      }
   }


void Compute_Ga_and_Gb ()

   {int inx, jnx;

    if (my_x == 0)
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++)
           {Ga[1][jnx] = Work_5[0][1][jnx] + lf * Work_7[0][1][jnx] - lf *
                         Work_7[1][1][jnx];

            Gb[1][jnx] = hh1 * Work_5[0][1][jnx] + hh3 * 
                               Work_5[1][1][jnx] + lf * hh1 * 
                               Work_7[0][1][jnx] + lf * hh3 *
                               Work_7[1][1][jnx];
	   }

    if (my_x == X_PROCS - 1)
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++)
           {Ga[X_PTS][jnx] = Work_5[0][X_PTS][jnx] + lf * 
                             Work_7[0][X_PTS][jnx] - lf *
                             Work_7[1][X_PTS][jnx];

            Gb[X_PTS][jnx] = hh1 * Work_5[0][X_PTS][jnx] + hh3 * 
                                   Work_5[1][X_PTS][jnx] + lf * hh1 * 
                                   Work_7[0][X_PTS][jnx] + lf * hh3 *
                                   Work_7[1][X_PTS][jnx];
	   }

    if (my_y == 0)
        for (inx = 1;  inx < X_PTS_1;  inx ++)
           {Ga[inx][1] = Work_5[0][inx][1] + lf * Work_7[0][inx][1] - lf * 
                         Work_7[1][inx][1];

            Gb[inx][1] = hh1 * Work_5[0][inx][1] + hh3 * 
                               Work_5[1][inx][1] + lf * hh1 * 
                               Work_7[0][inx][1] + lf * hh3 * 
                               Work_7[1][inx][1];
	   }

    if (my_y == Y_PROCS - 1)
        for (inx = 1;  inx < X_PTS_1;  inx ++)
           {Ga[inx][Y_PTS] = Work_5[0][inx][Y_PTS] + lf * 
                             Work_7[0][inx][Y_PTS] - lf * 
                             Work_7[1][inx][Y_PTS];

            Gb[inx][Y_PTS] = hh1 * Work_5[0][inx][Y_PTS] + hh3 * 
                                   Work_5[1][inx][Y_PTS] + lf * hh1 * 
                                   Work_7[0][inx][Y_PTS] + lf * hh3 * 
                                   Work_7[1][inx][Y_PTS];
	   }

    for (inx = lower_x;  inx < upper_x;  inx ++)
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
           {Ga[inx][jnx] = Work_5[0][inx][jnx] - Work_5[1][inx][jnx] + eig_2 *
                           Work_6[inx][jnx] + h1_inv * Tauz[inx][jnx] + lf *
                           Work_7[0][inx][jnx] - lf * Work_7[1][inx][jnx];

            Gb[inx][jnx] = hh1 * Work_5[0][inx][jnx] + hh3 * 
                           Work_5[1][inx][jnx] + h_inv * Tauz[inx][jnx] + lf *
                           hh1 * Work_7[0][inx][jnx] + lf * hh3 *
                           Work_7[1][inx][jnx];
	   }
   }


void Solve_Helmholtz (elmbda, Mat_Out, Mat_Ini)
double Mat_Out[][Y_PTS_2], Mat_Ini[][Y_PTS_2];
double elmbda;

   {int inx, jnx, knx, lower_ch_y, lower_top, offset, my_flag, all_flag;
    double old_val, fac;
    void Print_Mat ();

    fac = 1.0 / (4.0 - del_sq_inv * elmbda);
    if (my_x == 0)
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++)
            Work_8[1][jnx] = Mat_Ini[1][jnx];

    if (my_x == X_PROCS - 1)
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++)
            Work_8[X_PTS][jnx] = Mat_Ini[X_PTS][jnx];

    if (my_y == 0)
        for (inx = 1;  inx < X_PTS_1;  inx ++)
            Work_8[inx][1] = Mat_Ini[inx][1];

    if (my_y == Y_PROCS - 1)
        for (inx = 1;  inx < X_PTS_1;  inx ++)
            Work_8[inx][Y_PTS] = Mat_Ini[inx][Y_PTS];

    for (inx = lower_x;  inx < upper_x;  inx ++)
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            Work_8[inx][jnx] = omega * fac * (Mat_Ini[inx][jnx+1] +
                        Mat_Ini[inx][jnx-1] + Mat_Ini[inx+1][jnx] + 
                        Mat_Ini[inx-1][jnx] - del_sq_inv * Mat_Out[inx][jnx]) +
                        (1 - omega) * Mat_Ini[inx][jnx];
    Exchange (Work_8);
/*    only_on (OBSERVERS) Print_Mat ("Work_8", Work_8);  */

    do {my_flag = FALSE;
	for (knx = 0;  knx < 2;  knx ++)
	  {lower_ch_y = lower_y +  (lower_x + lower_y + knx) % 2;
           lower_top  = 1 -  2  * ((lower_x + lower_y + knx) % 2);
     	   for (inx = lower_x, offset = 0;  inx < upper_x;  
                inx ++, offset = lower_top - offset)
               for (jnx = lower_ch_y + offset;  jnx < upper_y;  jnx += 2)
                   {old_val = Work_8[inx][jnx];
                    Work_8[inx][jnx] = omega * fac * (Work_8[inx][jnx+1] +
                         Work_8[inx][jnx-1] + Work_8[inx+1][jnx] + 
                         Work_8[inx-1][jnx] - del_sq_inv * Mat_Out[inx][jnx]) +
                         (1 - omega) * Work_8[inx][jnx];
                    if ((old_val - Work_8[inx][jnx] >   tolerance) ||
                        (old_val - Work_8[inx][jnx] < - tolerance)) 
                        my_flag = TRUE;
	            }
            Exchange (Work_8);
           }

        all_flag = CMMD_reduce_int (my_flag, CMMD_combiner_ior);
/*        only_on (OBSERVERS) Print_Mat ("Work_8", Work_8);  */
       } while (all_flag == TRUE);

    for (inx = 0;  inx < X_PTS_2;  inx ++)
        for (jnx = 0;  jnx < Y_PTS_2;  jnx ++)
            Mat_Out[inx][jnx] = Work_8[inx][jnx];
/*    only_on (OBSERVERS) Print_Mat ("Mat_Out", Mat_Out);  */
   }


void Laplacian (In, Out)
double In[][Y_PTS_2], Out[][Y_PTS_2];

    {int inx, jnx;

    if (my_x == 0)       
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++) 
            Out[1][jnx]     = 0.0;

    if (my_x == X_PROCS - 1)       
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++) 
            Out[X_PTS][jnx] = 0.0;

    if (my_y == 0)       
        for (inx = 1;  inx < X_PTS_1;  inx ++) 
            Out[inx][1]     = 0.0;

    if (my_y == Y_PROCS - 1)       
        for (inx = 1;  inx < X_PTS_1;  inx ++)
            Out[inx][Y_PTS] = 0.0;

    for (inx = lower_x;  inx < upper_x;  inx ++)
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
            Out[inx][jnx] = fact_laplac * (In[inx][jnx+1] + In[inx][jnx-1] + 
                            In[inx+1][jnx] + In[inx-1][jnx] - 4.0 * 
                            In[inx][jnx]);
    }


void Jacobian (In_1, In_2, Out)
double In_1[][Y_PTS_2], In_2[][Y_PTS_2], Out[][Y_PTS_2];

   {double f_1, f_2, f_3, f_4, f_5, f_6, f_7, f_8;
    int inx, jnx;

    if (my_x == 0)       
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++) 
            Out[1][jnx]     = 0.0;

    if (my_x == X_PROCS - 1)       
        for (jnx = 1;  jnx < Y_PTS_1;  jnx ++) 
            Out[X_PTS][jnx] = 0.0;

    if (my_y == 0)       
        for (inx = 1;  inx < X_PTS_1;  inx ++) 
            Out[inx][1]     = 0.0;

    if (my_y == Y_PROCS - 1)       
        for (inx = 1;  inx < X_PTS_1;  inx ++)
            Out[inx][Y_PTS] = 0.0;

    for (inx = lower_x;  inx < upper_x;  inx ++)
        for (jnx = lower_y;  jnx < upper_y;  jnx ++)
           {f_1 = (In_2[inx-1][jnx] + In_2[inx-1][jnx+1] - In_2[inx+1][jnx] -
                   In_2[inx+1][jnx+1]) * (In_1[inx][jnx+1] - In_1[inx][jnx]);

            f_2 = (In_2[inx-1][jnx-1] + In_2[inx-1][jnx] - In_2[inx+1][jnx-1] -
                   In_2[inx+1][jnx]) * (In_1[inx][jnx] - In_1[inx][jnx-1]);

            f_3 = (In_2[inx][jnx+1] + In_2[inx+1][jnx+1] - In_2[inx][jnx-1] -
                   In_2[inx+1][jnx-1]) * (In_1[inx+1][jnx] - In_1[inx][jnx]);

            f_4 = (In_2[inx-1][jnx+1] + In_2[inx][jnx+1] - In_2[inx-1][jnx-1] -
                   In_2[inx][jnx-1]) * (In_1[inx][jnx] - In_1[inx-1][jnx]);

            f_5 = (In_2[inx][jnx+1]   - In_2[inx+1][jnx]) * 
                  (In_1[inx+1][jnx+1] - In_1[inx][jnx]);

            f_6 = (In_2[inx-1][jnx]   - In_2[inx][jnx-1]) * 
                  (In_1[inx][jnx]     - In_1[inx-1][jnx-1]);

            f_7 = (In_2[inx+1][jnx]   - In_2[inx][jnx-1]) * 
                  (In_1[inx+1][jnx-1] - In_1[inx][jnx]);

            f_8 = (In_2[inx][jnx+1]   - In_2[inx-1][jnx]) * 
                  (In_1[inx][jnx]     - In_1[inx-1][jnx+1]);

            Out[inx][jnx] = fact_jacob * (f_1 + f_2 + f_3 + f_4 + 
                                          f_5 + f_6 + f_7 + f_8);
           }
    }


void Print_Mat (name, Matrix)
char *name;
double Matrix[][Y_PTS_2];

   {int inx, jnx, dummy;

    if (TOT_PROCS > 1 && my_node != 0) 
        CMMD_receive (my_node - 1, ANY_TAG, &dummy, sizeof (int));

    printf ("\n");
    if (my_node == 0) printf ("            %s\n", name);
    if (my_node == 0) printf ("            --------\n");
    for (jnx = Y_PTS_1;  jnx >= 0;  jnx --)
       {printf ("[%02d]", jnx);
        for (inx = 0;  inx < X_PTS_2;  inx ++)
            printf (" %9.2e", Matrix[inx][jnx]);
        printf ("\n");
       }

    if (TOT_PROCS > 1)
        if (my_node != 0) 
            CMMD_send ((my_node+1)%TOT_PROCS, ANY_TAG, &dummy, sizeof (int));
        else
           {CMMD_send    (1,             ANY_TAG, &dummy, sizeof (int));
            CMMD_receive (TOT_PROCS - 1, ANY_TAG, &dummy, sizeof (int));
           }
   }
