/*
 * Copyright (c) 1992, 1993 by the University of Southern California
 *
 * For copying and distribution information, please see the files
 * <prm-copyr.h>.
 *
 * Written by srao: 6/92 - 3/93
 *
 */

#include <prm-copyr.h>

#include <stdio.h>

#ifndef MACH386
#include <stdlib.h>
#endif

#if defined(SUNOS)
#   include <vfork.h>
#endif

#include <sys/time.h>
#include <sgtty.h>   
#include <fcntl.h>

#define MAIN_PROG
#include <comm.h>

#ifdef HPUX
#   include <signal.h>
#endif

#include <jobmngr.h>


int             notty;            /* TRUE if jobmngr has no control tty      */
int             iodone;           /* TRUE after term_iotask terminates       */
int             num_sm_hosts;     /* Max no. of sysmngrs to contact for rscs */
int             tiopid;           /* Pid of term_iotask                      */
int             fiopid;           /* Pid of file_iotask                      */
int             tot_allocd;       /* Number of nodes allocated by sysmngr    */
int             *adj;             /* Adjacency matrix of communication graph */
u_int           authcode;         /* Authorization key furnished by sm       */
u_long          inaddr;           /* Internet address of nodemngr host       */
u_long          ntasks;           /* Total number of tasks in this job       */
u_long          ntasks_running;   /* Number of tasks currently running       */
char            buffer[256];      /* Temporary buffer                        */
char            *displayname;     /* Name of display for term_iotask         */
char            *sm_hostname;     /* Name of sysmngr host                    */
char            *iopath;          /* Directory name for term_iotask          */
char            *tioname;         /* Executable file name for term_iotask    */
char            *fioname;         /* Executable file name for file_iotask    */
char            *host_archname;   /* Host architecture type (def'd in prm.h) */
u_short         flags;            /* Flags (defined in jobmngr.h)            */
u_short         tio_port;	  /* UDP port of term_iotask                 */
u_short         fio_port;         /* UDP port of file_iotask                 */
TDES            *ttbl;            /* pointer to table of tasks               */
prm_node_addr   my_node_addr;     /* Address of jobmngr host                 */
prm_node_addr_t node_addr;
	        
char            *sm_hostlist[MAX_SM_HOSTS];  /* List of sysmngr host names   */


#ifdef MULTIHOST
int             n_allocd[MAX_HOST_TYPES];
u_short         h_type_reqd[MAX_HOST_TYPES]; /* Array whose ith element is the
						number of tasks for host type 
						i */
#endif	        



main(int argc, char *argv[], char *envp[])

{

  PTEXT   pkt, ttbl_pkt;
  PTEXT   rpkt;
  RREQ    current_req, newreq, *pending_reqs;
  int     i, j, k, scode=0, len, exit_code, st, task_cnt, start_ind;
  u_long  ithtask;      /* jobmngr assigns a unique task id to each task */

  u_long  tmp, tid, dlen; 
  u_short ncomm;
  u_short emsglen;
  int             ab_exit;  
  int     ntmp, app_debug=0, estatus=0, sigstatus=0, tnum;
  int     rsc_acqd;     /* Set to TRUE after resources are acquired from 
			   sysmngr */
  int     rsc_insuf;   /* Set to TRUE if allocd resources are insufficient */
  int     send_tmap;
  struct          hostent *hent;

  extern  RREQ create_request();
  extern  void sigint_handler(), dissoc_ctl_term();
  extern  PTEXT PRM_request_resources();

  char    *msg, *curposn, *jdfile, *msg_start; 
  struct  timeval tx, min_start, tstart, tend, max_end;
  struct  timezone tzx;

  FILE    *errfl;
  u_char  reply_op;

  int     options = 0;
  double  elapsed_time;
  prm_node_addr_t curnode_addr;

  ab_exit = FALSE;
  notty = FALSE;
  iodone = FALSE;
  flags = 0;
  ntasks = ntasks_running = 0;
  pfs_debug = prm_debug = 0;
  tioname = iopath = NULL;
  fioname = "file_iotask";

  _progname = argv[0]; /* _progname is used by lower level 
				     functions */

  host_archname = (char *)getenv("ARCH");
  if (host_archname == NULL) {
    host_archname = "sun4";
  }

  argc--; argv++;
  
  while (argc > 0 && **argv == '-') {
    switch (*(argv[0]+1)) {

    case 'A':
            sscanf(argv[0],"-A%d", &app_debug);
	    if (!app_debug)
	      app_debug = 1;   /* Default debug level */
            break;

    case 'D':
            prm_debug = 1;   /* Default debug level */
            options |= SO_DEBUG;
	    sscanf(argv[0],"-D%d", &prm_debug);
            break;

    case 'f':
	    argc--; argv++;
	    fioname = argv[0];
	    flags |= FIONFLAG;
	    break;

    case 'j':
	    argc--; argv++;
            jdfile = argv[0];
	    flags |= JDFILE;
	    break;

    case 'p':
	    argc--; argv++;
	    iopath = (char *)malloc(strlen(argv[0] + 11));
	    strcpy(iopath, argv[0]);
	    strcat(iopath, "/");
	    strcat(iopath, host_archname);
	    flags |= IOPFLAG;
	    break;

    case 's':
	    argc--; argv++;
            sm_hostlist[0] = sm_hostname = argv[0];
	    flags |= SMFLAG;
            break;

    case 't':
	    argc--; argv++;
	    flags |= TIONFLAG;
            tioname = argv[0];
	    break;

	    
    default:
            fprintf(stderr, "Usage: %s [ -A[appln_debug_level] ] [ -D[debug-level] ] [-f file-io-prog]\n         [-j jobfile] [-p iopath] [-s sysmngr-host] [-t term-io-prog]\n", _progname);
            exit(1);
	  }
    argc--; argv++;
  }
  if(prm_debug) pfs_debug = prm_debug;


  /* 1. Job initialization :
     Get my hostname and host address,
     assign a random jobid using timeofday and my internet address, 
     Initialize address translation cache,
     Install signal handlers, 
     */
  
  gettimeofday(&tx,&tzx);
  gethostname(_my_hostname, sizeof(_my_hostname));
  hent = gethostbyname(_my_hostname);
  bcopy(hent->h_addr, &inaddr, 4);
  _my_jobid = (inaddr%16) ^ (tx.tv_sec*1000000+tx.tv_usec);
  bzero((char *)&my_node_addr, PRM_AD_SZ);
  my_node_addr.sin_family = hent->h_addrtype;
  bcopy (hent->h_addr, (char *)&(my_node_addr.sin_addr), hent->h_length);
  
  trsln_cache = (struct cache *)calloc(ntasks + 1, sizeof(struct cache));
  bzero (trsln_cache, (ntasks + 1) * sizeof(struct cache));
  
  signal(SIGINT, sigint_handler); /* Handler for SIGINT. Informs nodemngr, 
				     sysmngr and iotasks. Nodemngrs
				     send the signal to all tasks. */
  

  /* 2. Get resource requirements. If a job description file has been 
     specified, read it, else, prompt user for a filename */

  if(!(flags & JDFILE) ) {
    printf("Enter the name of a job description file: ");
    jdfile = gets(buffer);
  }
  if (exit_code = creat_task_desc(jdfile))
    exit(exit_code);
  

  /* 3. request resource allocation from sysmngr(s) */
  /* Sysmngr host names have been read from the job desc. file into 
     sm_hostlist. Try to acquire nodes from them, in the order specified.
     NOTE that these are strictly 'alternate' sysmngrs in that the jobmngr
     cannot acquire a few resources from one and the rest from another. If
     one cannot allocate sufficent resources, it just ignores that sysmngr and
     goes to the next one. */

  rsc_acqd = FALSE;
  i = 0;
  
  while (!rsc_acqd && (i < num_sm_hosts)  ) {
    sm_hostname = sm_hostlist[i++];
    sprintf(buffer,"%s(%d)", sm_hostname, SYSMNGR_PORT);
    
    if(prm_debug)
      fprintf(stderr, "(%s) Requesting %d nodes from sysmngr %s .....\n", 
	      _progname, ntasks, sm_hostname);
    
    if ((rpkt = PRM_request_resources()) == NOPKT) {
      fprintf(stderr, "(%s) No response from sysmngr %s.\n", _progname, 
	      sm_hostname);
      exit_code = J_NO_SM;
    }
    else {
      msg = rpkt->start;
      if (*(msg + PRM_STATUS_OFF) == FAILURE) {
	fprintf (stderr, "(%s) sysmngr %s does not have free nodes.\n",
		 _progname, sm_hostname);
	exit_code = J_INSF_RSC;
      }
      else
	rsc_acqd = TRUE;
    }
  } /* while */

  if (!rsc_acqd) 
    exit(exit_code);

  bcopy(msg + PRM_DLEN_OFF, &dlen, LONG_SZ);
  dlen = ntohl(dlen);
  bcopy(msg + PRM_DATA_OFF + dlen, &authcode, LONG_SZ);
  /* authcode is retained in network byte order */ 

  bcopy(msg + PRM_TINFO_OFF, &tot_allocd, LONG_SZ);
  tot_allocd = ntohl(tot_allocd);
  curposn = msg + PRM_DATA_OFF;
  
#if MULTI_HOST
  for ( i = 0; i < MAX_HOST_TYPES; i++) {
    bcopy(curposn, &ntmp, LONG_SZ);
    n_allocd[i] = ntohl(ntmp);
    if (h_type_reqd[i] && !n_allocd[i]) {
      fprintf(stderr, "(%s) sysmngr %s could not allocate nodes of type %s.\n",
	      _progname, sm_hostname, htype_list[i]);
      inf_sysmngr(J_INSF_RSC);
      exit(J_INSF_RSC);
    }
    rsc_insuf |= (n_allocd[i] < h_type_reqd[i] );
    curposn += LONG_SZ;
  }
#else
  rsc_insuf = (tot_allocd < ntasks);
#endif
  
  if(prm_debug)
    fprintf(stderr, "(%s) Acquired %d nodes from sysmngr.\n", _progname,
	    tot_allocd);
  
  if (rsc_insuf) {
    fprintf(stderr, "(%s) Do you want to run your job on %d nodes [y/n]? ",
	    _progname, tot_allocd);
    gets(buffer);
    if(buffer[0] != 'y') {
      inf_sysmngr(J_INSF_RSC);
      exit(J_INSF_RSC);
    }
  }

  node_addr = (prm_node_addr_t) calloc(tot_allocd, PRM_AD_SZ);
  bcopy(curposn, node_addr, tot_allocd * PRM_AD_SZ);


  /* 4. Use an allocation algorithm to assign tasks to nodes to optimize costs.
     For now, we just assign ith task to node i */
  
#if MULTI_HOST
  task_cnt = 0;
  start_ind = 0;
  for (j = 0; j < MAX_HOST_TYPES; j++) {
    i = 0;
    while ((task_cnt < ntasks) && (ttbl[task_cnt]->h_type == j) )
      ttbl[task_cnt++]->nodeaddr = &(node_addr[start_ind+(i++)%n_allocd[j]]);
    start_ind += n_allocd[j];
  }
/*  fill_ngbrs(); */
#else
  for(j = 0; j < ntasks; j++) 
    ttbl[j]->nodeaddr = &node_addr[j%tot_allocd];
#endif
  


/* 5. Prepare a message containing mappings of tasks to nodes, to be sent to
   each nodemngr */

  ttbl_pkt = ardp_ptalloc();  
  msg_start = ttbl_pkt->start;
  prm_headers(ttbl_pkt, (u_char)PRM_THMAP_UPDT, 0, 0, _my_jobid);
  
  tmp = htonl(ntasks);
  bcopy(&tmp, msg_start + PRM_TINFO_OFF, LONG_SZ);
  tmp = htonl( (ntasks + 1) * (LONG_SZ + PRM_AD_SZ));
  bcopy(&tmp, msg_start + PRM_DLEN_OFF, LONG_SZ);
  
  curposn = msg_start + PRM_DATA_OFF;
  tid = (u_long)0;
  bcopy(&tid, curposn, LONG_SZ);
  curposn += LONG_SZ;
  bcopy(&my_node_addr, curposn, PRM_AD_SZ);     /* The host-address of task 0, 
						the iotask */
  curposn += PRM_AD_SZ;

  for(i = 0; i < ntasks; i++) {   /* fill in host-addresses of all tasks */
    tid = htonl((u_long) (i+1));
    bcopy (&tid, curposn, LONG_SZ);
    curposn += LONG_SZ;
    bcopy( ttbl[i]->nodeaddr, curposn, PRM_AD_SZ);
    curposn += PRM_AD_SZ;
  } 
  bcopy(&authcode, curposn, LONG_SZ);           /* authorization code */
  ttbl_pkt->length = (int) (curposn + LONG_SZ - msg_start);


  /* 6. Send a copy of the tid-hostaddr map to local nodemngr */
  
  send_tmap = JM_send_pkts_to_nm(&my_node_addr, ttbl_pkt, -1); 
  if (send_tmap != SUCCESS) {
    job_done((u_char)send_tmap, 0);
    exit(send_tmap);
  }


  /* 7. Request nodemngr to create a file I/O task */

  newreq = create_request((char *)0, fioname, 0);
  ardp_send(newreq, 0, &my_node_addr, -1);

  if ((rpkt = newreq->rcvd) == NOPKT) {
    fprintf(stderr, "(%s) Local nodemngr is not responding\n", _progname);
    job_done((u_char)J_NO_NM, 0);
    exit(J_NO_NM);
  }
  else {                          /* Check if authorization was successful  */
    reply_op = *(rpkt->start + PRM_STATUS_OFF);
    if (reply_op == FAILURE) {
      bcopy(rpkt->start + PRM_FLEN_OFF, &emsglen, 2);
      emsglen = ntohs(emsglen);
      if (emsglen)
	bcopy(rpkt->start + PRM_DATA_OFF,  p_err_string, emsglen);
      
      fprintf(stderr, "(%s) %s\n", _progname, p_err_string); 
      job_done((u_char)J_AUTH_FAIL, 0);
      exit(J_NO_LOCIO);
    }
    else {
      bcopy(rpkt->start + PRM_TINFO_OFF, &fiopid, 4);
      fiopid = ntohl(fiopid);
      bcopy(rpkt->start + PRM_DLEN_OFF, &fio_port, 2);
      _fio_host_addr = (prm_node_addr_t)malloc(PRM_AD_SZ);
      bcopy(&my_node_addr, _fio_host_addr, PRM_AD_SZ);
      _fio_host_addr->sin_port = fio_port;
      fio_port = ntohs(fio_port);
      tio_port = fio_port + 1;
    }
  }    
  ardp_rqfree(newreq);

  /* 8. Create an IOtask for terminal I/O */ 

  /* Checking for DISPLAY has been disabled. It hasn't been debugged yet */
  /*  displayname = (char *)getenv("DISPLAY"); */
  displayname = NULL;
  if( (tiopid = spawn_io(displayname, iopath, tioname, tio_port) ) <= 0 ) {
    fprintf(stderr, "(%s) Could not create terminal I/O task !\n", _progname); 
    iodone = TRUE;
    job_done((u_char)J_NO_LOCIO, 0);
    exit(J_NO_LOCIO);  
  }
  if (prm_debug)
    fprintf(stderr, "(%s) Created term_IOtask (pid = %d) and file_IOtask (pid = %d)\n", _progname, tiopid, fiopid);
  

  pending_reqs = (struct rreq **) calloc(ntasks, sizeof(RREQ) );


  /* 9. Send taskmap to each nodemngr asynchronously */
  for(i = 0; i < ntasks; i++) { 
    
    curnode_addr = ttbl[i]->nodeaddr;
    if(curnode_addr->sin_addr.s_addr == inaddr) {
      pending_reqs[i] = NOREQ;
      continue;
    }    
    /* send taskid -> machine_addr mapping table to node manager */
    
    pkt = ardp_ptalloc();
    bcopy(ttbl_pkt->start, pkt->start, ttbl_pkt->length);
    pkt->length = ttbl_pkt->length;
    
    newreq = ardp_rqalloc();
    newreq->outpkt = pkt;

    curnode_addr->sin_port = htons((u_short)NODEMNGR_PORT);
    if (ardp_send(newreq, 0, curnode_addr, 0) == ARDP_SUCCESS) {
      if (st = process_pending_req(newreq) != 0) {
	if (st == J_AUTH_FAIL)
	  fprintf(stderr, "(%s) Authorization on nodemngr %s failed!\n",
		  _progname, inet_ntoa(newreq->peer_addr) );
	job_done((u_char)st, i);
	exit(st);
      }
      ardp_rqfree(newreq);
      pending_reqs[i] = NOREQ;
    }
    else 
      pending_reqs[i] = newreq;
  } /*  for  */
  
  ardp_ptfree(ttbl_pkt);
  
  for (i = 0; i < tot_allocd; i++) {
    if ( pending_reqs[i] ) {
      if ( (st = process_pending_req(pending_reqs[i]) ) != 0) {
	if (st == J_AUTH_FAIL)
	  fprintf(stderr, "(%s) Authorization on nodemngr %s failed!\n",
		  _progname, inet_ntoa(pending_reqs[i]->peer_addr) );
	job_done((u_char)st, i);
	exit(st);
      }
      ardp_rqfree(pending_reqs[i]);
    }
  }


  /* 10. Finally, TASK INITIATION : Send a message to each nodemngr, requesting
     it to create the task. Message contains the pathname of executable file 
     and a flag to indicate whether the file can be directly loaded, or 
     loaded through the file_iotask.
     */

  ithtask = 1;
  p_err_string[0] = '\0';
  for(i = 0; i < ntasks; i++) {  
    
    curnode_addr = ttbl[i]->nodeaddr;
    
    /* fill in a (list of) packet(s) the information required by the node 
       mngr to create task i */
    newreq = create_request(ttbl[i]->dirname, ttbl[i]->fname, ithtask, 
			    ttbl[i]); 
    
    if(prm_debug) 
      fprintf(stderr, "(%s) Initiating task %d on node %s .....\n", 
	      _progname, ithtask, inet_ntoa(curnode_addr->sin_addr) );
    
    curnode_addr->sin_port = htons((u_short)NODEMNGR_PORT);
    if (ardp_send(newreq, 0, curnode_addr, 0) == ARDP_SUCCESS) {
      if (scode = process_init_req(newreq, i) != SUCCESS) {
	if (scode == J_TASK_INIT)
	  fprintf(stderr, "(%s) Could not create task %d on node %s: %s \n",
		  _progname, ithtask, inet_ntoa(curnode_addr->sin_addr), 
		  p_err_string);
	job_done((u_char)scode, i);
	exit(scode);
      }
      ardp_rqfree(newreq);
      pending_reqs[i] = NOREQ;
    }
    else 
      pending_reqs[i] = newreq;
    ++ithtask;
  } /*  for  */
  
  for (i = 0; i < ntasks; i++) {
    if ( pending_reqs[i] ) {
      if ( (scode = process_init_req(pending_reqs[i], i) ) != SUCCESS) {
	fprintf(stderr, "(%s) Could not create task %d on node %s: %s \n",
		_progname, i + 1, inet_ntoa(ttbl[i]->nodeaddr->sin_addr), 
		p_err_string);
	job_done((u_char)scode, i);
	exit(scode);
      }
      ardp_rqfree(pending_reqs[i]);
    }
  }

  cfree(pending_reqs, ntasks, sizeof(RREQ));
  
  if (ntasks_running == 0) {
    ab_exit = TRUE;
    goto done;
  }
  
  if (prm_debug)
    fprintf(stderr, "(%s) >>> All tasks started successfully.\n", _progname);

  /* jm uses taskid 0, port 0 */
  while ((_my_portnum = create_srv_socket(0, 0)) == 0);  

  /* Disassociate from controlling terminal */
  (void)dissoc_ctl_term();

  /* 12. Synchronize all tasks, including term_iotask */
  if(st = JM_sync_all_tasks()) {
    job_done((u_char)st, 0);
    exit(st);
  }    

  min_start.tv_sec = min_start.tv_usec = floor( (exp2(31.0)-1) );
  max_end.tv_sec = max_end.tv_usec = 0;
  
  
  /* 13. All tasks are now running, so I wait until they all finish. Alternate 
     jobmngrs may perform other kinds of work also (rather than just waiting)
     */
  
  while(ntasks_running) {  
    
    current_req = ardp_get_nxt();
    
    pkt = current_req->rcvd;
    msg = pkt->start;
    reply_op = *(msg + PRM_STATUS_OFF);
    
    switch (reply_op) {
      
    case PRM_APPL_ERR:
      job_done((u_char)J_APPL_ERR, 0);  /* define J_APPL_ERROR as 11 */
      exit(J_APPL_ERR);
      break;

    case PRM_TASK_STAT:
        
      bcopy(msg + PRM_TINFO_OFF, &tid, LONG_SZ);
      tid = ntohl(tid);
      
      estatus = *(msg + PRM_ADINF_OFF);

      if (estatus) {
	ab_exit = TRUE;
	bcopy(msg + PRM_FLEN_OFF, &emsglen, 2);
	emsglen = ntohs(emsglen);
	bcopy(msg + PRM_DATA_OFF, p_err_string, emsglen);
	if (emsglen) 
	  if (notty && !iodone)
	    io_printf ("%s\n", p_err_string, (char *)0);
	  else
	    printf ("%s\n", p_err_string );
      }
      else 
	emsglen = 0;

      sigstatus = (int)*(msg + PRM_DLEN_OFF);
      if (sigstatus) {
	io_printf("(%s) task %d died due to signal %d", _progname, tid, 
		(int)sigstatus);
	ab_exit = TRUE;
      }

      if (ttbl[tid-1]->status == T_RUNNING) {
	ttbl[tid-1]->status = T_DEAD;
	--ntasks_running;
      }
      curposn = msg + PRM_DATA_OFF + emsglen;
      bcopy(curposn, &tstart.tv_sec, LONG_SZ);
      tstart.tv_sec = ntohl(tstart.tv_sec);
      curposn += LONG_SZ;
      bcopy(curposn, &tstart.tv_usec, LONG_SZ);
      tstart.tv_usec = ntohl(tstart.tv_usec);
      if ( (tstart.tv_sec < min_start.tv_sec) || 
	  ( (tstart.tv_sec == min_start.tv_sec) && (tstart.tv_usec < 
						   min_start.tv_usec) ) ) {
	min_start.tv_sec  = tstart.tv_sec;
	min_start.tv_usec = tstart.tv_usec;
      }

      curposn += LONG_SZ;
      bcopy(curposn, &tend.tv_sec, LONG_SZ);
      tend.tv_sec = ntohl(tend.tv_sec);
      curposn += LONG_SZ;
      bcopy(curposn, &tend.tv_usec, LONG_SZ);
      tend.tv_usec = ntohl(tend.tv_usec);
      if ( (tend.tv_sec > max_end.tv_sec) || 
	  ( (tend.tv_sec == max_end.tv_sec) && (tend.tv_usec >
						   max_end.tv_usec) ) ) {
	max_end.tv_sec  = tend.tv_sec;
	max_end.tv_usec = tend.tv_usec;
      }

      current_req->outpkt = ardp_ptalloc();
      prm_headers(current_req->outpkt, (u_char)PRM_TSTAT_RESP, (u_char)SUCCESS,
		  (u_char)0, 0);
      current_req->outpkt->length = 3;
      ardp_respond(current_req, ARDP_R_COMPLETE);

	break;
	
      default:
	break;
    }
  }

  elapsed_time = (double)(max_end.tv_sec - min_start.tv_sec) + 
    ( (double) (max_end.tv_usec - min_start.tv_usec))/1000000.0;


  /* All tasks (and hence job) done, so print output/statistics etc and exit */

 done:
  /* 14. Synchronize with term-IOtask */
  JM_sync_with_tio();

  if(prm_debug)
    if(ab_exit) 
      io_printf("(%s) <<< All tasks terminated, job unsuccessful.\n", 
	      _progname,0);
    else
      io_printf("(%s) >>> All tasks terminated, job successful. Elapsed time = %7.3f sec.\n", 
	      _progname, elapsed_time, 0);

  /* 15. And finally the job is done */

  job_done((u_char)J_NORM_EXIT, 0);
  exit(J_NORM_EXIT);
}


/**************************************/

creat_task_desc(jflname)
char *jflname;
{
  FILE *fd;    /* job file descriptor */
  int n;  /* Number of tasks to create */
  int i, j, l, val, ncnt, hosttype;
  struct namelist *firstn, *curn;
  prm_node_addr_t ihostaddr;
  TDES tmtbl;
  char *infp;

  if( (fd = fopen(jflname, "r")) == NULL) {
    fprintf(stderr, "(%s) Could not open job description file %s!\n", 
	    _progname, jflname);
    return ERRORCODE;
  }
  fscanf(fd, "%d", &ntasks);
  if (ntasks > MAXNTASKS) {
    fprintf(stderr,"Too many tasks for job manager to handle. Giving up!\n");
    fclose(fd);
    return J_TOO_MANY;
  }
  _ntasks = ntasks;

  ttbl = (TDES *) calloc(ntasks, sizeof(TDES));
#if MULTI_HOST
  host_type = (u_long *)calloc(ntasks, sizeof(u_long));
#endif
  for(i=0; i<ntasks; i++) { /*for each task, create task description struct*/

    ttbl[i] = (TDES) malloc(sizeof (struct taskdes));

    fscanf(fd, "%s", buffer);
    ttbl[i]->fname = (char *)malloc(strlen(buffer) + 1);
    strcpy(ttbl[i]->fname, buffer);

    fscanf(fd, "%s", buffer);
#if MULTI_HOST
    l = strlen(buffer);
    do {
      if (l == 1) goto next_host;
      infp = ((buffer[0] == '(' ) ? buffer + 1, buffer);
      if ((hosttype = HOSTTYPE(infp)) == -1) {
	fprintf(stderr, "(%s) %s: Unknown host type for task %d.\n", 
		_progname, infp, i);
	return (J_HOST_TYPE);
      }
      h_type_reqd[hosttype]++;
      host_type[i] |= (0x0001 << hosttype);
    next_host:
      fscanf(fd, "%s", buffer);
    } while(buffer[strlen(buffer)] != ')' );
#endif    

    ttbl[i]->dirname = (char *)malloc(strlen(buffer)+1);
    strcpy(ttbl[i]->dirname, buffer);
    
    ttbl[i]->status = 0;
    
  } /* for */

#if 0
  sort_ttbl(); /* To cluster tasks intended for the same hosttype together */ 
#endif

  if(flags & CMATFLAG) {
    adj = (int *)calloc((ntasks+1)*(ntasks+1), sizeof(int));
    for(i=0; i<ntasks; i++) { /* Read comm. graph adjacency matrix */
      for (j=0; j<ntasks; j++) {
	fscanf(fd, "%d", &val);
	adj[i*(ntasks+1)+j] = val;
      }
    }
  }
  if (flags & SMFLAG) /* system mngr specified on command line */
    num_sm_hosts = 1;
  else
    num_sm_hosts = 0;
  
  
  if ( fscanf(fd, "%s", buffer) != EOF ) {
    if(!(flags & TIONFLAG) ) {   /* iotask filename not specified on 
				    command line */
      tioname = (char *)malloc(strlen(buffer)+1);
      sscanf(buffer, "%s", tioname);
    }
    if ( fscanf(fd, "%s", buffer) != EOF ) {
      if(!(flags & IOPFLAG)) {   /* iotask file dir not specified on 
				    command line */
	iopath = (char *)malloc(strlen(buffer)+11);
	strcpy(iopath, buffer);
	strcat(iopath, "/");
	strcat(iopath, host_archname);
      }
      
      /* Scan for sysmngr hostnames  */
      while((num_sm_hosts < MAX_SM_HOSTS) && 
	    (fscanf(fd, "%s", buffer) != EOF ) ) {
	sm_hostlist[num_sm_hosts] = (char *)malloc(strlen(buffer)+1);
	strcpy(sm_hostlist[num_sm_hosts++], buffer);
      }
    }
  }
  fclose(fd);
  return 0;
}

fill_ngbrs() 
{
  int i, j, ncnt;
  struct namelist *firstn=NULL, *curn=NULL;
  if (!(flags & CMATFLAG))
    return;
  for (i=0; i<ntasks; i++) {  /* to fill in the list of neighbors */
    ncnt=0;
    for (j=0; j<ntasks; j++) { /* traverse row i of adj matrix */
      if (adj[i*(ntasks+1)+j]  == 0) {
	continue;
      }
      if (firstn == NULL) {
	curn = firstn = (struct namelist *)malloc(sizeof (struct namelist));
	firstn->next = NULL;
      }
      else {
	curn->next =  (struct namelist *)malloc(sizeof (struct namelist));
	curn = curn->next;
	curn->next = NULL;
      }
      curn->ngbaddr = ttbl[j]->nodeaddr;
      ++ncnt;
    }
    ttbl[i]->ngbr = firstn;
    ttbl[i]->ndegree = ncnt;
    firstn=NULL;
  }
}


/* create_request -  Create an rreq structure with a pkt containing information for task
   initiatization by the nodemngr.   A pointer to this request structure is returned
*/

RREQ
  create_request(char *dir, char *fl, tid_t taskid)
{
  char *msg;
  u_short fsize, dsize;
  int i;
  tid_t nt;
  PTEXT pkt;
  RREQ creat_req;

  pkt = ardp_ptalloc();
  msg = pkt->start;
  prm_headers(pkt, (u_char)PRM_CREAT_TASK, (u_char)0, (u_char)OS_UNIX, 
	      _my_jobid);
  
  nt = htonl(taskid); 
  bcopy(&nt, msg + PRM_TINFO_OFF, LONG_SZ);
  
  if (dir) {
    dsize = (strlen(dir) + 1);
    bcopy(dir, msg + PRM_DATA_OFF, dsize);
  }
  else 
    dsize = 0;
  
  fsize = strlen(fl) + 1;
  bcopy(fl, msg + PRM_DATA_OFF + dsize, fsize);
  
  dsize = htons(dsize);
  bcopy(&dsize, msg + PRM_DLEN_OFF, 2);
  fsize = htons(fsize);
  bcopy(&fsize, msg + PRM_FLEN_OFF, 2);
  bcopy(&authcode, msg + PRM_DATA_OFF + dsize + fsize, LONG_SZ);
  pkt->length = PRM_DATA_OFF + dsize + fsize + LONG_SZ;  
  
  creat_req = ardp_rqalloc();
  APPEND_ITEM(pkt,creat_req->outpkt);
  return creat_req;
}




job_done(ecode, info)
u_char ecode;          /* Exit code of jobmngr */
int info;              /* Some additional info */
{
  int i, njid, nonode, notify_loc_nm_reqd = TRUE;
  PTEXT pkt, donepkt, rpkt;
  RREQ newreq;
  char *dst;
  prm_node_addr_t curnode_addr;

  donepkt = ardp_ptalloc();
  njid = htonl(_my_jobid);

  prm_headers(donepkt, (u_char)PRM_JOB_DONE, ecode, 0,_my_jobid);
  donepkt->length = PRM_JOBID_OFF + LONG_SZ;
  if (ecode == J_KBD_SIG)                     /* If a signal was delivered, */
    *(donepkt->start + PRM_ADINF_OFF) = (u_char)info; /* insert signal num here */
  
  for (i = 0; i < tot_allocd; i++) {
    if ((ecode == J_NO_NM) && (i == info))       continue;
    /* Jobmngr terminated because this node did not respond. So skip it */

    pkt = ardp_ptalloc();
    bcopy(donepkt->start, pkt->start, donepkt->length);
    pkt->length = donepkt->length;
    
    curnode_addr = ttbl[i]->nodeaddr;
    if (curnode_addr->sin_addr.s_addr == inaddr)
      notify_loc_nm_reqd = FALSE;
    
    newreq = ardp_rqalloc();
    newreq->outpkt = pkt;
    curnode_addr->sin_port = htons((u_short)NODEMNGR_PORT);
    ardp_send(newreq, 0, curnode_addr, -1);
    
    if( (rpkt = newreq->rcvd) == NOPKT) {  /* No ack recvd */
      if (notify_loc_nm_reqd && notty && !iodone )
	io_printf("(%s) nodemngr at %s not responding!!!\n", _progname,
		inet_ntoa(curnode_addr->sin_addr) ,0);
      else
	fprintf(stderr, "(%s) nodemngr at %s not responding!!!\n", _progname,
		inet_ntoa(curnode_addr->sin_addr));
      
    }
    ardp_rqfree(newreq);
  }
  if (notify_loc_nm_reqd) {     /* Inform nodemngr on local node */
    newreq = ardp_rqalloc();
    newreq->outpkt = pkt = ardp_ptalloc();
    bcopy(donepkt->start, pkt->start, donepkt->length);
    pkt->length = donepkt->length;
    my_node_addr.sin_port = htons( (u_short) NODEMNGR_PORT);
    ardp_send(newreq, 0, &my_node_addr, -1);
    ardp_rqfree(newreq);
  }
  inf_sysmngr(ecode);
  if (!iodone) {       /* Terminate the iotasks */
    /* For now, we just terminate the iotask<s using signals. In the long run,
       we may need to check whether they have migrated to other nodes and use
       messages to terminate them */
    kill(tiopid, SIGTERM); 
    kill(fiopid, SIGTERM);

    iodone = TRUE;
  }
  ardp_ptfree(donepkt);
}
  

inf_sysmngr(estat)
u_char estat;
{
  int njid;
  PTEXT pkt, rpkt, donepkt;
  RREQ newreq;

  donepkt = ardp_ptalloc();
  prm_headers(donepkt, (u_char)PRM_JOB_DONE, estat, 0,_my_jobid);
  donepkt->length = PRM_JOBID_OFF + LONG_SZ;
  
  sprintf(buffer, "%s(%d)", sm_hostname, SYSMNGR_PORT);

  newreq = ardp_rqalloc();
  newreq->outpkt = donepkt;
  ardp_send(newreq, buffer, 0, -1);
  
  if( (rpkt = newreq->rcvd) == NOPKT) {  /* No ack recvd */
    if (notty && !iodone )
      io_printf("(%s) sysmngr %s not responding!!!\n", 
	      _progname, sm_hostname, 0);
    else
      fprintf(stderr, "(%s) sysmngr %s not responding!!!\n", _progname, 
	      sm_hostname);
    exit(J_NO_SM);
  }
  ardp_rqfree(newreq);
  ardp_ptfree(donepkt);
}

void 
sigint_handler(sig, code, scp, addr)
int sig, code;
struct sigcontext *scp;
char *addr;
{
  job_done((u_char)J_KBD_SIG, sig);
  printf("\n");
  exit(J_KBD_SIG);
}


void
dissoc_ctl_term()
{
  /* Give up control of the terminal */
  int s;
  
  for (s = 0; s < 3; s++) {
    (void) close(s);
  }
  
  /*    (void) open("/dev/null", 0, 0);
	(void) dup2(0, 1);
	(void) dup2(0, 2);
	*/
#ifdef SETSID
  setsid();
#else
  s = open("/dev/tty", 2, 0);
  
  if (s >= 0) {
#  ifndef HPUX
    if( ioctl(s, TIOCNOTTY, (char *) 0) < 0 ) {
      /*	  errfl = fopen("tioc", "w");
		  fprintf(errfl, "ioctl returned -1\n"); 
		  fclose(errfl); */
    }
#  endif
    (void) close(s);
  }
  else
    perror("Could not open tty file: ");
#endif
  
  notty = TRUE;

}


sort_ttbl()
{
#if 0
  int swap_pos;
  int i, j;
  TDES tmp;

  swap_pos = 0;
  for (i = 0; i < MAX_HOST_TYPES; i++) {
    while (ttbl[swap_pos]->h_type <= i) {
      ++swap_pos;
      if (swap_pos >= ntasks) return;
    }
    for (j = swap_pos + 1; j < ntasks; j++) {
      if (ttbl[j]->h_type == i) {
	tmp = ttbl[j];
	ttbl[j] = ttbl[swap_pos];
	ttbl[swap_pos] = tmp;
	++swap_pos;
      }
    }
  }
#endif
}


JM_sync_all_tasks()
{
  int i;
  RREQ req;

  JM_sync_with_tio();

#if 0 /* Sync not reqd of fileio. It should already be running and performing
	 file transfers */

  req = ardp_rqalloc();
  req->outpkt = ardp_ptalloc();
  *(req->outpkt->start + PRM_OP_OFFSET) = (u_char)TASK_SYNC;
  req->outpkt->length = PRM_OP_OFFSET + 1;
  ardp_send(req, 0, _fio_host_addr, -1);
  if (req->rcvd == NOPKT) {
    fprintf(stderr, "(%s) File I/O task not responding.\n", _progname);
    ardp_rqfree(req);
    return J_NO_LOCIO;
  }
#endif

  for (i = 0; i < ntasks ; i++) {
    req = ardp_rqalloc();
    req->outpkt = ardp_ptalloc();
    *(req->outpkt->start + PRM_OPCODE_OFF) = (u_char)PRM_SYNC_TASKS;
    req->outpkt->length = PRM_OPCODE_OFF + 1;
    ttbl[i]->nodeaddr->sin_port =  ttbl[i]->port;
    ardp_send (req, 0, ttbl[i]->nodeaddr, -1);
    if (req->rcvd == NOPKT) {
      fprintf(stderr, "(%s) Task %d not responding.\n", _progname, i+1);
      ardp_rqfree(req);
      return J_TASK_INIT;
    }
    ardp_rqfree(req);
  }
  return 0;
}


JM_sync_with_tio()
{
  RREQ req;

  if (_tio_host_addr == NULL) {
    _tio_host_addr = (prm_node_addr_t)malloc(PRM_AD_SZ);
    bcopy(&my_node_addr, _tio_host_addr, PRM_AD_SZ);
    _tio_host_addr->sin_port = htons(tio_port);
  }

  req = ardp_rqalloc();
  req->outpkt = ardp_ptalloc();
  *(req->outpkt->start + PRM_OPCODE_OFF) = (u_char)PRM_SYNC_TASKS;
  req->outpkt->length = PRM_OPCODE_OFF + 1;
  ardp_send(req, 0, _tio_host_addr, -1);
  if (req->rcvd == NOPKT) {
    fprintf(stderr, "(%s) Terminal I/O task not responding.\n", _progname);
    ardp_rqfree(req);
    return J_NO_LOCIO;
  }
}
