/* 
** Copyright 1986, 1987, 1988, 1989, 1990, 1991 by the Condor Design Team
** 
** Permission to use, copy, modify, and distribute this software and its
** documentation for any purpose and without fee is hereby granted,
** provided that the above copyright notice appear in all copies and that
** both that copyright notice and this permission notice appear in
** supporting documentation, and that the names of the University of
** Wisconsin and the Condor Design Team not be used in advertising or
** publicity pertaining to distribution of the software without specific,
** written prior permission.  The University of Wisconsin and the Condor
** Design Team make no representations about the suitability of this
** software for any purpose.  It is provided "as is" without express
** or implied warranty.
** 
** THE UNIVERSITY OF WISCONSIN AND THE CONDOR DESIGN TEAM DISCLAIM ALL
** WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE UNIVERSITY OF
** WISCONSIN OR THE CONDOR DESIGN TEAM BE LIABLE FOR ANY SPECIAL, INDIRECT
** OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
** OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
** OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
** OR PERFORMANCE OF THIS SOFTWARE.
** 
** Authors:  Allan Bricker and Michael J. Litzkow,
** 	         University of Wisconsin, Computer Sciences Dept.
** 
*/ 

#include <stdio.h>
#include <netdb.h>
#include <errno.h>
#include <pwd.h>

#if defined(IRIX405)
#define __EXTENSIONS__
#include <signal.h>
#define BADSIG SIG_ERR
#undef __EXTENSIONS__
#else
#include <signal.h>
#endif

#include <ctype.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/param.h>
#include <sys/file.h>
#include <sys/time.h>
#include <sys/resource.h>

#if defined(HPUX8)
#include "fake_flock.h"
#define _BSD
#endif
#include <sys/wait.h>


#include <sys/stat.h>
#include <netinet/in.h>
#include <rpc/types.h>
#include <rpc/xdr.h>

#include "condor_types.h"
#include "debug.h"
#include "trace.h"
#include "except.h"
#include "sched.h"
#include "expr.h"
#include "proc.h"
#include "clib.h"
#include "exit.h"

#ifdef NDBM
#include <ndbm.h>
#else NDBM
#include "ndbm_fake.h"
#endif NDBM

#if defined(BSD43) || defined(DYNIX)
#define WEXITSTATUS(x) ((x).w_retcode)
#define WTERMSIG(x) ((x).w_termsig)
#endif

static char *_FileName_ = __FILE__;		/* Used by EXCEPT (see except.h)     */

CONTEXT	*create_context();
int		reaper();
int		swap_space_exhausted();

extern int	errno;
extern int	HasSigchldHandler;

char	*param();

CONTEXT	*MachineContext;

char	*Log;
int		SchedDInterval;
int		Foreground;
int		Termlog;
char	*CollectorHost;
char	*NegotiatorHost;
char	*Spool;
char	*Shadow;
int		MaxJobStarts;
int		MaxJobsRunning;
int		JobsRunning;

int		On = 1;

#ifdef vax
struct linger linger = { 0, 0 };	/* Don't linger */
#endif vax

int		ConnectionSock;
int		UdpSock;
DBM		*Q, *OpenJobQueue();

extern int	Terse;
extern int	ReservedSwap;		/* Swap space to reserve in kbytes */
extern int	ShadowSizeEstimate;	/* Size of shadow process in kbytes */

char 	*MyName;
time_t	LastTimeout;

#define CONDOR_HISTORY
#ifdef CONDOR_HISTORY
char *History;

int ClientTimeout;			/* timeout value for get_history */
int ClientSkt = -1;
#endif CONDOR_HISTORY

usage( name )
char	*name;
{
	dprintf( D_ALWAYS, "Usage: %s [-f] [-t]\n", name );
	exit( 1 );
}

main( argc, argv)
int		argc;
char	*argv[];
{
	int		count;
	fd_set	readfds;
	struct timeval	timer;
	char	**ptr;
	int		sigint_handler(), sighup_handler();
#ifdef CONDOR_HISTORY
	int		alarm_handler();
#endif CONDOR_HISTORY

#ifdef NFSFIX
	/* Must be condor to write to log files. */
	set_condor_euid(__FILE__,__LINE__);
#endif NFSFIX

	MachineContext = create_context();

	MyName = *argv;
	config( MyName, MachineContext );

	init_params();
	Terse = 1;


	if( argc > 3 ) {
		usage( argv[0] );
	}
	for( ptr=argv+1; *ptr; ptr++ ) {
		if( ptr[0][0] != '-' ) {
			usage( argv[0] );
		}
		switch( ptr[0][1] ) {
			case 'f':
				Foreground++;
				break;
			case 't':
				Termlog++;
				break;
			default:
				usage( argv[0] );
		}
	}

		/* This is so if we dump core it'll go in the log directory */
	if( chdir(Log) < 0 ) {
		EXCEPT( "chdir to log directory <%s>", Log );
	}

		/* Arrange to run in background */
	if( !Foreground ) {
		if( fork() )
			exit( 0 );
	}

		/* Set up logging */
	dprintf_config( "SCHEDD", 2 );

	dprintf( D_ALWAYS, "**************************************************\n" );
	dprintf( D_ALWAYS, "***          CONDOR_SCHEDD STARTING UP         ***\n" );
	dprintf( D_ALWAYS, "**************************************************\n" );
	dprintf( D_ALWAYS, "\n" );

	if( signal(SIGINT,sigint_handler) == BADSIG ) {
		EXCEPT( "signal(SIGINT,0x%x)", sigint_handler );
	}
	if( signal(SIGHUP,sighup_handler) == BADSIG ) {
		EXCEPT( "signal(SIGHUP,0x%x)", sighup_handler );
	}
	if( signal(SIGPIPE,SIG_IGN) == BADSIG ) {
		EXCEPT( "signal(SIGPIPE,SIG_IGN)" );
	}
#ifdef CONDOR_HISTORY
	if( signal(SIGALRM,alarm_handler) == BADSIG ) {
		EXCEPT( "signal(SIGALRM,0x%x)", alarm_handler );
	}

	ClientTimeout = 120;
#endif CONDOR_HISTORY

	ConnectionSock = init_connection_sock( "condor_schedd", SCHED_PORT );
	UdpSock = udp_connect( CollectorHost, COLLECTOR_UDP_PORT );

	if( signal(SIGCHLD,reaper) == BADSIG ) {
		EXCEPT( "signal(SIGCHLD,reaper)" );
	}
	HasSigchldHandler = TRUE;

	if( signal(SIGUSR1,swap_space_exhausted) == BADSIG ) {
		EXCEPT( "signal(SIGUSR1,swap_space_exhausted)" );
	}

	create_job_queue();
	mark_jobs_idle();

	timeout();
	LastTimeout = time( (time_t *)0 );
	timer.tv_usec = 0;
	FD_ZERO( &readfds );

	for(;;) {

		FD_SET( ConnectionSock, &readfds );

		timer.tv_sec = SchedDInterval - ( time((time_t *)0) - LastTimeout );
		if( timer.tv_sec < 0 ) {
			timer.tv_sec = 0;
		}
#if defined(AIX31) || defined(AIX32)
		errno = EINTR;	/* Shouldn't have to do this... */
#endif
		count = select(FD_SETSIZE, (int *)&readfds, (int *)0, (int *)0,
												(struct timeval *)&timer );
		if( count < 0 ) {
			if( errno == EINTR ) {
				continue;
			} else {
				EXCEPT( "select(FD_SETSIZE,0%o,0,0,%d sec)",
												readfds, timer.tv_sec );
			}
		}

		if( NFDS(count) == 0 ) {
			timeout();
			LastTimeout = time( (time_t *)0 );
		} else {
			if( !FD_ISSET(ConnectionSock,&readfds) ) {
				EXCEPT( "select returns %d, ConnectionSock (%d) not set",
												NFDS(count), ConnectionSock );
			}
			accept_connection();
		}
	}
}

accept_connection()
{
	struct sockaddr_in	from;
	int		len;
	int		fd;
	XDR		xdr, *xdrs, *xdr_Init();

	len = sizeof from;
	bzero( (char *)&from, sizeof from );
#ifndef CONDOR_HISTORY
	fd = accept( ConnectionSock, (struct sockaddr *)&from, &len );

	if( fd < 0 && errno != EINTR ) {
		EXCEPT( "accept" );
	}

	if( fd >= 0 ) {
		xdrs = xdr_Init( &fd, &xdr );
		do_command( xdrs );
		xdr_destroy( xdrs );
		(void)close( fd );
	}
#else CONDOR_HISTORY
	ClientSkt = accept( ConnectionSock, (struct sockaddr *)&from, &len);

	if( ClientSkt < 0 && errno != EINTR ) {
		EXCEPT( "accept" );
	}

	if( ClientSkt >= 0 ) {
		xdrs = xdr_Init( &ClientSkt, &xdr );
		(void) alarm( (unsigned) ClientTimeout );	/* Don't hang here forever*/
		do_command( xdrs );
		(void) alarm( (unsigned) 0 );				/* Cancel alarm */
		xdr_destroy( xdrs );
		(void)close( ClientSkt );
	}
#endif CONDOR_HISTORY
}

init_connection_sock( service, port )
char	*service;
int		port;
{
	struct sockaddr_in	sin;
	struct servent *servp;
	int		sock;

	bzero( (char *)&sin, sizeof sin );
	servp = getservbyname(service, "tcp");
	if( servp ) {
		sin.sin_port = htons( (u_short)servp->s_port );
	} else {
		sin.sin_port = htons( (u_short)port );
	}

	if( (sock=socket(AF_INET,SOCK_STREAM,0)) < 0 ) {
		EXCEPT( "socket" );
	}

	if( setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(caddr_t *)&On,sizeof(On)) <0) {
		EXCEPT( "setsockopt" );
	}

#ifdef vax
	if( setsockopt(sock,SOL_SOCKET,SO_LINGER,&linger,sizeof(linger)) < 0 ) {
		EXCEPT( "setsockopt" );
	}
#endif vax

	if( bind(sock,(struct sockaddr *)&sin,sizeof(sin)) < 0 ) {
		if( errno == EADDRINUSE ) {
			EXCEPT( "CONDOR_SCHEDD ALREADY RUNNING" );
		} else {
			EXCEPT( "bind" );
		}
	}

	if( listen(sock,5) < 0 ) {
		EXCEPT( "listen" );
	}

	return sock;
}


/*
** Somebody has connected to our socket with a request.  Read the request
** and handle it.
*/
do_command( xdrs )
XDR		*xdrs;
{
	int		cmd;

		/* Read the request */
	xdrs->x_op = XDR_DECODE;
	if( !xdr_int(xdrs,&cmd) ) {
		dprintf( D_ALWAYS, "Can't read command\n" );
		return;
	}

	switch( cmd ) {
		case NEGOTIATE:		/* Negotiate with cent negotiator to run a job */
			negotiate( xdrs );
			break;
		case RESCHEDULE:	/* Reorder job queue and update collector now */
			timeout();
			LastTimeout = time( (time_t *)0 );
			reschedule_negotiator();
			break;
		case KILL_FRGN_JOB:
			abort_job( xdrs );
			break;
		case RECONFIG:
			sighup_handler();
			break;
#ifdef CONDOR_HISTORY
		case GET_HISTORY:
			getHistory(xdrs);
			break;
#endif CONDOR_HISTORY
		case SEND_ALL_JOBS:
			send_all_jobs( xdrs );
			break;
		case SEND_ALL_JOBS_PRIO:
			send_all_jobs_prioritized( xdrs );
			break;
		default:
			EXCEPT( "Got unknown command (%d)\n", cmd );
	}
}

reschedule_negotiator()
{
	int		sock = -1;
	int		cmd;
	XDR		xdr, *xdrs = NULL;

	dprintf( D_ALWAYS, "Called reschedule_negotiator()\n" );

		/* Connect to the negotiator */
	if( (sock=do_connect(NegotiatorHost,"condor_negotiator",NEGOTIATOR_PORT))
																	< 0 ) {
		dprintf( D_ALWAYS, "Can't connect to CONDOR negotiator\n" );
		return;
	}
	xdrs = xdr_Init( &sock, &xdr );
	xdrs->x_op = XDR_ENCODE;

	cmd = RESCHEDULE;
	(void)xdr_int( xdrs, &cmd );
	(void)xdrrec_endofrecord( xdrs, TRUE );

	xdr_destroy( xdrs );
	(void)close( sock );
	return;
}

SetSyscalls(){}

init_params()
{
	char	*tmp;

	Log = param( "LOG" );
	if( Log == NULL )  {
		EXCEPT( "No log directory specified in config file\n" );
	}

	CollectorHost = param( "COLLECTOR_HOST" );
	if( CollectorHost == NULL ) {
		EXCEPT( "No Collector host specified in config file\n" );
	}

	NegotiatorHost = param( "NEGOTIATOR_HOST" );
	if( NegotiatorHost == NULL ) {
		EXCEPT( "No NegotiatorHost host specified in config file\n" );
	}

	tmp = param( "SCHEDD_INTERVAL" );
	if( tmp == NULL ) {
		SchedDInterval = 120;
	} else {
		SchedDInterval = atoi( tmp );
	}

	if( param("SCHEDD_DEBUG" ) == NULL ) {
		EXCEPT( "\"SCHEDD_DEBUG\" not specified" );
	}
	if( boolean("SCHEDD_DEBUG","Foreground") ) {
		Foreground = 1;
	}

	Spool = param( "SPOOL" );
	if( Spool == NULL ) {
		EXCEPT( "No Spool directory specified" );
	}

	if( (Shadow=param("SHADOW")) == NULL ) {
		EXCEPT( "SHADOW not specified in config file\n" );
	}

	if( (tmp=param("MAX_JOB_STARTS")) == NULL ) {
		MaxJobStarts = 5;
	} else {
		MaxJobStarts = atoi( tmp );
	}

	if( (tmp=param("MAX_JOBS_RUNNING")) == NULL ) {
		MaxJobsRunning = 15;
	} else {
		MaxJobsRunning = atoi( tmp );
	}

#ifdef CONDOR_HISTORY
	History = param( "HISTORY" );
	if( History == NULL ) {
		EXCEPT( "History file not specified" );
	}

	tmp = param( "CLIENT_TIMEOUT" );
	if( tmp == NULL ) {
		ClientTimeout = 120;
	} else {
		ClientTimeout = atoi( tmp );
	}
#endif CONDOR_HISTORY

	if( (tmp=param("RESERVED_SWAP")) == NULL ) {
		ReservedSwap = 5 * 1024;			/* 5 megabytes */
	} else {
		ReservedSwap = atoi( tmp ) * 1024;	/* Value specified in megabytes */
	}

	if( (tmp=param("SHADOW_SIZE_ESTIMATE")) == NULL ) {
		ShadowSizeEstimate =  128;			/* 128 K bytes */
	} else {
		ShadowSizeEstimate = atoi( tmp );	/* Value specified in kilobytes */
	}
		
}

/*
** Allow child processes to die a decent death, don't keep them
** hanging around as <defunct>.
**
** NOTE: This signal handler calls routines which will attempt to lock
** the job queue.  Be very careful it is not called when the lock is
** already held, or deadlock will occur!
*/
union wait	ShadowStatus;
reaper( sig, code, scp )
int		sig, code;
struct sigcontext	*scp;
{
	int		pid;

	if( sig == 0 ) {
		dprintf( D_ALWAYS, "***********  Begin Extra Checking ********\n" );
	} else {
		dprintf( D_ALWAYS, "Entered reaper( %d, %d, 0x%x )\n", sig, code, scp );
	}

	for(;;) {
		if( (pid = wait3( &ShadowStatus,WNOHANG,(struct rusage *)0 )) <= 0 ) {
			dprintf( D_FULLDEBUG, "wait3() returned %d, errno = %d\n",
															pid, errno );
			break;
		}
		if( WIFEXITED(ShadowStatus) ) {
			dprintf( D_FULLDEBUG, "Shadow pid %d exited with status %d\n",
											pid, WEXITSTATUS(ShadowStatus) );
			if( WEXITSTATUS(ShadowStatus) == JOB_NO_MEM ) {
				swap_space_exhausted();
			}
		} else if( WIFSIGNALED(ShadowStatus) ) {
			dprintf( D_FULLDEBUG, "Shadow pid %d died with signal %d\n",
											pid, WTERMSIG(ShadowStatus) );
		}
		delete_shadow_rec( pid );
	}
	if( sig == 0 ) {
		dprintf( D_ALWAYS, "***********  End Extra Checking ********\n" );
	}
}


/*
** The shadow running this job has died.  If things went right, the job
** has been marked as idle, unexpanded, or completed as appropriate.
** However, if the shadow terminated abnormally, the job might still
** be marked as running (a zombie).  Here we check for that conditon,
** and mark the job with the appropriate status.
*/
check_zombie( pid, job_id )
int			pid;
PROC_ID		*job_id;
{
	char	queue[MAXPATHLEN];
	PROC	proc;

	dprintf( D_ALWAYS, "Entered check_zombie( %d, 0x%x )\n", pid, job_id );
	(void)sprintf( queue, "%s/job_queue", Spool );
	if( (Q=OpenJobQueue(queue,O_RDWR,0)) == NULL ) {
		EXCEPT( "OpenJobQueue(%s)", queue );
	}

	LOCK_JOB_QUEUE( Q, WRITER );

	proc.id = *job_id;
	if( FetchProc(Q,&proc) < 0 ) { 	
		proc.status = REMOVED;
	}

	switch( proc.status ) {
		case RUNNING:
			kill_zombie( pid, job_id, &proc );
			break;
		case REMOVED:
			cleanup_ckpt_files( pid, job_id );
			break;
		default:
			break;
	}

	CLOSE_JOB_QUEUE( Q );
	dprintf( D_ALWAYS, "Exited check_zombie( %d, 0x%x )\n", pid, job_id );
}

kill_zombie( pid, job_id, proc )
int		pid;
PROC_ID	*job_id;
PROC	*proc;
{
	char	ckpt_name[MAXPATHLEN];

	dprintf( D_ALWAYS,
		"Shadow %d died, and left job %d.%d marked RUNNING\n",
		pid, job_id->cluster, job_id->proc );

	mark_job_stopped( job_id, Q );
}

cleanup_ckpt_files( pid, job_id )
int		pid;
PROC_ID	*job_id;
{
	char	ckpt_name[MAXPATHLEN];

		/* Remove any checkpoint file */
	(void)sprintf( ckpt_name, "%s/job%06d.ckpt.%d",
								Spool, job_id->cluster, job_id->proc  );
	(void)unlink( ckpt_name );

		/* Remove any temporary checkpoint file */
	(void)sprintf( ckpt_name, "%s/job%06d.ckpt.%d.tmp",
								Spool, job_id->cluster, job_id->proc  );
	(void)unlink( ckpt_name );
}

sigint_handler()
{
	dprintf( D_ALWAYS, "Killed by SIGINT\n" );
	exit( 0 );
}

sighup_handler()
{
	dprintf( D_ALWAYS, "Re reading config file\n" );

	free_context( MachineContext );
	MachineContext = create_context();
	config( MyName, MachineContext );

	init_params();
	timeout();
}

#ifdef CONDOR_HISTORY
/*
** The machine we are talking to has stopped responding, close our
** end.  Then the read()/write() will fail, and whichever client routine
** is involved will clean up.
*/
alarm_handler()
{
	dprintf( D_ALWAYS,
		"No response in %d seconds, breaking connection\n", ClientTimeout);
	
	if( ClientSkt >= 0 ) {
		(void)close( ClientSkt );
		dprintf( D_ALWAYS, "Closed %d at %d\n", ClientSkt, __LINE__ );
	}
	ClientSkt = -1;
}
#endif CONDOR_HISTORY

/*
** On a machine where condor is newly installed, there may be no job queue.
** Here we make sure one is initialized and has condor as owner and group.
*/
create_job_queue()
{
	int		oumask;
	char	name[MAXPATHLEN];
	int		fd;

	set_condor_euid();
	oumask = umask( 0 );

	(void)sprintf( name, "%s/job_queue.dir", Spool );
	if( (fd=open(name,O_RDWR|O_CREAT,0660)) < 0 ) {
		EXCEPT( "open(%s,O_RDWR,0660)", name );
	}
	(void)close( fd );

	(void)sprintf( name, "%s/job_queue.pag", Spool );
	if( (fd=open(name,O_RDWR|O_CREAT,0660)) < 0 ) {
		EXCEPT( "open(%s,O_RDWR,0660)", name );
	}
	(void)close( fd );

	(void)sprintf( name, "%s/history", Spool );
	if( (fd=open(name,O_RDWR|O_CREAT,0660)) < 0 ) {
		EXCEPT( "open(%s,O_RDWR,0660)", name );
	}
	(void)close( fd );

	set_root_euid();
	(void)umask( oumask );
}


/*
** There should be no jobs running when we start up.  If any were killed
** when the last schedd died, they will still be listed as "running" in
** the job queue.  Here we go in and mark them as idle.
*/
mark_jobs_idle()
{
	char	queue[MAXPATHLEN];
	int		mark_idle();

	set_condor_euid();

	(void)sprintf( queue, "%s/job_queue", Spool );
	if( (Q=OpenJobQueue(queue,O_RDWR,0)) == NULL ) {
		EXCEPT( "OpenJobQueue(%s)", queue );
	}

	LOCK_JOB_QUEUE( Q, WRITER );

	ScanJobQueue( Q, mark_idle );

	CLOSE_JOB_QUEUE( Q );

	set_root_euid();
}

mark_idle( proc )
PROC	*proc;
{
	char	ckpt_name[MAXPATHLEN];

	if( proc->status != RUNNING ) {
		return;
	}

	(void)sprintf( ckpt_name, "%s/job%06d.ckpt.%d",
								Spool, proc->id.cluster, proc->id.proc  );
	if( access(ckpt_name,F_OK) != 0 ) {
		proc->status = UNEXPANDED;
	} else {
		proc->status = IDLE;
	}

	if( StoreProc(Q,proc) < 0 ) {
		EXCEPT( "StoreProc(0x%x,0x%x)", Q, proc );
	}
}
