diff --new-file --recursive --context=3 linux/config.in linux.new/config.in
*** linux/config.in	Mon Dec 13 21:11:58 1993
--- linux.new/config.in	Mon Dec 13 21:11:15 1993
***************
*** 16,21 ****
--- 16,22 ----
  * Program binary formats
  *
  bool 'Elf executables' CONFIG_BINFMT_ELF y
+ bool 'Checkpoint restart support' CONFIG_CHKPNT n
  *
  * SCSI support
  *
diff --new-file --recursive --context=3 linux/fs/Makefile linux.new/fs/Makefile
*** linux/fs/Makefile	Mon Dec 13 21:12:32 1993
--- linux.new/fs/Makefile	Mon Dec 13 21:08:06 1993
***************
*** 41,46 ****
--- 41,50 ----
  BINFMTS := $(BINFMTS) binfmt_elf.o
  endif
  
+ ifdef CONFIG_CHKPNT
+ BINFMTS := $(BINFMTS) restart.o
+ endif
+ 
  .c.s:
  	$(CC) $(CFLAGS) -S $<
  .c.o:
diff --new-file --recursive --context=3 linux/fs/exec.c linux.new/fs/exec.c
*** linux/fs/exec.c	Mon Dec 13 21:12:26 1993
--- linux.new/fs/exec.c	Mon Dec 13 21:09:50 1993
***************
*** 727,737 ****
--- 727,743 ----
  			    struct pt_regs * regs);
  extern int load_elf_library(int fd);
  
+ extern int load_restart_binary(struct linux_binprm *,
+ 			       struct pt_regs * regs);
+ 
  /* Here are the actual binaries that will be accepted  */
  struct linux_binfmt formats[] = {
  	{load_aout_binary, load_aout_library},
  #ifdef CONFIG_BINFMT_ELF
  	{load_elf_binary, load_elf_library},
+ #endif
+ #ifdef CONFIG_CHKPNT
+ 	{load_restart_binary, NULL},
  #endif
  	{NULL, NULL}
  };
diff --new-file --recursive --context=3 linux/fs/restart.c linux.new/fs/restart.c
*** linux/fs/restart.c	Wed Dec 31 18:00:00 1969
--- linux.new/fs/restart.c	Mon Dec 13 21:08:16 1993
***************
*** 0 ****
--- 1,464 ----
+ /*
+  *  linux/fs/restart.c
+  *
+  *  Copyright (C) 1993  Steve Lord
+  */
+ 
+ #include <linux/fs.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/mman.h>
+ #include <linux/a.out.h>
+ #include <linux/errno.h>
+ #include <linux/signal.h>
+ #include <linux/string.h>
+ #include <linux/stat.h>
+ #include <linux/fcntl.h>
+ #include <linux/ptrace.h>
+ #include <linux/user.h>
+ #include <linux/segment.h>
+ #include <linux/malloc.h>
+ 
+ #include <asm/system.h>
+ 
+ #include <linux/binfmts.h>
+ #include <linux/restart.h>
+ 
+ #include <asm/segment.h>
+ #include <asm/system.h>
+ 
+ asmlinkage int sys_close(unsigned fd);
+ asmlinkage int sys_open(const char *, int, int);
+ asmlinkage int sys_brk(unsigned long);
+ 
+ asmlinkage void ret_from_restart(void) __asm__("ret_from_sys_call");
+ 
+ extern int tty_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+ extern struct super_block *get_super(dev_t);
+ 
+ /*
+  * Given a device number and an inode on the device rebuild
+  * the file it represents from scratch.
+  *
+  * Ideally we need some way of determining that an inode no longer
+  * represents the same file on the disk.
+  */
+ 
+ static  struct inode *recreate_inode(res_ino_t *inodes, int index)
+ {
+ 	struct super_block *sb;
+ 	struct inode *node;
+ 
+ 	if (index) {
+ 		index--;
+ 		if ((sb = get_super(inodes[index].i_dev)) == NULL) {
+ 			printk("could not get super block for %x\n",
+ 				inodes[index].i_dev);
+ 			return(NULL);
+ 		}
+ 		node = iget(sb, inodes[index].i_ino);
+ 		if (node == NULL) {
+ 			printk("could not get inode [%x]:%d\n",
+ 				inodes[index].i_dev, inodes[index].i_ino);
+ 		}
+ 
+ 		/* If the inode has just been created then it cannot
+ 		 * be the same one we were looking for, so put it
+ 		 * back and return an error.
+ 		 */
+ 		if (inodes[index].i_ctime == CURRENT_TIME) {
+ 			iput(node);
+ 			printk("inode [%x]:%d has been reused or freed\n",
+ 				inodes[index].i_dev, inodes[index].i_ino);
+ 			return(NULL);
+ 		}
+ 		return(node);
+ 	} else {
+ 		return(NULL);
+ 	}
+ }
+ 
+ /*
+  * This is debatable - only really any use for things like
+  * editors such as vi, and there are usually other things
+  * like missing temporary files which stop those from restarting.
+  *
+  * Basically we restore the old tty state from the restart file.
+  */
+ static	int	restore_tty(restart_t *ex)
+ {
+ 	int	fd;
+ 	int	ret = 0;
+ 
+ 	/* Set tty state to what it used to be */
+ 	if (current->tty) {
+ 		for (fd = 0; fd < NR_OPEN; fd++) {
+ 			if (current->filp[fd] &&
+ 			    (MINOR(current->filp[fd]->f_rdev) == ex->t_task.tty)) {
+ 				ret = tty_ioctl(current->filp[fd]->f_inode,
+ 						current->filp[fd], TCSETS,
+ 						(u_long) &(ex->r_term));
+ 				break;
+ 			}
+ 		}
+ 	}
+ 	return(ret);
+ }
+ 
+ 
+ /*
+  * Rebuild the task file table
+  */
+ static	int	restore_ftab(restart_t *ex, res_ino_t *inodes)
+ {
+ 	int	fd, error;
+ 	int	file_cnt, index, j;
+ 	struct file	*f, *file_tab;
+ 	struct task_struct *tsk;
+ 
+ 	file_tab = (struct file *)((char *)ex + ex->r_file_off);
+ 	file_cnt = (ex->r_map_off - ex->r_file_off) / sizeof(struct file);
+ 	tsk = &ex->t_task;
+ 
+ 	/* Now the file table */
+ 	for (fd = 0; fd < NR_OPEN; fd++) {
+ 		if (tsk->filp[fd]) {
+ 			index = (int) tsk->filp[fd] - 1;
+ 
+ 			if (index < 0 || index >= file_cnt) {
+ 				printk("File index %d out of range\n", index);
+ 				return -EBADF;
+ 			}
+ 
+ 			f = get_empty_filp();
+ 			if (!f) {
+ 				printk("Couldn't get a file table entry\n");
+ 				return -EMFILE;
+ 			}
+ 			current->filp[fd] = f;
+ 
+ 			/* If this is a tty then use the current tty rather
+ 			 * than the one stored in the restart file. This
+ 			 * is so when we restart the terminal becomes the
+ 			 * one we restart from rather than the one the
+ 			 * process was originally running in.
+ 			 *
+ 			 * For all other files and devices go and get
+ 			 * the inode again.
+ 			 */
+ 
+ 			if ((MAJOR(file_tab[index].f_rdev) == 4) &&
+ 			    (current->tty)) {
+ 				error = open_namei("/dev/tty",
+ 						    file_tab[index].f_flags,
+ 						    file_tab[index].f_mode,
+ 						    &(f->f_inode), NULL);
+ 				if (error) {
+ 					f->f_count--;
+ 					printk("tty recreate failed\n");
+ 					return -EBADF;
+ 				}
+ 				f->f_rdev = current->tty | 0x400;
+ 			} else {
+ 				f->f_inode = recreate_inode(inodes,
+ 					(int)(file_tab[index].f_inode));
+ 				if (f->f_inode == NULL) {
+ 					f->f_count--;
+ 					printk("file inode recreate failed\n");
+ 					return -EBADF;
+ 				}
+ 				f->f_rdev = file_tab[index].f_rdev;
+ 			}
+ 			f->f_mode = file_tab[index].f_mode;
+ 			f->f_pos = file_tab[index].f_pos;
+ 			f->f_flags = file_tab[index].f_flags;
+ 			f->f_op = NULL;
+ 
+ 			/* Issue an open on the file, this is mainly
+ 			 * for devices so that we can initialise them
+ 			 * for our use.
+ 			 */
+ 			if (f->f_inode->i_op) {
+ 				f->f_op = f->f_inode->i_op->default_file_ops;
+ 				if (f->f_op->open &&
+ 				    (error = f->f_op->open(f->f_inode, f))) {
+ 					printk("file re-open failed %d\n",
+ 						error);
+ 					iput(f->f_inode);
+ 					current->filp[fd] = NULL;
+ 					return -EBADF;
+ 				}
+ 			}
+ 
+ 			/* Look for other file table entries which
+ 			 * refer to the same task.
+ 			 */
+ 
+ 			FD_CLR(fd, &current->close_on_exec);
+ 			for (j = fd+1; j < NR_OPEN; j++) {
+ 				if (tsk->filp[fd] == tsk->filp[j]) {
+ 					f->f_count++;
+ 					current->filp[j] = f;
+ 					FD_CLR(j, &current->close_on_exec);
+ 					tsk->filp[j] = NULL;
+ 				}
+ 			}
+ 		}
+ 	}
+ 
+ 	return(0);
+ }
+ 
+ 
+ int load_restart_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+ {
+ 	restart_t	*b_ex, *ex;
+ 	unsigned long	old_fs;
+ 	struct task_struct *tsk;
+ 	struct vm_area_struct *map;
+ 	char		*page_map;
+ 	res_ino_t	*inodes;
+ 	int		i, j, fd, len, flags, perms;
+ 	int		f_offset, p_offset;
+ 	int		retval;
+ 
+ 	b_ex = (restart_t *) bprm->buf;		/* exec-header */
+ 	if ((b_ex->r_magic != RESTART_MAGIC) ||
+ 	    (b_ex->r_version != RESTART_VERSION)) {
+ 		return -ENOEXEC;
+ 	}
+ 
+ 	/* OK here we go, time to start recreating the process from
+ 	 * the ground up. Try to do this in the order we put stuff
+ 	 * into the file just to keep things simple.
+ 	 */
+ 
+ 	/* 1. The task structure itself - so first of all we need to
+ 	 *    get rid of who ever was here before and replace them
+ 	 *    with the saved task structure.
+ 	 */
+ 
+ 	len = b_ex->r_mem_off;
+ 	ex = (restart_t *) vmalloc(len);
+ 
+ 	if (ex == NULL) {
+ 		return -ENOMEM;
+ 	}
+ 
+ 	tsk = &ex->t_task;
+ 	inodes = (res_ino_t *)((char *)ex + b_ex->r_inode_off);
+ 	page_map = (char *)ex + b_ex->r_map_off;
+ 
+ 	old_fs = get_fs();
+ 	set_fs(get_ds());
+ 	retval = read_exec(bprm->inode, 0, (char *)ex, len);
+ 	set_fs(old_fs);
+ 	/* This is our last chance to fail */
+ 	if (retval < 0) {
+ 		vfree((void *) ex);
+ 		return(retval);
+ 	}
+ 
+ 	flush_old_exec(bprm);
+ 
+ 	/* I hate to do it this way, but it is probably safer than
+ 	 * any other mechanism. We should do more checks to ensure
+ 	 * that the structure we are copying from is actually the
+ 	 * same size as the one we are going to. Maybe store its
+ 	 * size in the checkpoint file and complain if it is
+ 	 * different.
+ 	 */
+ 	current->counter = tsk->counter;
+ 	current->signal = tsk->signal;
+ 	current->blocked = tsk->blocked;
+ 	current->errno = tsk->errno;
+ 	memcpy(current->debugreg, tsk->debugreg, sizeof(current->debugreg));
+ 	memcpy(current->sigaction, tsk->sigaction, sizeof(current->sigaction));
+ 
+ 	current->exit_code = tsk->exit_code;
+ 	current->exit_signal = tsk->exit_signal;
+ 	current->elf_executable = tsk->elf_executable;
+ 	current->dumpable = tsk->dumpable;
+ 	current->swappable = tsk->swappable;
+ 	current->start_code = tsk->start_code;
+ 	current->end_code = tsk->end_code;
+ 	current->end_data = tsk->end_data;
+ 	current->start_brk = current->brk = current->end_data;
+ 	current->start_stack = tsk->start_stack;
+ 	current->start_mmap = tsk->start_mmap;
+ 	current->arg_start = tsk->arg_start;
+ 	current->arg_end = tsk->arg_end;
+ 	current->env_start = tsk->env_start;
+ 	current->env_end = tsk->env_end;
+ 	current->rss = 0;
+ 
+ 	/* Leave the pid, prgrp, session and leader as we found them
+ 	 * trying to reuse a pid could be interesting. This does mean
+ 	 * that our process comes back with a different pid than it
+ 	 * was stopped with.
+ 	 */
+ 
+ 
+ 	/* What are the security implications of doing this, maybe we
+ 	 * should control who can restart from a particular image.
+ 	 */
+ 	memcpy(current->groups, tsk->groups, sizeof(current->groups));
+ 	current->uid = tsk->uid;
+ 	current->euid = tsk->euid;
+ 	current->suid = tsk->suid;
+ 	current->gid = tsk->gid;
+ 
+ 	/* Timeouts - how do we deal with these...... is anything more
+ 	 * actually needed.
+ 	 */
+ 
+ 	current->utime = tsk->utime;
+ 	current->stime = tsk->stime;
+ 	current->cutime = tsk->cutime;
+ 	current->cstime = tsk->cstime;
+ 
+ 	/* Leave start time alone for now maybe we should put the old
+ 	 * one back again - who uses this apart from ps?
+ 	 */
+ 
+ 	current->min_flt = tsk->min_flt;
+ 	current->maj_flt = tsk->maj_flt;
+ 	current->cmin_flt = tsk->cmin_flt;
+ 	memcpy(current->rlim, tsk->rlim, sizeof(current->rlim));
+ 	memcpy(current->comm, tsk->comm, 16);
+ 
+ 	/* Saved 386 state and bitmaps - don't deal with this yet,
+ 	 * checkpointing the emulator? Sounds like fun!
+ 	 */
+ 
+ 	current->umask = tsk->umask;
+ 
+ 	/* Let's not even think about swap state - we had better not
+ 	 * be swapped!
+ 	 */
+ 
+ 	iput(current->pwd);
+ 	iput(current->root);
+ 	iput(current->executable);
+ 
+ 	/* Pull in the old processes inode references */
+ 	current->pwd = recreate_inode(inodes, (int) tsk->pwd);
+ 	current->root = recreate_inode(inodes, (int) tsk->root);
+ 	current->executable = recreate_inode(inodes, (int) tsk->executable);
+ 
+ 	if ((current->pwd == NULL) && tsk->pwd) {
+ 		retval = -EBADF;
+ 		goto fatal_clean_up;
+ 	}
+ 
+ 	if ((current->root == NULL) && tsk->root) {
+ 		retval = -EBADF;
+ 		goto fatal_clean_up;
+ 	}
+ 
+ 	if ((current->executable == NULL) && tsk->executable) {
+ 		retval = -EBADF;
+ 		goto fatal_clean_up;
+ 	}
+ 
+ 	if ((retval = restore_ftab(ex, inodes))) {
+ 		goto fatal_clean_up;
+ 	}
+ 
+ 	sys_brk(tsk->brk);
+ 	/* Now do the memory map */
+ 	j = 0;	/* This is a count into the page map */
+ 	f_offset = ex->r_mem_off;
+ 	for (i = 0; i < ex->r_vm_count; i++) {
+ 		map = &ex->r_vm_map[i];
+ 		len = map->vm_end - map->vm_start;
+ 
+ 		/* If there is an inode to recreate this memory from
+ 		 * then open it and map it into memory.
+ 		 */
+ 		perms = 0;
+ 		flags = map->vm_page_prot & PAGE_COW ? MAP_PRIVATE : MAP_SHARED;
+ 		if ((map->vm_page_prot & PAGE_READONLY) == PAGE_READONLY)
+ 			perms |= PROT_READ | PROT_EXEC;
+ 		if (map->vm_page_prot & (PAGE_COW|PAGE_RW))
+ 			perms |= PROT_WRITE | PROT_READ;
+ 
+ 		if (map->vm_inode) {
+ 			map->vm_inode = recreate_inode(inodes,
+ 						       (int) map->vm_inode);
+ 			if (map->vm_inode == NULL) {
+ 				printk("mapped inode recreate failed\n");
+ 				retval = -EBADF;
+ 				goto fatal_clean_up;
+ 			}
+ 			fd = open_inode(map->vm_inode, O_RDONLY);
+ 			if (fd < 0) {
+ 				printk("open mapped inode failed: %d\n", fd);
+ 				iput(map->vm_inode);
+ 				retval = fd;
+ 				goto fatal_clean_up;
+ 			}
+ 			do_mmap(current->filp[fd], map->vm_start, len, perms,
+ 				MAP_FIXED | flags, map->vm_offset);
+ 			sys_close(fd);
+ 		} else {
+ 			do_mmap(NULL, map->vm_start, len, perms,
+ 				MAP_FIXED | flags, map->vm_offset);
+ 		}
+ 
+ 		/* Now see if we have any pages which belong to this
+ 		 * area in the restart file and read them in.
+ 		 */
+ 
+ 		if ((map->vm_page_prot & (PAGE_COW|PAGE_RW)) ||
+ 		    (map->vm_inode == NULL)) {
+ 			p_offset = map->vm_start;
+ 			while (len) {
+ 				if (page_map[j++] == '+') {
+ 					if ((retval = read_exec(bprm->inode,
+ 						  f_offset, (void *)p_offset,
+ 						  PAGE_SIZE)) < 0) {
+ 						printk("Failed to read image at %x\n", p_offset);
+ 						goto fatal_clean_up;
+ 					}
+ 					f_offset += PAGE_SIZE;
+ 				}
+ 				len -= PAGE_SIZE;
+ 				p_offset += PAGE_SIZE;
+ 			}
+ 		}
+ 
+ 		/* decrement inode count for memory mapped files */
+ 		if (map->vm_inode) {
+ 			map->vm_inode->i_count--;
+ 		}
+ 	}
+ 
+ 	restore_tty(ex);
+ 
+ 	/* Plug the old stack back in again */
+ 	map = current->mmap;
+ 	while (map->vm_next) {
+ 		map = map->vm_next;
+ 	}
+ 	current->stk_vma = map;
+ 
+ 	retval = 0;
+ 
+ fatal_clean_up:
+ 	if (retval) {
+ 		vfree((void *) ex);
+ 		printk("restart failure %d\n", retval);
+ 		send_sig(SIGKILL, current, 0);
+ 		return(0);
+ 	}
+ 
+ 	/* Go back where we were before and hope we have just put it back!
+ 	 */
+ 
+ 	*regs = ex->r_regs;
+ 	vfree((void *) ex);
+ 
+ 	return retval;
+ }
diff --new-file --recursive --context=3 linux/fs/super.c linux.new/fs/super.c
*** linux/fs/super.c	Mon Dec 13 21:12:22 1993
--- linux.new/fs/super.c	Mon Dec 13 21:07:09 1993
***************
*** 87,93 ****
  	}
  }
  
! static struct super_block * get_super(dev_t dev)
  {
  	struct super_block * s;
  
--- 87,93 ----
  	}
  }
  
! struct super_block * get_super(dev_t dev)
  {
  	struct super_block * s;
  
diff --new-file --recursive --context=3 linux/include/linux/restart.h linux.new/include/linux/restart.h
*** linux/include/linux/restart.h	Wed Dec 31 18:00:00 1969
--- linux.new/include/linux/restart.h	Mon Dec 13 21:10:18 1993
***************
*** 0 ****
--- 1,43 ----
+ #ifndef __restart_h__
+ #define __restart_h__
+ 
+ #include <linux/config.h>
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+ #include <linux/page.h>
+ #include <linux/ptrace.h>
+ #include <linux/termios.h>
+ 
+ /* Restart file definitions */
+ 
+ #define RESTART_MAGIC	0x65527453
+ #define RESTART_VERSION 1
+ #define REL_LEN		16
+ 
+ typedef struct {
+ 	int	r_magic;		/* Magic number */
+ 	int	r_version;		/* Restart file version */
+ 	char	r_kversion[REL_LEN];	/* Kernel version of checkpoint */
+ 	time_t	r_create;		/* Creation time */
+ 	int	r_inode_off;		/* Offset of inode table entries */
+ 	int	r_file_off;		/* Offset of filetable entries */
+ 	int	r_map_off;		/* Map array offset */
+ 	int	r_mem_off;		/* Offset of actual process memory */
+ 	int	r_vm_count;		/* Number of virtual area entries */
+ 	struct task_struct t_task;	/* The task header */
+ 	struct pt_regs r_regs;		/* Saved registers from kernel stack */
+ 	struct termios r_term;		/* Saved terminal state */
+ 	struct vm_area_struct	r_vm_map[1];	/* Start of array of vm map entries */
+ } restart_t;
+ 
+ typedef struct {
+ 	dev_t		i_dev;
+ 	unsigned long	i_ino;
+ 	time_t		i_ctime;
+ } res_ino_t;
+ 
+ #define FIXED_HEAD_LEN	(sizeof(restart_t) - sizeof(struct vm_area_struct))
+ #define TSK_OFFSET	((u_long)&(((restart_t *) 0)->t_task))
+ #define MAP_OFFSET	((u_long)(((restart_t *) 0)->r_vm_map))
+ 
+ #endif
