/*
 * SmartDisk 1.3.
 *
 * Created by David Le Blanc 29/10/91 Absolutely no copywrite. But if you improve
 * it, please send me a new version (with source!) 
 * 
 * Current limitations.
 *   No command line options, device and cache size are hard-wired into the program.
 *   Only one unit is supported.
 *
 * Obvious enhacements.
 *   Make device and unit configurable, as well as cache size and shape.
 *
 * Some performance quotes: (Doesn't everyone make these??)
 * Background:  I have a directory called MAN: which has 355 manuals.
 *
 * WARNING: These are bad examples, since a 'dir' reads the disk, then sorts the
 * contents, then writes the data to the screen. These times include the sorting
 * and output of the directory. This sorting and output time is in the order
 * of 1.5 to 2 seconds.
 *
 * Normal DIR MAN:		12 seconds
 * Cache enabled but empty      9  seconds (prefetch does work!) 
 * Cache primed                 5  seconds.
 *
 * With a slower drive and/or faster machine these times can only improve.
 * I have a drive capable of 800k/sec on my unaccelerated A500. Those with
 * a 150K/sec A590 would notice a greater performance boost. Same for those
 * with 'bloody fast machines' (grumble :) If I was REALLY worried about making
 * the statistic look good, then I'd test it on an A590.
 *
 * A590 USERS: You MUST edit the 'scsi.device' to 'xt.device' or it will NOT WORK.
 * GVP  USERS: You MUST edit the 'scsi.device' to 'gvpscsi.device' or it will
 *             NOT WORK.
 *
 * SmartDisk: A SCSI.DEVICE disk cache.
 *
 * 1.3.1:       Removed some 2.0ism's that crept in, and added code to
 *              read the device and unit from the command line.
 * 1.3:		Added Prefetch and fixed 'CMD_WRITE' to update instead of
 *              flush cache entry.
 * 1.2:         Added many optimisation such as moving the semaphore locks to
 *              the update functions, and for a request of many blocks, the
 *              blocks are read in all at once into the users buffer, THEN
 *              copied to cache. (Instead of read into cache one at a time and
 *              then copied to the user buffer)
 * 1.1:		Found a major problem. The following union under Lattice 5.10a
 *              generates unexpected (probably correct) code. The key, line and
 *		offset fields are actually unioned together, whereas I though
 *		the union would be between the two int declarations. 
 *
 *              union sector {
 *                 int key:32-LINE_BITS-ITEM_BITS,
 *                     line:LINE_BITS,
 *                     offset:ITEM_BITS ;
 *                 int sector ;
 *                 } ;
 *
 *		I made the bit fields a separate structure, and union'ed the 
 * 	 	sector and the new structure together.
 *
 * 1.0:		My school assigment was to write a N-way set associative cache
 *		simulator and to plot hit-rate percentages. With a bit of
 *		work, it is not a Hard Disk cache! Vive la Amiga!
 *
 *
 * History:
 *    29/10/91 : Sat down an wrote a cache.
 *    30/10/91 : Problem? Works Great Now.
 *    31/10/91 : Fixed up the 'aging' of blocks to handle the
 *             : counter wrapping after 2^32 buffer creates/updates.
 *             : (How long does 2^32 DIFFERENT accesses take?? With a fast hard
 *                disk going continually, I estimate about two months.)
 *             : Cleaned up the code a tad.
 */

struct IOStdReq *IO;		/* All IO goes through here now. */
struct SignalSemaphore *ss ;	/* To force single threading	 */
				/* through the code responsible	 */
				/* for updating the cache	 */
ULONG counter ;

int allocnum = 0 ;

void printf(char *fmt, ...) ;

/*
 * Constants. Adjust for adjusting cache size and shape.
 */

#define ITEM_SIZE 512           /* No change - sector size */

#define LINE_SIZE   4           /* Increase for prefetch */
#define SETS        8           /* N-way set associative */
#define LINES      32           /* N lines of cache      */

/*
 * Bit fields. Adjust to match above constants.
 */

#define ITEM_BITS  2            /* 2^ITEM_BITS = LINE_SIZE */
#define LINE_BITS  5            /* 2^LINE_BITS = LINES     */

struct bitaddress {
   int key:32-LINE_BITS-ITEM_BITS,
       line:LINE_BITS,
       offset:ITEM_BITS ;
   } ;

union sector {
   int sector ;
   struct bitaddress s ;
   } ;

struct cache_line {
   int key ;                     /* Item key */
   int age ;                     /* AGE for LRU alg */
   int valid ;
   char *buffer ;                /* [LINE_SIZE][ITEM_SIZE] of DATA */
   } ;

struct cache_line cache[SETS][LINES] ;

/*
 * A buffer for scsidisk.device to go through.
 */
char __chip globbuffer[LINE_SIZE << 9] ;

#define DEV_BEGINIO (-30)
#define DEV_ABORTIO (-36)

void (*__asm oldbeginio)(register __a1 struct IOStdReq *,
                         register __a6 struct Device *dev) ;
/*
 * Scan for sector, and return the set it resides in.
 */
int
FindEntry(union sector *s) {

   int set ;

   for (set = 0; set < SETS; set++) 
      if (cache[set][s->s.line].valid) {
         if (cache[set][s->s.line].key == s->s.key) {
            cache[set][s->s.line].age = counter ++ ;
            return set ;
            }
         }
      else
         break ;

   return -1 ;
   }

/*
 * Pick a set from the associated cache, and return
 * the set number. The cache memory is also allocated
 * before returning. The cache entry is marked VALID. so if you
 * can't fill it, remember to clear it!
 */
int
AllocCache(union sector *s) {
   int set ;
   int oldest ;
   int oldset ;
   int found ;
   int age ;

   oldset = 0 ;
   oldest = 0 ;
   found  = 0 ;

   for (set = 0; set < SETS; set++) 
      if (cache[set][s->s.line].valid) {
         if (cache[set][s->s.line].key != s->s.key) {
            /*
             * This 'age' calculation is complicated since normally, 
             * AGE = COUNTER - CACHE.AGE, however, if counter has
             * wrapped to zero, such evaluation evaluates ages of < 0 and
             * these entries will never be reselected for reuse.
             *
             * If counter wraps to zero, then CACHE.AGE is generally larger
             * than COUNTER, so we evaluate age as the total of MAXINT-AGE 
             * plus the current value of the counter. IE, how much it took to
             * wrap and reach the current position.
             *
             * Hence, 
             * AGE = ~0 - CACHE.AGE + COUNTER
             */
            age = cache[set][s->s.line].age ;
            if (age > counter)
               age = ((ULONG) ~0 - age) + counter ;
            else
               age = counter - age ;

            if (age > oldest)
               oldest = age, oldset = set ;
            }
         else {
            found = 1 ;
            break ; /* key = s.key. Line already in cache */
            }
         }
      else
         break ; /* !valid. Found a free line */

   if (found) {
      return -1 ;
      }

   if (set == SETS)
      set = oldset ;

   cache[set][s->s.line].age = counter ++ ;
   cache[set][s->s.line].key = s->s.key ;

   /*
    * If no buffer, allocate one.
    */
   if (! cache[set][s->s.line].buffer )
      if (cache[set][s->s.line].buffer = AllocMem(LINE_SIZE << 9, MEMF_PUBLIC))
         allocnum ++ ;

   /*
    * If STILL no buffer, return failure. Otherwise set the VALID flag.
    */
   if (cache[set][s->s.line].buffer )
      cache[set][s->s.line].valid = 1 ;
   else {
      cache[set][s->s.line].valid = 0 ;         /* Allocation failed */
      return -1 ;
      }

   return set ;
   }

/*
 * Allocate a line of cache and read it from disk.
 */
int
ReadCache(union sector *s) {
   struct MsgPort *port ;
   char *dest ;
   int  set ;

   ObtainSemaphore(ss);

   port = CreatePort(0,0) ;

   if (!port) {
      ReleaseSemaphore(ss) ;
      return -1 ;
      }

   if (s->s.offset)
      s->s.offset = 0 ;

   IO->io_Message.mn_ReplyPort = port ;

   IO->io_Command = CMD_READ;
   IO->io_Offset = s->sector << 9 ;
   IO->io_Length = LINE_SIZE << 9 ;
   IO->io_Data = (APTR) globbuffer ;

   oldbeginio(IO,IO->io_Device) ;
   WaitIO(IO) ;

   DeletePort(port) ;

   if (IO->io_Error) {
      ReleaseSemaphore(ss) ;
      return -1 ;
      }

   set = AllocCache(s) ;

   if (set < 0) {
      ReleaseSemaphore(ss) ;
      return -1 ;
      }

   dest = cache[set][s->s.line].buffer ;

   CopyMemQuick(globbuffer, dest, LINE_SIZE << 9) ;

   ReleaseSemaphore(ss) ;
   return 0 ;
   }

int
ReadBufferToCache(int linestart,int unread,char *buffer) {

   union sector s ;
   struct MsgPort *port ;
   int set ;

   ObtainSemaphore(ss);
   s.sector = linestart ;

   port = CreatePort(0,0) ;

   if (!port) {
      ReleaseSemaphore(ss) ;
      return -1 ;
      }

   IO->io_Message.mn_ReplyPort = port ;

   /*
    * Read enough to fill the buffer.
    */

   IO->io_Command = CMD_READ;
   IO->io_Offset = linestart << 9 ;
   IO->io_Length = unread << 9 ;
   IO->io_Data = (APTR) buffer ;

   oldbeginio(IO,IO->io_Device) ;
   WaitIO(IO) ;

   DeletePort(port) ;

   if (IO->io_Error) {
      ReleaseSemaphore(ss) ;
      return -1 ;
      }

   while (unread) {
      set = AllocCache(&s) ;
      if (set < 0) {
         ReleaseSemaphore(ss) ;
         return -1 ;
         }
      else {
         CopyMemQuick(buffer,cache[set][s.s.line].buffer, LINE_SIZE << 9) ;
         }

      s.sector += LINE_SIZE ;
      unread -= LINE_SIZE ;
      buffer += (LINE_SIZE << 9) ;
      }

   ReleaseSemaphore(ss) ;
   return 0 ;
   }

/* 
 * This functions checks the cache. If the sector is there, it returns
 * the buffer, otherwise it returns NULL.
 */
char *
FindCache(union sector *s,int set) {
   return & (cache[set][s->s.line].buffer[s->s.offset << 9 ] ) ;
   }

/*
 * This function takes 'sector' and set, and decides if the next sector
 * is in the cache.
 */
int
NextEntry(union sector *s, int set) {
   int line ;

   line = s->s.line ;
   s->sector ++ ;

   if (line == s->s.line) {
      return set ;
      }
   else
      return FindEntry(s) ;
   }

/*
 * Search for sector, and mark it invalid.
 */
void
ClearEntry(union sector *s,int set) {
   cache[set][s->s.line].valid = 0 ;
   }

CacheUpdate(union sector *s, int seccount, char *buffer) {
   int set ;

   ObtainSemaphore(ss) ;

   while (seccount) {
      set = FindEntry(s) ;
      if (set >= 0) {
         CopyMemQuick(buffer,FindCache(s,set), ITEM_SIZE) ;
         cache[set][s->s.line].age = counter ++ ;
         }

      buffer += ( ITEM_SIZE ) ;
      seccount -- ;
      s->sector ++ ;
      }

   ReleaseSemaphore(ss) ;
   return 0 ;
   }

void __saveds __asm mybeginio(register __a1 struct IOStdReq *req,
                              register __a6 struct Device *dev) {

   union sector s ;
   int   set ;
   int   command ;
   int   secnum ;
   char  *source ;
   char  *buffer ;

   int   unread ;
   int   linestart ;

   s.sector = req->io_Offset >> 9 ;
   secnum   = req->io_Length >> 9 ;
   command  = req->io_Command ;
   buffer   = (char *) req->io_Data ;

   if (command == CMD_WRITE)
      CacheUpdate(&s,secnum,buffer) ;

   if (command == CMD_READ) {

      while (secnum) {
         set = FindEntry(&s) ;

         if (set < 0) {
            source = NULL ;
            }
         else {
            source = FindCache(&s,set) ;
            }

         /*
          * Scan copying buffers to the request.
          */

         while (secnum && source) {
            CopyMemQuick(source,buffer,ITEM_SIZE) ;
            buffer += ITEM_SIZE ;

            secnum -- ;
            if (secnum) {
               set = NextEntry(&s, set) ;
               if (set < 0) {
                  source = NULL ;
                  }
               else {
                  source = FindCache(&s,set) ;
                  }
               }
            }

         if (!secnum) {                                 /* Done ? */
            break ;
            }

         /*
          * If we are in the middle of a line, read it in.
          */
         if (s.s.offset) {
            int original = s.sector ;

            s.s.offset = 0 ;
            if (ReadCache(&s) < 0) {
               }

            s.sector = original ;
            }
         else {
            /*
             * Start scanning at next line.
             */
            unread = 0 ;

            linestart = s.sector ; 

            /*
             * Scan counting sectors that need reading.
             */
            while ((secnum>unread) && (set < 0)) {
               s.sector = linestart + unread ;
               set = FindEntry(&s) ;
               /*
                * For efficiency, if a sector is not found, advance to
                * the next line instead of the next sector.
                */
               if (set < 0) {
                  unread += LINE_SIZE ;
                  }
               }

            if (unread > secnum) {
               unread -= LINE_SIZE ;
               }

            if (unread) {
               /*
                * Read the cache into the supplied buffer, and copy
                * it to cache memory.
                */
               ReadBufferToCache(linestart,unread,buffer) ;
               buffer += ( unread << 9 ) ;
               }
             else {
               /*
                * If there are more sectors, call 'ReadCache()' to get them.
                */
               ReadCache( (union sector *)&linestart) ;
               }

            /*
             * Pick up where we left off.
             */
            s.sector = linestart + unread ;
            secnum -= unread ;
            if (secnum < 0)
               break ;
            }
         }
      /*
       * Done!!!
       */
      }

   if (command != CMD_READ)
      oldbeginio(req,dev) ;
   else {
      req->io_Actual = req->io_Length ;
      ReplyMsg((struct Message *) req) ;
      } ;

   }

int
main(int argc, char *argv[]) {

   int error ;
   int line,set ;
   int devopen ;	
   struct MsgPort  *port ;
   char *device ;
   int unit ;

   port = NULL;
   devopen = 0 ;

   if (argc != 3) {
      printf ("Usage: SmartDisk <yourhd.device> <unitnum>\n") ;
      goto fail ;
      }
   else {
      device = argv[1] ;
      unit = atoi(argv[2]) ;
      }

   port = CreatePort(0,0) ;
   IO = CreateExtIO(port,sizeof(struct IOStdReq)) ;

   error = OpenDevice(device,unit,(struct IORequest *)IO, 0) ;

   if (error) {
      printf ("Unable to open %s unit %d\n",device,unit) ;
      goto fail ;
      }
   else
      devopen = 1 ;

   ss = AllocMem(sizeof(struct SignalSemaphore), MEMF_PUBLIC) ;

   if (!ss)
      goto fail ;

   SumLibrary( (struct Library *) IO->io_Device) ; 

   InitSemaphore(ss) ;

   oldbeginio = SetFunction((struct Library *)IO->io_Device,DEV_BEGINIO,(APTR)mybeginio) ;
   Wait (SIGBREAKF_CTRL_F) ;

   ObtainSemaphore(ss) ;
   SetFunction((struct Library *)IO->io_Device,DEV_BEGINIO,(APTR)oldbeginio) ;
   ReleaseSemaphore(ss) ;

   for (line = 0; line < LINES; line ++)
      for (set = 0; set < SETS; set ++)
         if (cache[set][line].buffer) {
            FreeMem( cache[set][line].buffer,LINE_SIZE << 9 ) ;
            allocnum -- ;
            }

   if (allocnum)
      printf("Allocation mismatch. %d buffers left lying around\n",allocnum) ;

fail:
   if (ss)
      FreeMem(ss,sizeof(struct SignalSemaphore)) ;
   if (devopen) {
      IO->io_Message.mn_ReplyPort = port ;
      CloseDevice((struct IORequest *)IO) ;
      }
   if (IO)
      DeleteExtIO((struct IORequest *)IO) ;
   if (port)
      DeletePort(port) ;

   return 0 ;
   }
