'    Date: 06-10-97  23:01
'    From: Benjamin L Mcgee
'  e-mail: benjamin.l.mcgee@purgatorie.org
'NET-MAIL: Benjamin L McGee on 1:15/7
'      To: Isaac Grover
'
'On 06-06-97 Isaac Grover wrote to All...
'
' IG> My intent is to first eliminate duplicate site names in the file,
' IG> possibly by using a temporary file, then plugging each of those
' IG> sites into a unit of a string array called site$.  How do I
' IG> figure out how many units the array must contain, then how could
' IG> I eliminate duplicates without using a swap file if possible?
'
'I whipped up something that should do just that.  Tried it on a file with
'2669 file names, listed one per line.  I ran out of memory at line 2249,
'and by the time it had processed 2000 lines it was down to about one line
'per second.  Hope it helps.
 
' PUBLIC
CONST FALSE% = 0
CONST TRUE% = NOT FALSE%
DECLARE FUNCTION ss.outofmemory% ()
DECLARE SUB ss.dump (file%)

DECLARE FUNCTION ss.add% (site$)
 
' PRIVATE
DECLARE FUNCTION ss.test% (site$)
DECLARE FUNCTION ss.preserve% ()
DIM SHARED ss.count AS INTEGER          ' count of strings
DIM SHARED ss.memerror AS INTEGER       ' out of memory flag
REDIM SHARED ss(1) AS STRING            ' string array
 
ON ERROR GOTO ss.error:
 
InFile% = FREEFILE
OPEN "INPUT.DAT" FOR INPUT AS InFile%

DO
  INPUT #InFile%, in$
  IF LEN(in$) THEN
    TestCount% = TestCount% + 1
    IF ss.add(in$) = FALSE THEN
      IF ss.outofmemory = TRUE THEN EXIT DO
    END IF
  END IF
LOOP WHILE NOT EOF(InFile%)
CLOSE InFile%
 
OutFile% = FREEFILE
OPEN "CONS:" FOR OUTPUT AS OutFile%
ss.dump (OutFile%)
PRINT #OutFile%, STR$(ss.count) + " lines printed."
PRINT #OutFile%, STR$(TestCount%) + " lines processed."
CLOSE OutFile

END

ss.error:
  IF ERR = 14 AND ss.memerror = TRUE THEN
    RESUME NEXT
  ELSE
    ERROR ERR: END
  END IF

FUNCTION ss.add% (site$)
 
answer% = TRUE
IF ss.test(site$) = TRUE THEN
 
    ' REDIM PRESERVE ss(ss.count + 1) AS STRING
    ' IF ss.memerror = TRUE THEN
    '    answer% = FALSE
    ' END IF
 
    ' sorry REDIM PRESERVE isn't supported by all
    ' QB versions, but that's not MY fault :)
    ' if your QB supports REDIM PRESERVE use it
    ' instead of ss.preserve
 
    answer% = ss.preserve
    IF answer% = TRUE THEN
        ss.count = ss.count + 1
        ss(ss.count) = site$
    END IF
ELSE
 
    answer% = FALSE
END IF
 
ss.add = answer%
 
END FUNCTION

SUB ss.dump (file%)
 
FOR iter% = 1 TO ss.count
    PRINT #file%, ss(iter%)
NEXT iter%
 
END SUB

FUNCTION ss.outofmemory%
    ss.outofmemory = ss.memerror
END FUNCTION

FUNCTION ss.preserve%
 
REDIM temp(ss.count) AS STRING
IF ss.memerror = TRUE THEN
    ss.preserve% = FALSE
    EXIT FUNCTION
ELSE
    FOR iter% = 1 TO ss.count
        temp(iter%) = ss(iter%)
    NEXT iter%
END IF
 
 
REDIM ss(ss.count + 1) AS STRING
IF ss.memerror = TRUE THEN
    ss.preserve% = FALSE
    EXIT FUNCTION
ELSE
    FOR iter% = 1 TO ss.count
        ss(iter%) = temp(iter%)
    NEXT iter%
END IF
 
ss.preserve = TRUE
 
END FUNCTION

FUNCTION ss.test% (site$)
 
' this function IS case sensative!
 
answer% = TRUE
 
IF ss.count% > 0 THEN
    FOR iter% = 1 TO ss.count%
        IF site$ = ss(iter%) THEN
            answer% = FALSE
            EXIT FOR
        END IF
    NEXT iter%
END IF
 
ss.test = answer%
 
END FUNCTION

