*************************************************************************
* FIND_DUP.PRG 
*
* A small utility to build a database of all the files in a directory
* and its subdirectory. It then flags definite and probable duplicates.
*
* Requires AMERICAN DOS 5.0 or higher to work properly.
* If you have a non-american version of DOS 5.0, you must change some 
* lines in the scan loop to accomodate for the language of your DIR 
* listing.
*
*************************************************************************
*
* Author : Stephane DESNAULT - CIS 100034,2657
*
* NOT industrial strength... simply useful !
*
*************************************************************************

* Get the path to search.... C:\ is the default

SET TALK OFF

choice   =""
path     ="C:\                                                  "
file_spec="*.*         "

DEFINE WINDOW get_path FROM 10,10 TO 20,70
ACTIVATE WINDOW get_path
	@ 1,3 SAY "Path to search for duplicates :"
	&& The valid clause checks that the directory to search exists.
	@ 3,3 GET path ;
		VALID FILE(ALLTRIM(path)+IIF(RIGHT(ALLTRIM(path),1)="\","NUL","\NUL")) ;
		ERROR ALLTRIM(path)+" is not a valid DOS path !"
	@ 5,3 SAY "File mask : "GET file_spec
	@ 7,3 GET choice PICTURE '@*H \!\<OK;\?\<Cancel' SIZE 1,8
	READ CYCLE
RELEASE WINDOW get_path

* Quit now if the user aborts

IF choice="Cancel"
	RETURN
ENDIF

* Build the file spec to look for...

path=ALLTRIM(path)
file_spec = IIF(RIGHT(path,1)="\",path,path+"\")+ALLTRIM(file_spec)

* List a directory to a temporary text file

? "Listing the directory.... Please wait"

temp_name=SYS(3)+".DIR"
RUN dir &file_spec /s > &temp_name

* Create the output DIR.DBF table. All fields are text, no conversion is done.

? "Building the archive file.... Please wait"

SET SAFETY ON
CREATE TABLE dir.dbf	(	;
	directory	C(80)		, ;
	name		C(08)		, ;
	extension   C(03)		, ;
	size		C(09)		, ;
	date		C(08)		, ;
	time		C(06)		, ;
	duplicate   L			, ;
	suspect		L		)

* Scan the directory listing and fill the output table

input_id=FOPEN(temp_name)
DO WHILE ! FEOF(input_id)
	curr_line=FGETS(input_id)
	DO CASE 
		CASE LEFT(curr_line,12)='Directory of'
			curr_dir=SUBSTR(curr_line,13,LEN(curr_line))
		CASE (SUBSTR(curr_line,36,1)=":") .AND. !("<DIR>" $ curr_line)
			APPEND BLANK
			REPLACE directory WITH curr_dir 				, ;
					name	  WITH SUBSTR(curr_line, 1,8)	, ;
					extension WITH SUBSTR(curr_line,10,3)	, ;
					size	  WITH SUBSTR(curr_line,14,9)	, ;
					date	  WITH SUBSTR(curr_line,24,8)	, ;
					time	  WITH SUBSTR(curr_line,34,6)
	ENDCASE
ENDDO

* Index on name+extension+size+date+time

SET TALK ON
INDEX ON name+extension+size+date+time TAG id
SET TALK OFF

* Scan for evident and probable duplicates. An evident duplicate is when
* all the columns except directory are alike. A probable duplicate is
* when name and extension are alike, so you might suspect successive 
* versions of the same file. A duplicate is also a suspect.

* Do not use SCAN...ENDSCAN, since we're navigating the file inside
* the loop.

GO TOP
curr_name=""	&& holds preceding file name and extension
curr_sdt =""	&& holds preceding file size date and time
is_dup	 =.F.	&& holds whether preceding file was already a duplicate
is_susp	 =.F.	&& holds whether preceding file was already a suspect

? "Scanning for duplicate or suspect files...."

DO WHILE ! EOF()
	&& If file is suspect...
	IF curr_name=name+extension
		&& preceding file is a suspect too, of course !
		IF ! is_susp
			SKIP-1
			REPLACE suspect WITH .T.
			SKIP
		ENDIF
		REPLACE suspect WITH .T.
		is_susp=.T.
		&& It might also be a confirmed duplicate...
		IF curr_sdt=size+date+time
			&& preceding file is a duplicate too, of course !
			IF ! is_dup
				SKIP-1
				REPLACE duplicate WITH .T.
				SKIP
			ENDIF
			REPLACE duplicate WITH .T.
			is_dup=.T.
		ELSE
			is_dup=.F.
		ENDIF
	ELSE
		is_susp=.F.
	ENDIF
	curr_name=name+extension 
	curr_sdt =size+date+time
	SKIP
ENDDO

=FCLOSE(input_id)
ERASE &temp_name

? "The file DIR.DBF now contains a listing of all the files on your"
? "hard disk, flagged for certain and probable duplicates."

RETURN