: '@(#) ngsizes 1.2 90/09/08 14:38:58'
#
# ngsizes - Generate disk usage summary for USENET newsgroups.
#
# Usage:
#
#   ngsizes [-D] [-b breakdown_list] [-t threshold]
#
#     -t  Specifies only groups using "threshold" or more disk blocks should
#         be reported.  The default is defined by the "threshold" parameter
#         below.
#
#     -b  Specifies how usage should be broken down versus age.  For example,
#         saying "-b 0,7,14" will report usage in three columns:  the total
#         usage, the usage by articles a week or older, and the usage by
#         articles two weeks or older.  The default is defined by the
#         "breakdown" parameter below.
#
#     -D  For debugging, the temporary files will be maintained.
#
# Site-Specific Definitions:
#
#   SPOOLDIR	Must point to your USENET spool directory.
#   ACTIVE	Must point to your list of active USENET newsgroups.
#   DU		Must point to the enhanced "du" command.
#
# Work Files:
#
#   $TMP.read	Readership statistics.
#   $TMP.ngs	List of all newsgroups to check.
#   $TMP.du	Disk usage for all directories in the news spool dir.
#
#
# Sat Sep  8 14:34:56 1990 - Chip Rosenthal <chip@chinacat.Unicom.COM>
#	Cleanup for distribution.
# Tue Apr 17 21:50:58 1990 - Chip Rosenthal <chip@chinacat.Unicom.COM>
#	Original composition.
#

# Site-specific definitions.
SPOOLDIR=/usenet/spool/news	# Points to the local news spool directory.
ACTIVE=/usenet/lib/news/active	# Points to the local list of active newsgroups.
DU=du				# Points to the enhanced "du" command.

# Default initializations.
debug=0			# set nonzero to keep temp files around
threshold=0		# show newsgroups greater than this (or '0' for all)
breakdown=0,1,3,5,7,15	# breakdown usage by age, one col per number days given

TMP=/tmp/ngsz$$
USAGE="usage: $0 [-b breakdown_list] [-t threshold]"

trap 'trap "" 0 ; rm -f $TMP.* ; exit 1' 1 2 3
trap 'rm -f $TMP.* ; exit 0' 0

# Crack the command line options.
if set -- `getopt 'Db:t:' $*` ; then
    : getopt worked
else
    echo "$USAGE" 1>&2
    exit 1
fi
while : ; do
    case "$1" in
	-D)  debug=1 ; shift ;;
	-b)  breakdown="$2" ; shift ; shift ;;
	-t)  threshold="$2" ; shift ; shift ;;
	--)  shift ; break ;;
	*)   echo "$USAGE" 1>&2 ; exit 1 ;;
    esac
done
if [ $# -ne 0 ] ; then
    echo "$USAGE" 1>&2
    exit 1
fi

# If debug is enabled, setup to keep temporary files around.
if [ $debug -ne 0 ] ; then
    trap '' 0 1 2 3
    TMP=/tmp/ngsz
fi

# Verify we can find the active file.
if [ ! -r $ACTIVE ] ; then
    echo "$0: file '$ACTIVE' not found or unreadable" 1>&2
    exit 1
fi

# Get a count of the readers for each newsgroup.
# Output format will be "readership_count newsgroup_name"
for newsrc in `awk -F: '{ print $6 "/.newsrc" }' /etc/passwd | sort -u` ; do
    if [ -f $newsrc ] ; then
	sed -n -e 's/:.*//p' $newsrc
    fi
done | sort | uniq -c > $TMP.read

# Extract the newsgroup names from the active file.
# Output format will be "newsgroup_name"
sed -e 's/[ 	].*//' -e '/^$/d' $ACTIVE | sort -u > $TMP.ngs

# Scan the spool directory for disk usage.  Convert the newsgroup pathname
# to a newsgroup name, and move it to the first field on the line.
# Output format will be "newsgroup_name usage usage ..."
if [ $debug -ne 0 -a -f $TMP.du ] ; then
    : suppress scan for debugging
else
    $DU -ilr -c "$breakdown" $SPOOLDIR				\
	| sed							\
	    -e 's/^\(.*\)	\([^	]*\)$/\2	\1/'	\
	    -e "s!$SPOOLDIR/!!"					\
	    -e "s!/!.!g"					\
	| sort -u						\
	    > $TMP.du
fi


# Generate the report.
(
    echo "BREAKDOWN $breakdown" | sed -e 's/,/ /g'
    echo "THRESHOLD $threshold"
    sed -e 's/^/READERS /' $TMP.read
    join $TMP.du $TMP.ngs | sort -rn +1 | sed -e 's/^/NEWSGROUP /'
) | awk '

BEGIN {
    LINE_WIDTH = 79	# maximum length of a line
    NG_WIDTH = 26	# width of field to print newsgroup in
    READR_WIDTH = 4	# width of field to print number of readers in
    FRONT_FMT = "%-" NG_WIDTH "." NG_WIDTH "s" "%" READR_WIDTH "s"
}

# Record "BREAKDOWN n1 n2 ..."
#   Defines the format for the newsgroup usage lines.  Each "n" corresponds
#   to one column in the newsgroup usage line, and specifies the age of
#   articles which consume this amount of disk space.
$1 == "BREAKDOWN" {
    num_breakdn = NF - 1
    FIELD_WIDTH = ( LINE_WIDTH - (NG_WIDTH+READR_WIDTH) ) / num_breakdn
    if ( FIELD_WIDTH > 8 )
	FIELD_WIDTH = 8
    FIELD_FMT = "%" FIELD_WIDTH "s"
    printf(FRONT_FMT,"newsgroup","read")
    for ( i = 0 ; i < num_breakdn ; ++i )
	printf(FIELD_FMT,sprintf("%ddays",$(i+2)))
    printf("\n")
    next
}

# Record "THRESHOLD n"
#   Indicates we only want to see newsgroups using "n" or more blocks.
$1 == "THRESHOLD" {
    threshold = $2
    next
}

# Record "READERS n ng"
#   Indicates that newsgroup "ng" has "n" readers.
$1 == "READERS" {
    num_readers[$3] = $2
    next
}

# Record "NEWSGROUP ng n1 n2 ..."
#   Indicates the disk usage of newsgroup "ng".  Each "n" specifies the
#   diskspace used by articles "ndays" or older, where "ndays" is defined
#   by the BREAKDOWN record.
$1 == "NEWSGROUP" {
    if ( $3 >= threshold ) {
	if ( num_readers[$2] == "" )
	    num_readers[$2] = 0
	printf(FRONT_FMT,$2,num_readers[$2])
	for ( i = 0 ; i < num_breakdn ; ++i )
	    printf(FIELD_FMT,$(i+3))
	printf("\n")
    }
    next
}

{ printf("ngsizes - bad line '%s'\n", $0) | "cat 1>&2" }

'

exit 0

