#!/usr/bin/perl -w

# htmlhier.pl - create an HTML file describing a tree of HTML files

# Copyright (C) 1997 by John J. Chew, III <jjchew@math.utoronto.ca>
# All Rights Reserved.

# <title>htmlhier.pl</title> (for indexing purposes)

# This script outputs to stdout an HTML file that hierarchically
# describes one or more directories.  For an example of its output,
# see http://www.math.utoronto.ca/~jjchew/hier.html.  It will also
# optionally generate a reverse-chronological listing (HTML) file,
# see e.g. http://www.math.utoronto.ca/~jjchew/chron.html.  Typical
# Unix usage is:
#
#   % htmlhier.pl -c c.new . > h.new
#   % mv c.new chron.html
#   % mv h.new hier.html
#
# This script will not descend into a directory unless it is world-
# readable and -searchable, nor will it descend into a directory that
# contains a file called 'do-not-index'.  It will not list files 
# that are not world-readable.  If a file contains a line (as abov)
# that contains "<title>", a string, and "</title>" (as should appear
# in an HTML file's head, but could appear in a comment in any other
# type of file), the enclosed string will be used to identify a file.
# If a directory has an index file, it must contain such a title,
# but directories do not have to contain index files for them to be
# listed.
#
# If this script does not behave the way you would like it to, please
# send me e-mail, and I'll see if I can further generalise its behaviour.

require 'ctime.pl';
require 'getopts.pl';

$gkVersion = '1.122';

sub Usage {
  die "Usage: $0 [-a author] [-c chron] [-i ifn] root-directory...\n"
    . "  -c chron  name of an optional reverse-chronological listing file\n"
    . "  -i ifn    name of index file in each directory [index.html]\n";
    ;
  }

$opt_a = 'jjchew@math.utoronto.ca';
$opt_c = undef;
$opt_i = 'index.html';

&Getopts('a:c:i:-:') || &Usage;
&Usage if $#ARGV < 0;

$gDNI = 'do-not-index';

&printHierarchy(@ARGV);
&printChronology if $opt_c;

sub describe { local($file) = @_;
  local($s);
  open(TMP, "<$file") || die "Can't open $file: $!\n";
  ($s) = grep(/<title>.*<\/title>/i, <TMP>);
  if (defined $s) {
    $s =~ s/.*<title>\s*//i;
    $s =~ s/\s*<\/title>.*//i;
    $s =~ s/\n//;
    "<a href=\"$file\">$s</a>";
    }
  }

sub list { local(@files) = @_;
  return unless $#files >= 0;
  local($file, $s, $ulopen);
  $ulopen = 0;
  for $file (@files) {
    ($mode, $mtime) = (lstat $file)[2, 9];
    if (!defined $mtime) { 
      warn "Can't stat $file: $!\n";
      next;
      }
    if ($opt_c) { 
      @times = localtime($mtime); 
      $times[5]+=1900 if $times[5]<1900; 
      $times[4]++;
      }
    if (-l _) { next; }
    elsif (-d _) { 
      next unless ($mode & 5) == 5;
      next if -f "$file/$gDNI"; # do not index
      $index = "$file/$opt_i";
      unless ($ulopen) { print "<ul>\n"; $ulopen = 1; }
      if (-f $index) { 
	($mode, $mtime) = (lstat $index)[2, 9];
	if ($opt_c) { 
	  @times = localtime($mtime); 
	  $times[5]+=1900 if $times[5]<1900; 
	  $times[4]++;
	  }
	$s = &describe($index);
	die "No title for $index!" unless defined $s && length($s);
	}
      else {
	$s = "<a href=\"$file/\">[directory]</a>";
	}
      ($ct = &ctime($mtime)) =~ s/\n//; $ct =~ s/ /\&nbsp;/g;
      print "<li>$s [$ct]\n";
      push(@gChron, sprintf("%4d-%02d-%02d %02d:%02d:%02d %s<br>%s",
	@times[5,4,3,2,1,0], $file, $s)) if $opt_c;
      unless (opendir(DIR, $file)) { 
        warn "Can't opendir($file): $!\n";
	next; }
      my @subfiles;
      for my $subfile (readdir(DIR)) {
        next if $subfile eq '.' || $subfile eq '..';
	push(@files, "$file/$subfile");
        }
      closedir(DIR);
      &list(@subfiles);
      }
    elsif (-f _) {
      next unless ($mode & 4) == 4;
      next if $file =~ m!/$opt_i$!;
      $s = &describe($file);
      if (defined $s && length($s)) {
	($ct = &ctime($mtime)) =~ s/\n//; $ct =~ s/ /\&nbsp;/g;
        unless ($ulopen) { print "<ul>\n"; $ulopen = 1; }
	print "<li>$s [$ct]\n";
	push(@gChron, sprintf("%4d-%02d-%02d %02d:%02d:%02d %s<br>%s",
	  @times[5,4,3,2,1,0], $file, $s)) if $opt_c;
	}
      }
    }
  print "</ul>\n" if $ulopen;
  }

sub printChronology {
  return unless $opt_c;
  open(OUT, ">$opt_c") || die;
  select(OUT);
  print "<html>\n";
  print "<head>\n";
  print "<link rev=made href=\"mailto:$opt_a\">\n";
  print "<title>Reverse Chronological Listing</title>\n";
  print "</head>\n";
  print "<body>\n";
  print "This file was automatically generated by\n";
  print "<a href=\"http://www.math.utoronto.ca/~jjchew/software/htmlhier.pl\">"
    ."htmlhier.pl</a>\n";
  print "version $gkVersion.<p>\n";
  for $line (sort { $b cmp $a } @gChron) { 
    print "$line<p>\n";
    }
  print "</body>\n";
  print "</html>\n";
  select(STDOUT);
  close(OUT);
  }

sub printHierarchy { local(@roots) = @_;
  print "<html>\n";
  print "<head>\n";
  print "<link rev=made href=\"mailto:$opt_a\">\n";
  print "<title>Hierarchical Listing</title>\n";
  print "</head>\n";
  print "<body>\n";
  print "This file was automatically generated by\n";
  print "<a href=\"http://www.math.utoronto.ca/~jjchew/software/htmlhier.pl\">"
    ."htmlhier.pl</a>\n";
  print "version $gkVersion.<p>\n";
  &list(@roots);
  print "</body>\n";
  print "</html>\n";
  }

exit 0;
