# dawg.pl 

# Copyright (C) 1996 by John J. Chew, III <jjchew@math.utoronto.ca>
# All Rights Reserved
#
# <title>dawg.pl</title>

# functions for working with dawg files as generated by the package
# posted to alt.sources, not yet checked for byte sex dependency

# assumed file format:
#
# four-byte records, the first the index of the last record in the
# file, the second (root) and all the rest:
#   0-7 character 8 end-of-word 9 end-of-list 10-31 pointer

package dawg;

$version = '1.1.2';

## public functions:
# $yesno = &dawg'check(*HANDLE, $word);
# $yesno = &dawg'check_suffix(*HANDLE, $index, $suffix);
# $status = &dawg'close(*HANDLE);
# @words = &dawg'list(*HANDLE, $letters, $qOnlyBingos);
# @words = &dawg'list2(*HANDLE, $letters, $qOnlyBingos);
# $status = &dawg'open(*HANDLE, $filename);
# ($char, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $index);
# $index = &dawg'get_root(*HANDLE);

# $yesno = &dawg'check(*HANDLE, $word);
sub check { local(*HANDLE, $word) = @_;
  &check_suffix(*HANDLE, &get_root(*HANDLE), $word);
  }

# $yesno = &dawg'check_suffix(*HANDLE, $index, $suffix);
sub check_suffix { local(*HANDLE, $index, $suffix) = @_;
  die "dawg'check_suffix: null suffix" if $suffix eq '';
  local($char, @chars, $eow, $eol, $next);

  @chars = split('', $suffix);
  while (1) {
    ($char, $eow, $eol, $next) = &get_record(*HANDLE, $index);
    if ($chars[0] eq $char) {
      shift @chars;
      $#chars < $[ && return $eow ? 1 : 0;
      ($index = $next) || return 0;
      }
    else {
      $eol && return 0;
      $index++;
      }
    }  
  }

# $status = &dawg'close(*HANDLE);
sub close { local(*HANDLE) = @_; close HANDLE; }

# translated from C, needs optimizing
# @words = &dawg'list(*HANDLE, $letters, $qOnlyBingos);
sub list { local(*HANDLE, $letters, $qOnlyBingos) = @_;
  local($bingoLength, @blanks, $char, $edge, $eow, $eol, $i, $index, 
    @stemEdges, $u, @words);
  
  $bingoLength = length($letters);
  $edge = &get_root;
  $i = 0;
  $word = '';
  while (1) {
    ($u, $eow, $eol, $index) = &dawg'get_record(HANDLE, $edge);
# warn ".. $edge $u $eow $eol $index $letters-$word $i\n";

    # if we can follow this edge, adjust our rack accordingly 
    if ($letters =~ s/$u//) { }
    elsif ($letters =~ s/\?//) { $u = "\U$u"; }
    # if not, go on to the next edge 
    else { goto next_edge; }
    $word .= $u;

# warn ".... matched '$u'.\n";
    # if this edge takes to the end of a word
    if ($eow) {
      # print the word if appropriate
      if ((!$qOnlyBingos) || $bingoLength == $i+1) {
	# should use @blanks to mark blank tiles
	push(@words, $word);
# warn "matched $word.\n";
	}
      }
    
    # can we go deeper here?
    if ($index && $bingoLength != $i+1) {
# warn "dn $i $edge $index\n";
      $stemEdges[$i++] = $edge;                    # save this stem
      $edge = $index;   # walk down graph
      next;                                 # try again
      }
    else {
      $u = chop $word; $letters .= ($u =~ /[A-Z]/) ? '?' : $u;
      }

next_edge:
    # try the next edge in this node, or the next node
    while ((&get_record(HANDLE, $edge))[2]) {
      return @words if !$i--;
# warn "up $i $edge $stemEdges[$i]\n";
      $u = chop $word; $letters .= ($u =~ /[A-Z]/) ? '?' : $u;
      $edge = $stemEdges[$i];
      }
# warn "nx $i $edge ".(1+$edge)."\n";
    $edge++;
    }
  return @words;
  }

sub list2 (*$$) { # allows ?s in rack
  local(*HANDLE) = shift @_;
  my ($rack, $qBingosOnly) = @_;

  my $bingoLength = length($rack);
  my @blanks;
  my $edge = &dawg'get_root(*HANDLE);
  my $nBlanks = 0;
  my %nChars;
  my @edges;
  my $word = '';
  my @words = ();

  # parse and measure rack, counting blanks
  {
    my $c;
    for $c (split('', $rack)) { 
      $c eq '?' 
	? $nBlanks++
	: $nChars{$c}++;
      }
  }

# printf "%2s %5d %5d %s\n", 'st', 0, $edge, $word;

mainloop:while (1) {
    my ($u, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $edge);

    # if we can follow this edge, adjust our rack accordingly 
    if ($nChars{$u}) { $nChars{$u}--; $word .= $u; }
    elsif ($nBlanks) { $nBlanks--; $word .= uc $u; }
    # if not, go on to the next edge 
    else { goto next_edge; }

    # found a word!
    if ($eow && (!$qBingosOnly || $bingoLength == length($word))) {
      push(@words, $word);
# print "ok $word\n";
      }
    
    # can we go deeper here? 
    if ($index && $bingoLength != length($word)) {
# printf "%2s %5d %5d %s+%s\n", 'dn', $edge, $index, substr($word, 0, -1), substr($word, -1);
      push(@edges, $edge); # save this edge
      $edge = $index;           # walk down graph       
      next;                 # try again             
      }
   { my $ch = chop $word; $ch =~ /[A-Z]/ ? $nBlanks++ : $nChars{$ch}++; }

next_edge:
    # try the next edge in this node, or the next node 
    while ($eol) {
      my $ch = chop $word;
# printf "%2s %5d %5d %s-%s\n", 'up', $edge, $edges[-1], $word, $ch;
      last mainloop unless length($ch);
      $ch =~ /[A-Z]/ ? $nBlanks++ : $nChars{$ch}++;
      ($u, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $edge = pop(@edges));
      }
# printf "%2s %5d %5d %s?%s\n", 'nx', $edge, $edge+1, $word, $u;
    $edge++;
    }
  return @words;
  }

# $status = &dawg'open(*HANDLE, $filename);
sub open { local(*HANDLE, $filename) = @_;
  local($result) = open(HANDLE, "<$filename");
  if ($result) {
    local($actual_size) = (stat HANDLE)[7];
    local($declared_size, $record);

    $record = '';
    sysread(HANDLE, $record, 4);
    $declared_size = unpack('N', $record);
    if ($actual_size/4 != $declared_size + 1) {
      CORE::close HANDLE;
# should actually try other byte sexes here, will implement as needed
      printf STDERR "dawg: $filename: declared file size (%08x) != actual file size (%08x)\n", $declared_size, $actual_size/4 - 1;
      undef;
      }
    else { $result; }
    }
  }

# ($char, $eow, $eol, $index) = &get_record(*HANDLE, $index);
sub get_record { local(*HANDLE, $index) = @_;
  local($char, $eol, $eow, $record);
  sysseek(HANDLE, $index*4, 0) || die "sysseek() failed: $!";
# printf "get_record(HANDLE,0x%06x): ", $index;
  $record = '';
  sysread(HANDLE, $record, 4);
  $index = unpack('N', $record);
  $char = $index >> 24;
  $eow = $index & 0x800000;
  $eol = $index & 0x400000;
# printf "('%s'=0x%02x, %d, %d, 0x%06x)\n", pack('c', $char), $char, $eow, $eol, $index & 0x3fffff;
  (pack('c', $char), $eow, $eol, $index & 0x3fffff);
  }

# $index = &get_root(*HANDLE);
sub get_root { 1; }

1;
