# dawg.pl # Copyright (C) 1996 by John J. Chew, III # All Rights Reserved # # dawg.pl # functions for working with dawg files as generated by the package # posted to alt.sources, not yet checked for byte sex dependency # assumed file format: # # four-byte records, the first the index of the last record in the # file, the second (root) and all the rest: # 0-7 character 8 end-of-word 9 end-of-list 10-31 pointer package dawg; $version = '1.1.2'; ## public functions: # $yesno = &dawg'check(*HANDLE, $word); # $yesno = &dawg'check_suffix(*HANDLE, $index, $suffix); # $status = &dawg'close(*HANDLE); # @words = &dawg'list(*HANDLE, $letters, $qOnlyBingos); # @words = &dawg'list2(*HANDLE, $letters, $qOnlyBingos); # $status = &dawg'open(*HANDLE, $filename); # ($char, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $index); # $index = &dawg'get_root(*HANDLE); # $yesno = &dawg'check(*HANDLE, $word); sub check { local(*HANDLE, $word) = @_; &check_suffix(*HANDLE, &get_root(*HANDLE), $word); } # $yesno = &dawg'check_suffix(*HANDLE, $index, $suffix); sub check_suffix { local(*HANDLE, $index, $suffix) = @_; die "dawg'check_suffix: null suffix" if $suffix eq ''; local($char, @chars, $eow, $eol, $next); @chars = split('', $suffix); while (1) { ($char, $eow, $eol, $next) = &get_record(*HANDLE, $index); if ($chars[0] eq $char) { shift @chars; $#chars < $[ && return $eow ? 1 : 0; ($index = $next) || return 0; } else { $eol && return 0; $index++; } } } # $status = &dawg'close(*HANDLE); sub close { local(*HANDLE) = @_; close HANDLE; } # translated from C, needs optimizing # @words = &dawg'list(*HANDLE, $letters, $qOnlyBingos); sub list { local(*HANDLE, $letters, $qOnlyBingos) = @_; local($bingoLength, @blanks, $char, $edge, $eow, $eol, $i, $index, @stemEdges, $u, @words); $bingoLength = length($letters); $edge = &get_root; $i = 0; $word = ''; while (1) { ($u, $eow, $eol, $index) = &dawg'get_record(HANDLE, $edge); # warn ".. $edge $u $eow $eol $index $letters-$word $i\n"; # if we can follow this edge, adjust our rack accordingly if ($letters =~ s/$u//) { } elsif ($letters =~ s/\?//) { $u = "\U$u"; } # if not, go on to the next edge else { goto next_edge; } $word .= $u; # warn ".... matched '$u'.\n"; # if this edge takes to the end of a word if ($eow) { # print the word if appropriate if ((!$qOnlyBingos) || $bingoLength == $i+1) { # should use @blanks to mark blank tiles push(@words, $word); # warn "matched $word.\n"; } } # can we go deeper here? if ($index && $bingoLength != $i+1) { # warn "dn $i $edge $index\n"; $stemEdges[$i++] = $edge; # save this stem $edge = $index; # walk down graph next; # try again } else { $u = chop $word; $letters .= ($u =~ /[A-Z]/) ? '?' : $u; } next_edge: # try the next edge in this node, or the next node while ((&get_record(HANDLE, $edge))[2]) { return @words if !$i--; # warn "up $i $edge $stemEdges[$i]\n"; $u = chop $word; $letters .= ($u =~ /[A-Z]/) ? '?' : $u; $edge = $stemEdges[$i]; } # warn "nx $i $edge ".(1+$edge)."\n"; $edge++; } return @words; } sub list2 (*$$) { # allows ?s in rack local(*HANDLE) = shift @_; my ($rack, $qBingosOnly) = @_; my $bingoLength = length($rack); my @blanks; my $edge = &dawg'get_root(*HANDLE); my $nBlanks = 0; my %nChars; my @edges; my $word = ''; my @words = (); # parse and measure rack, counting blanks { my $c; for $c (split('', $rack)) { $c eq '?' ? $nBlanks++ : $nChars{$c}++; } } # printf "%2s %5d %5d %s\n", 'st', 0, $edge, $word; mainloop:while (1) { my ($u, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $edge); # if we can follow this edge, adjust our rack accordingly if ($nChars{$u}) { $nChars{$u}--; $word .= $u; } elsif ($nBlanks) { $nBlanks--; $word .= uc $u; } # if not, go on to the next edge else { goto next_edge; } # found a word! if ($eow && (!$qBingosOnly || $bingoLength == length($word))) { push(@words, $word); # print "ok $word\n"; } # can we go deeper here? if ($index && $bingoLength != length($word)) { # printf "%2s %5d %5d %s+%s\n", 'dn', $edge, $index, substr($word, 0, -1), substr($word, -1); push(@edges, $edge); # save this edge $edge = $index; # walk down graph next; # try again } { my $ch = chop $word; $ch =~ /[A-Z]/ ? $nBlanks++ : $nChars{$ch}++; } next_edge: # try the next edge in this node, or the next node while ($eol) { my $ch = chop $word; # printf "%2s %5d %5d %s-%s\n", 'up', $edge, $edges[-1], $word, $ch; last mainloop unless length($ch); $ch =~ /[A-Z]/ ? $nBlanks++ : $nChars{$ch}++; ($u, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $edge = pop(@edges)); } # printf "%2s %5d %5d %s?%s\n", 'nx', $edge, $edge+1, $word, $u; $edge++; } return @words; } # $status = &dawg'open(*HANDLE, $filename); sub open { local(*HANDLE, $filename) = @_; local($result) = open(HANDLE, "<$filename"); if ($result) { local($actual_size) = (stat HANDLE)[7]; local($declared_size, $record); $record = ''; sysread(HANDLE, $record, 4); $declared_size = unpack('N', $record); if ($actual_size/4 != $declared_size + 1) { CORE::close HANDLE; # should actually try other byte sexes here, will implement as needed printf STDERR "dawg: $filename: declared file size (%08x) != actual file size (%08x)\n", $declared_size, $actual_size/4 - 1; undef; } else { $result; } } } # ($char, $eow, $eol, $index) = &get_record(*HANDLE, $index); sub get_record { local(*HANDLE, $index) = @_; local($char, $eol, $eow, $record); sysseek(HANDLE, $index*4, 0) || die "sysseek() failed: $!"; # printf "get_record(HANDLE,0x%06x): ", $index; $record = ''; sysread(HANDLE, $record, 4); $index = unpack('N', $record); $char = $index >> 24; $eow = $index & 0x800000; $eol = $index & 0x400000; # printf "('%s'=0x%02x, %d, %d, 0x%06x)\n", pack('c', $char), $char, $eow, $eol, $index & 0x3fffff; (pack('c', $char), $eow, $eol, $index & 0x3fffff); } # $index = &get_root(*HANDLE); sub get_root { 1; } 1;