#!/usr/bin/perl

# TODO - check to see children, spouses are listed in right order

# build.pl - build DBM relational databases from text input file

# Copyright (C) 1997 by John J. Chew, III <http://www.math.utoronto.ca/~jjchew>
# All Rights Reserved

# @gDBMSuffixes = ('.dir', '.pag'); # depends on operating system
@gDBMSuffixes = ('.db'); # depends on operating system

@dbs = ('child', 'marriage', 'parent', 'person');
for $db (@dbs) { 
  # this is going to fail for DOS users
  for $suffix (@gDBMSuffixes) { unlink $db . '.new' . $suffix; }
  eval("dbmopen($db, \$db . '.new', 0600)");
  }

$lastID = 0;
while (<>) { 
  # delete comments, skip blank lines
  next if /^\s*#/;
  s/\s+$//; next unless /\S/;

  # newline-separated records with |-separated fields
  @F = split(/\|/, $_, 20);

  # record type is determined by first field
  $type = shift @F;

  # 'p' denotes information about a person
  # p|id|surname|given|title|father|mother|bdate|bplace|ddate|dplace|notes
  if ($type eq 'p') {
    die "Duplicate p|$F[0] at line $.\n" if defined $person{$F[0]};
    die "Wrong number of fields (".($#F+1)." instead of 11):\n$_\n" unless $#F == 10;
    die "Disorder: $F[0] < $lastID\n" if $F[0] < $lastID;
    $lastID = $F[0];
    $person{$F[0]} = join('|', @F[1,2,3,6,7,8,9,10]);
    $parent{$F[0]} = "$F[4]|$F[5]";
    for $p (@F[4..5]) { next unless length($p);
      if (defined $child{$p}) { $child{$p} .= "|$F[0]"; }
      else { $child{$p} = $F[0]; }
      }
    }
  # 'm' denotes information about a marriage
  # m|id|husband|wife|mdate|mplace|ddate|dplace
  # (I suppose for same-sex marriages it's just m|spouse|spouse|...)
  elsif ($type eq 'm') {
    die "Wrong number of fields (".($#F+1)." instead of 6):\n$_\n" 
      unless $#F == 5;
    for $i (0..1) { $p = @F[$i]; next unless length($p);
      $s = $F[1-$i];
      if (defined $marriage{$p}) 
        { $marriage{$p} .= "\n" . join('|', $s, @F[2..$#F]); }
      else { $marriage{$p} = join('|', $s, @F[2..$#F]); }
      } 
    }
  else { die "Can't parse: $_\n"; }
  }

# rearrange children in birth order
while (my ($id, $kids) = each %child) {
  $child{$id} = join('|', 
    sort {
    my $aa = (split(/\|/, $person{$a}))[3];
    my $bb = (split(/\|/, $person{$b}))[3];
    $aa =~ s/^\D+//;
    $bb =~ s/^\D+//;
    $aa cmp $bb or $a cmp $b;
    } split(/\|/, $kids));
  }

# calculate ancestor depths
sub acalc { 
  local($p) = @_;
  local($count, $depth, $parent, @parent, $pcount, $pdepth);
  if (defined $adata{$p}) { 
    split(/\|/, $adata{$p}); 
    }
  else {
    $count = $depth = 0;
    @parent = split(/\|/, $parent{$p});
    $adata{$p} = '0|999999';
    for $parent (@parent) { 
      if ($parent) {
	($pcount, $pdepth) = &acalc($parent);
	$count += $pcount + 1;
	$depth = $pdepth + 1 if $depth <= $pdepth;
	}
      }
    $adata{$p} = "$count|$depth";
    ($count, $depth);
    }
  }
# first version breaks with some versions of db and perl
# %adata = (); while (($p) = each %parent) { &acalc($p); }
%adata = (); for my $p (keys %parent) { &acalc($p); }

# calculate descendant depths
sub dcalc { 
  my($p) = @_;
  my($count, $depth, $child, @child, $ccount, $cdepth);
# print "dcalc($p).\n" if $p > 70000;
  if (defined $ddata{$p}) { split(/\|/, $ddata{$p}); }
  else {
    $count = $depth = 0;
    @child = split(/\|/, $child{$p});
    $ddata{$p} = '0|999999';
    for $child (@child) { 
      ($ccount, $cdepth) = &dcalc($child);
      $count += $ccount + 1;
      $depth = $cdepth + 1 if $depth <= $cdepth;
      }
    $ddata{$p} = "$count|$depth";
    ($count, $depth);
    }
  }
# first version breaks with some versions of db and perl
# %ddata = (); while (($p) = each %child) { &dcalc($p); }
%ddata = (); for my $p (keys %child) { &dcalc($p); }

# store ancestor and descendant depths
for $p (keys %person) {
  $person{$p} = $person{$p} . '|' . ($adata{$p} || '0|0') 
    . '|' . ($ddata{$p} || '0|0');
  }

for $db (@dbs) {
  eval("dbmclose($db)");
  for $suffix (@gDBMSuffixes) { unlink $db . $suffix; }
  for $suffix (@gDBMSuffixes) { rename($db . '.new' . $suffix, $db . $suffix); }
  }