#!/usr/bin/perl -w
#
# SimpleDB torture.
# run "grader.pl -h" for help.
#
# Written by Thomer M. Gil
#
# {{{ uses and constants
use FileHandle;
use Carp;
use Getopt::Std;
use Data::Dumper;

use Config;
defined $Config{sig_name} or die "No signals defined?";

my $passed = 0;
my $failed = 0;
my $timedout = 0;
my $timeout_override = 0;
my $TIMEOUT = 45;
use constant MAX_VALUE => 2 ** 16;
use constant MAX_COLUMNS => 128;
our($opt_v, $opt_x, $opt_t, $opt_h, $opt_i, $opt_c);


# predicates
use constant EQUALS => 0;  
use constant GREATER_THAN => 1;
use constant LESS_THAN => 2;
use constant LESS_THAN_OR_EQ => 3;
use constant GREATER_THAN_OR_EQ => 4;
use constant MAX_PREDICATE => 4;
my %prednames = (
  0 => "==",
  1 => ">",
  2 => "<",
  3 => "<=",
  4 => ">=",
);

# aggregates
use constant MIN => 0;  
use constant MAX => 1;
use constant SUM => 2;
use constant AVG => 3;
use constant COUNT => 4;
use constant MAX_AGGREGATE => 4;
my %aggnames = (
  0 => "MIN",
  1 => "MAX",
  2 => "SUM",
  3 => "AVG",
  4 => "COUNT",
);

my %signo = ();
my %files = ();
# }}}

# {{{ create_table
#
# Creates a random table and writes it to a file called
# "f$columns.$which.txt". So, for example, if $columns == 10, and $which == 1,
# then the file will be called f10.1.txt.  If $which is not set, it defaults
# to 0.
#
# If colspecs is set, it should be a hash reference with keys/value
# combinations where a key is a column and the value is a value.  For example,
# if colspecs is { 0 => 80, 1 => 100 }, then all tuples in the table will
# always have value 80 in column 0 and value 100 in column 1.  If colspecs is
# not set, values will be random between 0 and MAX_VALUE.
#
# If $max_value is set, it overrides MAX_VALUE.
#
# Invokes SimpleDB's PageEncoder to convert the .txt to a .dat file, encoded
# such that HeapFile and HeapPage can read it.
#
sub create_table {
  my ($columns, $rows, $which, $colspecs, $max_value) = @_;

  $which ||= 0;
  $colspecs or %$colspecs = ();
  $max_value ||= MAX_VALUE;

  my %table = ();
  my $fname = "f" . ($columns-1) . ".$which.txt";
  $files{$fname}++;
  my $fh = new FileHandle(">$fname") or die "Couldn't create $fname: $!\n";
  my $x = 0;
  for(my $i=0; $i<$rows; $i++) {
    my $record = "";
    for(my $j=0; $j<$columns; $j++) {
      if(exists $colspecs->{$j}) {
        $record .= $colspecs->{$j};
      } else {
        $record .= int(rand($max_value || MAX_VALUE));
      }
      $record .= ",";
    }
    $record =~ s/,$//;
    $table{$record}++;
    print $fh "$record\n";
  }
  $fh->close();

  # convert to .dat file
  my $cmd = "java -Xmx64m  -jar dist/simpledb.jar convert $fname $columns";
  $opt_v and print "\n$cmd\n";
  system "$cmd";
  return \%table;
}
# }}}
# {{{ filter
# Filters rows in $table.
#
# For example, if $column == 8, and $predicate == EQUALS and $operand == 5,
# then the output will be all records from $table for which
# $row[8] == 5 is true.
#
# If $inverse is set, then we output all records for which the above is NOT
# true.  In other words, the remainder of the table.
#
sub filter {
  my ($table, $column, $predicate, $operand, $inverse) = @_;

  $inverse ||= 0;
  my %newtable = ();

  while(my ($k, $v) = each %$table) {
    my @fields = split /,/, $k;

    my $result = 0;
    $predicate == GREATER_THAN and $result = $fields[$column] > $operand;
    $predicate == GREATER_THAN_OR_EQ and $result = $fields[$column] >= $operand;
    $predicate == LESS_THAN and $result = $fields[$column] < $operand;
    $predicate == LESS_THAN_OR_EQ and $result = $fields[$column] <= $operand;
    $predicate == EQUALS and $result = $fields[$column] == $operand;
    (($result && !$inverse) || (!$result && $inverse)) and $newtable{$k} = $v;
  }

  return \%newtable;
}
# }}}
# {{{ merge
# Concatenates the contents of two tables.
sub merge {
  my ($table1, $table2) = @_;
  my %newtable = %$table1;
  while( my ($k, $v) = each %$table2) {
    $newtable{$k} += $v;
  }
  return \%newtable;
}
# }}}
# {{{ joinx
# Joins two tables based on a predicate.  For example, is $column1 == 2, and
# $column2 == 4 and $predicate == EQUALS, then the output is all records for
# which
#
# $row1[2] == $row2[4]
#
# is true.
sub joinx {
  my ($table1, $table2, $column1, $column2, $predicate) = @_;

  my %newtable = ();
  while (my ($k1, $v1) = each %$table1) {
    my @fields1 = split /,/, $k1;
    while (my ($k2, $v2) = each %$table2) {
      my @fields2 = split /,/, $k2;

      my $result = 0;
      $predicate == GREATER_THAN and $result = $fields1[$column1] > $fields2[$column2];
      $predicate == GREATER_THAN_OR_EQ and $result = $fields1[$column1] >= $fields2[$column2];
      $predicate == LESS_THAN and $result = $fields1[$column1] < $fields2[$column2];
      $predicate == LESS_THAN_OR_EQ and $result = $fields1[$column1] <= $fields2[$column2];
      $predicate == EQUALS and $result = $fields1[$column1] == $fields2[$column2];
      $result and $newtable{"$k1,$k2"}++;
    }
  }
  return \%newtable;
}
# }}}
# {{{ aggregate
#
# computes SELECT $aggregate($afield) GROUP BY $gfield
#
sub aggregate {
  my ($table, $aggregate, $afield, $gfield) = @_;

  my %xtable = ();
  while((my $k, $v) = each %$table) {
    my @fields = split /,/, $k;

    $xtable{ $fields[$gfield] } ||= {
      MIN => MAX_VALUE+1,
      MAX => -1,
      COUNT => 0,
      SUM => 0,
    };
    my $entry = $xtable{ $fields[$gfield] };

    $fields[$afield] < $entry->{MIN} and $entry->{MIN} = $fields[$afield];
    $fields[$afield] > $entry->{MAX} and $entry->{MAX} = $fields[$afield];
    $entry->{COUNT} += $v;
    $entry->{SUM} += $v * $fields[$afield];
  }

  my %newtable = ();
  while(my ($k, $v) = each %xtable) {
    $aggregate == MIN and $newtable{"$k," . $v->{MIN}}++;
    $aggregate == MAX and $newtable{"$k," . $v->{MAX}}++;
    $aggregate == COUNT and $newtable{"$k," . $v->{COUNT}}++;
    $aggregate == SUM and $newtable{"$k," . $v->{SUM}}++;
    $aggregate == AVG and $newtable{"$k," . sprintf("%d", $v->{SUM} / $v->{COUNT})}++;
  }
  return \%newtable;
}
# }}}

# {{{ match_tables
# Compares the output of a SimpleDB run with the contents of table.
# We expected SimpleDB to output records as follows
#
# column1\tcolumn2\tcolumn3\t...\tcolumnX
# column1\tcolumn2\tcolumn3\t...\tcolumnX
#
# Where \t is a tab character or other white space, but not a newline.
#
sub match_tables {
  my ($out, $table) = @_;
  $timedout and print " [timeout]" and return 0;

  my @outlines = split /\n/, $out;
  my $sum = 0;
  map { $sum += $_ } values %$table;
  print " [" . @outlines . " / $sum]";
  @outlines != $sum and return 0;

  foreach my $line (@outlines) {
    chomp($line);
    $line =~ s/\s+$//;
    (my $record = $line) =~ s/\s+/,/g;
    exists $$table{$record} or return 0;
    --$$table{$record} == 0 and delete $$table{$record};
  }
  keys %$table and return 0;

  return 1;
}
# }}}
# {{{ match_regexp
# Compares the ouput of a SimpleDB run against a regular expression.
sub match_regexp {
  my ($out, $regexp) = @_;

  $timedout and print " [timeout]" and return 0;
  return $out =~ m/$regexp/m;
}
# }}}

# {{{ execute
# runs the specified test with the specified arguments and returns standard
# output.
sub execute {
  my ($test, @args) = @_;
  my $args = join ' ', @args;
  my $cmd = "java -Xmx64m -jar dist/simpledb.jar simpledb.test.$test $args 2>&1";
  $opt_v and print "\n$cmd\n";

  # child process
  $timedout = 0;
  if(!(open(CHILD, "-|"))) {
    alarm $TIMEOUT;
    print `$cmd`;
    exit;
  }

  # parent collects output from child
  my $out = "";
  while(<CHILD>) { $out .= $_; }
  close CHILD;

  # did the child die from an alarm?
  ($? & 255) == $signo{ALRM} and $timedout = 1;

  return $out;
}
# }}}
# {{{ output
sub output {
  my ($result) = @_;

  if($result) {
    $passed++;
    print " ok.\n";
  } else {
    $failed++;
    print " failed.\n";
    $opt_x and exit;
  }
}
# }}}

# {{{ test1
sub test1 {
  print "test1 -- dumping contents of database\n";

  # 1-4 columns
  for($i=1; $i<=3; $i++) {
    foreach my $j (0, 1, 2, 511, 512, 513, 1023, 1024, 1025, 4096 + int(rand(4096))) {
      print "  [test1] testing $i column(s), $j rows...";
      my $table = &create_table($i, $j);
      &output(&match_tables(&execute('ScanTest', $i, 0), $table));
    }
  }

  # 126-128 columns
  for($i=MAX_COLUMNS-2; $i<=MAX_COLUMNS; $i++) {
    foreach my $j (0, 1, 2, 511, 512, 513, 1023, 1024, 1024 + int(rand(4096))) {
      print "  [test1] testing $i column(s), $j rows...";
      my $table = &create_table($i, $j);
      &output(&match_tables(&execute('ScanTest', $i, 0), $table));
    }
  }

  # some random combinations
  for($i=0; $i<8; $i++) {
    my $columns = int(rand(MAX_COLUMNS))+1;
    my $rows = int(rand(8192));

    print "  [test1] testing $columns column(s), $rows rows...";
    my $table = &create_table($columns, $rows);
    &output(&match_tables(&execute('ScanTest', $columns, 0), $table));
  }
}
# }}}
# {{{ test2
sub test2 {
  print "\ntest2 -- testing Filter...\n";
  &test2_and_3("test2", "FilterTest");
}
# }}}
# {{{ test3
sub test3 {
  print "\ntest3 -- testing Delete...\n";
  &test2_and_3("test3", "DeleteTest");
}
# }}}
# {{{ test2_and_3
sub test2_and_3 {
  my ($test, $class) = @_;

  my @tests = (
    {
      COLUMNS => 3,
      COLSPEC => { 0 => 1, },
      PREDICATE => EQUALS,
      OPERAND => 1,
      COLUMN => 0,
    },

    {
      COLUMNS => 3,
      ROWS => 5,
      COLSPEC => { 0 => 1, },
      PREDICATE => EQUALS,
      OPERAND => 2,
      COLUMN => 0,
    },

    {
      COLUMNS => 3,
      COLSPEC => { 1 => 1, },
      PREDICATE => LESS_THAN,
      OPERAND => 2,
      COLUMN => 1,
    },

    {
      COLUMNS => 3,
      COLSPEC => { 1 => 1, },
      PREDICATE => LESS_THAN_OR_EQ,
      OPERAND => 1,
      COLUMN => 1,
    },

    {
      COLUMNS => 3,
      COLSPEC => { 1 => 1, },
      PREDICATE => GREATER_THAN_OR_EQ,
      OPERAND => 0,
      COLUMN => 1,
    },

    {
      COLUMNS => 3,
      COLSPEC => { 1 => 1, },
      PREDICATE => GREATER_THAN,
      OPERAND => 5,
      COLUMN => 1,
    },
  );

  for($i=0; $i<8; $i++) {
    push @tests, {};
  }

  foreach my $testcase (@tests) {
    my $columns = exists $testcase->{COLUMNS} ? $testcase->{COLUMNS} : int(rand(MAX_COLUMNS))+1;
    my $rows = exists $testcase->{ROWS} ? $testcase->{ROWS} : 1024 + int(rand(1024));
    my $predicate = exists $testcase->{PREDICATE} ? $testcase->{PREDICATE} : int(rand(MAX_PREDICATE));
    my $max_value = exists $testcase->{MAXVAL} ? $testcase->{MAXVAL} : MAX_VALUE;
    my $column = exists $testcase->{COLUMN} ? $testcase->{COLUMN} : int(rand($columns));
    my $operand =  exists $testcase->{OPERAND} ? $testcase->{OPERAND} : int(rand(MAX_VALUE));
    my $table =  create_table($columns, $rows, 0, $testcase->{COLSPEC} || {}, $max_value);

    print "  [$test] table[$column] " . $prednames{$predicate} . " $operand on a $columns x $rows table...";

    if($class eq "DeleteTest") {
      my $deleted_table = &filter($table, $column, $predicate, $operand, 0);
      my $ndelrecords = 0;
      foreach (values %$deleted_table) {
        $ndelrecords += $_;
      }
      &output(&match_tables(&execute("DeleteTest", $columns, $column, $predicate, $operand, $ndelrecords),
                            &filter($table, $column, $predicate, $operand, 1)));
    }

    if($class eq "FilterTest") {
      &output(&match_tables(&execute("FilterTest", $columns, $column, $predicate, $operand),
                            &filter($table, $column, $predicate, $operand, 0)));
    }

    print "  [$test] after flush...";

    if($class eq "FilterTest") {
      &output(&match_tables(&execute("ScanTest", $columns, 0), $table));
    }

    # for DeleteTest, filter should be persistent after flush
    if($class eq "DeleteTest") {
      &output(&match_tables(&execute("ScanTest", $columns, 0), &filter($table, $column, $predicate, $operand, 1)));
    }
  }
}
# }}}
# {{{ test4
sub test4 {
  print "\ntest4 -- testing Insert...\n";
  my @tests = (
    {
      TABLE1 => {
        COLUMNS => 3,
        ROWS => 0,
      },

      TABLE2 => {
        COLUMNS => 3,
        ROWS => 0,
      },
    },

    {
      TABLE1 => {
        COLUMNS => 1,
        ROWS => 1,
      },

      TABLE2 => {
        COLUMNS => 1,
        ROWS => 1,
      },
    },

    {
      TABLE1 => {
        COLUMNS => 1,
        ROWS => 0,
      },

      TABLE2 => {
        COLUMNS => 1,
        ROWS => 1,
      },
    },


    {
      TABLE1 => {
        COLUMNS => 3,
        ROWS => 1,
      },

      TABLE2 => {
        COLUMNS => 3,
        ROWS => 0,
      },
    },

    {
      TABLE1 => {
        COLUMNS => 8,
        ROWS => 0,
      },

      TABLE2 => {
        COLUMNS => 8,
        ROWS => 1,
      },
    },
  );

  for($i=0; $i<8; $i++) {
    push @tests, {};
  }

  foreach my $testcase (@tests) {
    my $columns1 = exists $testcase->{TABLE1}->{COLUMNS} ? $testcase->{TABLE1}->{COLUMNS} : int(rand(MAX_COLUMNS))+1;
    my $columns2 = exists $testcase->{TABLE2}->{COLUMNS} ? $testcase->{TABLE2}->{COLUMNS} : $columns1;
    my $rows1 = exists $testcase->{TABLE1}->{ROWS} ? $testcase->{TABLE1}->{ROWS} : 1024 + int(rand(1024));
    my $rows2 = exists $testcase->{TABLE2}->{ROWS} ? $testcase->{TABLE2}->{ROWS} : 1024 + int(rand(1024));
    my $table1 = create_table($columns1, $rows1, 0);
    my $table2 = create_table($columns2, $rows2, 1);

    print "  [test4] insert a ${columns1}x${rows1} table into a ${columns2}x${rows2} table...";
    &output(&match_tables(&execute('InsertTest', $columns1, $columns2, $rows1), &merge($table1, $table2)));

    print "  [test4] after flush...";
    &output(&match_tables(&execute('ScanTest', $columns2, 1), &merge($table1, $table2)));
  }

  for(my $i=0; $i<3; $i++) {
    my $columns1 = int(rand(MAX_COLUMNS))+1;
    my $columns2 = $columns1;
    while($columns1 == $columns2) {
      $columns2 = int(rand(MAX_COLUMNS))+1;
    }
    my $table1 = create_table($columns1, 10, 0);
    my $table2 = create_table($columns2, 20, 1);
    print "  [test4] incompatible insert ($columns1 vs $columns2) throws simpledb.DbException...";
    &output(&match_regexp(&execute('InsertTest', $columns1, $columns2), 'simpledb.DbException'));
  }
}
# }}}
# {{{ test5
sub test5 {
  print "\ntest5 -- testing Join...\n";
  my @tests = (
    {
      TABLE1 => {
        COLUMNS => 2,
        COLUMN => 0,
        ROWS => 1,
        COLSPEC => {
          0 => 1,
        }
      },
      TABLE2 => {
        COLUMNS => 2,
        COLUMN => 0,
        ROWS => 1,
        COLSPEC => {
          0 => 1,
        }
      },
      PREDICATE => EQUALS,
    },

    {
      TABLE1 => {
        COLUMNS => 2,
        COLUMN => 0,
        ROWS => 3,
        COLSPEC => {
          0 => 1,
        }
      },
      TABLE2 => {
        COLUMNS => 2,
        COLUMN => 0,
        ROWS => 3,
        COLSPEC => {
          0 => 1,
        }
      },
      PREDICATE => EQUALS,
    },

    {
      TABLE1 => {
        COLUMNS => 2,
        COLUMN => 0,
        ROWS => 3,
        COLSPEC => {
          0 => 2,
        }
      },
      TABLE2 => {
        COLUMNS => 2,
        COLUMN => 0,
        ROWS => 3,
        COLSPEC => {
          0 => 1,
        }
      },
      PREDICATE => EQUALS,
    },
  );

  for($i=0; $i<16; $i++) {
    push @tests, {};
  }

  foreach my $testcase (@tests) {
    my $columns1 = exists $testcase->{TABLE1}->{COLUMNS} ? $testcase->{TABLE1}->{COLUMNS} : int(rand(MAX_COLUMNS))+1;
    my $columns2 = exists $testcase->{TABLE2}->{COLUMNS} ? $testcase->{TABLE2}->{COLUMNS} : int(rand(MAX_COLUMNS))+1;
    my $column1 = exists $testcase->{TABLE1}->{COLUMN} ? $testcase->{TABLE1}->{COLUMN} : int(rand($columns1));
    my $column2 = exists $testcase->{TABLE2}->{COLUMN} ? $testcase->{TABLE2}->{COLUMN} : int(rand($columns2));
    my $rows1 = exists $testcase->{TABLE1}->{ROWS} ? $testcase->{TABLE1}->{ROWS} : 32 + int(rand(32));
    my $rows2 = exists $testcase->{TABLE2}->{ROWS} ? $testcase->{TABLE2}->{ROWS} : 32 + int(rand(32));
    my $table1 = create_table($columns1, $rows1, 0, $testcase->{TABLE1}->{COLSPEC});
    my $table2 = create_table($columns2, $rows2, 1, $testcase->{TABLE2}->{COLSPEC});
    my $predicate = exists $testcase->{PREDICATE} ? $testcase->{PREDICATE} : int(rand(MAX_PREDICATE));

    print "  [test5] join ${columns1}x${rows1}[$column1] $prednames{$predicate} ${columns2}x${rows2}[$column2]...";
    &output(&match_tables(&execute("JoinTest", $columns1, $columns2, $column1, $predicate, $column2),
                          &joinx($table1, $table2, $column1, $column2, $predicate)));
  }
}
# }}}
# {{{ test6
sub test6 {
  print "\ntest6 -- testing Aggregates...\n";
  my @tests = (
    { ROWS => 0, },
    { ROWS => 1, },
    { ROWS => 1, },
    { ROWS => 1, },
    { ROWS => 1, },
    { COLUMNS => 1, ROWS => 1, },
    { COLUMNS => 1, ROWS => 2, },
    { COLUMNS => 1, },
    { COLUMNS => 1, },
    { COLUMNS => 2,
      COLSPEC => { 0 => 0, 1 => 0 },
    },
  );

  for($i=0; $i<16; $i++) {
    push @tests, {};
  }

  foreach my $testcase (@tests) {
    my $columns = exists $testcase->{COLUMNS} ? $testcase->{COLUMNS} : int(rand(MAX_COLUMNS))+1;
    my $afield = exists $testcase->{COLUMN1} ? $testcase->{COLUMN1} : int(rand($columns));
    my $gfield = exists $testcase->{COLUMN2} ? $testcase->{COLUMN2} : int(rand($columns));
    my $rows = exists $testcase->{ROWS} ? $testcase->{ROWS} : 1024 + int(rand(1024));
    my $aggregate = int(rand(MAX_AGGREGATE+1));
    my $table =  create_table($columns, $rows, 0, $testcase->{COLSPEC} || {}, 64 + int(rand(64)));
    print "  [test6] SELECT $aggnames{$aggregate}(field$afield) GROUP BY field$gfield in ${columns}x${rows} table...";
    &output(&match_tables(&execute("AggregateTest", $columns, $aggregate, $afield, $gfield),
                          &aggregate($table, $aggregate, $afield, $gfield)));
  }
}
# }}}
# {{{ test7
sub test7 {

  print "\ntest7 -- testing Transactions...\n";

  my $old_timeout = $TIMEOUT;
  $TIMEOUT = 600;
  for(my $i=0; $i<10; $i++) {
    my ($columns, $rows, $which, $colspecs, $max_value) = @_;
    my $table = &create_table(1, 1, 0, { 0 => 0 });
    my $nthreads = 16 + int(rand(50));
    print "  [test7] $nthreads concurrent threads...";
    &output(&match_tables(&execute("TransactionTest", $nthreads), {$nthreads => 1}));
  }
  $TIMEOUT = $old_timeout;
}
# }}}
# {{{ test8
sub test8 {
  print "\ntest8 -- testing Page Eviction...\n";

  print "  [test8] populating numerous pages with data...";
  my $columns = 50;
  my $rows = 1024*500;
  my $large_table = create_table($columns, $rows, 0);
  my $cmd = "java -Xmx8m -jar dist/simpledb.jar simpledb.test.ScanTest $columns 0 2>&1";
  $opt_v and print "\n$cmd\n";

  # child process
  if(!(open(CHILD, "-|"))) {
    alarm $TIMEOUT;
    print `$cmd`;
    exit;
  }

  # parent collects output from child
  my $out = "";
  while(<CHILD>) { $out .= $_; }
  close CHILD;

  &output($out !~ /OutOfMemoryError/ and $out !~ /Exception/);
}
# }}}

# {{{ clearfiles
sub clearfiles {
  # remove created .txt and .dat files
  $opt_c and return;
  foreach (keys %files) {
    unlink $_;
    $_ =~ s/\.txt$/.dat/;
    unlink $_;
  }
}
# }}}
# {{{ usage
sub usage {
  print <<EOF;
grader.pl -- SimpleDB torture
  
Usage: grader.pl [-c] [-h] [-t TEST] [-v] [-x]
  -c            : don't clear .txt and .dat files on exit, default off
  -h            : print this help
  -t TEST       : only run test TEST, default all tests
  -v            : increase verbosity, default off
  -x            : exit on failed test, default off
  -i TIMEOUT    : use timeout TIMEOUT, default $TIMEOUT

Tests
  1 : simple table dump
  2 : filters
  3 : delete
  4 : insert
  5 : join
  6 : aggregates
  7 : transactions
  8 : page eviction
EOF
}
# }}}
# {{{ main
sub main {
  &getopts('chvxt:i:');

  # print help
  $opt_h and &usage() and exit;

  # catch ctrl-c
  eval {
    $SIG{INT} = sub { &clearfiles(); exit; };
  };


  # flush standard output immediately
  $| = 1;

  my $i = 0;
  foreach $name (split ' ', $Config{sig_name}) {
    $signo{$name} = $i++;
  }

  # some silly random seed
  srand(time ^ $$);

  $opt_i and $TIMEOUT = $opt_i;

  # -t parameter will jump directly to that test
  # otherwise, run all tests
  if($opt_t) {
    eval "&test" . $opt_t;
  } else {
    &test1();
    &test2();
    &test3();
    &test4();
    &test5();
    &test6();
    &test7();
    &test8();
  }
  print "passed = $passed, failed = $failed.\n";

  &clearfiles();
}
# }}}

&main();
