#!/usr/bin/env perl

use warnings;
# vi: set ts=4 sw=4 :
#____________________________________________________________________________
#
#   MusicBrainz -- the open internet music database
#
#   Copyright (C) 1998 Robert Kaye
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#   $Id$
#____________________________________________________________________________

use strict;

@ARGV == 2 or die "Bad args";
my ($pending, $pendingdata) = @ARGV;

# Join on SeqId, which is numeric and the first column.
open(my $p_fh, "sort -n < $pending |") or die $!;
open(my $d_fh, "sort -n < $pendingdata |") or die $!;

# A temporary file to accept the XML output
open(my $xmlfh, "+>", undef) or die $!;
my $last_xid = 0;
my @xml_frags;
my $is_sorted = 1;

# Which columns do we export from each table?
my %export_columns = (
    artist              => [qw( name sortname gid )],
    album               => [qw( name gid )],
    albumjoin   => [qw( album track sequence )],
    track               => [qw( name gid artist )],
);

# "Pending": SeqId is the primary key.
# "PendingData": SeqId, IsKey is the primary key.

# How many "PendingData" rows we expect depends on the "Op" of the "Pending" row
my %op_expect = (
    "i" => 1,
    "d" => 1,
    "u" => 2,
);

# Which tables are we interested in?
my %want_table = map { $_ => 1 } qw(
    "public"."artist"
    "public"."album"
    "public"."albumjoin"
    "public"."track"
);

while (my $p_line = shift_p())
{
    (my $seqid) = $p_line =~ /^(\d+)\t/ or die "No SeqID in $p_line";

    my @d_lines;
    while (my $d_line = shift_d())
    {
        unshift_d($d_line), last
                unless $d_line =~ /^$seqid\t/;
        push @d_lines, $d_line;
    }


    my $p_row = decode_row($p_line, qw( SeqId TableName Op XID ));
    my $op = $p_row->{"Op"} or die "No Op in $p_line";

    my $expect_d = $op_expect{$op} or die "Unknown op '$op'";
    die "Wrong number of PendingData lines ($p_line)" unless @d_lines == $expect_d;

    my $table = $p_row->{"TableName"};
    $want_table{$table} or next;

    my $XID = $p_row->{"XID"};

    if ($op eq "i")
    {
        my $ins_row = decode_row($d_lines[0], qw( SeqId IsKey Data ));
        die "Bad IsKey for Op" unless $ins_row->{"IsKey"} eq "f";
        my $values = extractData($ins_row->{"Data"});
        handle_insert($XID, $table, $values);
    }
    elsif ($op eq "d")
    {
        my $del_row = decode_row($d_lines[0], qw( SeqId IsKey Data ));
        die "Bad IsKey for Op" unless $del_row->{"IsKey"} eq "t";
        my $data = extractData($del_row->{"Data"});
        handle_delete($XID, $table, $data);
    }
    elsif ($op eq "u")
    {
        my @upd_rows = map { decode_row($_, qw( SeqId IsKey Data )) } @d_lines;
        (my $keys_row) = grep { $_->{"IsKey"} eq "t" } @upd_rows or die "Bad IsKey for Op";
        (my $values_row) = grep { $_->{"IsKey"} eq "f" } @upd_rows or die "Bad IsKey for Op";
        my $keys = extractData($keys_row->{"Data"});
        my $values = extractData($values_row->{"Data"});
        handle_update($XID, $table, $values, $keys);
    }
    else { die "Bad Op" }
}

die "leftover data" if defined shift_d();
output_xml();

################################################################################

# Read from the two files; support an "un-read" interface.
{ my @b; sub shift_p { @b ? shift(@b) : <$p_fh> } sub unshift_p { unshift @b, @_ } }
{ my @b; sub shift_d { @b ? shift(@b) : <$d_fh> } sub unshift_d { unshift @b, @_ } }

################################################################################

# Handle each insert/update/delete change

sub handle_insert
{
    my ($XID, $table, $values) = @_;
    handle_ins_upd($XID, $table, $values, "insert");
}

sub handle_update
{
    my ($XID, $table, $values, $keys) = @_;
    handle_ins_upd($XID, $table, $values, "update");
}

sub handle_ins_upd
{
    my ($XID, $table, $values, $opname) = @_;
    $table =~ s/^"public"\."(\w+)"$/$1/ or die "Bad table name '$table'";

    my $cols = $export_columns{$table} or die "Unexpected table '$table'";

    # All our tables have a single-column numeric primary key called "id"
    my $xml = qq[  <$opname relation="$table" xid="$XID" id="$values->{id}">\n];
    for my $col (@$cols)
    {
        my $v = $values->{$col};
        defined($v) or die "Can't export NULL values";
        $v =~ s/&/&amp;/g; $v =~ s/</&lt;/g; $v =~ s/>/&gt;/g;
        $xml .= qq[    <attribute name="$col">$v</attribute>\n];
    }
    $xml .= qq[  </$opname>\n];

    write_xml_fragment($XID, $xml);
}

sub handle_delete
{
    my ($XID, $table, $keys) = @_;
    $table =~ s/^"public"\."(\w+)"$/$1/ or die "Bad table name '$table'";

    # All our tables have a single-column numeric primary key called "id"
    my $xml = qq#  <delete relation="$table" xid="$XID" id="$keys->{id}" />\n#;

    write_xml_fragment($XID, $xml);
}

################################################################################

# Collect a bunch of XML fragments.  Ensure they are sorted by XID.  Output
# them with an appropriate DOCTYPE etc.

sub write_xml_fragment
{
    my ($XID, $xmlfragment) = @_;
    my $pos = tell $xmlfh;
    print $xmlfh $xmlfragment;
    push @xml_frags, [ $XID, $pos, length($xmlfragment) ];
    $is_sorted = 0 if $XID < $last_xid;
    $last_xid = $XID;
}

sub sort_xml_fragments
{
    open(my $newfh, "+>", undef) or die $!;

    @xml_frags = sort { $a->[0] <=> $b->[0] } @xml_frags;

    for (@xml_frags)
    {
        my ($XID, $pos, $length) = @$_;
        seek($xmlfh, $pos, 0) or die "seek to $pos: $!";
        read($xmlfh, my $data, $length) or die "read $length at $pos: $!";
        print $newfh $data or die "write: $!";
    }

    $xmlfh = $newfh;
}

sub output_xml
{
    print(STDERR "No data\n"), exit 1
        if not @xml_frags;
    $xmlfh = sort_xml_fragments() unless $is_sorted;

    my $first_xid = $xml_frags[0][0];

    print '<?xml version="1.0" encoding="UTF-8"?>' .
        "\n<changes xid_start=\"$first_xid\">\n";

    seek($xmlfh, 0, 0);
    while (<$xmlfh>) { print };

    print "</changes>\n";
}

################################################################################

# See http://developer.postgresql.org/docs/postgres/sql-copy.html
my %copy_esc;
BEGIN {
    %copy_esc = (
        'b'             => "\b",
        'f'             => "\f",
        'n'             => "\n",
        'r'             => "\r",
        't'             => "\t",
        'v'             => chr(11),
        '\\'    => chr(92),
    );
}

# Decode a line of text in Postgres "COPY" format into a hash reference
sub decode_row
{
    my ($line, @columns) = @_;
    chomp $line;

    my %h;
    @h{@columns} = map {
        $_ eq '\N' ? undef : do {
                s[\\(?:([0-7]{1,3})|(.))][
                        defined($1)
                        ? chr(oct($1))
                        : ($copy_esc{$2} || $2)
                ]eg;
                $_;
        }
    } split /\t/, $line, -1;

    \%h;
}

# Decode a value in dbmirror "data" format into a hash reference
sub extractData
{
  my $dataField = shift;
  my %valuesHash;

  while (length($dataField)>0)
  {
    # Extract the field name that is surronded by double quotes
    $dataField =~ m/(\".*?\")/s;
    my $fieldName = $1;
    $dataField = substr $dataField ,length($fieldName);
    $fieldName =~ s/\"//g; #Remove the surronding " signs.

    if ($dataField =~ m/(^= )/s)
    {
      #Matched null
      $dataField = substr $dataField , length($1);
      $valuesHash{$fieldName}=undef;
    }
    elsif ($dataField =~ m/(^=\')/s)
    {
      #Has data.
      my $value;
      $dataField = substr $dataField ,2; #Skip the ='

      LOOP:
      {  #This is to allow us to use last from a do loop.
         #Recommended in perlsyn manpage.
          do
          {
              my $matchString;

              #Find the substring ending with the first ' or first \
              $dataField =~ m/(.*?[\'\\])?/s;
              $matchString = $1;
              $value .= substr $matchString,0,length($matchString)-1;

              if ($matchString =~ m/(\'$)/s)
              {
                   # $1 runs to the end of the field value.
                  $dataField = substr $dataField,length($matchString)+1;
                  last;
              }
              else
              {
                  #deal with the escape character.
                  #It The character following the escape gets appended.
                  $dataField = substr $dataField,length($matchString);
                  $dataField =~ s/(^.)//s;
                  $value .=  $1;
              }
          } until(length($dataField)==0);
      }
      $valuesHash{$fieldName} = $value;
    }
    else
    {
      print STDERR "Error in PendingData";
      die;
    }
  }

  return \%valuesHash;
}

# eof ConvertReplicationPacketToXML
