#!/usr/bin/env perl

use warnings;
# vi: set ts=4 sw=4 :
#____________________________________________________________________________
#
#   MusicBrainz -- the open internet music database
#
#   Copyright (C) 2012 Ian McEwen
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#   $Id$
#____________________________________________________________________________

use FindBin;
use lib "$FindBin::Bin/../../lib";

use strict;

use Getopt::Long;
use File::Temp qw( tempdir );
use File::Path qw( rmtree );
use File::Copy qw( copy );

my $fHelp;
my $tmpdir = "/tmp";
my $period = my $defperiod = 'daily';
my $outputdir;
my $start;
my $limit;
my $require_previous_packet = 0;

GetOptions(
    "help"                              => \$fHelp,
    "require-previous"                  => \$require_previous_packet,
    "period=s"                          => \$period,
    "start=s"                           => \$start,
    "limit=s"                           => \$limit,
    "output=s"                           => \$outputdir,
) or exit 2;

sub usage
{
    print <<EOF;
Usage: BundleReplicationPackets root-directory [options]

    BundleReplicationPackets takes hourly packets since the
    previous packet for a given period and bundles them
    together into a new packet. It will not bundle across
    schema sequence boundaries, however.

    It's the caller's responsibility to ensure the period
    names accurately describe their contents.

    root-directory is the base directory where packets reside

    --help              show this help
    --period            name for the period,
                        default '$defperiod'
    --start             where to start (default: inspect
                        previous packet)
    --require-previous  when using no specific start, require
                        a previous packet, rather than starting
                        at the earliest available
    --limit             how many packets to include (default:
                        include all packets until present
    --output            output directory for finished packet
                        (default: root-directory/period)
EOF
}

usage(), exit if $fHelp;
usage(), exit 2 if !@ARGV;

my $rootdir = shift @ARGV;
$outputdir ||= "$rootdir/$period";
my $num = shift @ARGV;

my @sequences = <$rootdir/replication-*.tar.bz2>;
@sequences = reverse sort
    map { s%$rootdir/replication-(\d+)\.tar\.bz2%$1%; $_; }
    grep { $_ =~ /replication-(\d+)\.tar\.bz2/ } @sequences;

# Don't look at the directory if we've been given a start sequence
if (!$start) {
    my @period_sequences = <$outputdir/replication-$period-*.tar.bz2>;
    @period_sequences = reverse sort map { s%$outputdir/replication-$period-(\d+).tar.bz2%$1%; $_; } @period_sequences;

    if (scalar @period_sequences) {
        # Open up the most recent one for this period to determine where to start
        my $period_mostrecent = $period_sequences[0];

        print localtime() . " : Inspecting most recent '$period' packet ($period_mostrecent)\n";
        my $localfile = "$outputdir/replication-$period-$period_mostrecent.tar.bz2";
        my $periodcheckdir = tempdir('periodcheck-XXXXXX', DIR => $tmpdir);

        extract_tar($localfile, $periodcheckdir);

        my $last_val = read_file("LAST_REPLICATION_SEQUENCE", $periodcheckdir);
        if ($last_val =~ /\A(\d+)\n\z/) {
            $last_val = $1;
        }

        $start = $last_val + 1;

        print localtime() . " : Removing $periodcheckdir\n";
        rmtree($periodcheckdir);

        print localtime() . " : Previous '$period' packet ended $last_val, starting $start\n";
    } else {
        # Otherwise, assume they want all the packets
        if ($require_previous_packet) {
            die("Previous packet required due to --require-previous, but none exists for '$period'");
        }
        $start = $sequences[scalar @sequences - 1];
        print localtime() . " : No previous '$period' packet, using all packets (starting $start)\n";
    }
}

$start or die("No start value found");

my @seq_to_use = reverse grep { $_ >= $start } @sequences;
if ($limit) {
    die("Not enough existing packets to reach limit $limit") if scalar @seq_to_use < $limit;
    print localtime() . " : Using a maximum of $limit packets\n";
    @seq_to_use = @seq_to_use[0..$limit - 1];
}

die "No packets available for this period" unless scalar @seq_to_use;

my $newpacketdir = tempdir("mb-bundle-XXXXXX", DIR => $tmpdir);
mkdir "$newpacketdir/mbdump" or die $!;

my $schema_sequence;
my $last_replication_sequence = $start - 1;
for my $sequence (@seq_to_use) {
    die("Replication sequence $sequence is too far from $last_replication_sequence")
        if $last_replication_sequence + 1 != $sequence;
    my $individual_packet_dir = tempdir("replication-bundle-XXXXXX", DIR => $tmpdir);

    # untar the packet
    my $localfile = "$rootdir/replication-$sequence.tar.bz2";
    extract_tar($localfile, $individual_packet_dir);

    # Set schema sequence, or check it matches previous packets
    my $this_schema_sequence = read_file('SCHEMA_SEQUENCE', $individual_packet_dir);
    if ($schema_sequence) {
        die("Schema sequence mismatch: $this_schema_sequence <> $schema_sequence")
            unless $schema_sequence eq $this_schema_sequence;
    } else {
        $schema_sequence = $this_schema_sequence;
        for my $file (qw(SCHEMA_SEQUENCE REPLICATION_SEQUENCE COPYING README TIMESTAMP)) {
            copy("$individual_packet_dir/$file", "$newpacketdir/$file") or die $!;
        }
    }

    # concatenate dbmirror_pending/dbmirror_pendingdata files
    open(my $dbmirror_pending, '<', "$individual_packet_dir/mbdump/dbmirror_pending") or die $!;
    open(my $new_dbmirror_pending, '>>', "$newpacketdir/mbdump/dbmirror_pending") or die $!;
    while (<$dbmirror_pending>) {
        print { $new_dbmirror_pending } $_;
    }
    close $dbmirror_pending;
    close $new_dbmirror_pending;

    open(my $dbmirror_pendingdata, '<', "$individual_packet_dir/mbdump/dbmirror_pendingdata") or die $!;
    open(my $new_dbmirror_pendingdata, '>>', "$newpacketdir/mbdump/dbmirror_pendingdata") or die $!;
    while (<$dbmirror_pendingdata>) {
        print { $new_dbmirror_pendingdata } $_;
    }
    close $dbmirror_pendingdata;
    close $new_dbmirror_pendingdata;

    # copy REPLICATION_SEQUENCE to LAST_REPLICATION_SEQUENCE (also TIMESTAMP)
    copy("$individual_packet_dir/REPLICATION_SEQUENCE", "$newpacketdir/LAST_REPLICATION_SEQUENCE") or die $!;
    copy("$individual_packet_dir/TIMESTAMP", "$newpacketdir/LAST_TIMESTAMP") or die $!;

    print localtime() . " : Removing $individual_packet_dir\n";
    rmtree($individual_packet_dir);

    $last_replication_sequence = $sequence;
}

# make tarfile

my $tar_filename = "$outputdir/replication-$period-$start.tar.bz2";
my @tar_files = (qw(
    TIMESTAMP LAST_TIMESTAMP COPYING README
    REPLICATION_SEQUENCE LAST_REPLICATION_SEQUENCE
    SCHEMA_SEQUENCE
    mbdump/dbmirror_pending mbdump/dbmirror_pendingdata
));

print localtime() . " : Creating packet $tar_filename\n";

system { "/bin/tar" } "tar",
    "-C", $newpacketdir,
    "--bzip2", "--create",
    "--verbose",
    "--file", $tar_filename,
    "--", @tar_files;

$? == 0 or die "Tar returned $?";

print localtime() . " : Removing $newpacketdir\n";
rmtree($newpacketdir);

################################################################################

sub read_file
{
    my ($file, $base) = @_;
    open(my $fh, "<$base/$file")
        or return undef;
    local $/;
    <$fh>;
}

sub write_file
{
    my ($file, $contents, $base) = @_;
    open(my $fh, ">$base/$file") or die $!;
    print $fh $contents or die $!;
    close $fh or die $!;
}

sub extract_tar
{
    my ($localfile, $dir) = @_;
    print localtime() . " : Decompressing $localfile to $dir\n";
    system "/bin/tar",
        "-C", $dir,
        "--bzip2",
        "-xvf",
        $localfile,
        ;
    exit $? if $?;
}
