#!/usr/bin/perl

use strict;
use warnings;

use Benchmark qw(:all);

use FindBin qw($Bin);
use lib "$Bin/../lib";
use File::Spec;
use IO::Uncompress::AnyUncompress qw($AnyUncompressError);

require('apt-cacher.pl');

our $cfg = read_config('/etc/apt-cacher/apt-cacher.conf');

private_config();

		  my %hash_length =  map { $_->[1] => $_->[0] } @{$cfg->{_algorithms}};
		  my $algorithms_regexp =  join ('|', map { "[0-9a-f]{$_->[1]}" }  @{$cfg->{_algorithms}});
		  my $packages_regexp = join('|', map { "(?'alg'\U$_->[0])" . ($_->[0] eq 'md5' ? 'sum': '') . ":\\s+(?'hash'[a-z0-9]{$_->[1]})" } @{$cfg->{_algorithms}});
		  $_ = qr/$_/ foreach $algorithms_regexp, $packages_regexp;


my ($name) = @ARGV;
open(my $fh, '<', $name) || die "Open $name failed: $!";

cmpthese(-10, {
	      'v1.7-devel' =>
	      sub {

		  seek($fh,0,0) || die "Seek failed: $!";

		  my $raw = IO::Uncompress::AnyUncompress->new($fh)
		    or die "Decompression failed: $AnyUncompressError\n";

		  # Name is just the cached filename without path
		  $name = (File::Spec->splitpath($name))[2];

		  # Determine namespace
		  my $namespace;
		  if ($namespace = get_namespace(get_original_url($name)) || '') { # Default empty, not undef
		      $namespace .= '/';
		  }

		  my ($indexbase) = ($name =~ /([^\/]+_)(?:Index|(?:In)?Release)$/);
		  $indexbase = '' unless $indexbase; # Empty by default (for Sources)

		  my %hash_length = (32 => 'md5', 40 => 'sha1', 64 => 'sha256');
		  my ($skip,%data);
		  while (<$raw>) {
		      last if $AnyUncompressError;
		      chomp;
		      if (/^SHA\d+-Patches:/) {
			  $skip = 0;
		      } elsif (/^SHA\d+-[a-zA-Z]+:/) {
			  # This flag prevents us bothering with unnecessary sections
			  # (History|Current|Download) of diff_Index files
			  $skip = 1;
		      } elsif (/^\s+([a-z0-9]{32,64})\s+(\d+)\s(\S+)$/) { # diff_Index/Release/Sources
			  next if $skip;
			  my $hexdigest=$1;
			  my $size=$2;
			  my $file=$indexbase.$3;

			  $file=~s!/!_!g; # substitute any separators in indexed filename

			  if ($name =~ /Index$/) {
			      $file.=".gz";
			  } elsif ($name =~ /_Sources(?:\.(?:x|g)z|\.bz2)?$/) {
			      # Prepend namespace, if set
			      $file = $namespace . $file;
			  }
			  $data{$file}{size} = $size;
			  {	# Select algorithm based on hex length
			      my $len = length($hexdigest);
			      if (exists $hash_length{$len}) {
				  $data{$file}{$hash_length{$len}}=$hexdigest;
			      } else {
				  warn "Unrecognised algorithm length: $len. Ignoring.";
			      }
			  }
		      } elsif (/^MD5sum:\s+([a-z0-9]{32})$/) { # Packages
			  $data{md5}=$1;
		      } elsif (/^SHA1:\s+([a-z0-9]{40})$/) {
			  $data{sha1}=$1;
		      } elsif (/^SHA256:\s+([a-z0-9]{64})$/) {
			  $data{sha256}=$1;
		      } elsif (/^Size:\s+([0-9]+)$/) {
			  $data{size}=$1;
		      } elsif (/^Filename:\s+.*?([^\/]+)$/) { # Non-greedy quantifier essential
			  # Prepend namespace, if set
			  $data{file} = $namespace . $1;
		      }
		  } continue {
		      # diff_Index and Release files have no empty line at the end, so also
		      # test eof() for them
		      if (!length || $raw->eof()) { # End of record/file
			  if (exists $data{file}) {
			      # From Packages. Convert to hash of hashes with filename as key
			      foreach (qw(size md5 sha1 sha256)) {
				  $data{$data{file}}{$_} = $data{$_};
				  delete $data{$_};
			      }
			      delete $data{file};
			  }

			  undef %data; # Reset
		      }
		  };
	      },
	      'hash_algorithms' =>
	      sub {

		  seek($fh,0,0) || die "Seek failed: $!";

		  my $raw = IO::Uncompress::AnyUncompress->new($fh)
		    or die "Decompression failed: $AnyUncompressError\n";

		  # Name is just the cached filename without path
		  $name = (File::Spec->splitpath($name))[2];

		  # Determine namespace
		  my $namespace;
		  if ($namespace = get_namespace(get_original_url($name)) || '') { # Default empty, not undef
		      $namespace .= '/';
		  }

		  my ($indexbase) = ($name =~ /([^\/]+_)(?:Index|(?:In)?Release)$/);
		  $indexbase = '' unless $indexbase; # Empty by default (for Sources)

		  my ($skip,%data);
		  while (<$raw>) {
		      last if $AnyUncompressError;
		      chomp;
		      if (/^SHA\d+-Patches:/) {
			  $skip = 0;
		      } elsif (/^SHA\d+-[a-zA-Z]+:/) {
			  # This flag prevents us bothering with unnecessary sections
			  # (History|Current|Download) of diff_Index files
			  $skip = 1;
		      } elsif (/^\s+($algorithms_regexp)\s+(\d+)\s(\S+)$/o) { # diff_Index/Release/Sources
			  next if $skip;
			  my $hexdigest=$1;
			  my $size=$2;
			  my $file=$indexbase.$3;

			  $file=~s!/!_!g; # substitute any separators in indexed filename

			  if ($name =~ /Index$/) {
			      $file.=".gz";
			  } elsif ($name =~ /_Sources(?:\.(?:x|g)z|\.bz2)?$/) {
			      # Prepend namespace, if set
			      $file = $namespace . $file;
			  }
			  $data{$file}{size} = $size;
			  {	# Select algorithm based on hex length
			      my $len = length($hexdigest);
			      if (exists $hash_length{$len}) {
				  $data{$file}{$hash_length{$len}}=$hexdigest;
			      } else {
				  warn "Unrecognised algorithm length: $len. Ignoring.";
			      }
			  }
		      } elsif (/^$packages_regexp$/o) { # Packages
			  $data{lc $+{alg}}=$+{hash};
		      } elsif (/^Size:\s+([0-9]+)$/) {
			  $data{size}=$1;
		      } elsif (/^Filename:\s+.*?([^\/]+)$/) { # Non-greedy quantifier essential
			  # Prepend namespace, if set
			  $data{file} = $namespace . $1;
		      }
		  } continue {
		      # diff_Index and Release files have no empty line at the end, so also
		      # test eof() for them
		      if (!length || $raw->eof()) { # End of record/file
			  if (exists $data{file}) {
			      # From Packages. Convert to hash of hashes with filename as key
			      foreach (qw(size), map {$_->[0]} @{$cfg->{_algorithms}}) {
				  next unless exists $data{$_};
				  $data{$data{file}}{$_} = $data{$_};
				  delete $data{$_};
			      }
			      delete $data{file};
			  }

			  undef %data; # Reset
		      }
		  } ;
	      }
	     });
