#!/usr/bin/perl -w
#
# Recover lost cddb files using discid files created by Gronk
#
# Correct said files from info stored in the directory structure, if
# possible. Some people are utter monkies when it comes to putting
# data into CDDB.
#
# CDDB HTTP Query Format:
# http://www.freedb.org/~cddb/cddb.cgi?cmd=cddb+read+rock+12345678&hello=joe+my.host.com+xmcd+2.1&proto=1
use CDDB;
use File::Find;
use strict;

$| = 1;

# These are the current mirrors (January 2003)
my @mirrorsites = qw(
  at.freedb.org
  freedb.freedb.de
  au.freedb.org
  bg.freedb.org
  ca.freedb.org
  de.freedb.org
  es.freedb.org
  fi.freedb.org
  lu.freedb.org
  no.freedb.org
  uk.freedb.org
  us.freedb.org
);

# from Gronk's config.pl
my $force_lowercase = 0;
my $force_underscore = 0;
my $safechars = "a-z0-9" . # alphanumerics
             "áàâäãå" . # funky vowels
             "éèêë" .
             "íìîï" .
             "óòôöõð" .
             "úùûü" .
             "çøñß" .   # other weirdo eurochars
             "_()";     # punctuation

# make sure we don't cripple ourselves in cddb.pl::string_to_file
if ( $force_underscore ) {
  $safechars .= "_";
} else {
  $safechars .= " ";
}

# Open a connection for each mirror
my @mirrors;
for my $mirror ( @mirrorsites) {
  my $cddbp = new CDDB( Host => $mirror, Port => 888 );
  if ( defined( $cddbp )) {
	push @mirrors, $cddbp;
  } else {
	print STDERR "Failed to connect to $mirror\n";
  }
}

my $cddbp = $mirrors[ 0 ];

print "Getting genres...";
my @genres = $cddbp->get_genres();
print "done.\n";

# Find files
find( \&wanted, '/export/share/mp3' );

sub wanted {
  my $file = $File::Find::name;
  return if $file !~ /\/discid$/;

  # Parse out artist and album
  my ( $artist, $album ) = $file =~ m|^/export/share/mp3/(.*)/(.*)/discid$|;
  $album =~ s/^\s+//;
  $album =~ s/\s+$//;


  if ( open( FILE, "<$file" )) {
	my $id = <FILE>;
	chomp( $id );

	if ( -f "$ENV{'HOME'}/.cddb/$id" ) {
	  print "Skipping $album, you already have it\n";
	  return;
	} else {
	  print "Locating $id ( $album )\n";
	}

	for my $g ( @genres ) {
	  if ( -f "$ENV{'HOME'}/.cddb/$id.$g.inexact" ) {
		print "  Skipping $g, already found one\n";
		next;
	  }


	  # Be nice, rotate the load
	  my $site = shift @mirrors;
	  push @mirrors, $site;

	  # Do the lookup
	  my $disc_info = $site->get_disc_details( $g, $id );
	  if ( defined( $disc_info )) {
		# Possible match... check
		my $match = 0;
		my $title = string_to_file( $disc_info->{'dtitle'} );

		# title will be an artist/album match, so regex. also does the case-insensitive bit.
		if ( $title =~ /$album/ ) {
		  $match = 1;
		}

		# Note that this allows complete unmatches to work, which I
		# may have to try and manage heuristically. Whee, heuristics!
		print "  Found it: $g, " . ( $match? "exact" : "maybe ($title)" ) . "\n";
		if ( open( CDDBF, ">$ENV{'HOME'}/.cddb/$id" . ( $match ? "" : ".$g.inexact" ))) {
		  print CDDBF $disc_info->{'xmcd_record'};
		  close( CDDBF );
		} else {
		  die "$id: $!";
		}
		if ( $match == 1 ) {
		  # Nuke any inexacts!
		  # Fixme, do this right :)
		  `rm -f $ENV{'HOME'}/.cddb/$id.*.inexact`;
		  print "    Removed any inexacts\n";
		  last;
		}
	  }
	}
  } else {
	warn "  Can't open $file: $!\n";
  }
}

# This is from Gronk's cddb.pl
# Given a piece of text (a song title or band name) converts it to something
# usable as a file name using the same algorithm as Grip: downcase, delete
# all non-alphanumerics, and map space to underscore.
#
sub string_to_file {
  local $_ = shift; # protect $_

  # note: do not localize this: Grip doesn't.

  tr/A-Z/a-z/ if $force_lowercase;          # downcase
  s@ @_@g if $force_underscore;         # now map space to underscore
  s/[^$safechars]//gi;      # delete unsafe chars
  return $_;
}
