#!/usr/bin/perl

# updatemarc.pl - hack MARC records harvested from Internet Archive

# Eric Lease Morgan <emorgan@nd.edu>
# June 1, 2009 - first cut
# June 2, 2009 - looped past bogus records; tweaked debugging


# define
use constant DATADIR => ( '/var/www/html/etexts' );
use constant PREPEND => '/var/www/html/etexts';
use constant ROOT    => '/etexts';
use constant SERVER  => 'http://zoia.library.nd.edu';
use constant ARCHIVE => 'http://www.archive.org/details/';

# require
use strict;
use File::Find;
use MARC::Batch;

# configure
my $root    = ROOT;
my $prepend = PREPEND;
my $server  = SERVER;

# get a list of all the MARC records
my @files;
find( \&get_file_names, DATADIR );

# process each MARC record
my $count = 0;
foreach ( @files ) {
	
	# marc
	my $mrc  =  $_;
	my $marc =  $_;
	$marc    =~ s/_meta\.mrc/\.marc/;
	
	# pdf
	my $pdf =  $_;
	$pdf    =~ s/$prepend/$server$root/;
	$pdf    =~ s/_meta\.mrc/\.pdf/;

	# key
	my $key = substr $_, 0, ( rindex $_, '/' );
	$key    = substr $key, ( rindex $key, '/' ) + 1;
	
	# internet archive page
	my $archive = ARCHIVE . "$key/";
	
	# echo/debug
	#print "      MRC: $mrc\n";
	#print "     MARC: $marc\n";
	print "      key: $key\n";
	#print "      PDF: $pdf\n";
	#print "  Archive: $archive\n";
	
	# get the next record; skip bogus ones
	my $batch  = MARC::Batch->new( 'USMARC', $mrc );
	my $record = $batch->next;
	next if ( ! $record );
	next if ( $batch->warnings );

	# echo/debug
	#print "   author: " . $record->author, "\n";
	#print "    title: " . $record->title, "\n";
	#print "  edition: " . $record->edition, "\n";
	#print "     date: " . $record->publication_date, "\n";
	#print "\n";
	
	# add control and 856
	$record->append_fields( MARC::Field->new( '001', $key ));
	$record->append_fields( MARC::Field->new( '856', '', '', u => $pdf, z => 'Local PDF' ));
	$record->append_fields( MARC::Field->new( '856', '', '', u => $archive, z => 'Remote/original version' ));
	
	# save
	open( MARC , " > $marc" ) or die "Can't open MARC: $!\n";
	print MARC $record->as_usmarc;
	close( MARC );
	
	# increment
	$count++;
	
} 

# done
print "$count records processed. Done.\n";
exit;


sub get_file_names {

	# get the full path
	my $file = $File::Find::name;
	
	# remove non-xml files and add them to the list
	next if ( $file !~ /\.mrc$/ );
	push @files, $file;
			
}