View file File name : marcdump Content :#!/usr/bin/perl -w # vi:et:sw=4 ts=4 =head1 NAME marcdump - MARC record dump utility =head1 SYNOPSIS B<marcdump> [options] file(s) =cut use strict; use integer; use Encode; use MARC::File; use MARC::File::USMARC; use MARC::Record; use Getopt::Long; ## flag STDOUT for UTF8 my $opt_print = 1; my $opt_hex = 0; my $opt_quiet = 0; my $opt_stats = 1; my @opt_field = (); my $opt_help = 0; my $opt_lif = 0; my $rc = GetOptions( "version" => sub { print "$0, using MARC::Record v$MARC::Record::VERSION\n"; exit 1; }, "print!" => \$opt_print, "hex!" => \$opt_hex, "lif!" => \$opt_lif, "quiet!" => \$opt_quiet, "stats!" => \$opt_stats, "field=s" => \@opt_field, "debug!" => \$MARC::Record::DEBUG, "help" => \$opt_help, ); my @files = @ARGV; if ( $opt_help || !@files || !$rc ) { print <DATA>; exit 1; } my $wants_leader = grep { /LDR/ } @opt_field; my $class = $opt_lif ? "MARC::File::MicroLIF" : "MARC::File::USMARC"; eval "require $class"; # Must be quoted to get path searching my %counts; my %errors; for my $filename ( @files ) { $counts{$filename} = 0; $errors{$filename} = 0; warn "$filename\n" unless $opt_quiet; my $file = $class->in( $filename ) or die $MARC::File::ERROR; while ( my $marc = $file->next() ) { ++$counts{$filename}; warn "$counts{$filename} records\n" if ( !$opt_quiet && ($counts{$filename} % 1000 == 0) ); if ( @opt_field ) { $marc = $marc->clone( @opt_field ); $marc->leader('') unless $wants_leader; } if ( $opt_print ) { if ( $opt_hex ) { print_hex( $marc ); } else { # stifle warnings here in case there's utf8 data being printed no warnings; print $marc->as_formatted, "\n\n"; } } if ( my @warnings = $marc->warnings() ) { ++$errors{$filename}; print join( "\n", @warnings, "" ); } } # while $file->close(); } # for if ( $opt_stats ) { print " Recs Errs Filename\n"; print "----- ----- --------\n"; for my $key ( sort keys %counts ) { printf( "%5d %5d %s\n", $counts{$key}, $errors{$key}, $key ); } # for } # if stats sub print_hex { my $marc = shift; my $raw = $marc->as_usmarc(); print "\n"; my $offset = 0; # dump the leader my $leader = bytes::substr( $raw, 0, MARC::Record::LEADER_LEN ); my $part1 = bytes::substr( $leader, 0, 12 ); my $part2 = bytes::substr( $leader, 12 ); _hex_line_output( $offset, _to_hex($part1), _to_ascii($part1), 48 ); _hex_line_output( $offset+12, _to_hex($part2), _to_ascii($part2), 48 ); $offset += MARC::Record::LEADER_LEN; # dump the directory. If we can't find end-of-field character that # follows the directory, everything following the leader (which we # have already dumped) will be dumped as part of the data section. my $dir_end = bytes::index( $raw, MARC::File::USMARC::END_OF_FIELD, MARC::Record::LEADER_LEN ); if ( $dir_end >= 0 ) { for ( my $n = $offset; $n < $dir_end; $n += MARC::File::USMARC::DIRECTORY_ENTRY_LEN ) { my $dir_entry = bytes::substr( $raw, $n, MARC::File::USMARC::DIRECTORY_ENTRY_LEN ); my $hex = _to_hex( $dir_entry ); my $ascii = bytes::substr( $dir_entry, 0, 3 ) . ' ' . bytes::substr( $dir_entry, 3, 4 ) . ' ' . bytes::substr( $dir_entry, 7, 5 ) ; _hex_line_output( $offset, $hex, $ascii, 48 ); $offset += MARC::File::USMARC::DIRECTORY_ENTRY_LEN; } # dump the end-of-field character that follows the directory _hex_line_output( $offset, _to_hex(MARC::File::USMARC::END_OF_FIELD), '.', 48 ); ++$offset; } # dump the data my $data_offset = 0; while ( $offset < bytes::length($raw) ) { my $chunk = bytes::substr( $raw, $offset, 16 ); _hex_line_output( $data_offset, _to_hex($chunk), _to_ascii($chunk), 48 ); $offset += 16; $data_offset += 16; } } sub _to_ascii { my $raw = shift; if ( defined $raw ) { $raw =~ s/[\x00-\x1f\x7f-\xff]/./g; } return $raw; } sub _to_hex { my $raw = shift; my $result = ''; if ( defined $raw ) { for ( my $n = 0; $n < bytes::length($raw); $n++ ) { $result .= sprintf( '%02x ', ord(bytes::substr($raw, $n, 1)) ); } } $result =~ s/ $//; return $result; } sub _hex_line_output { my $offset = shift; my $hex = shift; my $ascii = shift; my $width = shift; printf( "%05d: %-$width.${width}s %s\n", $offset, $hex, $ascii ); } __END__ Usage: marcdump [options] file(s) Options: --[no]print Print a MicroLIF-style dump of each record --[no]hex If --print active, make the output hexadecimal --lif Input files are MicroLIF, not USMARC --field=spec Specify a field spec to include. There may be many. Examples: --field=245 --field=1XX --[no]quiet Print status messages --[no]stats Print a statistical summary by file at the end --version Print version information --help Print this summary