/[MARC-Fast]/trunk/lib/MARC/Fast.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/MARC/Fast.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Tue Jan 4 10:26:07 2005 UTC (19 years, 2 months ago) by dpavlin
Original Path: trunk/Fast.pm
File size: 5476 byte(s)
initital import of 0.01 into subversion

1 dpavlin 1
2     package MARC::Fast;
3     use strict;
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10     $VERSION = 0.01;
11     @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16     }
17    
18     =head1 NAME
19    
20     MARC::Fast - Very fast implementation of MARC database reader
21    
22     =head1 SYNOPSIS
23    
24     use MARC::Fast;
25    
26    
27     =head1 DESCRIPTION
28    
29     This is very fast alternative to C<MARC> and C<MARC::Record> modules.
30    
31     It's is also very sutable for random access to MARC records (as opposed to
32     sequential one).
33    
34     =head1 METHODS
35    
36     =head2 new
37    
38     Read MARC database
39    
40     my $marc = new MARC::Fast(
41     marcdb => 'unimarc.iso',
42     quiet => 0,
43     debug => 0,
44     assert => 0,
45     );
46    
47     =cut
48    
49     ################################################## subroutine header end ##
50    
51    
52     sub new {
53     my $class = shift;
54     my $self = {@_};
55     bless ($self, $class);
56    
57     croak "need marcdb parametar" unless ($self->{marcdb});
58    
59     print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
60    
61     open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
62    
63     $self->{count} = 0;
64    
65     while (! eof($self->{fh})) {
66     $self->{count}++;
67    
68     # save record position
69     push @{$self->{fh_offset}}, tell($self->{fh});
70    
71     my $leader;
72     read($self->{fh}, $leader, 24);
73    
74     # Byte Name
75     # ---- ----
76     # 0-4 Record Length
77     # 5 Status (n=new, c=corrected and d=deleted)
78     # 6 Type of Record (a=printed material)
79     # 7 Bibliographic Level (m=monograph)
80     # 8-9 Blanks
81     # 10 Indictator count (2 for monographs)
82     # 11 Subfield code count (2 - 0x1F+subfield code itself)
83     # 12-16 Base address of data
84     # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
85     # 3=sublevel 3)
86     # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
87     # n=record is in non-ISBD format, i=record is in
88     # an incomplete ISBD format)
89     # 19 Blank
90     # 20 Length of length field in directory (always 4 in UNIMARC)
91     # 21 Length of Starting Character Position in directory (always
92     # 5 in UNIMARC)
93     # 22 Length of implementation defined portion in directory (always
94     # 0 in UNIMARC)
95     # 23 Blank
96     #
97     # |0 45 89 |12 16|1n 450 |
98     # |xxxxxnam 22(.....) 45 <---
99    
100     print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
101    
102     # store leader for later
103     push @{$self->{leaders}}, $leader;
104    
105     # skip to next record
106     seek($self->{fh},substr($leader,0,5)-24,1);
107    
108     }
109    
110     return $self;
111     }
112    
113     =head2 count
114    
115     Return number of records in database
116    
117     print $marc->count;
118    
119     =cut
120    
121     sub count {
122     my $self = shift;
123     return $self->{count};
124     }
125    
126     =head2 fetch
127    
128     Fetch record from database
129    
130     my $hash = $marc->fetch(42);
131    
132     =cut
133    
134     sub fetch {
135     my $self = shift;
136    
137     my $rec_nr = shift || return;
138    
139     my $leader = $self->{leaders}->[$rec_nr - 1];
140     unless ($leader) {
141     carp "can't find record $rec_nr";
142     return;
143     };
144     my $offset = $self->{fh_offset}->[$rec_nr - 1];
145     unless (defined($offset)) {
146     carp "can't find offset for record $rec_nr";
147     return;
148     };
149    
150     my $reclen = substr($leader,0,5);
151     my $base_addr = substr($leader,12,5);
152    
153     print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
154    
155     my $skip = 0;
156    
157     print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
158    
159     if ( ! seek($self->{fh}, $offset+24, 0) ) {
160     carp "can't seek to $offset: $!";
161     return;
162     }
163    
164     print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
165    
166     my $directory;
167     if( ! read($self->{fh},$directory,$base_addr-24) ) {
168     carp "can't read directory: $!";
169     $skip = 1;
170     } else {
171     print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
172     }
173    
174     print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
175    
176     my $fields;
177     if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
178     carp "can't read fields: $!";
179     $skip = 1;
180     } else {
181     print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
182     }
183    
184     my $row;
185    
186     while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
187     my ($tag,$len,$addr) = ($1,$2,$3);
188    
189     if (($addr+$len) > length($fields)) {
190     print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
191     $skip = 1;
192     next;
193     }
194    
195     # take field
196     my $f = substr($fields,$addr,$len);
197     print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
198    
199     $row->{$tag} = $f;
200    
201     my $del = substr($fields,$addr+$len-1,1);
202    
203     # check field delimiters...
204     if ($self->{assert} && $del ne chr(30)) {
205     print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
206     $skip = 1;
207     next;
208     }
209    
210     if ($self->{assert} && length($f) < 2) {
211     print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
212     next;
213     }
214    
215     }
216    
217     return $row;
218     }
219    
220     1;
221     __END__
222    
223     =head1 BUGS
224    
225    
226    
227     =head1 SUPPORT
228    
229    
230    
231     =head1 AUTHOR
232    
233     Dobrica Pavlinusic
234     CPAN ID: DPAVLIN
235     dpavlin@rot13.org
236     http://www.rot13.org/~dpavlin/
237    
238     =head1 COPYRIGHT
239    
240     This program is free software; you can redistribute
241     it and/or modify it under the same terms as Perl itself.
242    
243     The full text of the license can be found in the
244     LICENSE file included with this module.
245    
246    
247     =head1 SEE ALSO
248    
249     perl(1).
250    
251     =cut

  ViewVC Help
Powered by ViewVC 1.1.26