/[MARC-Fast]/trunk/Fast.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/Fast.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5 - (hide annotations)
Sat Oct 8 16:33:09 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5537 byte(s)
convert repeatable fileds into repeatable subfields

1 dpavlin 1
2     package MARC::Fast;
3     use strict;
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 5 $VERSION = 0.02;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16     }
17    
18     =head1 NAME
19    
20     MARC::Fast - Very fast implementation of MARC database reader
21    
22     =head1 SYNOPSIS
23    
24     use MARC::Fast;
25    
26    
27     =head1 DESCRIPTION
28    
29     This is very fast alternative to C<MARC> and C<MARC::Record> modules.
30    
31     It's is also very sutable for random access to MARC records (as opposed to
32     sequential one).
33    
34     =head1 METHODS
35    
36     =head2 new
37    
38     Read MARC database
39    
40     my $marc = new MARC::Fast(
41     marcdb => 'unimarc.iso',
42     quiet => 0,
43     debug => 0,
44     assert => 0,
45     );
46    
47     =cut
48    
49     ################################################## subroutine header end ##
50    
51    
52     sub new {
53     my $class = shift;
54     my $self = {@_};
55     bless ($self, $class);
56    
57     croak "need marcdb parametar" unless ($self->{marcdb});
58    
59     print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
60    
61     open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
62    
63     $self->{count} = 0;
64    
65     while (! eof($self->{fh})) {
66     $self->{count}++;
67    
68     # save record position
69     push @{$self->{fh_offset}}, tell($self->{fh});
70    
71     my $leader;
72     read($self->{fh}, $leader, 24);
73    
74     # Byte Name
75     # ---- ----
76     # 0-4 Record Length
77     # 5 Status (n=new, c=corrected and d=deleted)
78     # 6 Type of Record (a=printed material)
79     # 7 Bibliographic Level (m=monograph)
80     # 8-9 Blanks
81     # 10 Indictator count (2 for monographs)
82     # 11 Subfield code count (2 - 0x1F+subfield code itself)
83     # 12-16 Base address of data
84     # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
85     # 3=sublevel 3)
86     # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
87     # n=record is in non-ISBD format, i=record is in
88     # an incomplete ISBD format)
89     # 19 Blank
90     # 20 Length of length field in directory (always 4 in UNIMARC)
91     # 21 Length of Starting Character Position in directory (always
92     # 5 in UNIMARC)
93     # 22 Length of implementation defined portion in directory (always
94     # 0 in UNIMARC)
95     # 23 Blank
96     #
97     # |0 45 89 |12 16|1n 450 |
98     # |xxxxxnam 22(.....) 45 <---
99    
100     print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
101    
102     # store leader for later
103     push @{$self->{leaders}}, $leader;
104    
105     # skip to next record
106     seek($self->{fh},substr($leader,0,5)-24,1);
107    
108     }
109    
110     return $self;
111     }
112    
113     =head2 count
114    
115     Return number of records in database
116    
117     print $marc->count;
118    
119     =cut
120    
121     sub count {
122     my $self = shift;
123     return $self->{count};
124     }
125    
126     =head2 fetch
127    
128     Fetch record from database
129    
130     my $hash = $marc->fetch(42);
131    
132     =cut
133    
134     sub fetch {
135     my $self = shift;
136    
137     my $rec_nr = shift || return;
138    
139     my $leader = $self->{leaders}->[$rec_nr - 1];
140     unless ($leader) {
141     carp "can't find record $rec_nr";
142     return;
143     };
144     my $offset = $self->{fh_offset}->[$rec_nr - 1];
145     unless (defined($offset)) {
146     carp "can't find offset for record $rec_nr";
147     return;
148     };
149    
150     my $reclen = substr($leader,0,5);
151     my $base_addr = substr($leader,12,5);
152    
153     print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
154    
155     my $skip = 0;
156    
157     print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
158    
159     if ( ! seek($self->{fh}, $offset+24, 0) ) {
160     carp "can't seek to $offset: $!";
161     return;
162     }
163    
164     print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
165    
166     my $directory;
167     if( ! read($self->{fh},$directory,$base_addr-24) ) {
168     carp "can't read directory: $!";
169     $skip = 1;
170     } else {
171     print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
172     }
173    
174     print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
175    
176     my $fields;
177     if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
178     carp "can't read fields: $!";
179     $skip = 1;
180     } else {
181     print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
182     }
183    
184     my $row;
185    
186     while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
187     my ($tag,$len,$addr) = ($1,$2,$3);
188    
189     if (($addr+$len) > length($fields)) {
190     print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
191     $skip = 1;
192     next;
193     }
194    
195     # take field
196     my $f = substr($fields,$addr,$len);
197     print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
198    
199 dpavlin 5 if ($row->{$tag}) {
200     $row->{$tag} .= $f;
201     } else {
202     $row->{$tag} = $f;
203     }
204 dpavlin 1
205     my $del = substr($fields,$addr+$len-1,1);
206    
207     # check field delimiters...
208     if ($self->{assert} && $del ne chr(30)) {
209     print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
210     $skip = 1;
211     next;
212     }
213    
214     if ($self->{assert} && length($f) < 2) {
215     print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
216     next;
217     }
218    
219     }
220    
221     return $row;
222     }
223    
224     1;
225     __END__
226    
227     =head1 BUGS
228    
229    
230    
231     =head1 SUPPORT
232    
233    
234    
235     =head1 AUTHOR
236    
237     Dobrica Pavlinusic
238     CPAN ID: DPAVLIN
239     dpavlin@rot13.org
240     http://www.rot13.org/~dpavlin/
241    
242     =head1 COPYRIGHT
243    
244     This program is free software; you can redistribute
245     it and/or modify it under the same terms as Perl itself.
246    
247     The full text of the license can be found in the
248     LICENSE file included with this module.
249    
250    
251     =head1 SEE ALSO
252    
253     perl(1).
254    
255     =cut

  ViewVC Help
Powered by ViewVC 1.1.26