/[MARC-Fast]/trunk/Fast.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/Fast.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5 - (show annotations)
Sat Oct 8 16:33:09 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5537 byte(s)
convert repeatable fileds into repeatable subfields

1
2 package MARC::Fast;
3 use strict;
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.02;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16 }
17
18 =head1 NAME
19
20 MARC::Fast - Very fast implementation of MARC database reader
21
22 =head1 SYNOPSIS
23
24 use MARC::Fast;
25
26
27 =head1 DESCRIPTION
28
29 This is very fast alternative to C<MARC> and C<MARC::Record> modules.
30
31 It's is also very sutable for random access to MARC records (as opposed to
32 sequential one).
33
34 =head1 METHODS
35
36 =head2 new
37
38 Read MARC database
39
40 my $marc = new MARC::Fast(
41 marcdb => 'unimarc.iso',
42 quiet => 0,
43 debug => 0,
44 assert => 0,
45 );
46
47 =cut
48
49 ################################################## subroutine header end ##
50
51
52 sub new {
53 my $class = shift;
54 my $self = {@_};
55 bless ($self, $class);
56
57 croak "need marcdb parametar" unless ($self->{marcdb});
58
59 print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
60
61 open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
62
63 $self->{count} = 0;
64
65 while (! eof($self->{fh})) {
66 $self->{count}++;
67
68 # save record position
69 push @{$self->{fh_offset}}, tell($self->{fh});
70
71 my $leader;
72 read($self->{fh}, $leader, 24);
73
74 # Byte Name
75 # ---- ----
76 # 0-4 Record Length
77 # 5 Status (n=new, c=corrected and d=deleted)
78 # 6 Type of Record (a=printed material)
79 # 7 Bibliographic Level (m=monograph)
80 # 8-9 Blanks
81 # 10 Indictator count (2 for monographs)
82 # 11 Subfield code count (2 - 0x1F+subfield code itself)
83 # 12-16 Base address of data
84 # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
85 # 3=sublevel 3)
86 # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
87 # n=record is in non-ISBD format, i=record is in
88 # an incomplete ISBD format)
89 # 19 Blank
90 # 20 Length of length field in directory (always 4 in UNIMARC)
91 # 21 Length of Starting Character Position in directory (always
92 # 5 in UNIMARC)
93 # 22 Length of implementation defined portion in directory (always
94 # 0 in UNIMARC)
95 # 23 Blank
96 #
97 # |0 45 89 |12 16|1n 450 |
98 # |xxxxxnam 22(.....) 45 <---
99
100 print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
101
102 # store leader for later
103 push @{$self->{leaders}}, $leader;
104
105 # skip to next record
106 seek($self->{fh},substr($leader,0,5)-24,1);
107
108 }
109
110 return $self;
111 }
112
113 =head2 count
114
115 Return number of records in database
116
117 print $marc->count;
118
119 =cut
120
121 sub count {
122 my $self = shift;
123 return $self->{count};
124 }
125
126 =head2 fetch
127
128 Fetch record from database
129
130 my $hash = $marc->fetch(42);
131
132 =cut
133
134 sub fetch {
135 my $self = shift;
136
137 my $rec_nr = shift || return;
138
139 my $leader = $self->{leaders}->[$rec_nr - 1];
140 unless ($leader) {
141 carp "can't find record $rec_nr";
142 return;
143 };
144 my $offset = $self->{fh_offset}->[$rec_nr - 1];
145 unless (defined($offset)) {
146 carp "can't find offset for record $rec_nr";
147 return;
148 };
149
150 my $reclen = substr($leader,0,5);
151 my $base_addr = substr($leader,12,5);
152
153 print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
154
155 my $skip = 0;
156
157 print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
158
159 if ( ! seek($self->{fh}, $offset+24, 0) ) {
160 carp "can't seek to $offset: $!";
161 return;
162 }
163
164 print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
165
166 my $directory;
167 if( ! read($self->{fh},$directory,$base_addr-24) ) {
168 carp "can't read directory: $!";
169 $skip = 1;
170 } else {
171 print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
172 }
173
174 print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
175
176 my $fields;
177 if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
178 carp "can't read fields: $!";
179 $skip = 1;
180 } else {
181 print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
182 }
183
184 my $row;
185
186 while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
187 my ($tag,$len,$addr) = ($1,$2,$3);
188
189 if (($addr+$len) > length($fields)) {
190 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
191 $skip = 1;
192 next;
193 }
194
195 # take field
196 my $f = substr($fields,$addr,$len);
197 print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
198
199 if ($row->{$tag}) {
200 $row->{$tag} .= $f;
201 } else {
202 $row->{$tag} = $f;
203 }
204
205 my $del = substr($fields,$addr+$len-1,1);
206
207 # check field delimiters...
208 if ($self->{assert} && $del ne chr(30)) {
209 print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
210 $skip = 1;
211 next;
212 }
213
214 if ($self->{assert} && length($f) < 2) {
215 print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
216 next;
217 }
218
219 }
220
221 return $row;
222 }
223
224 1;
225 __END__
226
227 =head1 BUGS
228
229
230
231 =head1 SUPPORT
232
233
234
235 =head1 AUTHOR
236
237 Dobrica Pavlinusic
238 CPAN ID: DPAVLIN
239 dpavlin@rot13.org
240 http://www.rot13.org/~dpavlin/
241
242 =head1 COPYRIGHT
243
244 This program is free software; you can redistribute
245 it and/or modify it under the same terms as Perl itself.
246
247 The full text of the license can be found in the
248 LICENSE file included with this module.
249
250
251 =head1 SEE ALSO
252
253 perl(1).
254
255 =cut

  ViewVC Help
Powered by ViewVC 1.1.26