/[MARC-Fast]/trunk/Fast.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/Fast.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Tue Jan 4 10:26:07 2005 UTC (19 years, 2 months ago) by dpavlin
File size: 5476 byte(s)
initital import of 0.01 into subversion

1
2 package MARC::Fast;
3 use strict;
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.01;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16 }
17
18 =head1 NAME
19
20 MARC::Fast - Very fast implementation of MARC database reader
21
22 =head1 SYNOPSIS
23
24 use MARC::Fast;
25
26
27 =head1 DESCRIPTION
28
29 This is very fast alternative to C<MARC> and C<MARC::Record> modules.
30
31 It's is also very sutable for random access to MARC records (as opposed to
32 sequential one).
33
34 =head1 METHODS
35
36 =head2 new
37
38 Read MARC database
39
40 my $marc = new MARC::Fast(
41 marcdb => 'unimarc.iso',
42 quiet => 0,
43 debug => 0,
44 assert => 0,
45 );
46
47 =cut
48
49 ################################################## subroutine header end ##
50
51
52 sub new {
53 my $class = shift;
54 my $self = {@_};
55 bless ($self, $class);
56
57 croak "need marcdb parametar" unless ($self->{marcdb});
58
59 print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
60
61 open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
62
63 $self->{count} = 0;
64
65 while (! eof($self->{fh})) {
66 $self->{count}++;
67
68 # save record position
69 push @{$self->{fh_offset}}, tell($self->{fh});
70
71 my $leader;
72 read($self->{fh}, $leader, 24);
73
74 # Byte Name
75 # ---- ----
76 # 0-4 Record Length
77 # 5 Status (n=new, c=corrected and d=deleted)
78 # 6 Type of Record (a=printed material)
79 # 7 Bibliographic Level (m=monograph)
80 # 8-9 Blanks
81 # 10 Indictator count (2 for monographs)
82 # 11 Subfield code count (2 - 0x1F+subfield code itself)
83 # 12-16 Base address of data
84 # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
85 # 3=sublevel 3)
86 # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
87 # n=record is in non-ISBD format, i=record is in
88 # an incomplete ISBD format)
89 # 19 Blank
90 # 20 Length of length field in directory (always 4 in UNIMARC)
91 # 21 Length of Starting Character Position in directory (always
92 # 5 in UNIMARC)
93 # 22 Length of implementation defined portion in directory (always
94 # 0 in UNIMARC)
95 # 23 Blank
96 #
97 # |0 45 89 |12 16|1n 450 |
98 # |xxxxxnam 22(.....) 45 <---
99
100 print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
101
102 # store leader for later
103 push @{$self->{leaders}}, $leader;
104
105 # skip to next record
106 seek($self->{fh},substr($leader,0,5)-24,1);
107
108 }
109
110 return $self;
111 }
112
113 =head2 count
114
115 Return number of records in database
116
117 print $marc->count;
118
119 =cut
120
121 sub count {
122 my $self = shift;
123 return $self->{count};
124 }
125
126 =head2 fetch
127
128 Fetch record from database
129
130 my $hash = $marc->fetch(42);
131
132 =cut
133
134 sub fetch {
135 my $self = shift;
136
137 my $rec_nr = shift || return;
138
139 my $leader = $self->{leaders}->[$rec_nr - 1];
140 unless ($leader) {
141 carp "can't find record $rec_nr";
142 return;
143 };
144 my $offset = $self->{fh_offset}->[$rec_nr - 1];
145 unless (defined($offset)) {
146 carp "can't find offset for record $rec_nr";
147 return;
148 };
149
150 my $reclen = substr($leader,0,5);
151 my $base_addr = substr($leader,12,5);
152
153 print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
154
155 my $skip = 0;
156
157 print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
158
159 if ( ! seek($self->{fh}, $offset+24, 0) ) {
160 carp "can't seek to $offset: $!";
161 return;
162 }
163
164 print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
165
166 my $directory;
167 if( ! read($self->{fh},$directory,$base_addr-24) ) {
168 carp "can't read directory: $!";
169 $skip = 1;
170 } else {
171 print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
172 }
173
174 print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
175
176 my $fields;
177 if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
178 carp "can't read fields: $!";
179 $skip = 1;
180 } else {
181 print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
182 }
183
184 my $row;
185
186 while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
187 my ($tag,$len,$addr) = ($1,$2,$3);
188
189 if (($addr+$len) > length($fields)) {
190 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
191 $skip = 1;
192 next;
193 }
194
195 # take field
196 my $f = substr($fields,$addr,$len);
197 print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
198
199 $row->{$tag} = $f;
200
201 my $del = substr($fields,$addr+$len-1,1);
202
203 # check field delimiters...
204 if ($self->{assert} && $del ne chr(30)) {
205 print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
206 $skip = 1;
207 next;
208 }
209
210 if ($self->{assert} && length($f) < 2) {
211 print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
212 next;
213 }
214
215 }
216
217 return $row;
218 }
219
220 1;
221 __END__
222
223 =head1 BUGS
224
225
226
227 =head1 SUPPORT
228
229
230
231 =head1 AUTHOR
232
233 Dobrica Pavlinusic
234 CPAN ID: DPAVLIN
235 dpavlin@rot13.org
236 http://www.rot13.org/~dpavlin/
237
238 =head1 COPYRIGHT
239
240 This program is free software; you can redistribute
241 it and/or modify it under the same terms as Perl itself.
242
243 The full text of the license can be found in the
244 LICENSE file included with this module.
245
246
247 =head1 SEE ALSO
248
249 perl(1).
250
251 =cut

  ViewVC Help
Powered by ViewVC 1.1.26