/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 11 by dpavlin, Sat Jul 16 20:54:28 2005 UTC revision 285 by dpavlin, Sun Dec 18 21:06:39 2005 UTC
# Line 3  package WebPAC::Input; Line 3  package WebPAC::Input;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6    use blib;
7    
8    use WebPAC::Common;
9    use base qw/WebPAC::Common/;
10    use Text::Iconv;
11    
12  =head1 NAME  =head1 NAME
13    
14  WebPAC::Input - core module for input file format  WebPAC::Input - core module for input file format
15    
16  =head1 VERSION  =head1 VERSION
17    
18  Version 0.01  Version 0.02
19    
20  =cut  =cut
21    
22  our $VERSION = '0.01';  our $VERSION = '0.02';
23    
24  =head1 SYNOPSIS  =head1 SYNOPSIS
25    
26  This module will load particular loader module and execute it's functions.  This module is used as base class for all database specific modules
27    (basically, files which have one handle, fixed size while indexing and some
28    kind of numeric idefinirier which goes from 1 to filesize).
29    
30  Perhaps a little code snippet.  Perhaps a little code snippet.
31    
# Line 54  default, it C<ISO-8859-2>. Line 62  default, it C<ISO-8859-2>.
62    
63  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).
64    
65    This function will also call low-level C<init> if it exists with same
66    parametars.
67    
68  =cut  =cut
69    
70  sub new {  sub new {
71          my $class = shift;          my $class = shift;
72          my $self = {@_};          my $self = {@_};
73          bless($self, $class);          bless($self, $class);
74    
         $self->{'code_page'} ||= 'ISO-8859-2';  
   
75          my $log = $self->_get_logger;          my $log = $self->_get_logger;
76    
77            # check if required subclasses are implemented
78            foreach my $subclass (qw/open_db fetch_rec/) {
79                    $log->logdie("missing implementation of $subclass") unless ($self->SUPER::can($subclass));
80            }
81    
82            if ($self->can('init')) {
83                    $log->debug("calling init");
84                    $self->init(@_);
85            }
86    
87            $self->{'code_page'} ||= 'ISO-8859-2';
88    
89          # running with low_mem flag? well, use DBM::Deep then.          # running with low_mem flag? well, use DBM::Deep then.
90          if ($self->{'low_mem'}) {          if ($self->{'low_mem'}) {
91                  $log->info("running with low_mem which impacts performance (<32 Mb memory usage)");                  $log->info("running with low_mem which impacts performance (<32 Mb memory usage)");
# Line 94  sub new { Line 115  sub new {
115          $self ? return $self : return undef;          $self ? return $self : return undef;
116  }  }
117    
118    =head2 open
119    
120    This function will read whole database in memory and produce lookups.
121    
122     $isis->open(
123            path => '/path/to/database/file',
124            code_page => '852',
125            limit_mfn => 500,
126            start_mfn => 6000,
127            lookup => $lookup_obj,
128     );
129    
130    By default, C<code_page> is assumed to be C<852>.
131    
132    If optional parametar C<start_mfn> is set, this will be first MFN to read
133    from database (so you can skip beginning of your database if you need to).
134    
135    If optional parametar C<limit_mfn> is set, it will read just 500 records
136    from database in example above.
137    
138    Returns size of database, regardless of C<start_mfn> and C<limit_mfn>
139    parametars, see also C<$isis->size>.
140    
141    =cut
142    
143    sub open {
144            my $self = shift;
145            my $arg = {@_};
146    
147            my $log = $self->_get_logger();
148    
149            $log->logcroak("need path") if (! $arg->{'path'});
150            my $code_page = $arg->{'code_page'} || '852';
151    
152            # store data in object
153            $self->{'code_page'} = $code_page;
154            foreach my $v (qw/path start_mfn limit_mfn/) {
155                    $self->{$v} = $arg->{$v} if ($arg->{$v});
156            }
157    
158            # create Text::Iconv object
159            $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});
160    
161            my ($db, $size) = $self->open_db(
162                    path => $arg->{path},
163            );
164    
165            unless ($db) {
166                    $log->logwarn("can't open database $arg->{path}, skipping...");
167                    return;
168            }
169    
170            unless ($size) {
171                    $log->logwarn("no records in database $arg->{path}, skipping...");
172                    return;
173            }
174    
175            my $startmfn = 1;
176            my $maxmfn = $size;
177    
178            if (my $s = $self->{start_mfn}) {
179                    $log->info("skipping to MFN $s");
180                    $startmfn = $s;
181            } else {
182                    $self->{start_mfn} = $startmfn;
183            }
184    
185            if ($self->{limit_mfn}) {
186                    $log->info("limiting to ",$self->{limit_mfn}," records");
187                    $maxmfn = $startmfn + $self->{limit_mfn} - 1;
188                    $maxmfn = $size if ($maxmfn > $size);
189            }
190    
191            # store size for later
192            $self->{size} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;
193    
194            $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");
195    
196            # read database
197            for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
198    
199                    $log->debug("mfn: $mfn\n");
200    
201                    my $rec = $self->fetch_rec( $db, $mfn );
202    
203                    if (! $rec) {
204                            $log->warn("record $mfn empty? skipping...");
205                            next;
206                    }
207    
208                    # store
209                    if ($self->{'low_mem'}) {
210                            $self->{'db'}->put($mfn, $rec);
211                    } else {
212                            $self->{'data'}->{$mfn} = $rec;
213                    }
214    
215                    # create lookup
216                    $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
217    
218                    $self->progress_bar($mfn,$maxmfn);
219    
220            }
221    
222            $self->{'current_mfn'} = -1;
223            $self->{'last_pcnt'} = 0;
224    
225            $log->debug("max mfn: $maxmfn");
226    
227            # store max mfn and return it.
228            $self->{'max_mfn'} = $maxmfn;
229    
230            return $size;
231    }
232    
233    =head2 fetch
234    
235    Fetch next record from database. It will also displays progress bar.
236    
237     my $rec = $isis->fetch;
238    
239    Record from this function should probably go to C<data_structure> for
240    normalisation.
241    
242    =cut
243    
244    sub fetch {
245            my $self = shift;
246    
247            my $log = $self->_get_logger();
248    
249            $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
250    
251            if ($self->{'current_mfn'} == -1) {
252                    $self->{'current_mfn'} = $self->{'start_mfn'};
253            } else {
254                    $self->{'current_mfn'}++;
255            }
256    
257            my $mfn = $self->{'current_mfn'};
258    
259            if ($mfn > $self->{'max_mfn'}) {
260                    $self->{'current_mfn'} = $self->{'max_mfn'};
261                    $log->debug("at EOF");
262                    return;
263            }
264    
265            $self->progress_bar($mfn,$self->{'max_mfn'});
266    
267            my $rec;
268    
269            if ($self->{'low_mem'}) {
270                    $rec = $self->{'db'}->get($mfn);
271            } else {
272                    $rec = $self->{'data'}->{$mfn};
273            }
274    
275            $rec ||= 0E0;
276    }
277    
278    =head2 pos
279    
280    Returns current record number (MFN).
281    
282     print $isis->pos;
283    
284    First record in database has position 1.
285    
286    =cut
287    
288    sub pos {
289            my $self = shift;
290            return $self->{'current_mfn'};
291    }
292    
293    
294    =head2 size
295    
296    Returns number of records in database
297    
298     print $isis->size;
299    
300    Result from this function can be used to loop through all records
301    
302     foreach my $mfn ( 1 ... $isis->size ) { ... }
303    
304    because it takes into account C<start_mfn> and C<limit_mfn>.
305    
306    =cut
307    
308    sub size {
309            my $self = shift;
310            return $self->{'size'};
311    }
312    
313    =head2 seek
314    
315    Seek to specified MFN in file.
316    
317     $isis->seek(42);
318    
319    First record in database has position 1.
320    
321    =cut
322    
323    sub seek {
324            my $self = shift;
325            my $pos = shift || return;
326    
327            my $log = $self->_get_logger();
328    
329            if ($pos < 1) {
330                    $log->warn("seek before first record");
331                    $pos = 1;
332            } elsif ($pos > $self->{'max_mfn'}) {
333                    $log->warn("seek beyond last record");
334                    $pos = $self->{'max_mfn'};
335            }
336    
337            return $self->{'current_mfn'} = (($pos - 1) || -1);
338    }
339    
340    
341  =head1 MEMORY USAGE  =head1 MEMORY USAGE
342    
343  C<low_mem> options is double-edged sword. If enabled, WebPAC  C<low_mem> options is double-edged sword. If enabled, WebPAC

Legend:
Removed from v.11  
changed lines
  Added in v.285

  ViewVC Help
Powered by ViewVC 1.1.26