/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 285 by dpavlin, Sun Dec 18 21:06:39 2005 UTC revision 308 by dpavlin, Tue Dec 20 19:01:22 2005 UTC
# Line 3  package WebPAC::Input; Line 3  package WebPAC::Input;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
 use blib;  
   
6  use WebPAC::Common;  use WebPAC::Common;
7  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
8  use Text::Iconv;  use Text::Iconv;
9    use Data::Dumper;
10    
11  =head1 NAME  =head1 NAME
12    
13  WebPAC::Input - core module for input file format  WebPAC::Input - read different file formats into WebPAC
14    
15  =head1 VERSION  =head1 VERSION
16    
17  Version 0.02  Version 0.03
18    
19  =cut  =cut
20    
21  our $VERSION = '0.02';  our $VERSION = '0.03';
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25  This module is used as base class for all database specific modules  This module implements input as database which have fixed and known
26  (basically, files which have one handle, fixed size while indexing and some  I<size> while indexing and single unique numeric identifier for database
27  kind of numeric idefinirier which goes from 1 to filesize).  position ranging from 1 to I<size>.
28    
29    Simply, something that is indexed by unmber from 1 .. I<size>.
30    
31    Examples of such databases are CDS/ISIS files, MARC files, lines in
32    text file, and so on.
33    
34    Specific file formats are implemented using low-level interface modules,
35    located in C<WebPAC::Input::*> namespace which export C<open_db>,
36    C<fetch_rec> and optional C<init> functions.
37    
38  Perhaps a little code snippet.  Perhaps a little code snippet.
39    
40      use WebPAC::Input;      use WebPAC::Input;
41    
42      my $db = WebPAC::Input->new(      my $db = WebPAC::Input->new(
43          format => 'NULL',          module => 'WebPAC::Input::ISIS',
44          config => $config,                  config => $config,
45          lookup => $lookup_obj,                  lookup => $lookup_obj,
46          low_mem => 1,                  low_mem => 1,
47      );      );
48    
49      $db->open('/path/to/database');      $db->open('/path/to/database');
50      print "database size: ",$db->size,"\n";      print "database size: ",$db->size,"\n";
51      while (my $row = $db->fetch) {      while (my $rec = $db->fetch) {
         ...  
52      }      }
53    
54    
55    
56  =head1 FUNCTIONS  =head1 FUNCTIONS
57    
58  =head2 new  =head2 new
# Line 51  Perhaps a little code snippet. Line 60  Perhaps a little code snippet.
60  Create new input database object.  Create new input database object.
61    
62    my $db = new WebPAC::Input(    my $db = new WebPAC::Input(
63          format => 'NULL'          module => 'WebPAC::Input::MARC',
64          code_page => 'ISO-8859-2',          code_page => 'ISO-8859-2',
65          low_mem => 1,          low_mem => 1,
66    );    );
67    
68    C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
69    L<WebPAC::Input::MARC>.
70    
71  Optional parametar C<code_page> specify application code page (which will be  Optional parametar C<code_page> specify application code page (which will be
72  used internally). This should probably be your terminal encoding, and by  used internally). This should probably be your terminal encoding, and by
73  default, it C<ISO-8859-2>.  default, it C<ISO-8859-2>.
# Line 74  sub new { Line 86  sub new {
86    
87          my $log = $self->_get_logger;          my $log = $self->_get_logger;
88    
89            $log->logconfess("specify low-level file format module") unless ($self->{module});
90            my $module = $self->{module};
91            $module =~ s#::#/#g;
92            $module .= '.pm';
93            $log->debug("require low-level module $self->{module} from $module");
94    
95            require $module;
96            #eval $self->{module} .'->import';
97    
98          # check if required subclasses are implemented          # check if required subclasses are implemented
99          foreach my $subclass (qw/open_db fetch_rec/) {          foreach my $subclass (qw/open_db fetch_rec init/) {
100                  $log->logdie("missing implementation of $subclass") unless ($self->SUPER::can($subclass));                  my $n = $self->{module} . '::' . $subclass;
101                    if (! defined &{ $n }) {
102                            my $missing = "missing $subclass in $self->{module}";
103                            $self->{$subclass} = sub { $log->logwarn($missing) };
104                    } else {
105                            $self->{$subclass} = \&{ $n };
106                    }
107          }          }
108    
109          if ($self->can('init')) {          if ($self->{init}) {
110                  $log->debug("calling init");                  $log->debug("calling init");
111                  $self->init(@_);                  $self->{init}->($self, @_);
112          }          }
113    
114          $self->{'code_page'} ||= 'ISO-8859-2';          $self->{'code_page'} ||= 'ISO-8859-2';
# Line 119  sub new { Line 146  sub new {
146    
147  This function will read whole database in memory and produce lookups.  This function will read whole database in memory and produce lookups.
148    
149   $isis->open(   $input->open(
150          path => '/path/to/database/file',          path => '/path/to/database/file',
151          code_page => '852',          code_page => '852',
152          limit_mfn => 500,          limit => 500,
153          start_mfn => 6000,          offset => 6000,
154          lookup => $lookup_obj,          lookup => $lookup_obj,
155   );   );
156    
157  By default, C<code_page> is assumed to be C<852>.  By default, C<code_page> is assumed to be C<852>.
158    
159  If optional parametar C<start_mfn> is set, this will be first MFN to read  C<offset> is optional parametar to position at some offset before reading from database.
 from database (so you can skip beginning of your database if you need to).  
160    
161  If optional parametar C<limit_mfn> is set, it will read just 500 records  C<limit> is optional parametar to read just C<limit> records from database
 from database in example above.  
162    
163  Returns size of database, regardless of C<start_mfn> and C<limit_mfn>  Returns size of database, regardless of C<offset> and C<limit>
164  parametars, see also C<$isis->size>.  parametars, see also C<size>.
165    
166  =cut  =cut
167    
# Line 150  sub open { Line 175  sub open {
175          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
176    
177          # store data in object          # store data in object
178          $self->{'code_page'} = $code_page;          $self->{'input_code_page'} = $code_page;
179          foreach my $v (qw/path start_mfn limit_mfn/) {          foreach my $v (qw/path offset limit/) {
180                  $self->{$v} = $arg->{$v} if ($arg->{$v});                  $self->{$v} = $arg->{$v} if ($arg->{$v});
181          }          }
182    
183          # create Text::Iconv object          # create Text::Iconv object
184          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});
185    
186          my ($db, $size) = $self->open_db(          my ($db, $size) = $self->{open_db}->( $self,
187                  path => $arg->{path},                  path => $arg->{path},
188          );          );
189    
# Line 172  sub open { Line 197  sub open {
197                  return;                  return;
198          }          }
199    
200          my $startmfn = 1;          my $offset = 1;
201          my $maxmfn = $size;          my $limit = $size;
202    
203          if (my $s = $self->{start_mfn}) {          if (my $s = $self->{offset}) {
204                  $log->info("skipping to MFN $s");                  $log->info("skipping to MFN $s");
205                  $startmfn = $s;                  $offset = $s;
206          } else {          } else {
207                  $self->{start_mfn} = $startmfn;                  $self->{offset} = $offset;
208          }          }
209    
210          if ($self->{limit_mfn}) {          if ($self->{limit}) {
211                  $log->info("limiting to ",$self->{limit_mfn}," records");                  $log->debug("limiting to ",$self->{limit}," records");
212                  $maxmfn = $startmfn + $self->{limit_mfn} - 1;                  $limit = $offset + $self->{limit} - 1;
213                  $maxmfn = $size if ($maxmfn > $size);                  $limit = $size if ($limit > $size);
214          }          }
215    
216          # store size for later          # store size for later
217          $self->{size} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;          $self->{size} = ($limit - $offset) ? ($limit - $offset + 1) : 0;
218    
219          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");
220    
221          # read database          # read database
222          for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {          for (my $pos = $offset; $pos <= $limit; $pos++) {
223    
224                  $log->debug("mfn: $mfn\n");                  $log->debug("position: $pos\n");
225    
226                  my $rec = $self->fetch_rec( $db, $mfn );                  my $rec = $self->{fetch_rec}->($self, $db, $pos );
227    
228                    $log->debug(sub { Dumper($rec) });
229    
230                  if (! $rec) {                  if (! $rec) {
231                          $log->warn("record $mfn empty? skipping...");                          $log->warn("record $pos empty? skipping...");
232                          next;                          next;
233                  }                  }
234    
235                  # store                  # store
236                  if ($self->{'low_mem'}) {                  if ($self->{low_mem}) {
237                          $self->{'db'}->put($mfn, $rec);                          $self->{db}->put($pos, $rec);
238                  } else {                  } else {
239                          $self->{'data'}->{$mfn} = $rec;                          $self->{data}->{$pos} = $rec;
240                  }                  }
241    
242                  # create lookup                  # create lookup
243                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
244    
245                  $self->progress_bar($mfn,$maxmfn);                  $self->progress_bar($pos,$limit);
246    
247          }          }
248    
249          $self->{'current_mfn'} = -1;          $self->{pos} = -1;
250          $self->{'last_pcnt'} = 0;          $self->{last_pcnt} = 0;
   
         $log->debug("max mfn: $maxmfn");  
251    
252          # store max mfn and return it.          # store max mfn and return it.
253          $self->{'max_mfn'} = $maxmfn;          $self->{max_pos} = $limit;
254            $log->debug("max_pos: $limit");
255    
256          return $size;          return $size;
257  }  }
# Line 246  sub fetch { Line 272  sub fetch {
272    
273          my $log = $self->_get_logger();          my $log = $self->_get_logger();
274    
275          $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});          $log->logconfess("it seems that you didn't load database!") unless ($self->{pos});
276    
277          if ($self->{'current_mfn'} == -1) {          if ($self->{pos} == -1) {
278                  $self->{'current_mfn'} = $self->{'start_mfn'};                  $self->{pos} = $self->{offset};
279          } else {          } else {
280                  $self->{'current_mfn'}++;                  $self->{pos}++;
281          }          }
282    
283          my $mfn = $self->{'current_mfn'};          my $mfn = $self->{pos};
284    
285          if ($mfn > $self->{'max_mfn'}) {          if ($mfn > $self->{max_pos}) {
286                  $self->{'current_mfn'} = $self->{'max_mfn'};                  $self->{pos} = $self->{max_pos};
287                  $log->debug("at EOF");                  $log->debug("at EOF");
288                  return;                  return;
289          }          }
290    
291          $self->progress_bar($mfn,$self->{'max_mfn'});          $self->progress_bar($mfn,$self->{max_pos});
292    
293          my $rec;          my $rec;
294    
295          if ($self->{'low_mem'}) {          if ($self->{low_mem}) {
296                  $rec = $self->{'db'}->get($mfn);                  $rec = $self->{db}->get($mfn);
297          } else {          } else {
298                  $rec = $self->{'data'}->{$mfn};                  $rec = $self->{data}->{$mfn};
299          }          }
300    
301          $rec ||= 0E0;          $rec ||= 0E0;
# Line 287  First record in database has position 1. Line 313  First record in database has position 1.
313    
314  sub pos {  sub pos {
315          my $self = shift;          my $self = shift;
316          return $self->{'current_mfn'};          return $self->{pos};
317  }  }
318    
319    
# Line 301  Result from this function can be used to Line 327  Result from this function can be used to
327    
328   foreach my $mfn ( 1 ... $isis->size ) { ... }   foreach my $mfn ( 1 ... $isis->size ) { ... }
329    
330  because it takes into account C<start_mfn> and C<limit_mfn>.  because it takes into account C<offset> and C<limit>.
331    
332  =cut  =cut
333    
334  sub size {  sub size {
335          my $self = shift;          my $self = shift;
336          return $self->{'size'};          return $self->{size};
337  }  }
338    
339  =head2 seek  =head2 seek
# Line 329  sub seek { Line 355  sub seek {
355          if ($pos < 1) {          if ($pos < 1) {
356                  $log->warn("seek before first record");                  $log->warn("seek before first record");
357                  $pos = 1;                  $pos = 1;
358          } elsif ($pos > $self->{'max_mfn'}) {          } elsif ($pos > $self->{max_pos}) {
359                  $log->warn("seek beyond last record");                  $log->warn("seek beyond last record");
360                  $pos = $self->{'max_mfn'};                  $pos = $self->{max_pos};
361          }          }
362    
363          return $self->{'current_mfn'} = (($pos - 1) || -1);          return $self->{pos} = (($pos - 1) || -1);
364  }  }
365    
366    

Legend:
Removed from v.285  
changed lines
  Added in v.308

  ViewVC Help
Powered by ViewVC 1.1.26