/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 285 by dpavlin, Sun Dec 18 21:06:39 2005 UTC revision 286 by dpavlin, Sun Dec 18 21:06:46 2005 UTC
# Line 11  use Text::Iconv; Line 11  use Text::Iconv;
11    
12  =head1 NAME  =head1 NAME
13    
14  WebPAC::Input - core module for input file format  WebPAC::Input - read different file formats into WebPAC
15    
16  =head1 VERSION  =head1 VERSION
17    
18  Version 0.02  Version 0.03
19    
20  =cut  =cut
21    
22  our $VERSION = '0.02';  our $VERSION = '0.03';
23    
24  =head1 SYNOPSIS  =head1 SYNOPSIS
25    
26  This module is used as base class for all database specific modules  This module implements input as database which have fixed and known
27  (basically, files which have one handle, fixed size while indexing and some  I<size> while indexing and single unique numeric identifier for database
28  kind of numeric idefinirier which goes from 1 to filesize).  position ranging from 1 to I<size>.
29    
30    Simply, something that is indexed by unmber from 1 .. I<size>.
31    
32    Examples of such databases are CDS/ISIS files, MARC files, lines in
33    text file, and so on.
34    
35    Specific file formats are implemented using low-level interface modules,
36    located in C<WebPAC::Input::*> namespace which export C<open_db>,
37    C<fetch_rec> and optional C<init> functions.
38    
39  Perhaps a little code snippet.  Perhaps a little code snippet.
40    
41      use WebPAC::Input;      use WebPAC::Input;
42    
43      my $db = WebPAC::Input->new(      my $db = WebPAC::Input->new(
44          format => 'NULL',          module => 'WebPAC::Input::ISIS',
45          config => $config,                  config => $config,
46          lookup => $lookup_obj,                  lookup => $lookup_obj,
47          low_mem => 1,                  low_mem => 1,
48      );      );
49    
50      $db->open('/path/to/database');      $db->open('/path/to/database');
51      print "database size: ",$db->size,"\n";      print "database size: ",$db->size,"\n";
52      while (my $row = $db->fetch) {      while (my $rec = $db->fetch) {
         ...  
53      }      }
54    
55    
56    
57  =head1 FUNCTIONS  =head1 FUNCTIONS
58    
59  =head2 new  =head2 new
# Line 51  Perhaps a little code snippet. Line 61  Perhaps a little code snippet.
61  Create new input database object.  Create new input database object.
62    
63    my $db = new WebPAC::Input(    my $db = new WebPAC::Input(
64          format => 'NULL'          module => 'WebPAC::Input::MARC',
65          code_page => 'ISO-8859-2',          code_page => 'ISO-8859-2',
66          low_mem => 1,          low_mem => 1,
67    );    );
68    
69    C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
70    L<WebPAC::Input::MARC>.
71    
72  Optional parametar C<code_page> specify application code page (which will be  Optional parametar C<code_page> specify application code page (which will be
73  used internally). This should probably be your terminal encoding, and by  used internally). This should probably be your terminal encoding, and by
74  default, it C<ISO-8859-2>.  default, it C<ISO-8859-2>.
# Line 74  sub new { Line 87  sub new {
87    
88          my $log = $self->_get_logger;          my $log = $self->_get_logger;
89    
90            $log->logconfess("specify low-level file format module") unless ($self->{module});
91            my $module = $self->{module};
92            $module =~ s#::#/#g;
93            $module .= '.pm';
94            $log->debug("require low-level module $self->{module} from $module");
95            require $module;
96            eval $self->{module} .'->import';
97    
98          # check if required subclasses are implemented          # check if required subclasses are implemented
99          foreach my $subclass (qw/open_db fetch_rec/) {          foreach my $subclass (qw/open_db fetch_rec/) {
100                  $log->logdie("missing implementation of $subclass") unless ($self->SUPER::can($subclass));                  if ( $self->can($subclass) ) {
101                            $log->debug("imported $subclass");
102                    } else {
103                            $log->warn("missing $subclass in $self->{module}");
104                    }
105          }          }
106    
107          if ($self->can('init')) {          if ($self->can('init')) {
# Line 119  sub new { Line 144  sub new {
144    
145  This function will read whole database in memory and produce lookups.  This function will read whole database in memory and produce lookups.
146    
147   $isis->open(   $input->open(
148          path => '/path/to/database/file',          path => '/path/to/database/file',
149          code_page => '852',          code_page => '852',
150          limit_mfn => 500,          limit => 500,
151          start_mfn => 6000,          offset => 6000,
152          lookup => $lookup_obj,          lookup => $lookup_obj,
153   );   );
154    
155  By default, C<code_page> is assumed to be C<852>.  By default, C<code_page> is assumed to be C<852>.
156    
157  If optional parametar C<start_mfn> is set, this will be first MFN to read  C<offset> is optional parametar to position at some offset before reading from database.
 from database (so you can skip beginning of your database if you need to).  
158    
159  If optional parametar C<limit_mfn> is set, it will read just 500 records  C<limit> is optional parametar to read just C<limit> records from database
 from database in example above.  
160    
161  Returns size of database, regardless of C<start_mfn> and C<limit_mfn>  Returns size of database, regardless of C<offset> and C<limit>
162  parametars, see also C<$isis->size>.  parametars, see also C<size>.
163    
164  =cut  =cut
165    
# Line 151  sub open { Line 174  sub open {
174    
175          # store data in object          # store data in object
176          $self->{'code_page'} = $code_page;          $self->{'code_page'} = $code_page;
177          foreach my $v (qw/path start_mfn limit_mfn/) {          foreach my $v (qw/path offset limit/) {
178                  $self->{$v} = $arg->{$v} if ($arg->{$v});                  $self->{$v} = $arg->{$v} if ($arg->{$v});
179          }          }
180    
# Line 172  sub open { Line 195  sub open {
195                  return;                  return;
196          }          }
197    
198          my $startmfn = 1;          my $offset = 1;
199          my $maxmfn = $size;          my $limit = $size;
200    
201          if (my $s = $self->{start_mfn}) {          if (my $s = $self->{offset}) {
202                  $log->info("skipping to MFN $s");                  $log->info("skipping to MFN $s");
203                  $startmfn = $s;                  $offset = $s;
204          } else {          } else {
205                  $self->{start_mfn} = $startmfn;                  $self->{offset} = $offset;
206          }          }
207    
208          if ($self->{limit_mfn}) {          if ($self->{limit}) {
209                  $log->info("limiting to ",$self->{limit_mfn}," records");                  $log->info("limiting to ",$self->{limit}," records");
210                  $maxmfn = $startmfn + $self->{limit_mfn} - 1;                  $limit = $offset + $self->{limit} - 1;
211                  $maxmfn = $size if ($maxmfn > $size);                  $limit = $size if ($limit > $size);
212          }          }
213    
214          # store size for later          # store size for later
215          $self->{size} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;          $self->{size} = ($limit - $offset) ? ($limit - $offset + 1) : 0;
216    
217          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");
218    
219          # read database          # read database
220          for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {          for (my $pos = $offset; $pos <= $limit; $mfn++) {
221    
222                  $log->debug("mfn: $mfn\n");                  $log->debug("position: $pos\n");
223    
224                  my $rec = $self->fetch_rec( $db, $mfn );                  my $rec = $self->fetch_rec( $db, $pos );
225    
226                  if (! $rec) {                  if (! $rec) {
227                          $log->warn("record $mfn empty? skipping...");                          $log->warn("record $pos empty? skipping...");
228                          next;                          next;
229                  }                  }
230    
231                  # store                  # store
232                  if ($self->{'low_mem'}) {                  if ($self->{low_mem}) {
233                          $self->{'db'}->put($mfn, $rec);                          $self->{db}->put($pos, $rec);
234                  } else {                  } else {
235                          $self->{'data'}->{$mfn} = $rec;                          $self->{data}->{$pos} = $rec;
236                  }                  }
237    
238                  # create lookup                  # create lookup
239                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
240    
241                  $self->progress_bar($mfn,$maxmfn);                  $self->progress_bar($pos,$limit);
242    
243          }          }
244    
245          $self->{'current_mfn'} = -1;          $self->{pos} = -1;
246          $self->{'last_pcnt'} = 0;          $self->{last_pcnt} = 0;
   
         $log->debug("max mfn: $maxmfn");  
247    
248          # store max mfn and return it.          # store max mfn and return it.
249          $self->{'max_mfn'} = $maxmfn;          $self->{max_pos} = $limit;
250            $log->debug("max_pos: $limit");
251    
252          return $size;          return $size;
253  }  }
# Line 246  sub fetch { Line 268  sub fetch {
268    
269          my $log = $self->_get_logger();          my $log = $self->_get_logger();
270    
271          $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});          $log->logconfess("it seems that you didn't load database!") unless ($self->{pos});
272    
273          if ($self->{'current_mfn'} == -1) {          if ($self->{pos} == -1) {
274                  $self->{'current_mfn'} = $self->{'start_mfn'};                  $self->{pos} = $self->{offset};
275          } else {          } else {
276                  $self->{'current_mfn'}++;                  $self->{pos}++;
277          }          }
278    
279          my $mfn = $self->{'current_mfn'};          my $mfn = $self->{pos};
280    
281          if ($mfn > $self->{'max_mfn'}) {          if ($mfn > $self->{max_pos}) {
282                  $self->{'current_mfn'} = $self->{'max_mfn'};                  $self->{pos} = $self->{max_pos};
283                  $log->debug("at EOF");                  $log->debug("at EOF");
284                  return;                  return;
285          }          }
286    
287          $self->progress_bar($mfn,$self->{'max_mfn'});          $self->progress_bar($mfn,$self->{max_pos});
288    
289          my $rec;          my $rec;
290    
291          if ($self->{'low_mem'}) {          if ($self->{low_mem}) {
292                  $rec = $self->{'db'}->get($mfn);                  $rec = $self->{db}->get($mfn);
293          } else {          } else {
294                  $rec = $self->{'data'}->{$mfn};                  $rec = $self->{data}->{$mfn};
295          }          }
296    
297          $rec ||= 0E0;          $rec ||= 0E0;
# Line 287  First record in database has position 1. Line 309  First record in database has position 1.
309    
310  sub pos {  sub pos {
311          my $self = shift;          my $self = shift;
312          return $self->{'current_mfn'};          return $self->{pos};
313  }  }
314    
315    
# Line 301  Result from this function can be used to Line 323  Result from this function can be used to
323    
324   foreach my $mfn ( 1 ... $isis->size ) { ... }   foreach my $mfn ( 1 ... $isis->size ) { ... }
325    
326  because it takes into account C<start_mfn> and C<limit_mfn>.  because it takes into account C<offset> and C<limit>.
327    
328  =cut  =cut
329    
330  sub size {  sub size {
331          my $self = shift;          my $self = shift;
332          return $self->{'size'};          return $self->{size};
333  }  }
334    
335  =head2 seek  =head2 seek
# Line 329  sub seek { Line 351  sub seek {
351          if ($pos < 1) {          if ($pos < 1) {
352                  $log->warn("seek before first record");                  $log->warn("seek before first record");
353                  $pos = 1;                  $pos = 1;
354          } elsif ($pos > $self->{'max_mfn'}) {          } elsif ($pos > $self->{max_pos}) {
355                  $log->warn("seek beyond last record");                  $log->warn("seek beyond last record");
356                  $pos = $self->{'max_mfn'};                  $pos = $self->{max_pos};
357          }          }
358    
359          return $self->{'current_mfn'} = (($pos - 1) || -1);          return $self->{pos} = (($pos - 1) || -1);
360  }  }
361    
362    

Legend:
Removed from v.285  
changed lines
  Added in v.286

  ViewVC Help
Powered by ViewVC 1.1.26