/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 285 by dpavlin, Sun Dec 18 21:06:39 2005 UTC revision 307 by dpavlin, Tue Dec 20 00:03:04 2005 UTC
# Line 3  package WebPAC::Input; Line 3  package WebPAC::Input;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
 use blib;  
   
6  use WebPAC::Common;  use WebPAC::Common;
7  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
8  use Text::Iconv;  use Text::Iconv;
9    
10  =head1 NAME  =head1 NAME
11    
12  WebPAC::Input - core module for input file format  WebPAC::Input - read different file formats into WebPAC
13    
14  =head1 VERSION  =head1 VERSION
15    
16  Version 0.02  Version 0.03
17    
18  =cut  =cut
19    
20  our $VERSION = '0.02';  our $VERSION = '0.03';
21    
22  =head1 SYNOPSIS  =head1 SYNOPSIS
23    
24  This module is used as base class for all database specific modules  This module implements input as database which have fixed and known
25  (basically, files which have one handle, fixed size while indexing and some  I<size> while indexing and single unique numeric identifier for database
26  kind of numeric idefinirier which goes from 1 to filesize).  position ranging from 1 to I<size>.
27    
28    Simply, something that is indexed by unmber from 1 .. I<size>.
29    
30    Examples of such databases are CDS/ISIS files, MARC files, lines in
31    text file, and so on.
32    
33    Specific file formats are implemented using low-level interface modules,
34    located in C<WebPAC::Input::*> namespace which export C<open_db>,
35    C<fetch_rec> and optional C<init> functions.
36    
37  Perhaps a little code snippet.  Perhaps a little code snippet.
38    
39      use WebPAC::Input;      use WebPAC::Input;
40    
41      my $db = WebPAC::Input->new(      my $db = WebPAC::Input->new(
42          format => 'NULL',          module => 'WebPAC::Input::ISIS',
43          config => $config,                  config => $config,
44          lookup => $lookup_obj,                  lookup => $lookup_obj,
45          low_mem => 1,                  low_mem => 1,
46      );      );
47    
48      $db->open('/path/to/database');      $db->open('/path/to/database');
49      print "database size: ",$db->size,"\n";      print "database size: ",$db->size,"\n";
50      while (my $row = $db->fetch) {      while (my $rec = $db->fetch) {
         ...  
51      }      }
52    
53    
54    
55  =head1 FUNCTIONS  =head1 FUNCTIONS
56    
57  =head2 new  =head2 new
# Line 51  Perhaps a little code snippet. Line 59  Perhaps a little code snippet.
59  Create new input database object.  Create new input database object.
60    
61    my $db = new WebPAC::Input(    my $db = new WebPAC::Input(
62          format => 'NULL'          module => 'WebPAC::Input::MARC',
63          code_page => 'ISO-8859-2',          code_page => 'ISO-8859-2',
64          low_mem => 1,          low_mem => 1,
65    );    );
66    
67    C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
68    L<WebPAC::Input::MARC>.
69    
70  Optional parametar C<code_page> specify application code page (which will be  Optional parametar C<code_page> specify application code page (which will be
71  used internally). This should probably be your terminal encoding, and by  used internally). This should probably be your terminal encoding, and by
72  default, it C<ISO-8859-2>.  default, it C<ISO-8859-2>.
# Line 74  sub new { Line 85  sub new {
85    
86          my $log = $self->_get_logger;          my $log = $self->_get_logger;
87    
88            $log->logconfess("specify low-level file format module") unless ($self->{module});
89            my $module = $self->{module};
90            $module =~ s#::#/#g;
91            $module .= '.pm';
92            $log->debug("require low-level module $self->{module} from $module");
93    
94            require $module;
95            #eval $self->{module} .'->import';
96    
97          # check if required subclasses are implemented          # check if required subclasses are implemented
98          foreach my $subclass (qw/open_db fetch_rec/) {          foreach my $subclass (qw/open_db fetch_rec init/) {
99                  $log->logdie("missing implementation of $subclass") unless ($self->SUPER::can($subclass));                  my $n = $self->{module} . '::' . $subclass;
100                    if (! defined &{ $n }) {
101                            my $missing = "missing $subclass in $self->{module}";
102                            $self->{$subclass} = sub { $log->logwarn($missing) };
103                    } else {
104                            $self->{$subclass} = \&{ $n };
105                    }
106          }          }
107    
108          if ($self->can('init')) {          if ($self->{init}) {
109                  $log->debug("calling init");                  $log->debug("calling init");
110                  $self->init(@_);                  $self->{init}->($self, @_);
111          }          }
112    
113          $self->{'code_page'} ||= 'ISO-8859-2';          $self->{'code_page'} ||= 'ISO-8859-2';
# Line 119  sub new { Line 145  sub new {
145    
146  This function will read whole database in memory and produce lookups.  This function will read whole database in memory and produce lookups.
147    
148   $isis->open(   $input->open(
149          path => '/path/to/database/file',          path => '/path/to/database/file',
150          code_page => '852',          code_page => '852',
151          limit_mfn => 500,          limit => 500,
152          start_mfn => 6000,          offset => 6000,
153          lookup => $lookup_obj,          lookup => $lookup_obj,
154   );   );
155    
156  By default, C<code_page> is assumed to be C<852>.  By default, C<code_page> is assumed to be C<852>.
157    
158  If optional parametar C<start_mfn> is set, this will be first MFN to read  C<offset> is optional parametar to position at some offset before reading from database.
 from database (so you can skip beginning of your database if you need to).  
159    
160  If optional parametar C<limit_mfn> is set, it will read just 500 records  C<limit> is optional parametar to read just C<limit> records from database
 from database in example above.  
161    
162  Returns size of database, regardless of C<start_mfn> and C<limit_mfn>  Returns size of database, regardless of C<offset> and C<limit>
163  parametars, see also C<$isis->size>.  parametars, see also C<size>.
164    
165  =cut  =cut
166    
# Line 150  sub open { Line 174  sub open {
174          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
175    
176          # store data in object          # store data in object
177          $self->{'code_page'} = $code_page;          $self->{'input_code_page'} = $code_page;
178          foreach my $v (qw/path start_mfn limit_mfn/) {          foreach my $v (qw/path offset limit/) {
179                  $self->{$v} = $arg->{$v} if ($arg->{$v});                  $self->{$v} = $arg->{$v} if ($arg->{$v});
180          }          }
181    
182          # create Text::Iconv object          # create Text::Iconv object
183          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});
184    
185          my ($db, $size) = $self->open_db(          my ($db, $size) = $self->{open_db}->( $self,
186                  path => $arg->{path},                  path => $arg->{path},
187          );          );
188    
# Line 172  sub open { Line 196  sub open {
196                  return;                  return;
197          }          }
198    
199          my $startmfn = 1;          my $offset = 1;
200          my $maxmfn = $size;          my $limit = $size;
201    
202          if (my $s = $self->{start_mfn}) {          if (my $s = $self->{offset}) {
203                  $log->info("skipping to MFN $s");                  $log->info("skipping to MFN $s");
204                  $startmfn = $s;                  $offset = $s;
205          } else {          } else {
206                  $self->{start_mfn} = $startmfn;                  $self->{offset} = $offset;
207          }          }
208    
209          if ($self->{limit_mfn}) {          if ($self->{limit}) {
210                  $log->info("limiting to ",$self->{limit_mfn}," records");                  $log->debug("limiting to ",$self->{limit}," records");
211                  $maxmfn = $startmfn + $self->{limit_mfn} - 1;                  $limit = $offset + $self->{limit} - 1;
212                  $maxmfn = $size if ($maxmfn > $size);                  $limit = $size if ($limit > $size);
213          }          }
214    
215          # store size for later          # store size for later
216          $self->{size} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;          $self->{size} = ($limit - $offset) ? ($limit - $offset + 1) : 0;
217    
218          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");
219    
220          # read database          # read database
221          for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {          for (my $pos = $offset; $pos <= $limit; $pos++) {
222    
223                  $log->debug("mfn: $mfn\n");                  $log->debug("position: $pos\n");
224    
225                  my $rec = $self->fetch_rec( $db, $mfn );                  my $rec = $self->{fetch_rec}->($self, $db, $pos );
226    
227                  if (! $rec) {                  if (! $rec) {
228                          $log->warn("record $mfn empty? skipping...");                          $log->warn("record $pos empty? skipping...");
229                          next;                          next;
230                  }                  }
231    
232                  # store                  # store
233                  if ($self->{'low_mem'}) {                  if ($self->{low_mem}) {
234                          $self->{'db'}->put($mfn, $rec);                          $self->{db}->put($pos, $rec);
235                  } else {                  } else {
236                          $self->{'data'}->{$mfn} = $rec;                          $self->{data}->{$pos} = $rec;
237                  }                  }
238    
239                  # create lookup                  # create lookup
240                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
241    
242                  $self->progress_bar($mfn,$maxmfn);                  $self->progress_bar($pos,$limit);
243    
244          }          }
245    
246          $self->{'current_mfn'} = -1;          $self->{pos} = -1;
247          $self->{'last_pcnt'} = 0;          $self->{last_pcnt} = 0;
   
         $log->debug("max mfn: $maxmfn");  
248    
249          # store max mfn and return it.          # store max mfn and return it.
250          $self->{'max_mfn'} = $maxmfn;          $self->{max_pos} = $limit;
251            $log->debug("max_pos: $limit");
252    
253          return $size;          return $size;
254  }  }
# Line 246  sub fetch { Line 269  sub fetch {
269    
270          my $log = $self->_get_logger();          my $log = $self->_get_logger();
271    
272          $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});          $log->logconfess("it seems that you didn't load database!") unless ($self->{pos});
273    
274          if ($self->{'current_mfn'} == -1) {          if ($self->{pos} == -1) {
275                  $self->{'current_mfn'} = $self->{'start_mfn'};                  $self->{pos} = $self->{offset};
276          } else {          } else {
277                  $self->{'current_mfn'}++;                  $self->{pos}++;
278          }          }
279    
280          my $mfn = $self->{'current_mfn'};          my $mfn = $self->{pos};
281    
282          if ($mfn > $self->{'max_mfn'}) {          if ($mfn > $self->{max_pos}) {
283                  $self->{'current_mfn'} = $self->{'max_mfn'};                  $self->{pos} = $self->{max_pos};
284                  $log->debug("at EOF");                  $log->debug("at EOF");
285                  return;                  return;
286          }          }
287    
288          $self->progress_bar($mfn,$self->{'max_mfn'});          $self->progress_bar($mfn,$self->{max_pos});
289    
290          my $rec;          my $rec;
291    
292          if ($self->{'low_mem'}) {          if ($self->{low_mem}) {
293                  $rec = $self->{'db'}->get($mfn);                  $rec = $self->{db}->get($mfn);
294          } else {          } else {
295                  $rec = $self->{'data'}->{$mfn};                  $rec = $self->{data}->{$mfn};
296          }          }
297    
298          $rec ||= 0E0;          $rec ||= 0E0;
# Line 287  First record in database has position 1. Line 310  First record in database has position 1.
310    
311  sub pos {  sub pos {
312          my $self = shift;          my $self = shift;
313          return $self->{'current_mfn'};          return $self->{pos};
314  }  }
315    
316    
# Line 301  Result from this function can be used to Line 324  Result from this function can be used to
324    
325   foreach my $mfn ( 1 ... $isis->size ) { ... }   foreach my $mfn ( 1 ... $isis->size ) { ... }
326    
327  because it takes into account C<start_mfn> and C<limit_mfn>.  because it takes into account C<offset> and C<limit>.
328    
329  =cut  =cut
330    
331  sub size {  sub size {
332          my $self = shift;          my $self = shift;
333          return $self->{'size'};          return $self->{size};
334  }  }
335    
336  =head2 seek  =head2 seek
# Line 329  sub seek { Line 352  sub seek {
352          if ($pos < 1) {          if ($pos < 1) {
353                  $log->warn("seek before first record");                  $log->warn("seek before first record");
354                  $pos = 1;                  $pos = 1;
355          } elsif ($pos > $self->{'max_mfn'}) {          } elsif ($pos > $self->{max_pos}) {
356                  $log->warn("seek beyond last record");                  $log->warn("seek beyond last record");
357                  $pos = $self->{'max_mfn'};                  $pos = $self->{max_pos};
358          }          }
359    
360          return $self->{'current_mfn'} = (($pos - 1) || -1);          return $self->{pos} = (($pos - 1) || -1);
361  }  }
362    
363    

Legend:
Removed from v.285  
changed lines
  Added in v.307

  ViewVC Help
Powered by ViewVC 1.1.26