/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 285 by dpavlin, Sun Dec 18 21:06:39 2005 UTC revision 301 by dpavlin, Mon Dec 19 21:26:04 2005 UTC
# Line 11  use Text::Iconv; Line 11  use Text::Iconv;
11    
12  =head1 NAME  =head1 NAME
13    
14  WebPAC::Input - core module for input file format  WebPAC::Input - read different file formats into WebPAC
15    
16  =head1 VERSION  =head1 VERSION
17    
18  Version 0.02  Version 0.03
19    
20  =cut  =cut
21    
22  our $VERSION = '0.02';  our $VERSION = '0.03';
23    
24  =head1 SYNOPSIS  =head1 SYNOPSIS
25    
26  This module is used as base class for all database specific modules  This module implements input as database which have fixed and known
27  (basically, files which have one handle, fixed size while indexing and some  I<size> while indexing and single unique numeric identifier for database
28  kind of numeric idefinirier which goes from 1 to filesize).  position ranging from 1 to I<size>.
29    
30    Simply, something that is indexed by unmber from 1 .. I<size>.
31    
32    Examples of such databases are CDS/ISIS files, MARC files, lines in
33    text file, and so on.
34    
35    Specific file formats are implemented using low-level interface modules,
36    located in C<WebPAC::Input::*> namespace which export C<open_db>,
37    C<fetch_rec> and optional C<init> functions.
38    
39  Perhaps a little code snippet.  Perhaps a little code snippet.
40    
41      use WebPAC::Input;      use WebPAC::Input;
42    
43      my $db = WebPAC::Input->new(      my $db = WebPAC::Input->new(
44          format => 'NULL',          module => 'WebPAC::Input::ISIS',
45          config => $config,                  config => $config,
46          lookup => $lookup_obj,                  lookup => $lookup_obj,
47          low_mem => 1,                  low_mem => 1,
48      );      );
49    
50      $db->open('/path/to/database');      $db->open('/path/to/database');
51      print "database size: ",$db->size,"\n";      print "database size: ",$db->size,"\n";
52      while (my $row = $db->fetch) {      while (my $rec = $db->fetch) {
         ...  
53      }      }
54    
55    
56    
57  =head1 FUNCTIONS  =head1 FUNCTIONS
58    
59  =head2 new  =head2 new
# Line 51  Perhaps a little code snippet. Line 61  Perhaps a little code snippet.
61  Create new input database object.  Create new input database object.
62    
63    my $db = new WebPAC::Input(    my $db = new WebPAC::Input(
64          format => 'NULL'          module => 'WebPAC::Input::MARC',
65          code_page => 'ISO-8859-2',          code_page => 'ISO-8859-2',
66          low_mem => 1,          low_mem => 1,
67    );    );
68    
69    C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
70    L<WebPAC::Input::MARC>.
71    
72  Optional parametar C<code_page> specify application code page (which will be  Optional parametar C<code_page> specify application code page (which will be
73  used internally). This should probably be your terminal encoding, and by  used internally). This should probably be your terminal encoding, and by
74  default, it C<ISO-8859-2>.  default, it C<ISO-8859-2>.
# Line 74  sub new { Line 87  sub new {
87    
88          my $log = $self->_get_logger;          my $log = $self->_get_logger;
89    
90            $log->logconfess("specify low-level file format module") unless ($self->{module});
91            my $module = $self->{module};
92            $module =~ s#::#/#g;
93            $module .= '.pm';
94            $log->debug("require low-level module $self->{module} from $module");
95    
96            require $module;
97            #eval $self->{module} .'->import';
98    
99          # check if required subclasses are implemented          # check if required subclasses are implemented
100          foreach my $subclass (qw/open_db fetch_rec/) {          foreach my $subclass (qw/open_db fetch_rec init/) {
101                  $log->logdie("missing implementation of $subclass") unless ($self->SUPER::can($subclass));                  my $n = $self->{module} . '::' . $subclass;
102                    if (! defined &{ $n }) {
103                            my $missing = "missing $subclass in $self->{module}";
104                            $self->{$subclass} = sub { $log->logwarn($missing) };
105                    } else {
106                            $self->{$subclass} = \&{ $n };
107                    }
108          }          }
109    
110          if ($self->can('init')) {          if ($self->{init}) {
111                  $log->debug("calling init");                  $log->debug("calling init");
112                  $self->init(@_);                  $self->{init}->($self, @_);
113          }          }
114    
115          $self->{'code_page'} ||= 'ISO-8859-2';          $self->{'code_page'} ||= 'ISO-8859-2';
# Line 119  sub new { Line 147  sub new {
147    
148  This function will read whole database in memory and produce lookups.  This function will read whole database in memory and produce lookups.
149    
150   $isis->open(   $input->open(
151          path => '/path/to/database/file',          path => '/path/to/database/file',
152          code_page => '852',          code_page => '852',
153          limit_mfn => 500,          limit => 500,
154          start_mfn => 6000,          offset => 6000,
155          lookup => $lookup_obj,          lookup => $lookup_obj,
156   );   );
157    
158  By default, C<code_page> is assumed to be C<852>.  By default, C<code_page> is assumed to be C<852>.
159    
160  If optional parametar C<start_mfn> is set, this will be first MFN to read  C<offset> is optional parametar to position at some offset before reading from database.
 from database (so you can skip beginning of your database if you need to).  
161    
162  If optional parametar C<limit_mfn> is set, it will read just 500 records  C<limit> is optional parametar to read just C<limit> records from database
 from database in example above.  
163    
164  Returns size of database, regardless of C<start_mfn> and C<limit_mfn>  Returns size of database, regardless of C<offset> and C<limit>
165  parametars, see also C<$isis->size>.  parametars, see also C<size>.
166    
167  =cut  =cut
168    
# Line 150  sub open { Line 176  sub open {
176          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
177    
178          # store data in object          # store data in object
179          $self->{'code_page'} = $code_page;          $self->{'input_code_page'} = $code_page;
180          foreach my $v (qw/path start_mfn limit_mfn/) {          foreach my $v (qw/path offset limit/) {
181                  $self->{$v} = $arg->{$v} if ($arg->{$v});                  $self->{$v} = $arg->{$v} if ($arg->{$v});
182          }          }
183    
184          # create Text::Iconv object          # create Text::Iconv object
185          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});
186    
187          my ($db, $size) = $self->open_db(          my ($db, $size) = $self->{open_db}->( $self,
188                  path => $arg->{path},                  path => $arg->{path},
189          );          );
190    
# Line 172  sub open { Line 198  sub open {
198                  return;                  return;
199          }          }
200    
201          my $startmfn = 1;          my $offset = 1;
202          my $maxmfn = $size;          my $limit = $size;
203    
204          if (my $s = $self->{start_mfn}) {          if (my $s = $self->{offset}) {
205                  $log->info("skipping to MFN $s");                  $log->info("skipping to MFN $s");
206                  $startmfn = $s;                  $offset = $s;
207          } else {          } else {
208                  $self->{start_mfn} = $startmfn;                  $self->{offset} = $offset;
209          }          }
210    
211          if ($self->{limit_mfn}) {          if ($self->{limit}) {
212                  $log->info("limiting to ",$self->{limit_mfn}," records");                  $log->debug("limiting to ",$self->{limit}," records");
213                  $maxmfn = $startmfn + $self->{limit_mfn} - 1;                  $limit = $offset + $self->{limit} - 1;
214                  $maxmfn = $size if ($maxmfn > $size);                  $limit = $size if ($limit > $size);
215          }          }
216    
217          # store size for later          # store size for later
218          $self->{size} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;          $self->{size} = ($limit - $offset) ? ($limit - $offset + 1) : 0;
219    
220          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");
221    
222          # read database          # read database
223          for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {          for (my $pos = $offset; $pos <= $limit; $pos++) {
224    
225                  $log->debug("mfn: $mfn\n");                  $log->debug("position: $pos\n");
226    
227                  my $rec = $self->fetch_rec( $db, $mfn );                  my $rec = $self->{fetch_rec}->($self, $db, $pos );
228    
229                  if (! $rec) {                  if (! $rec) {
230                          $log->warn("record $mfn empty? skipping...");                          $log->warn("record $pos empty? skipping...");
231                          next;                          next;
232                  }                  }
233    
234                  # store                  # store
235                  if ($self->{'low_mem'}) {                  if ($self->{low_mem}) {
236                          $self->{'db'}->put($mfn, $rec);                          $self->{db}->put($pos, $rec);
237                  } else {                  } else {
238                          $self->{'data'}->{$mfn} = $rec;                          $self->{data}->{$pos} = $rec;
239                  }                  }
240    
241                  # create lookup                  # create lookup
242                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
243    
244                  $self->progress_bar($mfn,$maxmfn);                  $self->progress_bar($pos,$limit);
245    
246          }          }
247    
248          $self->{'current_mfn'} = -1;          $self->{pos} = -1;
249          $self->{'last_pcnt'} = 0;          $self->{last_pcnt} = 0;
   
         $log->debug("max mfn: $maxmfn");  
250    
251          # store max mfn and return it.          # store max mfn and return it.
252          $self->{'max_mfn'} = $maxmfn;          $self->{max_pos} = $limit;
253            $log->debug("max_pos: $limit");
254    
255          return $size;          return $size;
256  }  }
# Line 246  sub fetch { Line 271  sub fetch {
271    
272          my $log = $self->_get_logger();          my $log = $self->_get_logger();
273    
274          $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});          $log->logconfess("it seems that you didn't load database!") unless ($self->{pos});
275    
276          if ($self->{'current_mfn'} == -1) {          if ($self->{pos} == -1) {
277                  $self->{'current_mfn'} = $self->{'start_mfn'};                  $self->{pos} = $self->{offset};
278          } else {          } else {
279                  $self->{'current_mfn'}++;                  $self->{pos}++;
280          }          }
281    
282          my $mfn = $self->{'current_mfn'};          my $mfn = $self->{pos};
283    
284          if ($mfn > $self->{'max_mfn'}) {          if ($mfn > $self->{max_pos}) {
285                  $self->{'current_mfn'} = $self->{'max_mfn'};                  $self->{pos} = $self->{max_pos};
286                  $log->debug("at EOF");                  $log->debug("at EOF");
287                  return;                  return;
288          }          }
289    
290          $self->progress_bar($mfn,$self->{'max_mfn'});          $self->progress_bar($mfn,$self->{max_pos});
291    
292          my $rec;          my $rec;
293    
294          if ($self->{'low_mem'}) {          if ($self->{low_mem}) {
295                  $rec = $self->{'db'}->get($mfn);                  $rec = $self->{db}->get($mfn);
296          } else {          } else {
297                  $rec = $self->{'data'}->{$mfn};                  $rec = $self->{data}->{$mfn};
298          }          }
299    
300          $rec ||= 0E0;          $rec ||= 0E0;
# Line 287  First record in database has position 1. Line 312  First record in database has position 1.
312    
313  sub pos {  sub pos {
314          my $self = shift;          my $self = shift;
315          return $self->{'current_mfn'};          return $self->{pos};
316  }  }
317    
318    
# Line 301  Result from this function can be used to Line 326  Result from this function can be used to
326    
327   foreach my $mfn ( 1 ... $isis->size ) { ... }   foreach my $mfn ( 1 ... $isis->size ) { ... }
328    
329  because it takes into account C<start_mfn> and C<limit_mfn>.  because it takes into account C<offset> and C<limit>.
330    
331  =cut  =cut
332    
333  sub size {  sub size {
334          my $self = shift;          my $self = shift;
335          return $self->{'size'};          return $self->{size};
336  }  }
337    
338  =head2 seek  =head2 seek
# Line 329  sub seek { Line 354  sub seek {
354          if ($pos < 1) {          if ($pos < 1) {
355                  $log->warn("seek before first record");                  $log->warn("seek before first record");
356                  $pos = 1;                  $pos = 1;
357          } elsif ($pos > $self->{'max_mfn'}) {          } elsif ($pos > $self->{max_pos}) {
358                  $log->warn("seek beyond last record");                  $log->warn("seek beyond last record");
359                  $pos = $self->{'max_mfn'};                  $pos = $self->{max_pos};
360          }          }
361    
362          return $self->{'current_mfn'} = (($pos - 1) || -1);          return $self->{pos} = (($pos - 1) || -1);
363  }  }
364    
365    

Legend:
Removed from v.285  
changed lines
  Added in v.301

  ViewVC Help
Powered by ViewVC 1.1.26