/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 307 by dpavlin, Tue Dec 20 00:03:04 2005 UTC revision 483 by dpavlin, Sun May 14 09:34:05 2006 UTC
# Line 3  package WebPAC::Input; Line 3  package WebPAC::Input;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6  use WebPAC::Common;  use WebPAC::Common 0.03;
7  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
8  use Text::Iconv;  use Text::Iconv;
9    use Data::Dumper;
10    
11  =head1 NAME  =head1 NAME
12    
# Line 13  WebPAC::Input - read different file form Line 14  WebPAC::Input - read different file form
14    
15  =head1 VERSION  =head1 VERSION
16    
17  Version 0.03  Version 0.04
18    
19  =cut  =cut
20    
21  our $VERSION = '0.03';  our $VERSION = '0.04';
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
# Line 46  Perhaps a little code snippet. Line 47  Perhaps a little code snippet.
47      );      );
48    
49      $db->open('/path/to/database');      $db->open('/path/to/database');
50      print "database size: ",$db->size,"\n";          print "database size: ",$db->size,"\n";
51      while (my $rec = $db->fetch) {          while (my $rec = $db->fetch) {
52      }                  # do something with $rec
53            }
54    
55    
56    
# Line 62  Create new input database object. Line 64  Create new input database object.
64          module => 'WebPAC::Input::MARC',          module => 'WebPAC::Input::MARC',
65          code_page => 'ISO-8859-2',          code_page => 'ISO-8859-2',
66          low_mem => 1,          low_mem => 1,
67            recode => 'char pairs',
68            no_progress_bar => 1,
69    );    );
70    
71  C<module> is low-level file format module. See L<WebPAC::Input::Isis> and  C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
# Line 73  default, it C<ISO-8859-2>. Line 77  default, it C<ISO-8859-2>.
77    
78  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).
79    
80    C<recode> is optional string constisting of character or words pairs that
81    should be replaced in input stream.
82    
83    C<no_progress_bar> disables progress bar output on C<STDOUT>
84    
85  This function will also call low-level C<init> if it exists with same  This function will also call low-level C<init> if it exists with same
86  parametars.  parametars.
87    
# Line 182  sub open { Line 191  sub open {
191          # create Text::Iconv object          # create Text::Iconv object
192          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});
193    
194            my $filter_ref;
195    
196            if ($self->{recode}) {
197                    my @r = split(/\s/, $self->{recode});
198                    if ($#r % 2 != 1) {
199                            $log->logwarn("recode needs even number of elements (some number of valid pairs)");
200                    } else {
201                            my $recode;
202                            while (@r) {
203                                    my $from = shift @r;
204                                    my $to = shift @r;
205                                    $recode->{$from} = $to;
206                            }
207    
208                            my $regex = join '|' => keys %{ $recode };
209    
210                            $log->debug("using recode regex: $regex");
211                            
212                            $filter_ref = sub {
213                                    my $t = shift;
214                                    $t =~ s/($regex)/$recode->{$1}/g;
215                                    return $t;
216                            };
217    
218                    }
219    
220            }
221    
222          my ($db, $size) = $self->{open_db}->( $self,          my ($db, $size) = $self->{open_db}->( $self,
223                  path => $arg->{path},                  path => $arg->{path},
224                    filter => $filter_ref,
225          );          );
226    
227          unless ($db) {          unless ($db) {
# Line 196  sub open { Line 234  sub open {
234                  return;                  return;
235          }          }
236    
237          my $offset = 1;          my $from_rec = 1;
238          my $limit = $size;          my $to_rec = $size;
239    
240          if (my $s = $self->{offset}) {          if (my $s = $self->{offset}) {
241                  $log->info("skipping to MFN $s");                  $log->info("skipping to MFN $s");
242                  $offset = $s;                  $from_rec = $s;
243          } else {          } else {
244                  $self->{offset} = $offset;                  $self->{offset} = $from_rec;
245          }          }
246    
247          if ($self->{limit}) {          if ($self->{limit}) {
248                  $log->debug("limiting to ",$self->{limit}," records");                  $log->debug("limiting to ",$self->{limit}," records");
249                  $limit = $offset + $self->{limit} - 1;                  $to_rec = $from_rec + $self->{limit} - 1;
250                  $limit = $size if ($limit > $size);                  $to_rec = $size if ($to_rec > $size);
251          }          }
252    
253          # store size for later          # store size for later
254          $self->{size} = ($limit - $offset) ? ($limit - $offset + 1) : 0;          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;
255    
256          $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{code_page}");
257    
258          # read database          # read database
259          for (my $pos = $offset; $pos <= $limit; $pos++) {          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {
260    
261                  $log->debug("position: $pos\n");                  $log->debug("position: $pos\n");
262    
263                  my $rec = $self->{fetch_rec}->($self, $db, $pos );                  my $rec = $self->{fetch_rec}->($self, $db, $pos );
264    
265                    $log->debug(sub { Dumper($rec) });
266    
267                  if (! $rec) {                  if (! $rec) {
268                          $log->warn("record $pos empty? skipping...");                          $log->warn("record $pos empty? skipping...");
269                          next;                          next;
# Line 239  sub open { Line 279  sub open {
279                  # create lookup                  # create lookup
280                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
281    
282                  $self->progress_bar($pos,$limit);                  $self->progress_bar($pos,$to_rec) unless ($self->{no_progress_bar});
283    
284          }          }
285    
# Line 247  sub open { Line 287  sub open {
287          $self->{last_pcnt} = 0;          $self->{last_pcnt} = 0;
288    
289          # store max mfn and return it.          # store max mfn and return it.
290          $self->{max_pos} = $limit;          $self->{max_pos} = $to_rec;
291          $log->debug("max_pos: $limit");          $log->debug("max_pos: $to_rec");
292    
293          return $size;          return $size;
294  }  }
# Line 285  sub fetch { Line 325  sub fetch {
325                  return;                  return;
326          }          }
327    
328          $self->progress_bar($mfn,$self->{max_pos});          $self->progress_bar($mfn,$self->{max_pos}) unless ($self->{no_progress_bar});
329    
330          my $rec;          my $rec;
331    

Legend:
Removed from v.307  
changed lines
  Added in v.483

  ViewVC Help
Powered by ViewVC 1.1.26