/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 487 by dpavlin, Sun May 14 12:34:50 2006 UTC revision 585 by dpavlin, Wed Jul 5 19:52:45 2006 UTC
# Line 3  package WebPAC::Input; Line 3  package WebPAC::Input;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6    use blib;
7    
8  use WebPAC::Common;  use WebPAC::Common;
9  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
10  use Text::Iconv;  use Text::Iconv;
# Line 14  WebPAC::Input - read different file form Line 16  WebPAC::Input - read different file form
16    
17  =head1 VERSION  =head1 VERSION
18    
19  Version 0.04  Version 0.07
20    
21  =cut  =cut
22    
23  our $VERSION = '0.04';  our $VERSION = '0.07';
24    
25  =head1 SYNOPSIS  =head1 SYNOPSIS
26    
# Line 42  Perhaps a little code snippet. Line 44  Perhaps a little code snippet.
44      my $db = WebPAC::Input->new(      my $db = WebPAC::Input->new(
45          module => 'WebPAC::Input::ISIS',          module => 'WebPAC::Input::ISIS',
46                  config => $config,                  config => $config,
                 lookup => $lookup_obj,  
47                  low_mem => 1,                  low_mem => 1,
48      );      );
49    
# Line 62  Create new input database object. Line 63  Create new input database object.
63    
64    my $db = new WebPAC::Input(    my $db = new WebPAC::Input(
65          module => 'WebPAC::Input::MARC',          module => 'WebPAC::Input::MARC',
66          code_page => 'ISO-8859-2',          encoding => 'ISO-8859-2',
67          low_mem => 1,          low_mem => 1,
68          recode => 'char pairs',          recode => 'char pairs',
69          no_progress_bar => 1,          no_progress_bar => 1,
# Line 71  Create new input database object. Line 72  Create new input database object.
72  C<module> is low-level file format module. See L<WebPAC::Input::Isis> and  C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
73  L<WebPAC::Input::MARC>.  L<WebPAC::Input::MARC>.
74    
75  Optional parametar C<code_page> specify application code page (which will be  Optional parametar C<encoding> specify application code page (which will be
76  used internally). This should probably be your terminal encoding, and by  used internally). This should probably be your terminal encoding, and by
77  default, it C<ISO-8859-2>.  default, it C<ISO-8859-2>.
78    
# Line 94  sub new { Line 95  sub new {
95    
96          my $log = $self->_get_logger;          my $log = $self->_get_logger;
97    
98            $log->logconfess("code_page argument is not suppored any more. change it to encoding") if ($self->{lookup});
99            $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_ref") if ($self->{lookup});
100    
101          $log->logconfess("specify low-level file format module") unless ($self->{module});          $log->logconfess("specify low-level file format module") unless ($self->{module});
102          my $module = $self->{module};          my $module = $self->{module};
103          $module =~ s#::#/#g;          $module =~ s#::#/#g;
# Line 119  sub new { Line 123  sub new {
123                  $self->{init}->($self, @_);                  $self->{init}->($self, @_);
124          }          }
125    
126          $self->{'code_page'} ||= 'ISO-8859-2';          $self->{'encoding'} ||= 'ISO-8859-2';
127    
128          # running with low_mem flag? well, use DBM::Deep then.          # running with low_mem flag? well, use DBM::Deep then.
129          if ($self->{'low_mem'}) {          if ($self->{'low_mem'}) {
# Line 160  This function will read whole database i Line 164  This function will read whole database i
164          limit => 500,          limit => 500,
165          offset => 6000,          offset => 6000,
166          lookup => $lookup_obj,          lookup => $lookup_obj,
167            stats => 1,
168            lookup_ref => sub {
169                    my ($k,$v) = @_;
170                    # store lookup $k => $v
171            },
172   );   );
173    
174  By default, C<code_page> is assumed to be C<852>.  By default, C<code_page> is assumed to be C<852>.
# Line 168  C<offset> is optional parametar to posit Line 177  C<offset> is optional parametar to posit
177    
178  C<limit> is optional parametar to read just C<limit> records from database  C<limit> is optional parametar to read just C<limit> records from database
179    
180    C<stats> create optional report about usage of fields and subfields
181    
182    C<lookup_coderef> is closure to call when adding key => $value combinations to
183    lookup.
184    
185  Returns size of database, regardless of C<offset> and C<limit>  Returns size of database, regardless of C<offset> and C<limit>
186  parametars, see also C<size>.  parametars, see also C<size>.
187    
# Line 179  sub open { Line 193  sub open {
193    
194          my $log = $self->_get_logger();          my $log = $self->_get_logger();
195    
196            $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_coderef") if ($arg->{lookup});
197            $log->logconfess("lookup_coderef must be CODE, not ",ref($arg->{lookup_coderef}))
198                    if ($arg->{lookup_coderef} && ref($arg->{lookup_coderef}) ne 'CODE');
199    
200          $log->logcroak("need path") if (! $arg->{'path'});          $log->logcroak("need path") if (! $arg->{'path'});
201          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
202    
# Line 189  sub open { Line 207  sub open {
207          }          }
208    
209          # create Text::Iconv object          # create Text::Iconv object
210          $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});          $self->{iconv} = Text::Iconv->new($code_page,$self->{'encoding'});      ## FIXME remove!
211    
212          my $filter_ref;          my $filter_ref;
213    
# Line 222  sub open { Line 240  sub open {
240          my ($db, $size) = $self->{open_db}->( $self,          my ($db, $size) = $self->{open_db}->( $self,
241                  path => $arg->{path},                  path => $arg->{path},
242                  filter => $filter_ref,                  filter => $filter_ref,
243                    %{ $arg },
244          );          );
245    
246          unless ($db) {          unless (defined($db)) {
247                  $log->logwarn("can't open database $arg->{path}, skipping...");                  $log->logwarn("can't open database $arg->{path}, skipping...");
248                  return;                  return;
249          }          }
# Line 238  sub open { Line 257  sub open {
257          my $to_rec = $size;          my $to_rec = $size;
258    
259          if (my $s = $self->{offset}) {          if (my $s = $self->{offset}) {
260                  $log->info("skipping to MFN $s");                  $log->debug("skipping to MFN $s");
261                  $from_rec = $s;                  $from_rec = $s;
262          } else {          } else {
263                  $self->{offset} = $from_rec;                  $self->{offset} = $from_rec;
# Line 253  sub open { Line 272  sub open {
272          # store size for later          # store size for later
273          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;
274    
275          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{code_page}");          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{encoding}", $self->{stats} ? ' [stats]' : '');
276    
277          # read database          # read database
278          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {
# Line 277  sub open { Line 296  sub open {
296                  }                  }
297    
298                  # create lookup                  # create lookup
299                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $arg->{'lookup_coderef'}->( $rec ) if ($rec && $arg->{'lookup_coderef'});
300    
301                    # update counters for statistics
302                    if ($self->{stats}) {
303                            map {
304                                    my $fld = $_;
305                                    $self->{_stats}->{fld}->{ $fld }++;
306                                    if (ref($rec->{ $fld }) eq 'ARRAY') {
307                                            map {
308                                                    if (ref($_) eq 'HASH') {
309                                                            map {
310                                                                    $self->{_stats}->{sf}->{ $fld }->{ $_ }++;
311                                                            } keys %{ $_ };
312                                                    } else {
313                                                            $self->{_stats}->{repeatable}->{ $fld }++;
314                                                    }
315                                            } @{ $rec->{$fld} };
316                                    }
317                            } keys %{ $rec };
318                    }
319    
320                  $self->progress_bar($pos,$to_rec) unless ($self->{no_progress_bar});                  $self->progress_bar($pos,$to_rec) unless ($self->{no_progress_bar});
321    
# Line 400  sub seek { Line 438  sub seek {
438          return $self->{pos} = (($pos - 1) || -1);          return $self->{pos} = (($pos - 1) || -1);
439  }  }
440    
441    =head2 stats
442    
443    Dump statistics about field and subfield usage
444    
445      print $input->stats;
446    
447    =cut
448    
449    sub stats {
450            my $self = shift;
451    
452            my $log = $self->_get_logger();
453    
454            my $s = $self->{_stats};
455            if (! $s) {
456                    $log->warn("called stats, but there is no statistics collected");
457                    return;
458            }
459    
460            my $max_fld = 0;
461    
462            my $out = join("\n",
463                    map {
464                            my $f = $_ || die "no field";
465                            my $v = $s->{fld}->{$f} || die "no s->{fld}->{$f}";
466                            $max_fld = $v if ($v > $max_fld);
467    
468                            my $o = sprintf("%4s %d ~", $f, $v);
469    
470                            if (defined($s->{sf}->{$f})) {
471                                    map {
472                                            $o .= sprintf(" %s:%d", $_, $s->{sf}->{$f}->{$_});
473                                    } sort keys %{ $s->{sf}->{$f} };
474                            }
475    
476                            if (my $v_r = $s->{repeatable}->{$f}) {
477                                    $o .= " ($v_r)" if ($v_r != $v);
478                            }
479    
480                            $o;
481                    } sort { $a cmp $b } keys %{ $s->{fld} }
482            );
483    
484            $log->debug( sub { Dumper($s) } );
485    
486            return $out;
487    }
488    
489  =head1 MEMORY USAGE  =head1 MEMORY USAGE
490    

Legend:
Removed from v.487  
changed lines
  Added in v.585

  ViewVC Help
Powered by ViewVC 1.1.26