/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 416 by dpavlin, Sun Feb 26 23:21:50 2006 UTC revision 506 by dpavlin, Mon May 15 09:59:05 2006 UTC
# Line 14  WebPAC::Input - read different file form Line 14  WebPAC::Input - read different file form
14    
15  =head1 VERSION  =head1 VERSION
16    
17  Version 0.04  Version 0.05
18    
19  =cut  =cut
20    
21  our $VERSION = '0.04';  our $VERSION = '0.05';
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
# Line 65  Create new input database object. Line 65  Create new input database object.
65          code_page => 'ISO-8859-2',          code_page => 'ISO-8859-2',
66          low_mem => 1,          low_mem => 1,
67          recode => 'char pairs',          recode => 'char pairs',
68            no_progress_bar => 1,
69    );    );
70    
71  C<module> is low-level file format module. See L<WebPAC::Input::Isis> and  C<module> is low-level file format module. See L<WebPAC::Input::Isis> and
# Line 76  default, it C<ISO-8859-2>. Line 77  default, it C<ISO-8859-2>.
77    
78  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).
79    
80    C<recode> is optional string constisting of character or words pairs that
81    should be replaced in input stream.
82    
83    C<no_progress_bar> disables progress bar output on C<STDOUT>
84    
85  This function will also call low-level C<init> if it exists with same  This function will also call low-level C<init> if it exists with same
86  parametars.  parametars.
87    
# Line 154  This function will read whole database i Line 160  This function will read whole database i
160          limit => 500,          limit => 500,
161          offset => 6000,          offset => 6000,
162          lookup => $lookup_obj,          lookup => $lookup_obj,
163            stats => 1,
164   );   );
165    
166  By default, C<code_page> is assumed to be C<852>.  By default, C<code_page> is assumed to be C<852>.
# Line 162  C<offset> is optional parametar to posit Line 169  C<offset> is optional parametar to posit
169    
170  C<limit> is optional parametar to read just C<limit> records from database  C<limit> is optional parametar to read just C<limit> records from database
171    
172    C<stats> create optional report about usage of fields and subfields
173    
174  Returns size of database, regardless of C<offset> and C<limit>  Returns size of database, regardless of C<offset> and C<limit>
175  parametars, see also C<size>.  parametars, see also C<size>.
176    
# Line 218  sub open { Line 227  sub open {
227                  filter => $filter_ref,                  filter => $filter_ref,
228          );          );
229    
230          unless ($db) {          unless (defined($db)) {
231                  $log->logwarn("can't open database $arg->{path}, skipping...");                  $log->logwarn("can't open database $arg->{path}, skipping...");
232                  return;                  return;
233          }          }
# Line 247  sub open { Line 256  sub open {
256          # store size for later          # store size for later
257          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;
258    
259          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{code_page}");          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{code_page}", $self->{stats} ? ' [stats]' : '');
260    
261          # read database          # read database
262          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {
# Line 273  sub open { Line 282  sub open {
282                  # create lookup                  # create lookup
283                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});                  $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
284    
285                  $self->progress_bar($pos,$to_rec);                  # update counters for statistics
286                    if ($self->{stats}) {
287                            map {
288                                    my $fld = $_;
289                                    $self->{_stats}->{fld}->{ $fld }++;
290                                    if (ref($rec->{ $fld }) eq 'ARRAY') {
291                                            map {
292                                                    if (ref($_) eq 'HASH') {
293                                                            map {
294                                                                    $self->{_stats}->{sf}->{ $fld }->{ $_ }++;
295                                                            } keys %{ $_ };
296                                                    } else {
297                                                            $self->{_stats}->{repeatable}->{ $fld }++;
298                                                    }
299                                            } @{ $rec->{$fld} };
300                                    }
301                            } keys %{ $rec };
302                    }
303    
304                    $self->progress_bar($pos,$to_rec) unless ($self->{no_progress_bar});
305    
306          }          }
307    
# Line 319  sub fetch { Line 347  sub fetch {
347                  return;                  return;
348          }          }
349    
350          $self->progress_bar($mfn,$self->{max_pos});          $self->progress_bar($mfn,$self->{max_pos}) unless ($self->{no_progress_bar});
351    
352          my $rec;          my $rec;
353    
# Line 394  sub seek { Line 422  sub seek {
422          return $self->{pos} = (($pos - 1) || -1);          return $self->{pos} = (($pos - 1) || -1);
423  }  }
424    
425    =head2 stats
426    
427    Dump statistics about field and subfield usage
428    
429      print Dumper( $input->stats );
430    
431    =cut
432    
433    sub stats {
434            my $self = shift;
435            return $self->{_stats};
436    }
437    
438  =head1 MEMORY USAGE  =head1 MEMORY USAGE
439    

Legend:
Removed from v.416  
changed lines
  Added in v.506

  ViewVC Help
Powered by ViewVC 1.1.26