/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 697 by dpavlin, Mon Sep 25 09:49:28 2006 UTC revision 760 by dpavlin, Wed Oct 25 15:56:44 2006 UTC
# Line 16  WebPAC::Input - read different file form Line 16  WebPAC::Input - read different file form
16    
17  =head1 VERSION  =head1 VERSION
18    
19  Version 0.12  Version 0.13
20    
21  =cut  =cut
22    
23  our $VERSION = '0.12';  our $VERSION = '0.13';
24    
25  =head1 SYNOPSIS  =head1 SYNOPSIS
26    
# Line 98  sub new { Line 98  sub new {
98          $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_ref") if ($self->{lookup});          $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_ref") if ($self->{lookup});
99    
100          $log->logconfess("specify low-level file format module") unless ($self->{module});          $log->logconfess("specify low-level file format module") unless ($self->{module});
101          my $module = $self->{module};          my $module_path = $self->{module};
102          $module =~ s#::#/#g;          $module_path =~ s#::#/#g;
103          $module .= '.pm';          $module_path .= '.pm';
104          $log->debug("require low-level module $self->{module} from $module");          $log->debug("require low-level module $self->{module} from $module_path");
105    
106          require $module;          require $module_path;
         #eval $self->{module} .'->import';  
107    
108          # check if required subclasses are implemented          # check if required subclasses are implemented
109          foreach my $subclass (qw/open_db fetch_rec init dump_rec/) {          foreach my $subclass (qw/open_db fetch_rec init dump_rec/) {
110                  my $n = $self->{module} . '::' . $subclass;                  # FIXME
                 if (! defined &{ $n }) {  
                         my $missing = "missing $subclass in $self->{module}";  
                         $self->{$subclass} = sub { $log->logwarn($missing) };  
                 } else {  
                         $self->{$subclass} = \&{ $n };  
                 }  
         }  
   
         if ($self->{init}) {  
                 $log->debug("calling init");  
                 $self->{init}->($self, @_);  
111          }          }
112    
113          $self->{'encoding'} ||= 'ISO-8859-2';          $self->{'encoding'} ||= 'ISO-8859-2';
114    
         # running with low_mem flag? well, use DBM::Deep then.  
         if ($self->{'low_mem'}) {  
                 $log->info("running with low_mem which impacts performance (<32 Mb memory usage)");  
   
                 my $db_file = "data.db";  
   
                 if (-e $db_file) {  
                         unlink $db_file or $log->logdie("can't remove '$db_file' from last run");  
                         $log->debug("removed '$db_file' from last run");  
                 }  
   
                 require DBM::Deep;  
   
                 my $db = new DBM::Deep $db_file;  
   
                 $log->logdie("DBM::Deep error: $!") unless ($db);  
   
                 if ($db->error()) {  
                         $log->logdie("can't open '$db_file' under low_mem: ",$db->error());  
                 } else {  
                         $log->debug("using file '$db_file' for DBM::Deep");  
                 }  
   
                 $self->{'db'} = $db;  
         }  
   
115          $self ? return $self : return undef;          $self ? return $self : return undef;
116  }  }
117    
# Line 162  This function will read whole database i Line 124  This function will read whole database i
124          code_page => 'cp852',          code_page => 'cp852',
125          limit => 500,          limit => 500,
126          offset => 6000,          offset => 6000,
         lookup => $lookup_obj,  
127          stats => 1,          stats => 1,
128          lookup_ref => sub {          lookup_coderef => sub {
129                  my ($k,$v) = @_;                  my $rec = shift;
130                  # store lookup $k => $v                  # store lookups
131          },          },
132          modify_records => {          modify_records => {
133                  900 => { '^a' => { ' : ' => '^b' } },                  900 => { '^a' => { ' : ' => '^b' } },
# Line 183  C<limit> is optional parametar to read j Line 144  C<limit> is optional parametar to read j
144    
145  C<stats> create optional report about usage of fields and subfields  C<stats> create optional report about usage of fields and subfields
146    
147  C<lookup_coderef> is closure to call when adding C<< key => 'value' >> combinations to  C<lookup_coderef> is closure to called to save data into lookups
 lookup.  
148    
149  C<modify_records> specify mapping from subfields to delimiters or from  C<modify_records> specify mapping from subfields to delimiters or from
150  delimiters to subfields, as well as oprations on fields (if subfield is  delimiters to subfields, as well as oprations on fields (if subfield is
# Line 209  sub open { Line 169  sub open {
169          $log->logconfess("lookup_coderef must be CODE, not ",ref($arg->{lookup_coderef}))          $log->logconfess("lookup_coderef must be CODE, not ",ref($arg->{lookup_coderef}))
170                  if ($arg->{lookup_coderef} && ref($arg->{lookup_coderef}) ne 'CODE');                  if ($arg->{lookup_coderef} && ref($arg->{lookup_coderef}) ne 'CODE');
171    
172            $log->debug( $arg->{lookup_coderef} ? '' : 'not ', "using lookup_coderef");
173    
174          $log->logcroak("need path") if (! $arg->{'path'});          $log->logcroak("need path") if (! $arg->{'path'});
175          my $code_page = $arg->{'code_page'} || 'cp852';          my $code_page = $arg->{'code_page'} || 'cp852';
176    
# Line 250  sub open { Line 212  sub open {
212          }          }
213          $log->debug("rec_regex: ", Dumper($rec_regex)) if ($rec_regex);          $log->debug("rec_regex: ", Dumper($rec_regex)) if ($rec_regex);
214    
215          my ($db, $size) = $self->{open_db}->( $self,          my $class = $self->{module} || $log->logconfess("can't get low-level module name!");
216    
217            my $ll_db = $class->new(
218                  path => $arg->{path},                  path => $arg->{path},
219  #               filter => sub {  #               filter => sub {
220  #                       my ($l,$f_nr) = @_;  #                       my ($l,$f_nr) = @_;
# Line 262  sub open { Line 226  sub open {
226                  %{ $arg },                  %{ $arg },
227          );          );
228    
229          unless (defined($db)) {          unless (defined($ll_db)) {
230                  $log->logwarn("can't open database $arg->{path}, skipping...");                  $log->logwarn("can't open database $arg->{path}, skipping...");
231                  return;                  return;
232          }          }
233    
234            my $size = $ll_db->size;
235    
236          unless ($size) {          unless ($size) {
237                  $log->logwarn("no records in database $arg->{path}, skipping...");                  $log->logwarn("no records in database $arg->{path}, skipping...");
238                  return;                  return;
# Line 293  sub open { Line 259  sub open {
259    
260          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{encoding}", $self->{stats} ? ' [stats]' : '');          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{encoding}", $self->{stats} ? ' [stats]' : '');
261    
262            # turn on low_mem for databases with more than 100000 records!
263            if (! $self->{low_mem} && $size > 100000) {
264                    $log->warn("Using on-disk storage instead of memory for input data. This will affect performance.");
265                    $self->{low_mem}++;
266            }
267    
268            # running with low_mem flag? well, use DBM::Deep then.
269            if ($self->{'low_mem'}) {
270                    $log->info("running with low_mem which impacts performance (<32 Mb memory usage)");
271    
272                    my $db_file = "data.db";
273    
274                    if (-e $db_file) {
275                            unlink $db_file or $log->logdie("can't remove '$db_file' from last run");
276                            $log->debug("removed '$db_file' from last run");
277                    }
278    
279                    require DBM::Deep;
280    
281                    my $db = new DBM::Deep $db_file;
282    
283                    $log->logdie("DBM::Deep error: $!") unless ($db);
284    
285                    if ($db->error()) {
286                            $log->logdie("can't open '$db_file' under low_mem: ",$db->error());
287                    } else {
288                            $log->debug("using file '$db_file' for DBM::Deep");
289                    }
290    
291                    $self->{'db'} = $db;
292            }
293    
294          # read database          # read database
295          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {
296    
297                  $log->debug("position: $pos\n");                  $log->debug("position: $pos\n");
298    
299                  my $rec = $self->{fetch_rec}->($self, $pos, sub {                  my $rec = $ll_db->fetch_rec($pos, sub {
300                                  my ($l,$f_nr) = @_;                                  my ($l,$f_nr) = @_;
301  #                               return unless defined($l);  #                               return unless defined($l);
302  #                               return $l unless ($rec_regex && $f_nr);  #                               return $l unless ($rec_regex && $f_nr);
# Line 348  sub open { Line 346  sub open {
346                  if ($self->{stats}) {                  if ($self->{stats}) {
347    
348                          # fetch clean record with regexpes applied for statistics                          # fetch clean record with regexpes applied for statistics
349                          my $rec = $self->{fetch_rec}->($self, $pos);                          my $rec = $ll_db->fetch_rec($pos);
350    
351                          foreach my $fld (keys %{ $rec }) {                          foreach my $fld (keys %{ $rec }) {
352                                  $self->{_stats}->{fld}->{ $fld }++;                                  $self->{_stats}->{fld}->{ $fld }++;
# Line 385  sub open { Line 383  sub open {
383          $self->{max_pos} = $to_rec;          $self->{max_pos} = $to_rec;
384          $log->debug("max_pos: $to_rec");          $log->debug("max_pos: $to_rec");
385    
386            # save for dump
387            $self->{ll_db} = $ll_db;
388    
389          return $size;          return $size;
390  }  }
391    
# Line 555  Display humanly readable dump of record Line 556  Display humanly readable dump of record
556  sub dump {  sub dump {
557          my $self = shift;          my $self = shift;
558    
559          return $self->{dump_rec}->($self, $self->{pos});          return $self->{ll_db}->dump_rec( $self->{pos} );
560    
561  }  }
562    

Legend:
Removed from v.697  
changed lines
  Added in v.760

  ViewVC Help
Powered by ViewVC 1.1.26