/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 368 by dpavlin, Sun Jan 8 20:32:06 2006 UTC revision 372 by dpavlin, Sun Jan 8 21:50:34 2006 UTC
# Line 198  sub data_structure { Line 198  sub data_structure {
198    
199                          my @v;                          my @v;
200                          if ($self->{'lookup_regex'} && $format =~ $self->{'lookup_regex'}) {                          if ($self->{'lookup_regex'} && $format =~ $self->{'lookup_regex'}) {
201                                  @v = $self->fill_in_to_arr($rec,$format);                                  @v = $self->_rec_to_arr($rec,$format,'fill_in');
202                          } else {                          } else {
203                                  @v = $self->parse_to_arr($rec,$format);                                  @v = $self->_rec_to_arr($rec,$format,'parse');
204                          }                          }
205                          if (! @v) {                          if (! @v) {
206                                  $log->debug("$field <",$self->{tag},"> format: $format no values");                                  $log->debug("$field <",$self->{tag},"> format: $format no values");
# Line 308  my $warn_once; Line 308  my $warn_once;
308  sub parse {  sub parse {
309          my $self = shift;          my $self = shift;
310    
311          my ($rec, $format_utf8, $i) = @_;          my ($rec, $format_utf8, $i, $rec_size) = @_;
312    
313          return if (! $format_utf8);          return if (! $format_utf8);
314    
# Line 357  sub parse { Line 357  sub parse {
357                  }                  }
358    
359                  my $found = 0;                  my $found = 0;
360                  my $tmp = $self->get_data(\$rec,$3,$4,$r,\$found);                  my $tmp = $self->get_data(\$rec,$3,$4,$r,\$found,$rec_size);
361    
362                  if ($found) {                  if ($found) {
363                          $found_any->{$fld_type} += $found;                          $found_any->{$fld_type} += $found;
# Line 410  sub parse { Line 410  sub parse {
410          return $out;          return $out;
411  }  }
412    
 =head2 parse_to_arr  
   
 Similar to C<parse>, but returns array of all repeatable fields  
   
  my @arr = $webpac->parse_to_arr($rec,'v250^a');  
   
 =cut  
   
 sub parse_to_arr {  
         my $self = shift;  
   
         my ($rec, $format_utf8) = @_;  
   
         my $log = $self->_get_logger();  
   
         $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);  
         return if (! $format_utf8);  
   
         my $i = 0;  
         my @arr;  
   
         while (my $v = $self->parse($rec,$format_utf8,$i++)) {  
                 push @arr, $v;  
         }  
   
         $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);  
   
         return @arr;  
 }  
   
   
413  =head2 fill_in  =head2 fill_in
414    
415  Workhourse of all: takes record from in-memory structure of database and  Workhourse of all: takes record from in-memory structure of database and
# Line 462  delimiters before fields which aren't us Line 431  delimiters before fields which aren't us
431  This method will automatically decode UTF-8 string to local code page  This method will automatically decode UTF-8 string to local code page
432  if needed.  if needed.
433    
434    There is optional parametar C<$record_size> which can be used to get sizes of
435    all C<field^subfield> combinations in this format.
436    
437     my $text = $webpac->fill_in($rec,'got: v900^a v900^x',0,\$rec_size);
438    
439  =cut  =cut
440    
441  sub fill_in {  sub fill_in {
# Line 469  sub fill_in { Line 443  sub fill_in {
443    
444          my $log = $self->_get_logger();          my $log = $self->_get_logger();
445    
446          my $rec = shift || $log->logconfess("need data record");          my ($rec,$format,$i,$rec_size) = @_;
447          my $format = shift || $log->logconfess("need format to parse");  
448            $log->logconfess("need data record") unless ($rec);
449            $log->logconfess("need format to parse") unless($format);
450    
451          # iteration (for repeatable fields)          # iteration (for repeatable fields)
452          my $i = shift || 0;          $i ||= 0;
453    
454          $log->logdie("infitite loop in format $format") if ($i > ($self->{'max_mfn'} || 9999));          $log->logdie("infitite loop in format $format") if ($i > ($self->{'max_mfn'} || 9999));
455    
# Line 496  sub fill_in { Line 473  sub fill_in {
473    
474          # do actual replacement of placeholders          # do actual replacement of placeholders
475          # repeatable fields          # repeatable fields
476          if ($format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges) {          if ($format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found,$rec_size)/ges) {
477                  $just_single = 0;                  $just_single = 0;
478          }          }
479    
480          # non-repeatable fields          # non-repeatable fields
481          if ($format =~ s/s(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,0,\$found)/ges) {          if ($format =~ s/s(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,0,\$found,$rec_size)/ges) {
482                  return if ($i > 0 && $just_single);                  return if ($i > 0 && $just_single);
483          }          }
484    
# Line 535  sub fill_in { Line 512  sub fill_in {
512  }  }
513    
514    
515  =head2 fill_in_to_arr  =head2 _rec_to_arr
516    
517  Similar to C<fill_in>, but returns array of all repeatable fields. Usable  Similar to C<parse> and C<fill_in>, but returns array of all repeatable fields. Usable
518  for fields which have lookups, so they shouldn't be parsed but rather  for fields which have lookups, so they shouldn't be parsed but rather
519  C<fill_id>ed.  C<paste>d or C<fill_id>ed. Last argument is name of operation: C<paste> or C<fill_in>.
520    
521   my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');   my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]','paste');
522    
523  =cut  =cut
524    
525  sub fill_in_to_arr {  sub _rec_to_arr {
526          my $self = shift;          my $self = shift;
527    
528          my ($rec, $format_utf8) = @_;          my ($rec, $format_utf8, $code) = @_;
529    
530          my $log = $self->_get_logger();          my $log = $self->_get_logger();
531    
# Line 556  sub fill_in_to_arr { Line 533  sub fill_in_to_arr {
533          return if (! $format_utf8);          return if (! $format_utf8);
534    
535          my $i = 0;          my $i = 0;
536            my $max = 0;
537          my @arr;          my @arr;
538            my $rec_size = {};
539    
540          while (my $v = $self->fill_in($rec,$format_utf8,$i++)) {          while ($i <= $max) {
541                    my $v = $self->$code($rec,$format_utf8,$i++,\$rec_size) || next;
542                  push @arr, $v;                  push @arr, $v;
543                    if ($rec_size) {
544                            foreach my $f (keys %{ $rec_size }) {
545                                    $max = $rec_size->{$f} if ($rec_size->{$f} > $max);
546                            }
547                            warn "max set to $max, rec_size = ", Dumper($rec_size);
548                            undef $rec_size;
549                    }
550          }          }
551    
552          $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);          $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
# Line 572  sub fill_in_to_arr { Line 559  sub fill_in_to_arr {
559    
560  Returns value from record.  Returns value from record.
561    
562   my $text = $self->get_data(\$rec,$f,$sf,$i,\$found,\$fld_occurances);   my $text = $self->get_data(\$rec,$f,$sf,$i,\$found,\$rec_size);
563    
564  Required arguments are:  Required arguments are:
565    
# Line 592  optional subfield Line 579  optional subfield
579    
580  =item C<$i>  =item C<$i>
581    
582  index offset for repeatable values ( 0 ... $#occurances )  index offset for repeatable values ( 0 ... $rec_size->{'400^a'} )
583    
584  =item C<$found>  =item C<$found>
585    
586  optional variable that will be incremeted if preset  optional variable that will be incremeted if preset
587    
588  =item C<$fld_occurances>  =item C<$rec_size>
589    
590  hash to hold maximum occurances of C<field\tsubfield> combinations  hash to hold maximum occurances of C<field^subfield> combinations
591  (which can be accessed using keys in same format)  (which can be accessed using keys in same format)
592    
593  =back  =back
594    
595  Returns value or empty string, updates C<$found> and C<fld_occurences>  Returns value or empty string, updates C<$found> and C<rec_size>
596  if present.  if present.
597    
598  =cut  =cut
# Line 618  sub get_data { Line 605  sub get_data {
605          return '' unless ($$rec->{$f} && ref($$rec->{$f}) eq 'ARRAY');          return '' unless ($$rec->{$f} && ref($$rec->{$f}) eq 'ARRAY');
606    
607          if (defined($$cache)) {          if (defined($$cache)) {
608                  $$cache->{"$f\t$sf"} ||= $$#rec->{$f};                  $$cache->{ $f . ( $sf ? '^' . $sf : '' ) } ||= scalar @{ $$rec->{$f} };
609          }          }
610    
611          return '' unless ($$rec->{$f}->[$i]);          return '' unless ($$rec->{$f}->[$i]);

Legend:
Removed from v.368  
changed lines
  Added in v.372

  ViewVC Help
Powered by ViewVC 1.1.26