/[webpac2]/trunk/lib/WebPAC/Input.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1076 by dpavlin, Wed Nov 28 22:51:43 2007 UTC revision 1100 by dpavlin, Sat Aug 2 23:46:41 2008 UTC
# Line 8  use blib; Line 8  use blib;
8  use WebPAC::Common;  use WebPAC::Common;
9  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
10  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
11  use Encode qw/from_to/;  use Encode qw/decode from_to/;
12    
13  =head1 NAME  =head1 NAME
14    
15  WebPAC::Input - read different file formats into WebPAC  WebPAC::Input - read different file formats into WebPAC
16    
 =head1 VERSION  
   
 Version 0.18  
   
17  =cut  =cut
18    
19  our $VERSION = '0.18';  our $VERSION = '0.19';
20    
21  =head1 SYNOPSIS  =head1 SYNOPSIS
22    
# Line 61  Create new input database object. Line 57  Create new input database object.
57    
58    my $db = new WebPAC::Input(    my $db = new WebPAC::Input(
59          module => 'WebPAC::Input::MARC',          module => 'WebPAC::Input::MARC',
         encoding => 'ISO-8859-2',  
60          recode => 'char pairs',          recode => 'char pairs',
61          no_progress_bar => 1,          no_progress_bar => 1,
62          input_config => {          input_config => {
# Line 72  Create new input database object. Line 67  Create new input database object.
67  C<module> is low-level file format module. See L<WebPAC::Input::ISIS> and  C<module> is low-level file format module. See L<WebPAC::Input::ISIS> and
68  L<WebPAC::Input::MARC>.  L<WebPAC::Input::MARC>.
69    
 Optional parametar C<encoding> specify application code page (which will be  
 used internally). This should probably be your terminal encoding, and by  
 default, it C<ISO-8859-2>.  
   
70  C<recode> is optional string constisting of character or words pairs that  C<recode> is optional string constisting of character or words pairs that
71  should be replaced in input stream.  should be replaced in input stream.
72    
# Line 93  sub new { Line 84  sub new {
84    
85          my $log = $self->_get_logger;          my $log = $self->_get_logger;
86    
87          $log->logconfess("code_page argument is not suppored any more. change it to encoding") if ($self->{lookup});          $log->logconfess("code_page argument is not suppored any more.") if $self->{code_page};
88          $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_ref") if ($self->{lookup});          $log->logconfess("encoding argument is not suppored any more.") if $self->{encoding};
89          $log->logconfess("low_mem argument is not suppored any more. rewrite it to load_row and save_row") if ($self->{low_mem});          $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_ref") if $self->{lookup};
90            $log->logconfess("low_mem argument is not suppored any more. rewrite it to load_row and save_row") if $self->{low_mem};
91    
92          $log->logconfess("specify low-level file format module") unless ($self->{module});          $log->logconfess("specify low-level file format module") unless ($self->{module});
93          my $module_path = $self->{module};          my $module_path = $self->{module};
# Line 105  sub new { Line 97  sub new {
97    
98          require $module_path;          require $module_path;
99    
         $self->{'encoding'} ||= 'ISO-8859-2';  
   
100          $self ? return $self : return undef;          $self ? return $self : return undef;
101  }  }
102    
# Line 118  This function will read whole database i Line 108  This function will read whole database i
108    
109   $input->open(   $input->open(
110          path => '/path/to/database/file',          path => '/path/to/database/file',
111          code_page => 'cp852',          input_encoding => 'cp852',
112            strict_encoding => 0,
113          limit => 500,          limit => 500,
114          offset => 6000,          offset => 6000,
115          stats => 1,          stats => 1,
# Line 143  This function will read whole database i Line 134  This function will read whole database i
134    
135   );   );
136    
137  By default, C<code_page> is assumed to be C<cp852>.  By default, C<input_encoding> is assumed to be C<cp852>.
138    
139  C<offset> is optional parametar to position at some offset before reading from database.  C<offset> is optional parametar to position at some offset before reading from database.
140    
# Line 164  overrides C<modify_records> if both exis Line 155  overrides C<modify_records> if both exis
155  C<save_row> and C<load_row> are low-level implementation of store engine. Calling convention  C<save_row> and C<load_row> are low-level implementation of store engine. Calling convention
156  is documented in example above.  is documented in example above.
157    
158    C<strict_encoding> should really default to 1, but it doesn't for now.
159    
160  Returns size of database, regardless of C<offset> and C<limit>  Returns size of database, regardless of C<offset> and C<limit>
161  parametars, see also C<size>.  parametars, see also C<size>.
162    
# Line 174  sub open { Line 167  sub open {
167          my $arg = {@_};          my $arg = {@_};
168    
169          my $log = $self->_get_logger();          my $log = $self->_get_logger();
170            $log->debug( "arguments: ",dump( $arg ));
171    
172            $log->logconfess("encoding argument is not suppored any more.") if $self->{encoding};
173            $log->logconfess("code_page argument is not suppored any more.") if $self->{code_page};
174          $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_coderef") if ($arg->{lookup});          $log->logconfess("lookup argument is not suppored any more. rewrite call to lookup_coderef") if ($arg->{lookup});
175          $log->logconfess("lookup_coderef must be CODE, not ",ref($arg->{lookup_coderef}))          $log->logconfess("lookup_coderef must be CODE, not ",ref($arg->{lookup_coderef}))
176                  if ($arg->{lookup_coderef} && ref($arg->{lookup_coderef}) ne 'CODE');                  if ($arg->{lookup_coderef} && ref($arg->{lookup_coderef}) ne 'CODE');
# Line 182  sub open { Line 178  sub open {
178          $log->debug( $arg->{lookup_coderef} ? '' : 'not ', "using lookup_coderef");          $log->debug( $arg->{lookup_coderef} ? '' : 'not ', "using lookup_coderef");
179    
180          $log->logcroak("need path") if (! $arg->{'path'});          $log->logcroak("need path") if (! $arg->{'path'});
181          my $code_page = $arg->{'code_page'} || 'cp852';          my $input_encoding = $arg->{'input_encoding'} || $self->{'input_encoding'} || 'cp852';
182    
183          # store data in object          # store data in object
         $self->{'input_code_page'} = $code_page;  
184          foreach my $v (qw/path offset limit/) {          foreach my $v (qw/path offset limit/) {
185                  $self->{$v} = $arg->{$v} if ($arg->{$v});                  $self->{$v} = $arg->{$v} if ($arg->{$v});
186          }          }
# Line 240  sub open { Line 235  sub open {
235  #               filter => sub {  #               filter => sub {
236  #                       my ($l,$f_nr) = @_;  #                       my ($l,$f_nr) = @_;
237  #                       return unless defined($l);  #                       return unless defined($l);
238  #                       from_to($l, $code_page, $self->{'encoding'});  #                       $l = decode($input_encoding, $l);
239  #                       $l =~ s/($recode_regex)/$recode_map->{$1}/g if ($recode_regex && $recode_map);  #                       $l =~ s/($recode_regex)/$recode_map->{$1}/g if ($recode_regex && $recode_map);
240  #                       return $l;  #                       return $l;
241  #               },  #               },
# Line 278  sub open { Line 273  sub open {
273          # store size for later          # store size for later
274          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;          $self->{size} = ($to_rec - $from_rec) ? ($to_rec - $from_rec + 1) : 0;
275    
276          $log->info("processing $self->{size}/$size records [$from_rec-$to_rec] convert $code_page -> $self->{encoding}", $self->{stats} ? ' [stats]' : '');          my $strict_encoding = $arg->{strict_encoding} || $self->{strict_encoding}; ## FIXME should be 1 really
277    
278            $log->info("processing $self->{size}/$size records [$from_rec-$to_rec]",
279                    " encoding $input_encoding ", $strict_encoding ? ' [strict]' : '',
280                    $self->{stats} ? ' [stats]' : '',
281            );
282    
283          # read database          # read database
284          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {          for (my $pos = $from_rec; $pos <= $to_rec; $pos++) {
# Line 296  sub open { Line 296  sub open {
296                                  $log->debug("-=> $f_nr ## $l");                                  $log->debug("-=> $f_nr ## $l");
297    
298                                  # codepage conversion and recode_regex                                  # codepage conversion and recode_regex
299                                  from_to($l, $code_page, $self->{'encoding'});  #                               $l = decode($input_encoding, $l, 1);
300                                    from_to( $l, $input_encoding, 'utf-8', 1 );
301                                  $l =~ s/($recode_regex)/$recode_map->{$1}/g if ($recode_regex && $recode_map);                                  $l =~ s/($recode_regex)/$recode_map->{$1}/g if ($recode_regex && $recode_map);
302    
303                                  # apply regexps                                  # apply regexps

Legend:
Removed from v.1076  
changed lines
  Added in v.1100

  ViewVC Help
Powered by ViewVC 1.1.26