/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 374 by dpavlin, Sun Jan 8 22:21:19 2006 UTC revision 401 by dpavlin, Sun Feb 19 16:36:42 2006 UTC
# Line 6  use strict; Line 6  use strict;
6  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
7    
8  use Search::Estraier;  use Search::Estraier;
9  use Text::Iconv;  use Encode qw/from_to/;
10  use Data::Dumper;  use Data::Dumper;
11  use LWP;  use LWP;
12  use URI::Escape;  use URI::Escape;
# Line 17  WebPAC::Output::Estraier - Create Hyper Line 17  WebPAC::Output::Estraier - Create Hyper
17    
18  =head1 VERSION  =head1 VERSION
19    
20  Version 0.09  Version 0.10
21    
22  =cut  =cut
23    
24  our $VERSION = '0.09';  our $VERSION = '0.10';
25    
26  =head1 SYNOPSIS  =head1 SYNOPSIS
27    
# Line 39  Connect to Hyper Estraier index using HT Line 39  Connect to Hyper Estraier index using HT
39          user => 'admin',          user => 'admin',
40          passwd => 'admin',          passwd => 'admin',
41          database => 'demo',          database => 'demo',
42            label => 'node label',
43          encoding => 'iso-8859-2',          encoding => 'iso-8859-2',
44          clean => 1,          clean => 1,
45   );   );
# Line 63  password for user Line 64  password for user
64    
65  name of database from which data comes  name of database from which data comes
66    
67    =item label
68    
69    label for node (optional)
70    
71  =item encoding  =item encoding
72    
73  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
# Line 88  sub new { Line 93  sub new {
93                  $log->logdie("need $p") unless ($self->{$p});                  $log->logdie("need $p") unless ($self->{$p});
94          }          }
95    
96            $self->{encoding} ||= 'ISO-8859-2';
97    
98          my $url = $self->{masterurl} . '/node/' . $self->{database};          my $url = $self->{masterurl} . '/node/' . $self->{database};
99          $self->{url} = $url;          $self->{url} = $url;
100    
# Line 103  sub new { Line 110  sub new {
110          $log->debug("nodes found: $nodes");          $log->debug("nodes found: $nodes");
111    
112          if ($nodes !~ m/^$self->{database}\t/sm) {          if ($nodes !~ m/^$self->{database}\t/sm) {
113                  $log->warn("creating index $url");                  my $label = $self->{label} || 'WebPAC ' . $self->{database};
114                    $log->warn("creating index $url ($label)");
115                  $self->master(                  $self->master(
116                          action => 'nodeadd',                          action => 'nodeadd',
117                          name => $self->{database},                          name => $self->{database},
118                          label => "WebPAC $self->{database}",                          label => $self->convert( $label ),
119                  ) || $log->logdie("can't create Hyper Estraier node $self->{database}");                  ) || $log->logdie("can't create Hyper Estraier node $self->{database}");
120          }          }
121    
# Line 115  sub new { Line 123  sub new {
123          $self->{db}->set_url($self->{url});          $self->{db}->set_url($self->{url});
124          $self->{db}->set_auth($self->{user}, $self->{passwd});          $self->{db}->set_auth($self->{user}, $self->{passwd});
125    
126          my $encoding = $self->{encoding} || 'ISO-8859-2';          $log->info("using index $self->{url} with encoding $self->{encoding}");
         $log->info("using index $self->{url} with encoding $encoding");  
   
         $self->{iconv} = new Text::Iconv($encoding, 'UTF-8') or  
                 $log->logdie("can't create conversion from $encoding to UTF-8");  
127    
128          $self ? return $self : return undef;          $self ? return $self : return undef;
129  }  }
# Line 166  sub add { Line 170  sub add {
170          $log->debug("creating $uri");          $log->debug("creating $uri");
171    
172          my $doc = Search::Estraier::Document->new;          my $doc = Search::Estraier::Document->new;
173          $doc->add_attr('@uri', $self->{'iconv'}->convert($uri) );          $doc->add_attr('@uri', $self->convert($uri) );
174    
175          $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );          $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
176    
# Line 185  sub add { Line 189  sub add {
189    
190                  next if (! $vals);                  next if (! $vals);
191    
192                  $vals = $self->{'iconv'}->convert( $vals ) or                  $vals = $self->convert( $vals ) or
193                          $log->logdie("can't convert '$vals' to UTF-8");                          $log->logdie("can't convert '$vals' to UTF-8");
194    
195                  $doc->add_attr( $tag, $vals );                  $doc->add_attr( $tag, $vals );
# Line 194  sub add { Line 198  sub add {
198    
199          my $text = $args->{'text'};          my $text = $args->{'text'};
200          if ( $text ) {          if ( $text ) {
201                  $text = $self->{'iconv'}->convert( $text ) or                  $text = $self->convert( $text ) or
202                          $log->logdie("can't convert '$text' to UTF-8");                          $log->logdie("can't convert '$text' to UTF-8");
203                  $doc->add_text( $text );                  $doc->add_text( $text );
204          }          }
# Line 392  sub est_ua { Line 396  sub est_ua {
396          return $self->{_master_ua};          return $self->{_master_ua};
397  }  }
398    
399    =head2 convert
400    
401     my $utf8_string = $self->convert('string in codepage');
402    
403    =cut
404    
405    sub convert {
406            my $self = shift;
407    
408            my $text = shift || return;
409            from_to($text, $self->{encoding}, 'UTF-8');
410            return $text;
411    }
412    
413  =head1 AUTHOR  =head1 AUTHOR
414    
415  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>

Legend:
Removed from v.374  
changed lines
  Added in v.401

  ViewVC Help
Powered by ViewVC 1.1.26