/[webpac2]/trunk/lib/WebPAC/Output/SWISH.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/SWISH.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1145 by dpavlin, Thu Apr 23 11:14:24 2009 UTC revision 1156 by dpavlin, Sat Apr 25 11:43:46 2009 UTC
# Line 3  package WebPAC::Output::SWISH; Line 3  package WebPAC::Output::SWISH;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6  use base qw/WebPAC::Common/;  use lib 'lib';
7    
8  use File::Path qw/make_path/;  use base qw/WebPAC::Common Class::Accessor/;
9    __PACKAGE__->mk_accessors(qw(
10            database
11            input
12            type
13    
14            index_path
15    ));
16    
17    use File::Path qw/mkpath/;
18  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
19  use YAML;  use YAML;
20    use JSON;
21    
22    
23  =head1 NAME  =head1 NAME
24    
# Line 26  type C<search>. Line 37  type C<search>.
37    
38  =head2 new  =head2 new
39    
40   my $out = new WebPAC::Output::SWISH(   my $out = new WebPAC::Output::SWISH({
41          database => 'demo',          database => 'demo',
42   );   });
43    
44  Options are:  Options are:
45    
# Line 46  Name of database will be used to form UR Line 57  Name of database will be used to form UR
57    
58  our $dir = 'var/swish';  our $dir = 'var/swish';
59    
60  sub new {  sub init {
61          my $class = shift;          my $self = shift;
         my $self = {@_};  
         bless($self, $class);  
62    
63          my $log = $self->_get_logger;          my $log = $self->_get_logger;
64    
65          #$log->debug("self: ", sub { dump($self) });          my $database = $self->database || $log->logdie("need database");
66    
67          my $database = $self->{database} || $log->logdie("need database");          mkpath $dir if ! -e $dir;
68    
69          my $path = "$dir/$database.conf";          my $path = "$dir/$database.conf";
70    
# Line 80  UndefinedXMLAttributes auto Line 89  UndefinedXMLAttributes auto
89  IndexFile $dir/$database  IndexFile $dir/$database
90    
91  # Croatian ISO-8859-2 characters to unaccented equivalents  # Croatian ISO-8859-2 characters to unaccented equivalents
92  TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz  #TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
93    
94    # store data into index
95    PropertyNames data
96    
97  # disable output  # disable output
98  ParserWarnLevel 0  ParserWarnLevel 0
# Line 91  DEFAULT_SWISH_CONF Line 102  DEFAULT_SWISH_CONF
102    
103          close($conf) || die "can't write config $path: $!";          close($conf) || die "can't write config $path: $!";
104    
105          $self->{_swish_index_path} = "$dir/$database";          $self->index_path( "$dir/$database" );
106    
107          my $swish = "swish-e -S prog -c $path";          my $swish = "swish-e -S prog -c $path";
108          open( $self->{_swish_fh}, '-|', $swish ) || die "can't open pipe to $swish: $!";          open( $self->{_swish_fh}, '|-', $swish ) || die "can't open pipe to $swish: $!";
109    
110            $log->info( "created $path ", -s $path, " bytes for ", $self->index_path );
111    
112          $self->{stats} = {};          $self->{stats} = {};
113    
# Line 105  DEFAULT_SWISH_CONF Line 118  DEFAULT_SWISH_CONF
118    
119    my $path = $out->index_path;    my $path = $out->index_path;
120    
 =cut  
   
 sub index_path { $_[0]->{_swish_index_path} };  
   
121  =head2 add  =head2 add
122    
123  Adds one entry to database.    $out->add( 42, $ds );
   
   $out->add(  
         id => 42,  
         ds => $ds,  
         type => 'display',  
         text => 'optional text from which snippet is created',  
   );  
   
 This function will create  entries in index using following URI format:  
   
   C<file:///type/database%20name/000>  
   
 Each tag in C<data_structure> with specified C<type> will create one  
 attribute and corresponding hidden text (used for search).  
124    
125  =cut  =cut
126    
# Line 133  my %escape = ('<'=>'&lt;', '>'=>'&gt;', Line 128  my %escape = ('<'=>'&lt;', '>'=>'&gt;',
128  my $escape_re  = join '|' => keys %escape;  my $escape_re  = join '|' => keys %escape;
129    
130  sub add {  sub add {
131          my $self = shift;          my ($self,$id,$ds) = @_;
   
         my $args = {@_};  
132    
133          my $log = $self->_get_logger;          my $log = $self->_get_logger;
134            $log->debug("id: $id ds = ",sub { dump($ds) });
135    
136          my $database = $self->{'database'} || $log->logconfess('no database in $self');          my $database = $self->database || $log->logconfess('no database in $self');
         $log->logconfess('need db in object') unless ($self->{'db'});  
   
         foreach my $p (qw/id ds type/) {  
                 $log->logdie("need $p") unless ($args->{$p});  
         }  
   
         my $type = $args->{'type'};  
         my $id = $args->{'id'};  
137    
138          my $uri = "$database/$id";          my $uri = $self->database . '/' . $self->input . "/$id";
139          $log->debug("creating $uri");          $log->debug("creating $uri");
140    
141          # filter all tags which have type defined          # filter all tags which have type defined
142            my $type = $self->type || 'search';
143          my @tags = grep {          my @tags = grep {
144                  ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )                  ref($ds->{$_}) eq 'HASH' && defined( $ds->{$_}->{$type} )
145          } keys %{ $args->{'ds'} };          } keys %{ $ds };
146    
147          $log->debug("tags = ", join(",", @tags));          $log->debug("tags = ", join(",", @tags));
148    
# Line 163  sub add { Line 150  sub add {
150    
151          my $xml = qq{<xml>};          my $xml = qq{<xml>};
152    
153            $xml .= "<$_>" . $self->$_ . "</$_>" foreach ( 'database', 'input' );
154    
155            my $data;
156    
157          foreach my $tag (@tags) {          foreach my $tag (@tags) {
158    
159                  my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });                  my $r = ref $ds->{$tag}->{$type};
160                    die "tag $tag type $type not ARRAY but '$r' = ",dump( $ds->{$tag}->{$type} ) unless $r eq 'ARRAY';
161    
162                    my $vals = join(" ", @{ $ds->{$tag}->{$type} });
163    
164                  next if ! $vals;                  next if ! $vals;
165    
166                  $vals =~ s/($escape_re)/$escape{$1}/gs;                  $vals =~ s/($escape_re)/$escape{$1}/gs;
167                  $xml .= qq{<$tag><![CDATA[$vals]]></$tag>};                  # BW & EW are our markers for tag boundry
168                    $xml .= qq{<$tag><![CDATA[BW $vals EW]]></$tag>};
169    
170                  $self->{stats}->{attr}->{$tag}++;                  $self->{stats}->{attr}->{$tag}++;
171    
172                    $data->{$tag} = $vals;
173          }          }
174    
175            # serialize to JSON instead of YAML because we will loose whitespace
176            $data = to_json($data);
177            $xml .= qq{<data><![CDATA[$data]]></data>};
178    
179          $xml .= qq{</xml>\n};          $xml .= qq{</xml>\n};
180    
181          my $len = length($xml);          my $len = length($xml);
182    
183          my $fh = $self->{_swish_fh};          my $fh = $self->{_swish_fh} || die "_swish_fh missing";
184    
185            print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml" or
186                    die "can't add $uri: $@\n$xml";
187    
188          print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml";          $log->debug( $xml );
 #               || die "can't add $uri: $@\n$xml\n";  
189    
190          return 1;          return 1;
191  }  }
# Line 201  sub finish { Line 204  sub finish {
204          YAML::DumpFile( $path, $self->{stats} );          YAML::DumpFile( $path, $self->{stats} );
205          $log->info("created  $path ", -s $path, " bytes");          $log->info("created  $path ", -s $path, " bytes");
206          $log->debug( dump( $self->{stats} ) );          $log->debug( dump( $self->{stats} ) );
207    
208            close( $self->{_swish_fh} ) || die "can't close index ", $self->index_path, ": $!";
209  }  }
210    
211  =head1 AUTHOR  =head1 AUTHOR

Legend:
Removed from v.1145  
changed lines
  Added in v.1156

  ViewVC Help
Powered by ViewVC 1.1.26