--- trunk/Split.pm 2004/08/08 19:22:56 4 +++ trunk/Split.pm 2004/12/17 18:32:34 7 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.00'; +our $VERSION = '0.01'; use SWISH::API; use Text::Iconv; @@ -17,6 +17,11 @@ use Data::Dumper; +use constant { + ADDED => 1, + DELETED => 2, +}; + =head1 NAME SWISH::Split - Perl interface to split index variant of Swish-e @@ -37,7 +42,7 @@ engines) easier. In the background, it will fork swish-e binaries (one for each index slice) -and produce UTF-8 encoded XML files for it. So, if your imput charset isn't +and produce UTF-8 encoded XML files for it. So, if your input charset isn't C you will have to specify it. =head1 Methods used for indexing @@ -144,14 +149,14 @@ my $swishpath = shift || return; my $data = shift || return; - my ($out,$err) = $self->put_slice($swishpath, $self->to_xml($data)); + my $slice = $self->put_slice($swishpath, $self->to_xml($data)); - if ($err) { - carp "$swishpath: $err"; - return 0; - } +# if ($err) { +# carp "$swishpath: $err"; +# return undef; +# } - return 1; + return $slice; } =head2 delete @@ -167,29 +172,37 @@ my @paths = @_ || return; + foreach my $path (@paths) { + $self->{'paths'}->{$path} = DELETED; + } + return 42; } -=head2 finish +=head2 done Finish indexing and close index file(s). - $i->finish; + $i->done; This is most time-consuming operation. When it's called, it will re-index all entries which haven't changed in all slices. Returns number of slices updated. +This method should really be called close or finish, but both of those are +allready used. + =cut -sub finish { +sub done { my $self = shift; my $ret = 0; foreach my $s (keys %{$self->{'slice'}}) { + print STDERR "closing slice $s\n"; $ret += $self->close_slice($s); } @@ -212,6 +225,11 @@ sub swishpaths { my $self = shift; + + my $s = shift || return; + return if (! exists($self->{'slice'}->{'s'})); + + return keys %{$self->{'slice'}->{'s'}}; } =head2 swishpaths_updated @@ -313,7 +331,6 @@ sub find_paths { my $self = shift; - my $s = shift || return; } @@ -330,9 +347,7 @@ If you want to see what is allready defined for swish-e in configuration take a look at source code for C. -It uses C utility to comunicate with C. Path is provided -by C. Do Windows users have to change that to C -or something similar? +It uses C as C to comunicate with C. =cut @@ -351,8 +366,7 @@ print $tmp_fh <<"DEFAULT_SWISH_CONF"; # swish-e config file -IndexDir cat -#SwishProgParameters - +IndexDir stdin # input file definition DefaultContents XML* @@ -410,7 +424,7 @@ print STDERR "creating slice $s\n"; # FIXME - my @swish = qw(swish-e -S prog -c); + my @swish = qw(swish-e -u -S prog -c); push @swish, $swish_config; ## Build the harness, open all pipes, and launch the subprocesses @@ -430,9 +444,11 @@ =head2 put_slice -Pass XML data to swish and receive output and errors. +Pass XML data to swish. - my ($out,$err) = $i->put_slice('/swish/path', 'data'); + my $slice = $i->put_slice('/swish/path', 'data'); + +Returns slice in which XML ended up. =cut @@ -456,6 +472,7 @@ $self->{'slice'}->{$s}->{'in'} .= "Path-Name: $path\n". "Content-Length: ".(length($xml)+1)."\n". + "Update-Mode: Index\n". "Document-Type: XML\n\n$xml\n"; # do I/O @@ -463,6 +480,8 @@ $self->slice_output($s); + $self->{'paths'}->{$path} = ADDED; + return $s; } @@ -470,7 +489,7 @@ Prints to STDERR output and errors from C. - $i->slice_output($s); + my $slice = $i->slice_output($s); Normally, you don't need to call it. @@ -488,18 +507,18 @@ if (length $self->{'slice'}->{$s}->{'out'} > $self->{'slice'}->{$s}->{'out_len'}) { #print STDERR "swish-e OUT: ",$self->{'slice'}->{$s}->{'out'},"\n" if ($self->{'slice'}->{$s}->{'out'}); $self->{'slice'}->{$s}->{'out_len'} = length $self->{'slice'}->{$s}->{'out'}; - return 1; + return $s; } elsif (length $self->{'slice'}->{$s}->{'err'} > $self->{'slice'}->{$s}->{'err_len'}) { print STDERR "swish-e ERR: ",$self->{'slice'}->{$s}->{'err'},"\n" if ($self->{'slice'}->{$s}->{'err'}); $self->{'slice'}->{$s}->{'err_len'} = length $self->{'slice'}->{$s}->{'err'}; # this is fatal - return 0; + return undef; } - return 1; + return $s; } -=head2 close_slice { +=head2 close_slice Close slice (terminates swish-e process for that slice). @@ -523,7 +542,7 @@ $self->slice_output($s); # clean up - $self->{'slice'}->{$s}->{'h'}->finish or confess "finish on slice $s returned: $?"; + $self->{'slice'}->{$s}->{'h'}->finish or confess "finish on slice $s returned $?: $! -- ",$self->{'slice'}->{$s}->{'err'}; delete($self->{'slice'}->{$s}) && return 1; return 0; @@ -567,7 +586,7 @@ __END__ -=head2 Searching +=head1 Searching Searching is still conducted using L, but you have to glob index names. @@ -582,11 +601,11 @@ That would also benefit performance, but it increases indexing time because merged indexes must be re-created on each indexing run. -=head2 EXPORT +=head1 EXPORT Nothing by default. -=head2 EXAMPLES +=head1 EXAMPLES Test script for this module uses all parts of API. It's also nice example how to use C.