/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 14 by dpavlin, Wed Jan 4 21:51:01 2006 UTC revision 24 by dpavlin, Thu Jan 5 14:33:05 2006 UTC
# Line 4  use 5.008; Line 4  use 5.008;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
 require Exporter;  
   
 our @ISA = qw(Exporter);  
   
 our %EXPORT_TAGS = ( 'all' => [ qw(  
 ) ] );  
   
 our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );  
   
 our @EXPORT = qw(  
 );  
   
7  our $VERSION = '0.00';  our $VERSION = '0.00';
8    
 use Carp;  
   
9  =head1 NAME  =head1 NAME
10    
11  Search::Estraier - pure perl module to use Hyper Estraier search engine  Search::Estraier - pure perl module to use Hyper Estraier search engine
# Line 41  implementation. It also includes methods Line 27  implementation. It also includes methods
27    
28  =cut  =cut
29    
30    =head2 _s
31    
32    Remove multiple whitespaces from string, as well as whitespaces at beginning or end
33    
34     my $text = $self->_s(" this  is a text  ");
35     $text = 'this is a text';
36    
37    =cut
38    
39    sub _s {
40            my $text = $_[1] || return;
41            $text =~ s/\s\s+/ /gs;
42            $text =~ s/^\s+//;
43            $text =~ s/\s+$//;
44            return $text;
45    }
46    
47  package Search::Estraier::Document;  package Search::Estraier::Document;
48    
49  use Carp qw/croak confess/;  use Carp qw/croak confess/;
50    
51    use Search::Estraier;
52    our @ISA = qw/Search::Estraier/;
53    
54  =head1 Search::Estraier::Document  =head1 Search::Estraier::Document
55    
56  This class implements Document which is collection of attributes  This class implements Document which is collection of attributes
57  (key=value), vectors (also key value) display text and hidden text.  (key=value), vectors (also key value) display text and hidden text.
58    
 Document for HyperEstraier  
   
59  =head2 new  =head2 new
60    
61  Create new document, empty or from draft.  Create new document, empty or from draft.
# Line 128  sub add_attr { Line 132  sub add_attr {
132    
133          while (my ($name, $value) = each %{ $attrs }) {          while (my ($name, $value) = each %{ $attrs }) {
134                  if (! defined($value)) {                  if (! defined($value)) {
135                          delete( $self->{attrs}->{_s($name)} );                          delete( $self->{attrs}->{ $self->_s($name) } );
136                  } else {                  } else {
137                          $self->{attrs}->{_s($name)} = _s($value);                          $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
138                  }                  }
139          }          }
140    
# Line 151  sub add_text { Line 155  sub add_text {
155          my $text = shift;          my $text = shift;
156          return unless defined($text);          return unless defined($text);
157    
158          push @{ $self->{dtexts} }, _s($text);          push @{ $self->{dtexts} }, $self->_s($text);
159  }  }
160    
161    
# Line 168  sub add_hidden_text { Line 172  sub add_hidden_text {
172          my $text = shift;          my $text = shift;
173          return unless defined($text);          return unless defined($text);
174    
175          push @{ $self->{htexts} }, _s($text);          push @{ $self->{htexts} }, $self->_s($text);
176  }  }
177    
178  =head2 id  =head2 id
# Line 280  Empty document object Line 284  Empty document object
284    
285    $doc->delete;    $doc->delete;
286    
287    This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
288    convinience. Document objects which go out of scope will be destroyed
289    automatically.
290    
291  =cut  =cut
292    
293  sub delete {  sub delete {
# Line 295  sub delete { Line 303  sub delete {
303  }  }
304    
305    
 =head2 _s  
306    
307  Remove multiple whitespaces from string, as well as whitespaces at beginning or end  package Search::Estraier::Condition;
308    
309   my $text = _s(" this  is a text  ");  use Carp qw/confess croak/;
310   $text = 'this is a text';  
311    use Search::Estraier;
312    our @ISA = qw/Search::Estraier/;
313    
314    =head1 Search::Estraier::Condition
315    
316    =head2 new
317    
318      my $cond = new Search::HyperEstraier::Condition;
319    
320  =cut  =cut
321    
322  sub _s {  sub new {
323          my $text = shift || return;          my $class = shift;
324          $text =~ s/\s\s+/ /gs;          my $self = {};
325          $text =~ s/^\s+//;          bless($self, $class);
326          $text =~ s/\s+$//;  
327          return $text;          $self->{max} = -1;
328            $self->{options} = 0;
329    
330            $self ? return $self : return undef;
331  }  }
332    
333    =head2 set_phrase
334    
335      $cond->set_phrase('search phrase');
336    
337    =cut
338    
339    sub set_phrase {
340            my $self = shift;
341            $self->{phrase} = $self->_s( shift );
342    }
343    
344    =head2 add_attr
345    
346      $cond->add_attr('@URI STRINC /~dpavlin/');
347    
348    =cut
349    
350    sub add_attr {
351            my $self = shift;
352            my $attr = shift || return;
353            push @{ $self->{attrs} }, $self->_s( $attr );
354    }
355    
356    =head2 set_order
357    
358      $cond->set_order('@mdate NUMD');
359    
360    =cut
361    
362    sub set_order {
363            my $self = shift;
364            $self->{order} = shift;
365    }
366    
367    =head2 set_max
368    
369      $cond->set_max(42);
370    
371    =cut
372    
373    sub set_max {
374            my $self = shift;
375            my $max = shift;
376            croak "set_max needs number" unless ($max =~ m/^\d+$/);
377            $self->{max} = $max;
378    }
379    
380    =head2 set_options
381    
382      $cond->set_options( SURE => 1 );
383    
384    =cut
385    
386    my $options = {
387            # check N-gram keys skipping by three
388            SURE => 1 << 0,
389            # check N-gram keys skipping by two
390            USUAL => 1 << 1,
391            # without TF-IDF tuning
392            FAST => 1 << 2,
393            # with the simplified phrase
394            AGITO => 1 << 3,
395            # check every N-gram key
396            NOIDF => 1 << 4,
397            # check N-gram keys skipping by one
398            SIMPLE => 1 << 10,
399    };
400    
401    sub set_options {
402            my $self = shift;
403            my $option = shift;
404            confess "unknown option" unless ($options->{$option});
405            $self->{options} ||= $options->{$option};
406    }
407    
408    =head2 phrase
409    
410    Return search phrase.
411    
412      print $cond->phrase;
413    
414    =cut
415    
416    sub phrase {
417            my $self = shift;
418            return $self->{phrase};
419    }
420    
421    =head2 order
422    
423    Return search result order.
424    
425      print $cond->order;
426    
427    =cut
428    
429    sub order {
430            my $self = shift;
431            return $self->{order};
432    }
433    
434    =head2 attrs
435    
436    Return search result attrs.
437    
438      my @cond_attrs = $cond->attrs;
439    
440    =cut
441    
442    sub attrs {
443            my $self = shift;
444            #croak "attrs return array, not scalar" if (! wantarray);
445            return @{ $self->{attrs} };
446    }
447    
448    =head2 max
449    
450    Return maximum number of results.
451    
452      print $cond->max;
453    
454    C<-1> is returned for unitialized value, C<0> is unlimited.
455    
456    =cut
457    
458    sub max {
459            my $self = shift;
460            return $self->{max};
461    }
462    
463    =head2 options
464    
465    Return options for this condition.
466    
467      print $cond->options;
468    
469    Options are returned in numerical form.
470    
471    =cut
472    
473    sub options {
474            my $self = shift;
475            return $self->{options};
476    }
477    
478    
479    package Search::Estraier::ResultDocument;
480    
481    use Carp qw/croak/;
482    
483    #use Search::Estraier;
484    #our @ISA = qw/Search::Estraier/;
485    
486    =head1 Search::Estraier::ResultDocument
487    
488    =head2 new
489    
490      my $rdoc = new Search::HyperEstraier::ResultDocument(
491            uri => 'http://localhost/document/uri/42',
492            attrs => {
493                    foo => 1,
494                    bar => 2,
495            },
496            snippet => 'this is a text of snippet'
497            keywords => 'this\tare\tkeywords'
498      );
499    
500    =cut
501    
502    sub new {
503            my $class = shift;
504            my $self = {@_};
505            bless($self, $class);
506    
507            foreach my $f (qw/uri attrs snippet keywords/) {
508                    croak "missing $f for ResultDocument" unless defined($self->{$f});
509            }
510    
511            $self ? return $self : return undef;
512    }
513    
514    =head2 uri
515    
516    Return URI of result document
517    
518      print $rdoc->uri;
519    
520    =cut
521    
522    sub uri {
523            my $self = shift;
524            return $self->{uri};
525    }
526    
527    
528    =head2 attr_names
529    
530    Returns array with attribute names from result document object.
531    
532      my @attrs = $rdoc->attr_names;
533    
534    =cut
535    
536    sub attr_names {
537            my $self = shift;
538            croak "attr_names return array, not scalar" if (! wantarray);
539            return sort keys %{ $self->{attrs} };
540    }
541    
542    =head2 attr
543    
544    Returns value of an attribute.
545    
546      my $value = $rdoc->attr( 'attribute' );
547    
548    =cut
549    
550    sub attr {
551            my $self = shift;
552            my $name = shift || return;
553            return $self->{attrs}->{ $name };
554    }
555    
556    =head2 snippet
557    
558    Return snippet from result document
559    
560      print $rdoc->snippet;
561    
562    =cut
563    
564    sub snippet {
565            my $self = shift;
566            return $self->{snippet};
567    }
568    
569    =head2 keywords
570    
571    Return keywords from result document
572    
573      print $rdoc->keywords;
574    
575    =cut
576    
577    sub keywords {
578            my $self = shift;
579            return $self->{keywords};
580    }
581    
582    
583  package Search::Estraier::Master;  package Search::Estraier::Master;
# Line 326  Controll node master. This requires user Line 592  Controll node master. This requires user
592    
593  {  {
594          package RequestAgent;          package RequestAgent;
595          @ISA = qw(LWP::UserAgent);          our @ISA = qw(LWP::UserAgent);
596    
597          sub new {          sub new {
598                  my $self = LWP::UserAgent::new(@_);                  my $self = LWP::UserAgent::new(@_);
# Line 387  Dobrica Pavlinusic, E<lt>dpavlin@rot13.o Line 653  Dobrica Pavlinusic, E<lt>dpavlin@rot13.o
653    
654  =head1 COPYRIGHT AND LICENSE  =head1 COPYRIGHT AND LICENSE
655    
656  Copyright (C) 2005 by Dobrica Pavlinusic  Copyright (C) 2005-2006 by Dobrica Pavlinusic
657    
658  This library is free software; you can redistribute it and/or modify  This library is free software; you can redistribute it and/or modify
659  it under the GPL v2 or later.  it under the GPL v2 or later.

Legend:
Removed from v.14  
changed lines
  Added in v.24

  ViewVC Help
Powered by ViewVC 1.1.26