4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
|
7 |
our $VERSION = '0.06'; |
our $VERSION = '0.07_3'; |
8 |
|
|
9 |
=head1 NAME |
=head1 NAME |
10 |
|
|
120 |
|
|
121 |
=head1 Search::Estraier::Document |
=head1 Search::Estraier::Document |
122 |
|
|
123 |
This class implements Document which is collection of attributes |
This class implements Document which is single item in Hyper Estraier. |
|
(key=value), vectors (also key value) display text and hidden text. |
|
124 |
|
|
125 |
|
It's is collection of: |
126 |
|
|
127 |
|
=over 4 |
128 |
|
|
129 |
|
=item attributes |
130 |
|
|
131 |
|
C<< 'key' => 'value' >> pairs which can later be used for filtering of results |
132 |
|
|
133 |
|
You can add common filters to C<attrindex> in estmaster's C<_conf> |
134 |
|
file for better performance. See C<attrindex> in |
135 |
|
L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>. |
136 |
|
|
137 |
|
=item vectors |
138 |
|
|
139 |
|
also C<< 'key' => 'value' >> pairs |
140 |
|
|
141 |
|
=item display text |
142 |
|
|
143 |
|
Text which will be used to create searchable corpus of your index and |
144 |
|
included in snippet output. |
145 |
|
|
146 |
|
=item hidden text |
147 |
|
|
148 |
|
Text which will be searchable, but will not be included in snippet. |
149 |
|
|
150 |
|
=back |
151 |
|
|
152 |
=head2 new |
=head2 new |
153 |
|
|
182 |
|
|
183 |
if ($line =~ m/^%VECTOR\t(.+)$/) { |
if ($line =~ m/^%VECTOR\t(.+)$/) { |
184 |
my @fields = split(/\t/, $1); |
my @fields = split(/\t/, $1); |
185 |
for my $i ( 0 .. ($#fields - 1) ) { |
if ($#fields % 2 == 1) { |
186 |
$self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ]; |
$self->{kwords} = { @fields }; |
187 |
$i++; |
} else { |
188 |
|
warn "can't decode $line\n"; |
189 |
} |
} |
190 |
next; |
next; |
191 |
} elsif ($line =~ m/^%/) { |
} elsif ($line =~ m/^%/) { |
269 |
push @{ $self->{htexts} }, $self->_s($text); |
push @{ $self->{htexts} }, $self->_s($text); |
270 |
} |
} |
271 |
|
|
272 |
|
=head2 add_vectors |
273 |
|
|
274 |
|
Add a vectors |
275 |
|
|
276 |
|
$doc->add_vector( |
277 |
|
'vector_name' => 42, |
278 |
|
'another' => 12345, |
279 |
|
); |
280 |
|
|
281 |
|
=cut |
282 |
|
|
283 |
|
sub add_vectors { |
284 |
|
my $self = shift; |
285 |
|
return unless (@_); |
286 |
|
|
287 |
|
# this is ugly, but works |
288 |
|
die "add_vector needs HASH as argument" unless ($#_ % 2 == 1); |
289 |
|
|
290 |
|
$self->{kwords} = {@_}; |
291 |
|
} |
292 |
|
|
293 |
|
|
294 |
=head2 id |
=head2 id |
295 |
|
|
384 |
} |
} |
385 |
|
|
386 |
if ($self->{kwords}) { |
if ($self->{kwords}) { |
387 |
$draft .= '%%VECTOR'; |
$draft .= '%VECTOR'; |
388 |
while (my ($key, $value) = each %{ $self->{kwords} }) { |
while (my ($key, $value) = each %{ $self->{kwords} }) { |
389 |
$draft .= "\t$key\t$value"; |
$draft .= "\t$key\t$value"; |
390 |
} |
} |
1100 |
|
|
1101 |
$node->put_doc( $document_draft ) or die "can't add document"; |
$node->put_doc( $document_draft ) or die "can't add document"; |
1102 |
|
|
1103 |
Return true on success or false on failture. |
Return true on success or false on failure. |
1104 |
|
|
1105 |
=cut |
=cut |
1106 |
|
|
1108 |
my $self = shift; |
my $self = shift; |
1109 |
my $doc = shift || return; |
my $doc = shift || return; |
1110 |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
1111 |
$self->shuttle_url( $self->{url} . '/put_doc', |
if ($self->shuttle_url( $self->{url} . '/put_doc', |
1112 |
'text/x-estraier-draft', |
'text/x-estraier-draft', |
1113 |
$doc->dump_draft, |
$doc->dump_draft, |
1114 |
undef |
undef |
1115 |
) == 200; |
) == 200) { |
1116 |
|
$self->_clear_info; |
1117 |
|
return 1; |
1118 |
|
} |
1119 |
|
return undef; |
1120 |
} |
} |
1121 |
|
|
1122 |
|
|
1135 |
my $id = shift || return; |
my $id = shift || return; |
1136 |
return unless ($self->{url}); |
return unless ($self->{url}); |
1137 |
croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/); |
croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/); |
1138 |
$self->shuttle_url( $self->{url} . '/out_doc', |
if ($self->shuttle_url( $self->{url} . '/out_doc', |
1139 |
'application/x-www-form-urlencoded', |
'application/x-www-form-urlencoded', |
1140 |
"id=$id", |
"id=$id", |
1141 |
undef |
undef |
1142 |
) == 200; |
) == 200) { |
1143 |
|
$self->_clear_info; |
1144 |
|
return 1; |
1145 |
|
} |
1146 |
|
return undef; |
1147 |
} |
} |
1148 |
|
|
1149 |
|
|
1161 |
my $self = shift; |
my $self = shift; |
1162 |
my $uri = shift || return; |
my $uri = shift || return; |
1163 |
return unless ($self->{url}); |
return unless ($self->{url}); |
1164 |
$self->shuttle_url( $self->{url} . '/out_doc', |
if ($self->shuttle_url( $self->{url} . '/out_doc', |
1165 |
'application/x-www-form-urlencoded', |
'application/x-www-form-urlencoded', |
1166 |
"uri=" . uri_escape($uri), |
"uri=" . uri_escape($uri), |
1167 |
undef |
undef |
1168 |
) == 200; |
) == 200) { |
1169 |
|
$self->_clear_info; |
1170 |
|
return 1; |
1171 |
|
} |
1172 |
|
return undef; |
1173 |
} |
} |
1174 |
|
|
1175 |
|
|
1187 |
my $self = shift; |
my $self = shift; |
1188 |
my $doc = shift || return; |
my $doc = shift || return; |
1189 |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
1190 |
$self->shuttle_url( $self->{url} . '/edit_doc', |
if ($self->shuttle_url( $self->{url} . '/edit_doc', |
1191 |
'text/x-estraier-draft', |
'text/x-estraier-draft', |
1192 |
$doc->dump_draft, |
$doc->dump_draft, |
1193 |
undef |
undef |
1194 |
) == 200; |
) == 200) { |
1195 |
|
$self->_clear_info; |
1196 |
|
return 1; |
1197 |
|
} |
1198 |
|
return undef; |
1199 |
} |
} |
1200 |
|
|
1201 |
|
|
1562 |
push @args, 'wwidth=' . $self->{wwidth}; |
push @args, 'wwidth=' . $self->{wwidth}; |
1563 |
push @args, 'hwidth=' . $self->{hwidth}; |
push @args, 'hwidth=' . $self->{hwidth}; |
1564 |
push @args, 'awidth=' . $self->{awidth}; |
push @args, 'awidth=' . $self->{awidth}; |
1565 |
push @args, 'skip=' . $self->{skip} if ($self->{skip}); |
push @args, 'skip=' . $cond->{skip} if ($cond->{skip}); |
1566 |
|
|
1567 |
return join('&', @args); |
return join('&', @args); |
1568 |
} |
} |
1709 |
croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/); |
croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/); |
1710 |
|
|
1711 |
$self->shuttle_url( $self->{url} . '/_set_user', |
$self->shuttle_url( $self->{url} . '/_set_user', |
1712 |
'text/plain', |
'application/x-www-form-urlencoded', |
1713 |
'name=' . uri_escape($name) . '&mode=' . $mode, |
'name=' . uri_escape($name) . '&mode=' . $mode, |
1714 |
undef |
undef |
1715 |
) == 200; |
) == 200; |
1742 |
undef |
undef |
1743 |
) == 200) { |
) == 200) { |
1744 |
# refresh node info after adding link |
# refresh node info after adding link |
1745 |
$self->_set_info; |
$self->_clear_info; |
1746 |
return 1; |
return 1; |
1747 |
} |
} |
1748 |
|
return undef; |
1749 |
} |
} |
1750 |
|
|
1751 |
=head2 admins |
=head2 admins |
1790 |
return $self->{inform}->{links}; |
return $self->{inform}->{links}; |
1791 |
} |
} |
1792 |
|
|
1793 |
|
=head2 cacheusage |
1794 |
|
|
1795 |
|
Return cache usage for a node |
1796 |
|
|
1797 |
|
my $cache = $node->cacheusage; |
1798 |
|
|
1799 |
|
=cut |
1800 |
|
|
1801 |
|
sub cacheusage { |
1802 |
|
my $self = shift; |
1803 |
|
|
1804 |
|
return unless ($self->{url}); |
1805 |
|
|
1806 |
|
my $resbody; |
1807 |
|
my $rv = $self->shuttle_url( $self->{url} . '/cacheusage', |
1808 |
|
'text/plain', |
1809 |
|
undef, |
1810 |
|
\$resbody, |
1811 |
|
); |
1812 |
|
|
1813 |
|
return if ($rv != 200 || !$resbody); |
1814 |
|
|
1815 |
|
return $resbody; |
1816 |
|
} |
1817 |
|
|
1818 |
=head2 master |
=head2 master |
1819 |
|
|
1820 |
Set actions on Hyper Estraier node master (C<estmaster> process) |
Set actions on Hyper Estraier node master (C<estmaster> process) |
1917 |
) or confess "shuttle_url failed"; |
) or confess "shuttle_url failed"; |
1918 |
|
|
1919 |
if ($status == $rest->{status}) { |
if ($status == $rest->{status}) { |
1920 |
|
|
1921 |
|
# refresh node info after sync |
1922 |
|
$self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/); |
1923 |
|
|
1924 |
if ($rest->{returns} && wantarray) { |
if ($rest->{returns} && wantarray) { |
1925 |
|
|
1926 |
my @results; |
my @results; |
1978 |
|
|
1979 |
my @lines = split(/[\r\n]/,$resbody); |
my @lines = split(/[\r\n]/,$resbody); |
1980 |
|
|
1981 |
$self->{inform} = {}; |
$self->_clear_info; |
1982 |
|
|
1983 |
( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum}, |
( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum}, |
1984 |
$self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5); |
$self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5); |
2003 |
|
|
2004 |
} |
} |
2005 |
|
|
2006 |
|
=head2 _clear_info |
2007 |
|
|
2008 |
|
Clear information for node |
2009 |
|
|
2010 |
|
$node->_clear_info; |
2011 |
|
|
2012 |
|
On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node |
2013 |
|
info will be fetch again from Hyper Estraier. |
2014 |
|
|
2015 |
|
=cut |
2016 |
|
sub _clear_info { |
2017 |
|
my $self = shift; |
2018 |
|
$self->{inform} = { |
2019 |
|
dnum => -1, |
2020 |
|
wnum => -1, |
2021 |
|
size => -1.0, |
2022 |
|
}; |
2023 |
|
} |
2024 |
|
|
2025 |
### |
### |
2026 |
|
|
2027 |
=head1 EXPORT |
=head1 EXPORT |
2034 |
|
|
2035 |
Hyper Estraier Ruby interface on which this module is based. |
Hyper Estraier Ruby interface on which this module is based. |
2036 |
|
|
2037 |
|
Hyper Estraier now also has pure-perl binding included in distribution. It's |
2038 |
|
a faster way to access databases directly if you are not running |
2039 |
|
C<estmaster> P2P server. |
2040 |
|
|
2041 |
=head1 AUTHOR |
=head1 AUTHOR |
2042 |
|
|
2043 |
Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt> |
Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt> |