4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
|
7 |
our $VERSION = '0.05_1'; |
our $VERSION = '0.07_2'; |
8 |
|
|
9 |
=head1 NAME |
=head1 NAME |
10 |
|
|
20 |
my $node = new Search::Estraier::Node( |
my $node = new Search::Estraier::Node( |
21 |
url => 'http://localhost:1978/node/test', |
url => 'http://localhost:1978/node/test', |
22 |
user => 'admin', |
user => 'admin', |
23 |
passwd => 'admin' |
passwd => 'admin', |
24 |
|
create => 1, |
25 |
|
label => 'Label for node', |
26 |
|
croak_on_error => 1, |
27 |
); |
); |
28 |
|
|
29 |
# create document |
# create document |
120 |
|
|
121 |
=head1 Search::Estraier::Document |
=head1 Search::Estraier::Document |
122 |
|
|
123 |
This class implements Document which is collection of attributes |
This class implements Document which is single item in Hyper Estraier. |
124 |
(key=value), vectors (also key value) display text and hidden text. |
|
125 |
|
It's is collection of: |
126 |
|
|
127 |
|
=over 4 |
128 |
|
|
129 |
|
=item attributes |
130 |
|
|
131 |
|
C<< 'key' => 'value' >> pairs which can later be used for filtering of results |
132 |
|
|
133 |
|
You can add common filters to C<attrindex> in estmaster's C<_conf> |
134 |
|
file for better performance. See C<attrindex> in |
135 |
|
L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>. |
136 |
|
|
137 |
|
=item vectors |
138 |
|
|
139 |
|
also C<< 'key' => 'value' >> pairs |
140 |
|
|
141 |
|
=item display text |
142 |
|
|
143 |
|
Text which will be used to create searchable corpus of your index and |
144 |
|
included in snippet output. |
145 |
|
|
146 |
|
=item hidden text |
147 |
|
|
148 |
|
Text which will be searchable, but will not be included in snippet. |
149 |
|
|
150 |
|
=back |
151 |
|
|
152 |
=head2 new |
=head2 new |
153 |
|
|
900 |
|
|
901 |
my $node = new Search::HyperEstraier::Node( |
my $node = new Search::HyperEstraier::Node( |
902 |
url => 'http://localhost:1978/node/test', |
url => 'http://localhost:1978/node/test', |
903 |
|
user => 'admin', |
904 |
|
passwd => 'admin' |
905 |
|
create => 1, |
906 |
|
label => 'optional node label', |
907 |
debug => 1, |
debug => 1, |
908 |
croak_on_error => 1 |
croak_on_error => 1 |
909 |
); |
); |
916 |
|
|
917 |
URL to node |
URL to node |
918 |
|
|
919 |
|
=item user |
920 |
|
|
921 |
|
specify username for node server authentication |
922 |
|
|
923 |
|
=item passwd |
924 |
|
|
925 |
|
password for authentication |
926 |
|
|
927 |
|
=item create |
928 |
|
|
929 |
|
create node if it doesn't exists |
930 |
|
|
931 |
|
=item label |
932 |
|
|
933 |
|
optional label for new node if C<create> is used |
934 |
|
|
935 |
=item debug |
=item debug |
936 |
|
|
937 |
dumps a B<lot> of debugging output |
dumps a B<lot> of debugging output |
962 |
if ($#_ == 0) { |
if ($#_ == 0) { |
963 |
$self->{url} = shift; |
$self->{url} = shift; |
964 |
} else { |
} else { |
|
my $args = {@_}; |
|
|
|
|
965 |
%$self = ( %$self, @_ ); |
%$self = ( %$self, @_ ); |
966 |
|
|
967 |
|
$self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user}); |
968 |
|
|
969 |
warn "## Node debug on\n" if ($self->{debug}); |
warn "## Node debug on\n" if ($self->{debug}); |
970 |
} |
} |
971 |
|
|
975 |
size => -1.0, |
size => -1.0, |
976 |
}; |
}; |
977 |
|
|
978 |
|
if ($self->{create}) { |
979 |
|
if (! eval { $self->name } || $@) { |
980 |
|
my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#); |
981 |
|
croak "can't find node name in '$self->{url}'" unless ($name); |
982 |
|
my $label = $self->{label} || $name; |
983 |
|
$self->master( |
984 |
|
action => 'nodeadd', |
985 |
|
name => $name, |
986 |
|
label => $label, |
987 |
|
) || croak "can't create node $name ($label)"; |
988 |
|
} |
989 |
|
} |
990 |
|
|
991 |
$self ? return $self : return undef; |
$self ? return $self : return undef; |
992 |
} |
} |
993 |
|
|
1078 |
|
|
1079 |
$node->put_doc( $document_draft ) or die "can't add document"; |
$node->put_doc( $document_draft ) or die "can't add document"; |
1080 |
|
|
1081 |
Return true on success or false on failture. |
Return true on success or false on failure. |
1082 |
|
|
1083 |
=cut |
=cut |
1084 |
|
|
1086 |
my $self = shift; |
my $self = shift; |
1087 |
my $doc = shift || return; |
my $doc = shift || return; |
1088 |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
1089 |
$self->shuttle_url( $self->{url} . '/put_doc', |
if ($self->shuttle_url( $self->{url} . '/put_doc', |
1090 |
'text/x-estraier-draft', |
'text/x-estraier-draft', |
1091 |
$doc->dump_draft, |
$doc->dump_draft, |
1092 |
undef |
undef |
1093 |
) == 200; |
) == 200) { |
1094 |
|
$self->_clear_info; |
1095 |
|
return 1; |
1096 |
|
} |
1097 |
|
return undef; |
1098 |
} |
} |
1099 |
|
|
1100 |
|
|
1113 |
my $id = shift || return; |
my $id = shift || return; |
1114 |
return unless ($self->{url}); |
return unless ($self->{url}); |
1115 |
croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/); |
croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/); |
1116 |
$self->shuttle_url( $self->{url} . '/out_doc', |
if ($self->shuttle_url( $self->{url} . '/out_doc', |
1117 |
'application/x-www-form-urlencoded', |
'application/x-www-form-urlencoded', |
1118 |
"id=$id", |
"id=$id", |
1119 |
undef |
undef |
1120 |
) == 200; |
) == 200) { |
1121 |
|
$self->_clear_info; |
1122 |
|
return 1; |
1123 |
|
} |
1124 |
|
return undef; |
1125 |
} |
} |
1126 |
|
|
1127 |
|
|
1139 |
my $self = shift; |
my $self = shift; |
1140 |
my $uri = shift || return; |
my $uri = shift || return; |
1141 |
return unless ($self->{url}); |
return unless ($self->{url}); |
1142 |
$self->shuttle_url( $self->{url} . '/out_doc', |
if ($self->shuttle_url( $self->{url} . '/out_doc', |
1143 |
'application/x-www-form-urlencoded', |
'application/x-www-form-urlencoded', |
1144 |
"uri=" . uri_escape($uri), |
"uri=" . uri_escape($uri), |
1145 |
undef |
undef |
1146 |
) == 200; |
) == 200) { |
1147 |
|
$self->_clear_info; |
1148 |
|
return 1; |
1149 |
|
} |
1150 |
|
return undef; |
1151 |
} |
} |
1152 |
|
|
1153 |
|
|
1165 |
my $self = shift; |
my $self = shift; |
1166 |
my $doc = shift || return; |
my $doc = shift || return; |
1167 |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); |
1168 |
$self->shuttle_url( $self->{url} . '/edit_doc', |
if ($self->shuttle_url( $self->{url} . '/edit_doc', |
1169 |
'text/x-estraier-draft', |
'text/x-estraier-draft', |
1170 |
$doc->dump_draft, |
$doc->dump_draft, |
1171 |
undef |
undef |
1172 |
) == 200; |
) == 200) { |
1173 |
|
$self->_clear_info; |
1174 |
|
return 1; |
1175 |
|
} |
1176 |
|
return undef; |
1177 |
} |
} |
1178 |
|
|
1179 |
|
|
1466 |
); |
); |
1467 |
return if ($rv != 200); |
return if ($rv != 200); |
1468 |
|
|
1469 |
my (@docs, $hints); |
my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody; |
1470 |
|
my $hintsText = splice @records, 0, 2; # starts with empty record |
1471 |
my @lines = split(/\n/, $resbody); |
my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm }; |
1472 |
return unless (@lines); |
|
1473 |
|
# process records |
1474 |
my $border = $lines[0]; |
my $docs = []; |
1475 |
my $isend = 0; |
foreach my $record (@records) |
1476 |
my $lnum = 1; |
{ |
1477 |
|
# split into keys and snippets |
1478 |
while ( $lnum <= $#lines ) { |
my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s; |
1479 |
my $line = $lines[$lnum]; |
|
1480 |
$lnum++; |
# create document hash |
1481 |
|
my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm }; |
1482 |
#warn "## $line\n"; |
$doc->{'@keywords'} = $doc->{keywords}; |
1483 |
if ($line && $line =~ m/^\Q$border\E(:END)*$/) { |
($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm; |
1484 |
$isend = $1; |
$doc->{snippet} = $snippet; |
1485 |
last; |
|
1486 |
} |
push @$docs, new Search::Estraier::ResultDocument( |
1487 |
|
attrs => $doc, |
1488 |
if ($line =~ /\t/) { |
uri => $doc->{'@uri'}, |
1489 |
my ($k,$v) = split(/\t/, $line, 2); |
snippet => $snippet, |
1490 |
$hints->{$k} = $v; |
keywords => $doc->{'keywords'}, |
1491 |
} |
); |
1492 |
} |
} |
1493 |
|
|
1494 |
my $snum = $lnum; |
return new Search::Estraier::NodeResult( docs => $docs, hints => $hints ); |
|
|
|
|
while( ! $isend && $lnum <= $#lines ) { |
|
|
my $line = $lines[$lnum]; |
|
|
#warn "# $lnum: $line\n"; |
|
|
$lnum++; |
|
|
|
|
|
if ($line && $line =~ m/^\Q$border\E/) { |
|
|
if ($lnum > $snum) { |
|
|
my $rdattrs; |
|
|
my $rdvector; |
|
|
my $rdsnippet; |
|
|
|
|
|
my $rlnum = $snum; |
|
|
while ($rlnum < $lnum - 1 ) { |
|
|
#my $rdline = $self->_s($lines[$rlnum]); |
|
|
my $rdline = $lines[$rlnum]; |
|
|
$rlnum++; |
|
|
last unless ($rdline); |
|
|
if ($rdline =~ /^%/) { |
|
|
$rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/); |
|
|
} elsif($rdline =~ /=/) { |
|
|
$rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/); |
|
|
} else { |
|
|
confess "invalid format of response"; |
|
|
} |
|
|
} |
|
|
while($rlnum < $lnum - 1) { |
|
|
my $rdline = $lines[$rlnum]; |
|
|
$rlnum++; |
|
|
$rdsnippet .= "$rdline\n"; |
|
|
} |
|
|
#warn Dumper($rdvector, $rdattrs, $rdsnippet); |
|
|
if (my $rduri = $rdattrs->{'@uri'}) { |
|
|
push @docs, new Search::Estraier::ResultDocument( |
|
|
uri => $rduri, |
|
|
attrs => $rdattrs, |
|
|
snippet => $rdsnippet, |
|
|
keywords => $rdvector, |
|
|
); |
|
|
} |
|
|
} |
|
|
$snum = $lnum; |
|
|
#warn "### $line\n"; |
|
|
$isend = 1 if ($line =~ /:END$/); |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
if (! $isend) { |
|
|
warn "received result doesn't have :END\n$resbody"; |
|
|
return; |
|
|
} |
|
|
|
|
|
#warn Dumper(\@docs, $hints); |
|
|
|
|
|
return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints ); |
|
1495 |
} |
} |
1496 |
|
|
1497 |
|
|
1540 |
push @args, 'wwidth=' . $self->{wwidth}; |
push @args, 'wwidth=' . $self->{wwidth}; |
1541 |
push @args, 'hwidth=' . $self->{hwidth}; |
push @args, 'hwidth=' . $self->{hwidth}; |
1542 |
push @args, 'awidth=' . $self->{awidth}; |
push @args, 'awidth=' . $self->{awidth}; |
1543 |
push @args, 'skip=' . $self->{skip} if ($cond->{skip}); |
push @args, 'skip=' . $cond->{skip} if ($cond->{skip}); |
1544 |
|
|
1545 |
return join('&', @args); |
return join('&', @args); |
1546 |
} |
} |
1687 |
croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/); |
croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/); |
1688 |
|
|
1689 |
$self->shuttle_url( $self->{url} . '/_set_user', |
$self->shuttle_url( $self->{url} . '/_set_user', |
1690 |
'text/plain', |
'application/x-www-form-urlencoded', |
1691 |
'name=' . uri_escape($name) . '&mode=' . $mode, |
'name=' . uri_escape($name) . '&mode=' . $mode, |
1692 |
undef |
undef |
1693 |
) == 200; |
) == 200; |
1720 |
undef |
undef |
1721 |
) == 200) { |
) == 200) { |
1722 |
# refresh node info after adding link |
# refresh node info after adding link |
1723 |
$self->_set_info; |
$self->_clear_info; |
1724 |
return 1; |
return 1; |
1725 |
} |
} |
1726 |
|
return undef; |
1727 |
} |
} |
1728 |
|
|
1729 |
=head2 admins |
=head2 admins |
1768 |
return $self->{inform}->{links}; |
return $self->{inform}->{links}; |
1769 |
} |
} |
1770 |
|
|
1771 |
|
=head2 cacheusage |
1772 |
|
|
1773 |
|
Return cache usage for a node |
1774 |
|
|
1775 |
|
my $cache = $node->cacheusage; |
1776 |
|
|
1777 |
|
=cut |
1778 |
|
|
1779 |
|
sub cacheusage { |
1780 |
|
my $self = shift; |
1781 |
|
|
1782 |
|
return unless ($self->{url}); |
1783 |
|
|
1784 |
|
my $resbody; |
1785 |
|
my $rv = $self->shuttle_url( $self->{url} . '/cacheusage', |
1786 |
|
'text/plain', |
1787 |
|
undef, |
1788 |
|
\$resbody, |
1789 |
|
); |
1790 |
|
|
1791 |
|
return if ($rv != 200 || !$resbody); |
1792 |
|
|
1793 |
|
return $resbody; |
1794 |
|
} |
1795 |
|
|
1796 |
|
=head2 master |
1797 |
|
|
1798 |
|
Set actions on Hyper Estraier node master (C<estmaster> process) |
1799 |
|
|
1800 |
|
$node->master( |
1801 |
|
action => 'sync' |
1802 |
|
); |
1803 |
|
|
1804 |
|
All available actions are documented in |
1805 |
|
L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol> |
1806 |
|
|
1807 |
|
=cut |
1808 |
|
|
1809 |
|
my $estmaster_rest = { |
1810 |
|
shutdown => { |
1811 |
|
status => 202, |
1812 |
|
}, |
1813 |
|
sync => { |
1814 |
|
status => 202, |
1815 |
|
}, |
1816 |
|
backup => { |
1817 |
|
status => 202, |
1818 |
|
}, |
1819 |
|
userlist => { |
1820 |
|
status => 200, |
1821 |
|
returns => [ qw/name passwd flags fname misc/ ], |
1822 |
|
}, |
1823 |
|
useradd => { |
1824 |
|
required => [ qw/name passwd flags/ ], |
1825 |
|
optional => [ qw/fname misc/ ], |
1826 |
|
status => 200, |
1827 |
|
}, |
1828 |
|
userdel => { |
1829 |
|
required => [ qw/name/ ], |
1830 |
|
status => 200, |
1831 |
|
}, |
1832 |
|
nodelist => { |
1833 |
|
status => 200, |
1834 |
|
returns => [ qw/name label doc_num word_num size/ ], |
1835 |
|
}, |
1836 |
|
nodeadd => { |
1837 |
|
required => [ qw/name/ ], |
1838 |
|
optional => [ qw/label/ ], |
1839 |
|
status => 200, |
1840 |
|
}, |
1841 |
|
nodedel => { |
1842 |
|
required => [ qw/name/ ], |
1843 |
|
status => 200, |
1844 |
|
}, |
1845 |
|
nodeclr => { |
1846 |
|
required => [ qw/name/ ], |
1847 |
|
status => 200, |
1848 |
|
}, |
1849 |
|
nodertt => { |
1850 |
|
status => 200, |
1851 |
|
}, |
1852 |
|
}; |
1853 |
|
|
1854 |
|
sub master { |
1855 |
|
my $self = shift; |
1856 |
|
|
1857 |
|
my $args = {@_}; |
1858 |
|
|
1859 |
|
# have action? |
1860 |
|
my $action = $args->{action} || croak "need action, available: ", |
1861 |
|
join(", ",keys %{ $estmaster_rest }); |
1862 |
|
|
1863 |
|
# check if action is valid |
1864 |
|
my $rest = $estmaster_rest->{$action}; |
1865 |
|
croak "action '$action' is not supported, available actions: ", |
1866 |
|
join(", ",keys %{ $estmaster_rest }) unless ($rest); |
1867 |
|
|
1868 |
|
croak "BUG: action '$action' needs return status" unless ($rest->{status}); |
1869 |
|
|
1870 |
|
my @args; |
1871 |
|
|
1872 |
|
if ($rest->{required} || $rest->{optional}) { |
1873 |
|
|
1874 |
|
map { |
1875 |
|
croak "need parametar '$_' for action '$action'" unless ($args->{$_}); |
1876 |
|
push @args, $_ . '=' . uri_escape( $args->{$_} ); |
1877 |
|
} ( @{ $rest->{required} } ); |
1878 |
|
|
1879 |
|
map { |
1880 |
|
push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_}); |
1881 |
|
} ( @{ $rest->{optional} } ); |
1882 |
|
|
1883 |
|
} |
1884 |
|
|
1885 |
|
my $uri = new URI( $self->{url} ); |
1886 |
|
|
1887 |
|
my $resbody; |
1888 |
|
|
1889 |
|
my $status = $self->shuttle_url( |
1890 |
|
'http://' . $uri->host_port . '/master?action=' . $action , |
1891 |
|
'application/x-www-form-urlencoded', |
1892 |
|
join('&', @args), |
1893 |
|
\$resbody, |
1894 |
|
1, |
1895 |
|
) or confess "shuttle_url failed"; |
1896 |
|
|
1897 |
|
if ($status == $rest->{status}) { |
1898 |
|
|
1899 |
|
# refresh node info after sync |
1900 |
|
$self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/); |
1901 |
|
|
1902 |
|
if ($rest->{returns} && wantarray) { |
1903 |
|
|
1904 |
|
my @results; |
1905 |
|
my $fields = $#{$rest->{returns}}; |
1906 |
|
|
1907 |
|
foreach my $line ( split(/[\r\n]/,$resbody) ) { |
1908 |
|
my @e = split(/\t/, $line, $fields + 1); |
1909 |
|
my $row; |
1910 |
|
foreach my $i ( 0 .. $fields) { |
1911 |
|
$row->{ $rest->{returns}->[$i] } = $e[ $i ]; |
1912 |
|
} |
1913 |
|
push @results, $row; |
1914 |
|
} |
1915 |
|
|
1916 |
|
return @results; |
1917 |
|
|
1918 |
|
} elsif ($resbody) { |
1919 |
|
chomp $resbody; |
1920 |
|
return $resbody; |
1921 |
|
} else { |
1922 |
|
return 0E0; |
1923 |
|
} |
1924 |
|
} |
1925 |
|
|
1926 |
|
carp "expected status $rest->{status}, but got $status"; |
1927 |
|
return undef; |
1928 |
|
} |
1929 |
|
|
1930 |
=head1 PRIVATE METHODS |
=head1 PRIVATE METHODS |
1931 |
|
|
1956 |
|
|
1957 |
my @lines = split(/[\r\n]/,$resbody); |
my @lines = split(/[\r\n]/,$resbody); |
1958 |
|
|
1959 |
$self->{inform} = {}; |
$self->_clear_info; |
1960 |
|
|
1961 |
( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum}, |
( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum}, |
1962 |
$self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5); |
$self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5); |
1981 |
|
|
1982 |
} |
} |
1983 |
|
|
1984 |
|
=head2 _clear_info |
1985 |
|
|
1986 |
|
Clear information for node |
1987 |
|
|
1988 |
|
$node->_clear_info; |
1989 |
|
|
1990 |
|
On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node |
1991 |
|
info will be fetch again from Hyper Estraier. |
1992 |
|
|
1993 |
|
=cut |
1994 |
|
sub _clear_info { |
1995 |
|
my $self = shift; |
1996 |
|
$self->{inform} = { |
1997 |
|
dnum => -1, |
1998 |
|
wnum => -1, |
1999 |
|
size => -1.0, |
2000 |
|
}; |
2001 |
|
} |
2002 |
|
|
2003 |
### |
### |
2004 |
|
|
2005 |
=head1 EXPORT |
=head1 EXPORT |
2012 |
|
|
2013 |
Hyper Estraier Ruby interface on which this module is based. |
Hyper Estraier Ruby interface on which this module is based. |
2014 |
|
|
2015 |
|
Hyper Estraier now also has pure-perl binding included in distribution. It's |
2016 |
|
a faster way to access databases directly if you are not running |
2017 |
|
C<estmaster> P2P server. |
2018 |
|
|
2019 |
=head1 AUTHOR |
=head1 AUTHOR |
2020 |
|
|
2021 |
Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt> |
Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt> |
2022 |
|
|
2023 |
|
Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code |
2024 |
|
|
2025 |
=head1 COPYRIGHT AND LICENSE |
=head1 COPYRIGHT AND LICENSE |
2026 |
|
|