21 |
|
|
22 |
my $debug = 1; |
my $debug = 1; |
23 |
|
|
24 |
my $abs_path = abs_path( $0 ); |
my $abs_path; |
25 |
$abs_path =~ s!/[^/]*$!/!; #!fix-vim |
|
26 |
|
BEGIN { |
27 |
|
use CGI::Carp qw(carpout); |
28 |
|
|
29 |
|
$abs_path = abs_path( $0 ); |
30 |
|
$abs_path =~ s!/[^/]*$!/!; #!fix-vim |
31 |
|
|
32 |
|
open(my $log_fh, '>>', "$abs_path/log") or |
33 |
|
die("Unable to open $abs_path/log: $!\n"); |
34 |
|
carpout($log_fh); |
35 |
|
} |
36 |
|
|
37 |
|
|
38 |
############################################################# |
############################################################# |
39 |
my $COLLECTION = 'EPrints'; |
my $COLLECTION = 'EPrints'; |
51 |
my $similar = $cgi->param( 'similar' ) || ''; |
my $similar = $cgi->param( 'similar' ) || ''; |
52 |
my $slogovi = $cgi->param( 'slogovi' ) || ''; |
my $slogovi = $cgi->param( 'slogovi' ) || ''; |
53 |
my $stem = $cgi->param( 'stem' ) || ''; |
my $stem = $cgi->param( 'stem' ) || ''; |
54 |
my $kino = $cgi->param( 'kino' ); |
my $similar_to_kino = $cgi->param( 'similar_to_kino' ); |
55 |
|
|
56 |
my $charset='iso-8859-2'; |
my $charset='iso-8859-2'; |
57 |
|
|
69 |
<html xmlns="http://www.w3.org/1999/xhtml"> |
<html xmlns="http://www.w3.org/1999/xhtml"> |
70 |
<head> |
<head> |
71 |
<meta http-equiv="content-type" content="text/html; charset=$charset" /> |
<meta http-equiv="content-type" content="text/html; charset=$charset" /> |
72 |
<title>Search Engine</title> |
<title>Semantic Search Engine</title> |
73 |
</head> |
</head> |
74 |
<body> |
<body> |
75 |
<form method="get" action=""> |
<form method="get" action=""> |
77 |
Enter bunch of related terms to documents you are trying to find: |
Enter bunch of related terms to documents you are trying to find: |
78 |
<br/><input type="text" name="query" value="$query" size="80"> |
<br/><input type="text" name="query" value="$query" size="80"> |
79 |
<br/><input type="submit" /> |
<br/><input type="submit" /> |
80 |
|, $cgi->checkbox( -name => 'stem' ), $cgi->checkbox( -name => 'slogovi' ), qq| |
|, |
81 |
|
$cgi->checkbox( -name => 'stem' ), |
82 |
|
$cgi->checkbox( -name => 'slogovi' ), |
83 |
|
$cgi->checkbox( -name => 'similar_to_kino', -label=>'similar to fulltext' ), |
84 |
|
qq| |
85 |
</p>\n|; |
</p>\n|; |
86 |
|
|
87 |
|
|
100 |
|
|
101 |
my ($results, $terms); |
my ($results, $terms); |
102 |
if ( $query ) { |
if ( $query ) { |
|
($results, $terms) = $semantic->semantic_search( $full_query ); |
|
|
|
|
103 |
my $kino = KinoSearch::Simple->new( |
my $kino = KinoSearch::Simple->new( |
104 |
path => 'kinoindex/', |
path => 'kinoindex/', |
105 |
language => 'ru', |
language => 'ru', |
106 |
); |
); |
107 |
|
|
108 |
|
my $kino_query = join(" AND ", split(/\s+/, $query ) ); |
109 |
|
|
110 |
my $total_hits = $kino->search( |
my $total_hits = $kino->search( |
111 |
query => $query, |
query => $kino_query, |
112 |
offset => 0, |
offset => 0, |
113 |
num_wanted => 10, |
num_wanted => 10, |
114 |
); |
); |
115 |
|
|
116 |
print qq|<div style="width:20%; float:right;">KinoSearch hits $total_hits for $query\n<ol>|; |
my @similar_ids; |
117 |
|
my ( $max, $min ) = ( 0,0 ); |
118 |
|
|
119 |
|
print qq|<div style="width:20%; float:right;">KinoSearch hits $total_hits for $kino_query\n<ol>|; |
120 |
while ( my $hit = $kino->fetch_hit_hashref ) { |
while ( my $hit = $kino->fetch_hit_hashref ) { |
121 |
print qq|<li><a href="#id_$hit->{id}">$hit->{title}</a> $hit->{score}</li>\n|; |
print qq|<li><a href="#id_$hit->{id}">$hit->{title}</a> $hit->{score}</li>\n|; |
122 |
$score_ponder->{ $hit->{id} } = $hit->{score}; |
if ( $similar_to_kino ) { |
123 |
|
push @similar_ids, $hit->{id}; |
124 |
|
} else { |
125 |
|
$score_ponder->{ $hit->{id} } = $hit->{score}; |
126 |
|
$min = $hit->{score} if ( $hit->{score} < $min ); |
127 |
|
$max = $hit->{score} if ( $hit->{score} > $max ); |
128 |
|
} |
129 |
|
} |
130 |
|
|
131 |
|
my $d = $max - $min; |
132 |
|
|
133 |
|
map { |
134 |
|
$score_ponder->{ $_ } -= $min; |
135 |
|
$score_ponder->{ $_ } /= $d; |
136 |
|
warn "score_ponder $_ = ", $score_ponder->{$_} if $debug; |
137 |
|
} keys %$score_ponder; |
138 |
|
|
139 |
|
if ( $similar_to_kino ) { |
140 |
|
print qq|</ol></div>|; |
141 |
|
($results, $terms) = $semantic->find_similar( @similar_ids ); |
142 |
|
} else { |
143 |
|
print qq|</ol>score range: $min - $max</div>|; |
144 |
|
($results, $terms) = $semantic->semantic_search( $full_query ); |
145 |
} |
} |
|
print qq|</ol></div>|; |
|
146 |
|
|
147 |
} else { |
} else { |
148 |
($results, $terms) = $semantic->find_similar( $similar ); |
($results, $terms) = $semantic->find_similar( $similar ); |
154 |
################################## |
################################## |
155 |
# TERM BASED CALCULATIONS |
# TERM BASED CALCULATIONS |
156 |
################################## |
################################## |
157 |
my @sorted_terms = sort { |
my @sorted_terms = sort { $terms->{$b} <=> $terms->{$a} } keys %$terms; |
|
my ( $sb, $sa ) = ( $terms->{$b}, $terms->{$a} ); |
|
|
$sb *= $score_ponder->{$b} if $score_ponder->{$b}; |
|
|
$sa *= $score_ponder->{$a} if $score_ponder->{$a}; |
|
|
$sb <=> $sa; |
|
|
} keys %$terms; |
|
158 |
my @top_terms = splice( @sorted_terms, $start, $TERMS_TO_DISPLAY ); |
my @top_terms = splice( @sorted_terms, $start, $TERMS_TO_DISPLAY ); |
159 |
|
|
160 |
warn "top_terms = ", dump( @top_terms ) if $debug; |
warn "top_terms = ", dump( @top_terms ) if $debug; |
170 |
|
|
171 |
print "<p>Result Count: ".(scalar keys %$results)."</p>\n"; |
print "<p>Result Count: ".(scalar keys %$results)."</p>\n"; |
172 |
|
|
173 |
my @sorted_results = sort { $results->{$b} <=> $results->{$a} } keys %$results; |
my @sorted_results = sort { |
174 |
|
$results->{$b} <=> $results->{$a} |
175 |
|
} map { |
176 |
|
$results->{$_} *= ( $score_ponder->{$_} || 0.1 ); $_; |
177 |
|
} keys %$results; |
178 |
my @display_results = splice( @sorted_results, $start, $RESULTS_TO_DISPLAY ); |
my @display_results = splice( @sorted_results, $start, $RESULTS_TO_DISPLAY ); |
179 |
|
|
180 |
warn "display results = ", dump( @display_results ) if $debug; |
warn "display results = ", dump( @display_results ) if $debug; |