1 |
dpavlin |
1 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use Time::HiRes qw(time); |
7 |
|
|
use Data::Dump qw(dump); |
8 |
|
|
use File::Slurp; |
9 |
dpavlin |
4 |
use Getopt::Long; |
10 |
dpavlin |
11 |
use IO::Socket::INET; |
11 |
dpavlin |
1 |
|
12 |
dpavlin |
4 |
|
13 |
|
|
my $path = '/data/isi/full.txt'; |
14 |
dpavlin |
11 |
my $limit = 5000; |
15 |
dpavlin |
4 |
my $offset = 0; |
16 |
dpavlin |
6 |
my @views; |
17 |
dpavlin |
11 |
my $listen; |
18 |
|
|
my @nodes; |
19 |
dpavlin |
4 |
|
20 |
|
|
|
21 |
|
|
GetOptions( |
22 |
|
|
'path=s' => \$path, |
23 |
|
|
'offset=i' => \$offset, |
24 |
|
|
'limit=i' => \$limit, |
25 |
dpavlin |
6 |
'view=s' => \@views, |
26 |
dpavlin |
11 |
'listen|port=i' => \$listen, |
27 |
|
|
'connect=s' => \@nodes, |
28 |
dpavlin |
5 |
) or die $!; |
29 |
dpavlin |
4 |
|
30 |
dpavlin |
1 |
my $t = time; |
31 |
|
|
|
32 |
dpavlin |
12 |
|
33 |
|
|
our $prefix; |
34 |
|
|
BEGIN { |
35 |
|
|
$prefix = $0; |
36 |
|
|
if ( $prefix =~ s{^./}{} ) { |
37 |
|
|
chomp( my $pwd = `pwd` ); |
38 |
|
|
$prefix = "$pwd/$prefix"; |
39 |
|
|
} |
40 |
|
|
$prefix =~ s{^(.+)/srv/Sack/bin.+$}{$1}; |
41 |
|
|
warn "# prefix $prefix"; |
42 |
|
|
} |
43 |
|
|
|
44 |
|
|
|
45 |
|
|
use lib "$prefix/srv/webpac2/lib/"; |
46 |
dpavlin |
1 |
use WebPAC::Input::ISI; |
47 |
|
|
my $input = WebPAC::Input::ISI->new( |
48 |
dpavlin |
12 |
path => "$prefix/$path", |
49 |
dpavlin |
4 |
offset => $offset, |
50 |
|
|
limit => $limit, |
51 |
dpavlin |
1 |
); |
52 |
|
|
|
53 |
|
|
|
54 |
|
|
sub report { |
55 |
|
|
my $description = shift; |
56 |
|
|
my $dt = time - $t; |
57 |
|
|
printf "%s in %1.4fs %.2f/s\n", $description, $dt, $input->size / $dt; |
58 |
dpavlin |
10 |
$t = time; |
59 |
dpavlin |
1 |
} |
60 |
|
|
|
61 |
|
|
|
62 |
|
|
report $input->size . ' records loaded'; |
63 |
|
|
|
64 |
|
|
mkdir 'out' unless -e 'out'; |
65 |
|
|
|
66 |
dpavlin |
5 |
our $out; |
67 |
|
|
|
68 |
dpavlin |
8 |
our $cache; |
69 |
|
|
|
70 |
dpavlin |
11 |
sub send_nodes { |
71 |
|
|
my $content = pop @_; |
72 |
|
|
my $header = length($content); |
73 |
|
|
$header .= ' ' . join(' ', @_) if @_; |
74 |
|
|
|
75 |
|
|
foreach my $node ( @nodes ) { |
76 |
|
|
|
77 |
|
|
my $sock = IO::Socket::INET->new( |
78 |
|
|
PeerAddr => $node, |
79 |
|
|
Proto => 'tcp', |
80 |
|
|
) or die "can't connect to $node - $!"; |
81 |
|
|
|
82 |
|
|
print ">>>> $node $header\n"; |
83 |
|
|
|
84 |
|
|
print $sock "$header\n$content" || warn "can't send $header to $node: $!"; |
85 |
|
|
|
86 |
|
|
} |
87 |
|
|
} |
88 |
|
|
|
89 |
|
|
sub run_code { |
90 |
|
|
my ( $view, $code ) = @_; |
91 |
|
|
|
92 |
|
|
warn "\n#### CODE $view START ####\n$code\n#### CODE $view END ####\n"; |
93 |
|
|
|
94 |
|
|
send_nodes view => $view => $code; |
95 |
|
|
|
96 |
|
|
undef $out; |
97 |
|
|
|
98 |
|
|
my $affected = 0; |
99 |
|
|
$t = time; |
100 |
|
|
|
101 |
|
|
foreach my $pos ( $offset + 1 .. $offset + $input->size ) { |
102 |
|
|
my $rec = $cache->{$pos} ||= $input->fetch_rec( $pos ); |
103 |
|
|
if ( ! $rec ) { |
104 |
|
|
warn "END at $pos"; |
105 |
|
|
last; |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
eval "$code"; |
109 |
|
|
if ( $@ ) { |
110 |
|
|
warn "ERROR [$pos] $@\n"; |
111 |
|
|
} else { |
112 |
|
|
$affected++; |
113 |
|
|
} |
114 |
|
|
}; |
115 |
|
|
|
116 |
|
|
report "$affected affected records $view"; |
117 |
|
|
|
118 |
|
|
warn "WARN no \$out defined!" unless defined $out; |
119 |
|
|
} |
120 |
|
|
|
121 |
dpavlin |
1 |
sub run_views { |
122 |
dpavlin |
6 |
@views = sort glob 'views/*.pl' unless @views; |
123 |
dpavlin |
1 |
warn "# views ", dump @views; |
124 |
|
|
|
125 |
|
|
foreach my $view ( @views ) { |
126 |
|
|
|
127 |
|
|
next if system("perl -c $view") != 0; |
128 |
|
|
|
129 |
|
|
my $code = read_file $view; |
130 |
|
|
|
131 |
dpavlin |
11 |
run_code $view => $code; |
132 |
dpavlin |
1 |
|
133 |
dpavlin |
11 |
if ( defined $out ) { |
134 |
|
|
my $dump = dump $out; |
135 |
|
|
my $len = length $dump; |
136 |
dpavlin |
1 |
|
137 |
dpavlin |
11 |
my $path = $view; |
138 |
|
|
$path =~ s{views?/}{out/} || die "no view in $view"; |
139 |
|
|
$path =~ s{\.pl}{}; |
140 |
dpavlin |
1 |
|
141 |
dpavlin |
11 |
print "OUT $view $offset/$limit $len bytes $path" |
142 |
dpavlin |
5 |
, ( $len < 10000 ? " \$out = $dump" : ' SAVED ONLY' ) |
143 |
dpavlin |
1 |
, "\n" |
144 |
|
|
; |
145 |
dpavlin |
5 |
|
146 |
dpavlin |
9 |
unlink "$path.last" if -e "$path.last"; |
147 |
|
|
rename $path, "$path.last"; |
148 |
dpavlin |
1 |
write_file $path, $dump; |
149 |
dpavlin |
11 |
report "SAVE $path"; |
150 |
|
|
} |
151 |
|
|
|
152 |
|
|
} |
153 |
|
|
|
154 |
|
|
} |
155 |
|
|
|
156 |
|
|
if ( $listen ) { |
157 |
|
|
my $sock = IO::Socket::INET->new( |
158 |
|
|
Listen => SOMAXCONN, |
159 |
|
|
# LocalAddr => '0.0.0.0', |
160 |
|
|
LocalPort => $listen, |
161 |
|
|
Proto => 'tcp', |
162 |
|
|
Reuse => 1, |
163 |
|
|
) or die $!; |
164 |
|
|
|
165 |
|
|
while (1) { |
166 |
|
|
|
167 |
|
|
warn "NODE listen on $listen\n"; |
168 |
|
|
|
169 |
|
|
my $client = $sock->accept(); |
170 |
|
|
|
171 |
|
|
warn "<<<< connect from ", $client->peerhost, $/; |
172 |
|
|
|
173 |
|
|
my @header = split(/\s/, <$client>); |
174 |
|
|
warn "# header ",dump @header; |
175 |
|
|
|
176 |
|
|
my $size = shift @header; |
177 |
|
|
|
178 |
|
|
my $content; |
179 |
|
|
read $client, $content, $size; |
180 |
|
|
|
181 |
|
|
if ( $header[0] eq 'view' ) { |
182 |
|
|
run_code $header[1] => $content; |
183 |
dpavlin |
5 |
} else { |
184 |
dpavlin |
11 |
warn "WARN unknown"; |
185 |
dpavlin |
1 |
} |
186 |
|
|
|
187 |
|
|
} |
188 |
|
|
} |
189 |
|
|
|
190 |
dpavlin |
3 |
run_views; |
191 |
|
|
|
192 |
dpavlin |
1 |
while ( 1 ) { |
193 |
|
|
|
194 |
|
|
print "sack> "; |
195 |
|
|
my $cmd = <STDIN>; |
196 |
|
|
|
197 |
dpavlin |
11 |
if ( $cmd =~ m{^(vi?|\\e|o(?:ut)?)}i ) { |
198 |
dpavlin |
3 |
system "vi out/*"; |
199 |
dpavlin |
11 |
} elsif ( $cmd =~ m{^i(nfo)?}i ) { |
200 |
|
|
print "nodes: ", dump @nodes, $/; |
201 |
dpavlin |
3 |
} else { |
202 |
|
|
run_views; |
203 |
|
|
} |
204 |
|
|
|
205 |
dpavlin |
1 |
} |
206 |
|
|
|