1 |
dpavlin |
1 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use Time::HiRes qw(time); |
7 |
|
|
use Data::Dump qw(dump); |
8 |
|
|
use File::Slurp; |
9 |
dpavlin |
4 |
use Getopt::Long; |
10 |
dpavlin |
1 |
|
11 |
dpavlin |
4 |
|
12 |
|
|
my $path = '/data/isi/full.txt'; |
13 |
|
|
my $limit = 10000; |
14 |
|
|
my $offset = 0; |
15 |
dpavlin |
6 |
my @views; |
16 |
dpavlin |
4 |
|
17 |
|
|
|
18 |
|
|
GetOptions( |
19 |
|
|
'path=s' => \$path, |
20 |
|
|
'offset=i' => \$offset, |
21 |
|
|
'limit=i' => \$limit, |
22 |
dpavlin |
6 |
'view=s' => \@views, |
23 |
dpavlin |
5 |
) or die $!; |
24 |
dpavlin |
4 |
|
25 |
dpavlin |
1 |
my $t = time; |
26 |
|
|
|
27 |
|
|
use lib '/srv/webpac2/lib/'; |
28 |
|
|
use WebPAC::Input::ISI; |
29 |
|
|
my $input = WebPAC::Input::ISI->new( |
30 |
dpavlin |
4 |
path => $path, |
31 |
|
|
offset => $offset, |
32 |
|
|
limit => $limit, |
33 |
dpavlin |
1 |
); |
34 |
|
|
|
35 |
|
|
|
36 |
|
|
sub report { |
37 |
|
|
my $description = shift; |
38 |
|
|
my $dt = time - $t; |
39 |
|
|
printf "%s in %1.4fs %.2f/s\n", $description, $dt, $input->size / $dt; |
40 |
|
|
} |
41 |
|
|
|
42 |
|
|
|
43 |
|
|
report $input->size . ' records loaded'; |
44 |
|
|
|
45 |
|
|
mkdir 'out' unless -e 'out'; |
46 |
|
|
|
47 |
dpavlin |
5 |
our $out; |
48 |
|
|
|
49 |
dpavlin |
8 |
our $cache; |
50 |
|
|
|
51 |
dpavlin |
1 |
sub run_views { |
52 |
dpavlin |
6 |
@views = sort glob 'views/*.pl' unless @views; |
53 |
dpavlin |
1 |
warn "# views ", dump @views; |
54 |
|
|
|
55 |
|
|
foreach my $view ( @views ) { |
56 |
|
|
|
57 |
|
|
my ( $nr, $package ) = ( $1, $2 ) |
58 |
|
|
if $view =~ m{/(\d+)\.([^/]+(\.pl)?$)}; |
59 |
|
|
|
60 |
dpavlin |
5 |
undef $out; |
61 |
dpavlin |
1 |
|
62 |
|
|
next if system("perl -c $view") != 0; |
63 |
|
|
|
64 |
|
|
my $code = read_file $view; |
65 |
dpavlin |
5 |
warn "## CODE\n$code\n## CODE\n"; |
66 |
dpavlin |
1 |
|
67 |
dpavlin |
5 |
my $affected = 0; |
68 |
dpavlin |
1 |
$t = time; |
69 |
|
|
|
70 |
dpavlin |
9 |
foreach my $pos ( $offset + 1 .. $offset + $input->size ) { |
71 |
|
|
my $rec = $cache->{$pos} ||= $input->fetch_rec( $pos ); |
72 |
dpavlin |
5 |
if ( ! $rec ) { |
73 |
|
|
warn "END at $pos"; |
74 |
|
|
last; |
75 |
|
|
} |
76 |
dpavlin |
1 |
|
77 |
dpavlin |
5 |
eval "$code"; |
78 |
|
|
if ( $@ ) { |
79 |
|
|
warn "ERROR [$pos] $@\n"; |
80 |
|
|
} else { |
81 |
|
|
$affected++; |
82 |
|
|
} |
83 |
dpavlin |
1 |
}; |
84 |
|
|
|
85 |
dpavlin |
5 |
report "$affected affected records $view"; |
86 |
dpavlin |
1 |
|
87 |
|
|
if ( defined $out ) { |
88 |
|
|
my $dump = dump $out; |
89 |
|
|
my $len = length $dump; |
90 |
|
|
my $path = "out/$nr.$package"; |
91 |
dpavlin |
5 |
print "out $view $offset/$limit $len bytes $path" |
92 |
|
|
, ( $len < 10000 ? " \$out = $dump" : ' SAVED ONLY' ) |
93 |
dpavlin |
1 |
, "\n" |
94 |
|
|
; |
95 |
dpavlin |
5 |
|
96 |
dpavlin |
9 |
unlink "$path.last" if -e "$path.last"; |
97 |
|
|
rename $path, "$path.last"; |
98 |
dpavlin |
1 |
write_file $path, $dump; |
99 |
|
|
report "save $path"; |
100 |
dpavlin |
5 |
} else { |
101 |
|
|
warn "W: no \$out defined!"; |
102 |
dpavlin |
1 |
} |
103 |
|
|
|
104 |
|
|
} |
105 |
|
|
} |
106 |
|
|
|
107 |
dpavlin |
3 |
run_views; |
108 |
|
|
|
109 |
dpavlin |
1 |
while ( 1 ) { |
110 |
|
|
|
111 |
|
|
print "sack> "; |
112 |
|
|
my $cmd = <STDIN>; |
113 |
|
|
|
114 |
dpavlin |
3 |
if ( $cmd =~ m{(vi|\\e|out)}i ) { |
115 |
|
|
system "vi out/*"; |
116 |
|
|
} else { |
117 |
|
|
run_views; |
118 |
|
|
} |
119 |
|
|
|
120 |
dpavlin |
1 |
} |
121 |
|
|
|