1 |
dpavlin |
1.1 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
# parse file.alert mon logs and report (up|down)time of services |
4 |
|
|
# |
5 |
|
|
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org> |
6 |
|
|
# 2003-10-05 converted to CGI script |
7 |
|
|
# |
8 |
|
|
|
9 |
|
|
use strict; |
10 |
|
|
use POSIX qw(strftime); |
11 |
|
|
use CGI qw/:standard *table/; |
12 |
|
|
use CGI::Carp qw(fatalsToBrowser); |
13 |
|
|
use Data::Sorting qw(:arrays); |
14 |
dpavlin |
1.2 |
use Time::ParseDate; |
15 |
|
|
|
16 |
dpavlin |
1.1 |
use Data::Dumper; |
17 |
|
|
|
18 |
dpavlin |
1.2 |
my $date_fmt = "%Y-%m-%d"; |
19 |
|
|
my $date_time_fmt = "%Y-%m-%d %H:%M:%S"; |
20 |
|
|
|
21 |
|
|
my $from_date = "now - 6 months"; |
22 |
|
|
my $to_date = "now"; |
23 |
dpavlin |
1.1 |
|
24 |
|
|
# working days definition (1-7; mon=1) |
25 |
|
|
my $wday_start = 1; |
26 |
|
|
my $wday_end = 5; |
27 |
|
|
# working hours |
28 |
|
|
my $whours_start = "7:00"; |
29 |
|
|
my $whours_end = "17:00"; |
30 |
|
|
|
31 |
dpavlin |
1.2 |
my $debug=1; |
32 |
dpavlin |
1.1 |
$debug++ if (grep(/-v/,@ARGV)); |
33 |
|
|
$debug++ if (grep(/-d/,@ARGV)); |
34 |
|
|
|
35 |
|
|
my $q = new CGI; |
36 |
|
|
|
37 |
|
|
my $print_orphans = $q->param('print_orphans') || 0; |
38 |
|
|
my $rep_reset = $q->param('rep_reset') || 0; |
39 |
|
|
my @sg_selected = $q->param('sg_filter'); |
40 |
|
|
|
41 |
|
|
my @sort; |
42 |
|
|
my $order; |
43 |
|
|
my %sort_param; |
44 |
|
|
my ($usort,$dsort); |
45 |
|
|
if ($q->param('usort')) { |
46 |
|
|
$sort_param{'usort'} = $q->param('usort'); |
47 |
|
|
$q->delete('usort'); |
48 |
|
|
@sort = ( -compare => 'numeric', $sort_param{'usort'} ); |
49 |
|
|
} |
50 |
|
|
if ($q->param('dsort')) { |
51 |
|
|
$sort_param{'dsort'} = $q->param('dsort'); |
52 |
|
|
$q->delete('dsort'); |
53 |
|
|
@sort = ( -compare => 'numeric', -order=>'reverse', $sort_param{'dsort'} ); |
54 |
|
|
} |
55 |
|
|
|
56 |
|
|
# |
57 |
|
|
# This option (activated via command switch -r) will reset failure duration |
58 |
|
|
# if repeated failure on same group/service happend. |
59 |
|
|
# If you want honest reporting (or grouped only by group and service), |
60 |
|
|
# you souldn't turn it on :-) However, if you have just failure events in your |
61 |
|
|
# log, this will produce output which will show duration BETWEEN two failures |
62 |
|
|
# |
63 |
|
|
|
64 |
|
|
# pretty format date |
65 |
|
|
sub d { |
66 |
|
|
my $utime = shift || return "?"; |
67 |
dpavlin |
1.2 |
return strftime($date_time_fmt,localtime($utime)); |
68 |
dpavlin |
1.1 |
} |
69 |
|
|
# pretty format duration |
70 |
|
|
sub dur { |
71 |
|
|
my $dur = shift || return "?"; |
72 |
|
|
my $out = ""; |
73 |
|
|
|
74 |
|
|
my $s = $dur; |
75 |
|
|
my $d = int($s/(24*60*60)); |
76 |
|
|
$s = $s % (24*60*60); |
77 |
|
|
my $h = int($s/(60*60)); |
78 |
|
|
$s = $s % (60*60); |
79 |
|
|
my $m = int($s/60); |
80 |
|
|
$s = $s % 60; |
81 |
|
|
|
82 |
|
|
$out .= $d."d " if ($d > 0); |
83 |
dpavlin |
1.2 |
if ($debug) { |
84 |
|
|
$out .= sprintf("%02d:%02d:%02d [%d]",$h,$m,$s, $dur); |
85 |
|
|
} else { |
86 |
|
|
$out .= sprintf("%02d:%02d:%02d",$h,$m,$s); |
87 |
|
|
} |
88 |
dpavlin |
1.1 |
|
89 |
|
|
return $out; |
90 |
|
|
} |
91 |
|
|
|
92 |
|
|
# read log and calculate |
93 |
|
|
# |
94 |
|
|
|
95 |
|
|
my %fail; |
96 |
|
|
my %downtime; # total downtime |
97 |
|
|
my %sg_filter; # filter for service/group |
98 |
dpavlin |
1.2 |
my %sg_count; # count number of downtimes |
99 |
dpavlin |
1.1 |
|
100 |
|
|
my $log_file="/home/dpavlin/mon-log/sap.log"; |
101 |
|
|
|
102 |
|
|
my @data; |
103 |
|
|
|
104 |
|
|
open(LOG, $log_file) || die "$log_file: $!"; |
105 |
|
|
|
106 |
|
|
while(<LOG>) { |
107 |
|
|
chomp; |
108 |
|
|
if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) { |
109 |
|
|
my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6); |
110 |
|
|
my $id = "$group/$service"; |
111 |
|
|
if ($status eq "up" && defined($fail{$id})) { |
112 |
|
|
if (grep(m;$group/$service;,@sg_selected)) { |
113 |
|
|
push @data, { |
114 |
|
|
'sg'=>"$group/$service", |
115 |
|
|
'from_time'=>$fail{$id}, |
116 |
|
|
'to_time'=>$utime, |
117 |
|
|
'dur_time'=>($utime - $fail{$id}), |
118 |
|
|
'from'=>d($fail{$id}), |
119 |
|
|
'to'=>d($utime), |
120 |
|
|
'dur'=>dur($utime - $fail{$id}), |
121 |
|
|
'desc'=>$desc }; |
122 |
|
|
$downtime{"$group/$service"} += ($utime - $fail{$id}), |
123 |
dpavlin |
1.2 |
$sg_count{"$group/$service"}++; |
124 |
dpavlin |
1.1 |
} |
125 |
|
|
$sg_filter{"$group/$service"}++; |
126 |
|
|
delete $fail{$id}; |
127 |
|
|
} elsif ($status eq "up") { |
128 |
dpavlin |
1.2 |
if ($print_orphans && grep(m;$group/$service;,@sg_selected)) { |
129 |
dpavlin |
1.1 |
push @data, { |
130 |
|
|
'sg'=>"$group/$service", |
131 |
|
|
'to_time'=>$utime, |
132 |
|
|
'from'=>'unknown', |
133 |
|
|
'to'=>d($utime), |
134 |
|
|
'dur'=>'unknown', |
135 |
|
|
'desc'=>$desc }; |
136 |
dpavlin |
1.2 |
$sg_count{"$group/$service"}++; |
137 |
dpavlin |
1.1 |
} |
138 |
|
|
delete $fail{$id}; |
139 |
|
|
$sg_filter{"$group/$service"}++; |
140 |
|
|
} elsif (defined($fail{$id})) { |
141 |
dpavlin |
1.2 |
if ($rep_reset && grep(m;$group/$service;,@sg_selected)) { |
142 |
dpavlin |
1.1 |
push @data, { |
143 |
|
|
'sg'=>"$group/$service", |
144 |
|
|
'from_time'=>$fail{$id}, |
145 |
|
|
'to_time'=>$utime, |
146 |
|
|
'dur_time'=>($utime - $fail{$id}), |
147 |
|
|
'from'=>d($fail{$id}), |
148 |
|
|
'to'=>d($utime), |
149 |
|
|
'dur'=>dur($utime - $fail{$id}), |
150 |
|
|
'desc'=>'[failure again]'}; |
151 |
|
|
$downtime{"$group/$service"} += ($utime - $fail{$id}), |
152 |
|
|
$fail{$id} = $utime; |
153 |
dpavlin |
1.2 |
$sg_count{"$group/$service"}++; |
154 |
dpavlin |
1.1 |
} |
155 |
|
|
$sg_filter{"$group/$service"}++; |
156 |
|
|
} else { |
157 |
|
|
$fail{$id} = $utime; |
158 |
|
|
} |
159 |
|
|
} |
160 |
|
|
} |
161 |
|
|
close(LOG); |
162 |
|
|
|
163 |
|
|
# generate output |
164 |
|
|
# |
165 |
|
|
print header,start_html("mon availiability report"); |
166 |
|
|
|
167 |
|
|
# make some filters |
168 |
|
|
# |
169 |
|
|
|
170 |
|
|
print start_form, |
171 |
|
|
start_table({-border=>0,-cellspacing=>0,-cellpadding=>0}), |
172 |
|
|
Tr(td( |
173 |
|
|
em("Show just service/group:"),br, |
174 |
|
|
checkbox_group(-name=>'sg_filter', |
175 |
|
|
-values=>[keys %sg_filter], |
176 |
|
|
-default=>[keys %sg_filter], |
177 |
|
|
-linebreak=>'true', |
178 |
|
|
), |
179 |
|
|
),td( |
180 |
|
|
em("Other options:"),br, |
181 |
|
|
$q->checkbox(-name=>'rep_reset',-checked=>0, |
182 |
|
|
-label=>"show repeated failures on same service as individual failures"), |
183 |
|
|
br, |
184 |
|
|
$q->checkbox(-name=>'print_orphans',-checked=>0, |
185 |
|
|
-label=>"show records which are not complete in this interval"), |
186 |
|
|
br, |
187 |
dpavlin |
1.2 |
$q->checkbox(-name=>'use_date_limit',-checked=>1, |
188 |
|
|
-label=>"use date limit from:"), |
189 |
|
|
$q->textfield(-name=>'from_date',-size=>20,-default=>$from_date), |
190 |
|
|
" to: ", |
191 |
|
|
$q->textfield(-name=>'to_date',-size=>20,-default=>$to_date), |
192 |
|
|
small('Using <a href="http://search.cpan.org/search?mode=module&query=Time::ParseDate">Time::ParseDate</a>'), |
193 |
|
|
br, |
194 |
dpavlin |
1.1 |
$q->submit(-name=>'show',-value=>'Show report'), |
195 |
|
|
)),end_table; |
196 |
|
|
|
197 |
|
|
# dump report |
198 |
|
|
# |
199 |
|
|
|
200 |
|
|
sub sort_link { |
201 |
|
|
my $q = shift || return; |
202 |
|
|
my $col = shift || return; |
203 |
|
|
my $dir = lc(shift) || return; |
204 |
|
|
if ($sort_param{$dir.'sort'} && $sort_param{$dir.'sort'} eq $col) { |
205 |
|
|
return '&'.$dir.'Arr;'; |
206 |
|
|
} else { |
207 |
|
|
return '<a href="'.$q->url(-query=>1).'&'.$dir.'sort='.$col.'">&'.$dir.'Arr;</a>'; |
208 |
|
|
} |
209 |
|
|
} |
210 |
|
|
|
211 |
dpavlin |
1.2 |
|
212 |
|
|
my ($from_time,$to_time,$from_html,$to_html); |
213 |
|
|
if ($q->param('use_date_limit')) { |
214 |
|
|
$from_time = parsedate($q->param('from_date'), UK=>1); |
215 |
|
|
$to_time = parsedate($q->param('to_date'), UK=>1); |
216 |
|
|
$from_html = strftime($date_fmt,localtime($from_time)); |
217 |
|
|
$to_html = strftime($date_fmt,localtime($to_time)); |
218 |
|
|
$from_html .= " [$from_time] " if ($debug); |
219 |
|
|
$to_html .= " [$to_time] " if ($debug); |
220 |
|
|
} |
221 |
|
|
|
222 |
|
|
# sort data |
223 |
|
|
# |
224 |
|
|
my @sorted = sorted_array(@data, @sort); |
225 |
|
|
#my @sorted = @data; |
226 |
|
|
|
227 |
|
|
print "-- sort: ",Dumper(@sort)," (data: ".@data." sorted: ".@sorted.") --\n",br if ($debug); |
228 |
|
|
|
229 |
|
|
print start_table({-border=>1,-cellspacing=>0,-cellpadding=>2,-width=>'100%'}); |
230 |
|
|
|
231 |
|
|
print Tr( |
232 |
dpavlin |
1.1 |
th("group/service"), |
233 |
|
|
th({-bgcolor=>'#f0f0f0'}, |
234 |
|
|
&sort_link($q,'from_time','u').' from '. |
235 |
dpavlin |
1.2 |
&sort_link($q,'from_time','d'), |
236 |
|
|
br,$from_html |
237 |
dpavlin |
1.1 |
), |
238 |
|
|
th( |
239 |
|
|
&sort_link($q,'to_time','u').' to '. |
240 |
dpavlin |
1.2 |
&sort_link($q,'to_time','d'), |
241 |
|
|
br,$to_html |
242 |
dpavlin |
1.1 |
), |
243 |
|
|
th({-bgcolor=>'#e0e0e0'}, |
244 |
|
|
&sort_link($q,'dur_time','u').' duration '. |
245 |
|
|
&sort_link($q,'dur_time','d') |
246 |
|
|
), |
247 |
|
|
th("description") |
248 |
dpavlin |
1.2 |
) if (scalar @sorted > 0); |
249 |
dpavlin |
1.1 |
|
250 |
|
|
foreach my $row (@sorted) { |
251 |
dpavlin |
1.2 |
next if ($q->param('use_date_limit') && ($row->{from_time} < $from_time || $row->{to_time} > $to_time)); |
252 |
dpavlin |
1.1 |
print Tr( |
253 |
|
|
td({-align=>'left',-valign=>'center'},$row->{sg}), |
254 |
|
|
td({-align=>'right',-bgcolor=>'#f0f0f0'},$row->{from}), |
255 |
|
|
td({-align=>'right'},$row->{to}), |
256 |
|
|
td({-align=>'center',-bgcolor=>'#e0e0e0'},$row->{dur}), |
257 |
|
|
td({-align=>'left'},$row->{desc}), |
258 |
|
|
),"\n"; |
259 |
|
|
} |
260 |
|
|
|
261 |
|
|
# dump totals |
262 |
|
|
# |
263 |
|
|
|
264 |
|
|
foreach my $sg (keys %downtime) { |
265 |
dpavlin |
1.2 |
print Tr(td({-colspan=>3,-align=>'right'},"total for $sg:"), |
266 |
|
|
td({-bgcolor=>'#e0e0e0',-align=>'right'},dur($downtime{$sg})), |
267 |
|
|
td(small("in ".$sg_count{$sg}." failures"))),"\n"; |
268 |
dpavlin |
1.1 |
} |
269 |
|
|
|
270 |
|
|
print end_table, |
271 |
|
|
end_form; |
272 |
|
|
|