1 |
#!/usr/bin/perl |
2 |
# |
3 |
# Return a list of hosts which not reachable via ICMP echo |
4 |
# |
5 |
# Jim Trocki, trockij@transmeta.com |
6 |
# |
7 |
# $Id: fping.monitor 1.7 Mon, 27 Aug 2001 14:22:45 -0400 trockij $ |
8 |
# |
9 |
# Copyright (C) 1998, Jim Trocki |
10 |
# |
11 |
# This program is free software; you can redistribute it and/or modify |
12 |
# it under the terms of the GNU General Public License as published by |
13 |
# the Free Software Foundation; either version 2 of the License, or |
14 |
# (at your option) any later version. |
15 |
# |
16 |
# This program is distributed in the hope that it will be useful, |
17 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 |
# GNU General Public License for more details. |
20 |
# |
21 |
# You should have received a copy of the GNU General Public License |
22 |
# along with this program; if not, write to the Free Software |
23 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
24 |
# |
25 |
use strict; |
26 |
|
27 |
use Getopt::Std; |
28 |
|
29 |
my %opt; |
30 |
getopts ("ahr:s:t:T", \%opt); |
31 |
|
32 |
sub usage |
33 |
{ |
34 |
print <<EOF; |
35 |
usage: fping.monitor [-a] [-r num] [-s num] [-t num] [-T] host [host...] |
36 |
|
37 |
-a only report failure if all hosts are unreachable |
38 |
-r num retry "num" times for each host before reporting failure |
39 |
-s num consider hosts which respond in over "num" msecs failures |
40 |
-t num wait "num" msecs before sending retries |
41 |
-T traceroute to each failed host. CAUTION: this may cause |
42 |
this monitor to hang for a very long time |
43 |
|
44 |
EOF |
45 |
|
46 |
exit; |
47 |
} |
48 |
|
49 |
usage if ($opt{"h"}); |
50 |
|
51 |
my $TIMEOUT = $opt{"t"} || 2000; |
52 |
my $RETRIES = $opt{"r"} || 3; |
53 |
my $CMD = "fping -e -r $RETRIES -t $TIMEOUT"; |
54 |
my $START_TIME = time; |
55 |
my $END_TIME; |
56 |
|
57 |
exit 0 if (@ARGV == 0); |
58 |
|
59 |
my @hosts; |
60 |
|
61 |
# you can use hosts in format host:optional configuration parameters and |
62 |
# this part will strip everything after hostname |
63 |
foreach (@ARGV) { |
64 |
if (m/^[^:]+:?[^\@]*\@([^\/]+)\/?.*/) { |
65 |
push @hosts,$1; |
66 |
} else { |
67 |
push @hosts,$_; |
68 |
} |
69 |
} |
70 |
|
71 |
open (IN, "$CMD @hosts 2>&1 |") || |
72 |
die "could not open pipe to fping: $!\n"; |
73 |
|
74 |
my @unreachable; |
75 |
my @alive; |
76 |
my @slow; |
77 |
my @other_prob; # details for other per-host problems |
78 |
my @error; # other errors which I'll give non-zero exit for |
79 |
my @icmp; # ICMP messages output by fping |
80 |
my %addr_unknown; |
81 |
|
82 |
my %want_host = map { $_ => 1 } @hosts; # hosts fping hasn't output yet |
83 |
|
84 |
while (<IN>) |
85 |
{ |
86 |
chomp; |
87 |
if (/^(\S+).*unreachable/) |
88 |
{ |
89 |
push (@unreachable, $1); |
90 |
delete $want_host{$1} |
91 |
or push @error, "unreachable host `$1' wasn't asked for"; |
92 |
} |
93 |
|
94 |
elsif (/^(\S+) is alive \((\S+)/) |
95 |
{ |
96 |
delete $want_host{$1} |
97 |
or push @error, "reachable host `$1' wasn't asked for"; |
98 |
|
99 |
if ($opt{"s"} && $2 > $opt{"s"}) |
100 |
{ |
101 |
push (@slow, [$1, $2]); |
102 |
} |
103 |
|
104 |
else |
105 |
{ |
106 |
push (@alive, [$1, $2]); |
107 |
} |
108 |
} |
109 |
|
110 |
elsif (/^(\S+)\s+address\s+not\s+found/) |
111 |
{ |
112 |
$addr_unknown{$1} = 1; |
113 |
push @other_prob, "$1 address not found"; |
114 |
push @unreachable, $1; |
115 |
delete $want_host{$1} |
116 |
or push @error, "unknown host `$1' wasn't asked for"; |
117 |
} |
118 |
|
119 |
# ICMP Host Unreachable from 1.2.3.4 for ICMP Echo sent to 2.4.6.8 |
120 |
# (among others) |
121 |
|
122 |
elsif (/^ICMP (.*) for ICMP Echo sent to (\S+)/) |
123 |
{ |
124 |
push @icmp, $_; |
125 |
} |
126 |
|
127 |
else |
128 |
{ |
129 |
push @error, "unidentified output from fping: [$_]"; |
130 |
} |
131 |
} |
132 |
|
133 |
for my $host (keys %want_host) { |
134 |
push @other_prob, "$host not listed in fping's output"; |
135 |
push @unreachable, $host; |
136 |
} |
137 |
|
138 |
close (IN); |
139 |
|
140 |
$END_TIME = time; |
141 |
|
142 |
my $retval = $? >> 8; |
143 |
|
144 |
if ($retval < 3) |
145 |
{ |
146 |
# do nothing |
147 |
} |
148 |
|
149 |
elsif ($retval == 3) |
150 |
{ |
151 |
push @error, "fping: invalid cmdline arguments [$CMD @ARGV]"; |
152 |
} |
153 |
|
154 |
elsif ($retval == 4) |
155 |
{ |
156 |
push @error, "fping: system call failure"; |
157 |
} |
158 |
|
159 |
else |
160 |
{ |
161 |
push @error, "unknown return code ($retval) from fping"; |
162 |
} |
163 |
|
164 |
if (@error) { |
165 |
print "unusual errors\n"; |
166 |
} |
167 |
else { |
168 |
my @fail = sort @unreachable, map { $_->[0] } @slow; |
169 |
# This line is intentionally blank if there are no failures. |
170 |
print "@fail\n"; |
171 |
} |
172 |
|
173 |
print "\n"; |
174 |
print "start time: " . localtime ($START_TIME) . "\n"; |
175 |
print "end time : " . localtime ($END_TIME) . "\n"; |
176 |
print "duration : " . ($END_TIME - $START_TIME) . " seconds\n"; |
177 |
|
178 |
if (@error != 0) |
179 |
{ |
180 |
print <<EOF; |
181 |
|
182 |
------------------------------------------------------------------------------ |
183 |
unusual errors |
184 |
------------------------------------------------------------------------------ |
185 |
EOF |
186 |
print join ("\n", @error), "\n"; |
187 |
} |
188 |
|
189 |
if (@unreachable != 0) |
190 |
{ |
191 |
print <<EOF; |
192 |
|
193 |
------------------------------------------------------------------------------ |
194 |
unreachable hosts |
195 |
------------------------------------------------------------------------------ |
196 |
EOF |
197 |
print join ("\n", @unreachable), "\n"; |
198 |
|
199 |
print "\nother problems:\n", join "\n", @other_prob, '' |
200 |
if @other_prob; |
201 |
} |
202 |
|
203 |
if (@icmp != 0) |
204 |
{ |
205 |
print <<EOF; |
206 |
|
207 |
------------------------------------------------------------------------------ |
208 |
ICMP messages |
209 |
------------------------------------------------------------------------------ |
210 |
EOF |
211 |
print join "\n", @icmp, ''; |
212 |
} |
213 |
|
214 |
|
215 |
if (@slow != 0) |
216 |
{ |
217 |
print <<EOF; |
218 |
|
219 |
------------------------------------------------------------------------------ |
220 |
slow hosts (response time which exceeds $opt{s}ms) |
221 |
------------------------------------------------------------------------------ |
222 |
EOF |
223 |
|
224 |
foreach my $host (@slow) |
225 |
{ |
226 |
printf ("%-40s %.2f ms\n", @{$host}); |
227 |
} |
228 |
} |
229 |
|
230 |
|
231 |
|
232 |
if (@alive != 0) |
233 |
{ |
234 |
print <<EOF; |
235 |
|
236 |
------------------------------------------------------------------------------ |
237 |
reachable hosts rtt |
238 |
------------------------------------------------------------------------------ |
239 |
EOF |
240 |
|
241 |
for (my $i = 0; $i < @alive; $i++) |
242 |
{ |
243 |
printf ("%-40s %.2f ms\n", @{$alive[$i]}); |
244 |
} |
245 |
} |
246 |
|
247 |
# |
248 |
# traceroute |
249 |
# |
250 |
if ($opt{"T"} && @unreachable) |
251 |
{ |
252 |
my $header_output = 0; |
253 |
foreach my $host (@unreachable) |
254 |
{ |
255 |
next if $addr_unknown{$host}; |
256 |
print $header_output++ ? "\n" : <<EOF; |
257 |
|
258 |
------------------------------------------------------------------------------ |
259 |
traceroute to unreachable hosts |
260 |
------------------------------------------------------------------------------ |
261 |
EOF |
262 |
system ("traceroute -w 3 $host 2>&1"); |
263 |
} |
264 |
} |
265 |
|
266 |
exit 1 if @error; |
267 |
|
268 |
# |
269 |
# fail only if all hosts do not respond |
270 |
# |
271 |
if ($opt{"a"}) |
272 |
{ |
273 |
exit(@alive ? 0 : 1); |
274 |
} |
275 |
|
276 |
exit 1 if (@slow != 0); |
277 |
|
278 |
exit $retval; |