适用场景:当服务器某一内网网卡经常有问题,另一张公网网卡正常,或者通过某一台正常的服务器去监控另一台不稳定的服务器。以此脚本为基础,可以在多线路网关或VPN节点上,修改为自动判断网络状态并切换路由;或者在监控到有丢包率严重时重启网卡。
基本原理:通过参数控制ping的结果,统计比较丢包率,通过sendmail命令发邮件通知,所以必须启用系统的邮件服务,一般是默认就启用的。
邮件报警实现的功能:有问题则报警,问题持续则间隔一定时间再报警,该间隔时间可在变量中设定。网卡从问题中恢复也通知。
使用:修改相应的变量,脚本保存为/root/sh/mon-eth.sh
chmod u+x /root/sh/mon-eth.sh
执行:nohup /bin/bash /root/sh/mon-eth.sh >> /var/log/mon-eth.log 2>&1 &
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
#/bin/bash ############################################# # author zhao yanan # date 2012/09/14 # update 2012/09/22 Improve function # update 2012/09/24 Improve function # update 2012/12/06 Increased variable settings and log output ############################################# # Execution: # nohup /bin/bash /root/sh/mon-eth.sh >> /var/log/mon-eth.log 2>&1 & ## env ############ localip=192.168.0.2 remoteip=192.168.0.1 servername=“dbserver” eth=eth0 packet_loss_percentage=60 # Packet loss percentage, alarm threshold repeat_alarm_time=75 # Repeat alarm time interval ( About *24 seconds. value of 75, about half an hour) interval=20 # Detection interval(seconds) mailfromadd=‘server1<server1@domain.com>’ mailtoadd=‘user1<user1@domain.com>’ mailccadd=‘user2<user2@domain.com>’ export LANG=C export LC_ALL=C export PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin ###### check ################################# echo “Start monitoring.” function mailto() { # mail /usr/sbin/sendmail –t <<EOF From: $mailfromadd To: $mailtoadd Cc: $mailccadd Subject: $servername $eth $localip packet loss ————————————————— $eth $localip packet loss, `date` $i ————————————————— EOF } function mailto2() { # mail /usr/sbin/sendmail –t <<EOF From: $mailfromadd To: $mailtoadd Cc: $mailccadd Subject: $servername $eth $localip packet loss (too many times) ————————————————— $eth $localip packet loss (too many times) `date` $i ————————————————— EOF } function mailto3() { # mail /usr/sbin/sendmail –t <<EOF From: $mailfromadd To: $mailtoadd Cc: $mailccadd Subject: $servername $eth $localip ok ————————————————— $eth $localip ok `date` $i ————————————————— EOF } m=0 n=0 echo “$m” > /tmp/mon–“$eth”–m echo “$n” > /tmp/mon–“$eth”–n while true do i=`ping $remoteip –I $eth –i 1 –c 5 –W 1 –w 5 –q | grep “packet loss”` j=`echo “$i” | awk –F, ‘{print $(NF-1);}’ | awk –F% ‘{print $1}’` if [ $j –ge $packet_loss_percentage ]; then echo `date` “$i” m=$(($m+1)) echo “$m” > /tmp/mon–“$eth”–m else m=0 echo “$m” > /tmp/mon–“$eth”–m echo “`date` $eth ok.” fi if [ “$m” –eq 0 ] && [ “$n” –eq 1 ]; then echo `date` “$i” mailto3 echo “$eth ok, mail notification has been sent.” n=0 echo “$n” > /tmp/mon–“$eth”–n elif [ “$m” –eq 1 ] && [ “$n” –eq 0 ]; then mailto echo “$eth packet loss, mail notification has been sent.” n=1 echo “$n” > /tmp/mon–“$eth”–n fi if [ “$m” –ge $repeat_alarm_time ] && [ “$n” –eq 1 ]; then mailto2 echo “$eth packet loss, mail notification is sent again.” m=2 echo “$m” > /tmp/mon–“$eth”–m fi sleep $interval done |
发表回复