Add the following checks:

- process count
- uptime
- process open ports
- load
- raid
- ntp status
- apt
- disk writable
This commit is contained in:
Albert Cervera i Areny 2015-02-02 01:54:02 +01:00
parent ded01235db
commit 361f015bb7
2 changed files with 352 additions and 9 deletions

View file

@ -1,12 +1,39 @@
# The COPYRIGHT file at the top level of this repository contains the full # The COPYRIGHT file at the top level of this repository contains the full
# copyright notices and license terms. # copyright notices and license terms.
from trytond.pool import PoolMeta import os
import psutil import psutil
import json
import tempfile
import socket
import subprocess
from datetime import datetime
from trytond.pool import PoolMeta
__all__ = ['CheckPlan'] __all__ = ['CheckPlan']
__metaclass__ = PoolMeta __metaclass__ = PoolMeta
def check_output(*args):
process = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
process.wait()
data = process.stdout.read()
return data
def to_float(text):
try:
return float(text)
except ValueError:
return None
PROTOCOLS = {
socket.SOCK_STREAM: 'TCP',
socket.SOCK_DGRAM: 'UDP',
}
class CheckPlan: class CheckPlan:
__name__ = 'monitoring.check.plan' __name__ = 'monitoring.check.plan'
@ -86,10 +113,24 @@ class CheckPlan:
}) })
return res return res
def check_process_count(self):
return [{
'result': 'process_count',
'float_value': len(psutil.pids()),
}]
def check_uptime(self):
boot_time = datetime.fromtimestamp(psutil.boot_time())
uptime = (datetime.now() - boot_time).total_seconds()
return [{
'result': 'uptime',
'float_value': uptime,
}]
def check_process_cpu_percent(self): def check_process_cpu_percent(self):
processes = self.get_attribute('processes') processes = self.get_attribute('processes')
if processes: if processes:
processes = processes.split(',') processes = [x.strip() for x in processes.split(',')]
res = [] res = []
for process in psutil.process_iter(): for process in psutil.process_iter():
try: try:
@ -97,7 +138,7 @@ class CheckPlan:
if processes and name not in processes: if processes and name not in processes:
continue continue
cpu = process.cpu_percent() cpu = process.cpu_percent()
except psutil.NoSuchProcess: except (psutil.NoSuchProcess, psutil.AccessDenied):
continue continue
res.append({ res.append({
'result': 'process_cpu_percent', 'result': 'process_cpu_percent',
@ -109,7 +150,7 @@ class CheckPlan:
def check_process_open_files_count(self): def check_process_open_files_count(self):
processes = self.get_attribute('processes') processes = self.get_attribute('processes')
if processes: if processes:
processes = processes.split(',') processes = [x.strip() for x in processes.split(',')]
res = [] res = []
for process in psutil.process_iter(): for process in psutil.process_iter():
try: try:
@ -117,7 +158,7 @@ class CheckPlan:
if processes and name not in processes: if processes and name not in processes:
continue continue
files = process.num_fds() files = process.num_fds()
except psutil.NoSuchProcess: except (psutil.NoSuchProcess, psutil.AccessDenied):
continue continue
res.append({ res.append({
'result': 'process_open_files_count', 'result': 'process_open_files_count',
@ -129,7 +170,7 @@ class CheckPlan:
def check_process_memory_percent(self): def check_process_memory_percent(self):
processes = self.get_attribute('processes') processes = self.get_attribute('processes')
if processes: if processes:
processes = processes.split(',') processes = [x.strip() for x in processes.split(',')]
res = [] res = []
for process in psutil.process_iter(): for process in psutil.process_iter():
try: try:
@ -137,7 +178,7 @@ class CheckPlan:
if processes and name not in processes: if processes and name not in processes:
continue continue
memory = process.memory_percent() memory = process.memory_percent()
except psutil.NoSuchProcess: except (psutil.NoSuchProcess, psutil.AccessDenied):
continue continue
res.append({ res.append({
'result': 'process_memory_percent', 'result': 'process_memory_percent',
@ -149,7 +190,7 @@ class CheckPlan:
def check_process_io_counters(self): def check_process_io_counters(self):
processes = self.get_attribute('processes') processes = self.get_attribute('processes')
if processes: if processes:
processes = processes.split(',') processes = [x.strip() for x in processes.split(',')]
res = [] res = []
for process in psutil.process_iter(): for process in psutil.process_iter():
try: try:
@ -157,7 +198,7 @@ class CheckPlan:
if processes and name not in processes: if processes and name not in processes:
continue continue
counters = process.io_counters() counters = process.io_counters()
except psutil.NoSuchProcess: except (psutil.NoSuchProcess, psutil.AccessDenied):
continue continue
for name in ('read_count', 'write_count', 'read_bytes', for name in ('read_count', 'write_count', 'read_bytes',
'write_bytes'): 'write_bytes'):
@ -167,3 +208,186 @@ class CheckPlan:
'float_value': getattr(counters, name), 'float_value': getattr(counters, name),
}) })
return res return res
def check_process_open_ports(self):
'''
Expected structure in ports attribute:
protocol:ip:port
Example:
TCP:*:22
TCP:*:8000
'''
valid_entries = set()
entries = [x.strip() for x in
self.get_attribute('process_open_ports').split()]
for entry in entries:
if len(entry.split(':')) != 3:
continue
protocol, ip, port = entry.split(':')
if '*' in entry:
valid_entries.add((protocol, entry.replace('*', '0.0.0.0'),
port))
valid_entries.add((protocol, entry.replace('*', '::'), port))
else:
valid_entries.add((protocol, ip, port))
invalids = []
value = 'OK'
for process in psutil.process_iter():
try:
connections = process.get_connections()
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
for connection in connections:
if connection.status != 'LISTEN':
continue
if connection.type not in PROTOCOLS:
continue
protocol = PROTOCOLS[connection.type]
ip = connection.laddr[0]
port = connection.laddr[1]
entry = (protocol, ip, port)
if entry not in valid_entries:
invalids.append(entry)
value = 'Error'
continue
return [{
'result': 'process_open_ports_status',
'char_value': value,
'payload': json.dumps({
'invalid_ports': invalids,
}),
}]
def check_load(self):
one, five, fifteen = os.getloadavg()
res = []
res.append({
'result': 'load_1',
'float_value': one,
})
res.append({
'result': 'load_5',
'float_value': five,
})
res.append({
'result': 'load_15',
'float_value': fifteen,
})
return res
def check_raid(self):
"""
Expected values in raid_devices:
md0, md1
"""
devices = self.get_attribute('raid_devices')
if devices:
devices = [x.strip() for x in devices.split(',')]
lines = open('/proc/mdstat', 'r').readlines()
current_device = None
current_payload = ''
res = []
for line in lines:
if line.startswith('md'):
current_device = line.split()[0]
current_payload += line
continue
if current_device:
if devices and current_device not in devices:
current_device = None
current_payload = ''
continue
current_payload += line
if '[UU]' in line:
state = 'OK'
else:
state = 'Error'
res.append({
'result': 'raid_status',
'label': current_device,
'char_value': state,
'payload': json.dumps({
'output': current_payload,
}),
})
current_device = None
current_payload = ''
return res
def check_ntp_status(self):
output = check_output('/usr/sbin/ntpdate', '-q', 'pool.ntp.org')
line = output.splitlines()[-1]
sec = line.split()[-1]
text = line.split()[-2]
offset = line.split()[-3]
res = []
value = 999999
if sec == 'sec' and offset == 'offset':
try:
value = float(text)
except ValueError:
pass
res.append({
'result': 'ntp_offset',
'float_value': value,
})
return res
def check_apt(self):
output = check_output('apt-get', '-s', 'upgrade')
upgrades = 0
security_upgrades = 0
errors = False
for line in output.splitlines():
if not line.startswith('Inst'):
continue
upgrades += 1
items = line.split()
if len(items) != 5:
errors = True
continue
release = items[3]
if 'security' in release.lower():
security_upgrades += 1
res = []
res.append({
'result': 'apt_status',
'char_value': 'Error' if errors else 'OK',
})
res.append({
'result': 'apt_upgrades',
'float_value': upgrades,
})
res.append({
'result': 'apt_security_upgrades',
'float_value': security_upgrades,
})
return res
def check_disk_writable(self):
path = self.get_attribute('writable_path')
path = path.strip()
if not path.endswith('/'):
path += '/'
try:
with tempfile.TemporaryFile(prefix=path):
pass
except Exception, e:
return [{
'result': 'disk_writable',
'label': path,
'char_value': 'Error',
'payload': str(e),
}]
return [{
'result': 'disk_writable',
'label': path,
'char_value': 'OK',
}]

View file

@ -293,6 +293,28 @@
<field name="uom" ref="product.uom_unit"/> <field name="uom" ref="product.uom_unit"/>
</record> </record>
<record model="monitoring.check.type" id="check_process_count">
<field name="name">Process Count</field>
<field name="internal_name">check_process_count</field>
</record>
<record model="monitoring.result.type" id="result_process_count">
<field name="name">Process Count</field>
<field name="internal_name">process_count</field>
<field name="type">float</field>
<field name="uom" ref="product.uom_unit"/>
</record>
<record model="monitoring.check.type" id="check_uptime">
<field name="name">Uptime</field>
<field name="internal_name">check_uptime</field>
</record>
<record model="monitoring.result.type" id="result_uptime">
<field name="name">Uptime</field>
<field name="internal_name">uptime</field>
<field name="type">float</field>
<field name="uom" ref="product.uom_second"/>
</record>
<record model="monitoring.check.type" id="check_process_cpu_percent"> <record model="monitoring.check.type" id="check_process_cpu_percent">
<field name="name">Process CPU Percent</field> <field name="name">Process CPU Percent</field>
<field name="internal_name">check_process_cpu_percent</field> <field name="internal_name">check_process_cpu_percent</field>
@ -354,5 +376,102 @@
<field name="type">float</field> <field name="type">float</field>
<field name="uom" ref="monitoring.uom_byte"/> <field name="uom" ref="monitoring.uom_byte"/>
</record> </record>
<record model="monitoring.check.type" id="check_process_open_ports">
<field name="name">Process Open Ports</field>
<field name="internal_name">check_process_open_ports</field>
</record>
<record model="monitoring.result.type" id="result_process_open_ports_status">
<field name="name">Process Open Ports Status</field>
<field name="internal_name">process_open_ports_status</field>
<field name="type">char</field>
</record>
<record model="product.uom.category" id="uom_cat_load">
<field name="name">Load</field>
</record>
<record model="product.uom" id="uom_load">
<field name="name">Load</field>
<field name="symbol"> </field>
<field name="category" ref="uom_cat_load"/>
<field name="factor" eval="1"/>
<field name="rate" eval="1"/>
</record>
<record model="monitoring.check.type" id="check_load">
<field name="name">Load</field>
<field name="internal_name">check_load</field>
</record>
<record model="monitoring.result.type" id="result_load_1">
<field name="name">Load 1</field>
<field name="internal_name">load_1</field>
<field name="type">float</field>
<field name="uom" ref="uom_load"/>
</record>
<record model="monitoring.result.type" id="result_load_5">
<field name="name">Load 5</field>
<field name="internal_name">load_5</field>
<field name="type">float</field>
<field name="uom" ref="uom_load"/>
</record>
<record model="monitoring.result.type" id="result_load_15">
<field name="name">Load 15</field>
<field name="internal_name">load_15</field>
<field name="type">float</field>
<field name="uom" ref="uom_load"/>
</record>
<record model="monitoring.check.type" id="check_raid">
<field name="name">RAID Status</field>
<field name="internal_name">check_raid</field>
</record>
<record model="monitoring.result.type" id="result_raid_status">
<field name="name">RAID Status</field>
<field name="internal_name">raid_status</field>
<field name="type">char</field>
</record>
<record model="monitoring.check.type" id="check_ntp_status">
<field name="name">NTP Status</field>
<field name="internal_name">check_ntp_status</field>
</record>
<record model="monitoring.result.type" id="result_ntp_status">
<field name="name">NTP Status</field>
<field name="internal_name">ntp_offset</field>
<field name="type">float</field>
<field name="uom" ref="product.uom_second"/>
</record>
<record model="monitoring.check.type" id="check_apt">
<field name="name">APT</field>
<field name="internal_name">check_apt</field>
</record>
<record model="monitoring.result.type" id="result_apt_status">
<field name="name">APT Status</field>
<field name="internal_name">apt_status</field>
<field name="type">char</field>
</record>
<record model="monitoring.result.type" id="result_apt_upgrades">
<field name="name">APT Upgrades</field>
<field name="internal_name">apt_upgrades</field>
<field name="type">float</field>
<field name="uom" ref="product.uom_unit"/>
</record>
<record model="monitoring.result.type" id="result_apt_security_upgrades">
<field name="name">APT Security Upgrades</field>
<field name="internal_name">apt_security_upgrades</field>
<field name="type">float</field>
<field name="uom" ref="product.uom_unit"/>
</record>
<record model="monitoring.check.type" id="check_disk_writable">
<field name="name">Disk Writable</field>
<field name="internal_name">check_disk_writable</field>
</record>
<record model="monitoring.result.type" id="result_disk_writable">
<field name="name">Disk Writable</field>
<field name="internal_name">disk_writable</field>
<field name="type">char</field>
</record>
</data> </data>
</tryton> </tryton>