mirror of
https://github.com/NaN-tic/trytond-monitoring_system.git
synced 2023-12-14 03:42:57 +01:00
Add the following checks:
- process count - uptime - process open ports - load - raid - ntp status - apt - disk writable
This commit is contained in:
parent
ded01235db
commit
361f015bb7
2 changed files with 352 additions and 9 deletions
242
monitoring.py
242
monitoring.py
|
@ -1,12 +1,39 @@
|
|||
# The COPYRIGHT file at the top level of this repository contains the full
|
||||
# copyright notices and license terms.
|
||||
from trytond.pool import PoolMeta
|
||||
import os
|
||||
import psutil
|
||||
import json
|
||||
import tempfile
|
||||
import socket
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from trytond.pool import PoolMeta
|
||||
|
||||
__all__ = ['CheckPlan']
|
||||
__metaclass__ = PoolMeta
|
||||
|
||||
|
||||
def check_output(*args):
|
||||
process = subprocess.Popen(args, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
process.wait()
|
||||
data = process.stdout.read()
|
||||
return data
|
||||
|
||||
|
||||
def to_float(text):
|
||||
try:
|
||||
return float(text)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
PROTOCOLS = {
|
||||
socket.SOCK_STREAM: 'TCP',
|
||||
socket.SOCK_DGRAM: 'UDP',
|
||||
}
|
||||
|
||||
|
||||
class CheckPlan:
|
||||
__name__ = 'monitoring.check.plan'
|
||||
|
||||
|
@ -86,10 +113,24 @@ class CheckPlan:
|
|||
})
|
||||
return res
|
||||
|
||||
def check_process_count(self):
|
||||
return [{
|
||||
'result': 'process_count',
|
||||
'float_value': len(psutil.pids()),
|
||||
}]
|
||||
|
||||
def check_uptime(self):
|
||||
boot_time = datetime.fromtimestamp(psutil.boot_time())
|
||||
uptime = (datetime.now() - boot_time).total_seconds()
|
||||
return [{
|
||||
'result': 'uptime',
|
||||
'float_value': uptime,
|
||||
}]
|
||||
|
||||
def check_process_cpu_percent(self):
|
||||
processes = self.get_attribute('processes')
|
||||
if processes:
|
||||
processes = processes.split(',')
|
||||
processes = [x.strip() for x in processes.split(',')]
|
||||
res = []
|
||||
for process in psutil.process_iter():
|
||||
try:
|
||||
|
@ -97,7 +138,7 @@ class CheckPlan:
|
|||
if processes and name not in processes:
|
||||
continue
|
||||
cpu = process.cpu_percent()
|
||||
except psutil.NoSuchProcess:
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
res.append({
|
||||
'result': 'process_cpu_percent',
|
||||
|
@ -109,7 +150,7 @@ class CheckPlan:
|
|||
def check_process_open_files_count(self):
|
||||
processes = self.get_attribute('processes')
|
||||
if processes:
|
||||
processes = processes.split(',')
|
||||
processes = [x.strip() for x in processes.split(',')]
|
||||
res = []
|
||||
for process in psutil.process_iter():
|
||||
try:
|
||||
|
@ -117,7 +158,7 @@ class CheckPlan:
|
|||
if processes and name not in processes:
|
||||
continue
|
||||
files = process.num_fds()
|
||||
except psutil.NoSuchProcess:
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
res.append({
|
||||
'result': 'process_open_files_count',
|
||||
|
@ -129,7 +170,7 @@ class CheckPlan:
|
|||
def check_process_memory_percent(self):
|
||||
processes = self.get_attribute('processes')
|
||||
if processes:
|
||||
processes = processes.split(',')
|
||||
processes = [x.strip() for x in processes.split(',')]
|
||||
res = []
|
||||
for process in psutil.process_iter():
|
||||
try:
|
||||
|
@ -137,7 +178,7 @@ class CheckPlan:
|
|||
if processes and name not in processes:
|
||||
continue
|
||||
memory = process.memory_percent()
|
||||
except psutil.NoSuchProcess:
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
res.append({
|
||||
'result': 'process_memory_percent',
|
||||
|
@ -149,7 +190,7 @@ class CheckPlan:
|
|||
def check_process_io_counters(self):
|
||||
processes = self.get_attribute('processes')
|
||||
if processes:
|
||||
processes = processes.split(',')
|
||||
processes = [x.strip() for x in processes.split(',')]
|
||||
res = []
|
||||
for process in psutil.process_iter():
|
||||
try:
|
||||
|
@ -157,7 +198,7 @@ class CheckPlan:
|
|||
if processes and name not in processes:
|
||||
continue
|
||||
counters = process.io_counters()
|
||||
except psutil.NoSuchProcess:
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
for name in ('read_count', 'write_count', 'read_bytes',
|
||||
'write_bytes'):
|
||||
|
@ -167,3 +208,186 @@ class CheckPlan:
|
|||
'float_value': getattr(counters, name),
|
||||
})
|
||||
return res
|
||||
|
||||
def check_process_open_ports(self):
|
||||
'''
|
||||
Expected structure in ports attribute:
|
||||
|
||||
protocol:ip:port
|
||||
|
||||
Example:
|
||||
|
||||
TCP:*:22
|
||||
TCP:*:8000
|
||||
'''
|
||||
valid_entries = set()
|
||||
entries = [x.strip() for x in
|
||||
self.get_attribute('process_open_ports').split()]
|
||||
for entry in entries:
|
||||
if len(entry.split(':')) != 3:
|
||||
continue
|
||||
protocol, ip, port = entry.split(':')
|
||||
if '*' in entry:
|
||||
valid_entries.add((protocol, entry.replace('*', '0.0.0.0'),
|
||||
port))
|
||||
valid_entries.add((protocol, entry.replace('*', '::'), port))
|
||||
else:
|
||||
valid_entries.add((protocol, ip, port))
|
||||
invalids = []
|
||||
value = 'OK'
|
||||
for process in psutil.process_iter():
|
||||
try:
|
||||
connections = process.get_connections()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
for connection in connections:
|
||||
if connection.status != 'LISTEN':
|
||||
continue
|
||||
if connection.type not in PROTOCOLS:
|
||||
continue
|
||||
protocol = PROTOCOLS[connection.type]
|
||||
ip = connection.laddr[0]
|
||||
port = connection.laddr[1]
|
||||
entry = (protocol, ip, port)
|
||||
if entry not in valid_entries:
|
||||
invalids.append(entry)
|
||||
value = 'Error'
|
||||
continue
|
||||
return [{
|
||||
'result': 'process_open_ports_status',
|
||||
'char_value': value,
|
||||
'payload': json.dumps({
|
||||
'invalid_ports': invalids,
|
||||
}),
|
||||
}]
|
||||
|
||||
def check_load(self):
|
||||
one, five, fifteen = os.getloadavg()
|
||||
res = []
|
||||
res.append({
|
||||
'result': 'load_1',
|
||||
'float_value': one,
|
||||
})
|
||||
res.append({
|
||||
'result': 'load_5',
|
||||
'float_value': five,
|
||||
})
|
||||
res.append({
|
||||
'result': 'load_15',
|
||||
'float_value': fifteen,
|
||||
})
|
||||
return res
|
||||
|
||||
def check_raid(self):
|
||||
"""
|
||||
Expected values in raid_devices:
|
||||
md0, md1
|
||||
"""
|
||||
devices = self.get_attribute('raid_devices')
|
||||
if devices:
|
||||
devices = [x.strip() for x in devices.split(',')]
|
||||
lines = open('/proc/mdstat', 'r').readlines()
|
||||
current_device = None
|
||||
current_payload = ''
|
||||
res = []
|
||||
for line in lines:
|
||||
if line.startswith('md'):
|
||||
current_device = line.split()[0]
|
||||
current_payload += line
|
||||
continue
|
||||
if current_device:
|
||||
if devices and current_device not in devices:
|
||||
current_device = None
|
||||
current_payload = ''
|
||||
continue
|
||||
current_payload += line
|
||||
if '[UU]' in line:
|
||||
state = 'OK'
|
||||
else:
|
||||
state = 'Error'
|
||||
res.append({
|
||||
'result': 'raid_status',
|
||||
'label': current_device,
|
||||
'char_value': state,
|
||||
'payload': json.dumps({
|
||||
'output': current_payload,
|
||||
}),
|
||||
})
|
||||
current_device = None
|
||||
current_payload = ''
|
||||
return res
|
||||
|
||||
def check_ntp_status(self):
|
||||
output = check_output('/usr/sbin/ntpdate', '-q', 'pool.ntp.org')
|
||||
line = output.splitlines()[-1]
|
||||
sec = line.split()[-1]
|
||||
text = line.split()[-2]
|
||||
offset = line.split()[-3]
|
||||
res = []
|
||||
value = 999999
|
||||
if sec == 'sec' and offset == 'offset':
|
||||
try:
|
||||
value = float(text)
|
||||
except ValueError:
|
||||
pass
|
||||
res.append({
|
||||
'result': 'ntp_offset',
|
||||
'float_value': value,
|
||||
})
|
||||
return res
|
||||
|
||||
def check_apt(self):
|
||||
output = check_output('apt-get', '-s', 'upgrade')
|
||||
upgrades = 0
|
||||
security_upgrades = 0
|
||||
errors = False
|
||||
for line in output.splitlines():
|
||||
if not line.startswith('Inst'):
|
||||
continue
|
||||
|
||||
upgrades += 1
|
||||
|
||||
items = line.split()
|
||||
if len(items) != 5:
|
||||
errors = True
|
||||
continue
|
||||
|
||||
release = items[3]
|
||||
if 'security' in release.lower():
|
||||
security_upgrades += 1
|
||||
|
||||
res = []
|
||||
res.append({
|
||||
'result': 'apt_status',
|
||||
'char_value': 'Error' if errors else 'OK',
|
||||
})
|
||||
res.append({
|
||||
'result': 'apt_upgrades',
|
||||
'float_value': upgrades,
|
||||
})
|
||||
res.append({
|
||||
'result': 'apt_security_upgrades',
|
||||
'float_value': security_upgrades,
|
||||
})
|
||||
return res
|
||||
|
||||
def check_disk_writable(self):
|
||||
path = self.get_attribute('writable_path')
|
||||
path = path.strip()
|
||||
if not path.endswith('/'):
|
||||
path += '/'
|
||||
try:
|
||||
with tempfile.TemporaryFile(prefix=path):
|
||||
pass
|
||||
except Exception, e:
|
||||
return [{
|
||||
'result': 'disk_writable',
|
||||
'label': path,
|
||||
'char_value': 'Error',
|
||||
'payload': str(e),
|
||||
}]
|
||||
return [{
|
||||
'result': 'disk_writable',
|
||||
'label': path,
|
||||
'char_value': 'OK',
|
||||
}]
|
||||
|
|
119
monitoring.xml
119
monitoring.xml
|
@ -293,6 +293,28 @@
|
|||
<field name="uom" ref="product.uom_unit"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_process_count">
|
||||
<field name="name">Process Count</field>
|
||||
<field name="internal_name">check_process_count</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_process_count">
|
||||
<field name="name">Process Count</field>
|
||||
<field name="internal_name">process_count</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="product.uom_unit"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_uptime">
|
||||
<field name="name">Uptime</field>
|
||||
<field name="internal_name">check_uptime</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_uptime">
|
||||
<field name="name">Uptime</field>
|
||||
<field name="internal_name">uptime</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="product.uom_second"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_process_cpu_percent">
|
||||
<field name="name">Process CPU Percent</field>
|
||||
<field name="internal_name">check_process_cpu_percent</field>
|
||||
|
@ -354,5 +376,102 @@
|
|||
<field name="type">float</field>
|
||||
<field name="uom" ref="monitoring.uom_byte"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_process_open_ports">
|
||||
<field name="name">Process Open Ports</field>
|
||||
<field name="internal_name">check_process_open_ports</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_process_open_ports_status">
|
||||
<field name="name">Process Open Ports Status</field>
|
||||
<field name="internal_name">process_open_ports_status</field>
|
||||
<field name="type">char</field>
|
||||
</record>
|
||||
|
||||
<record model="product.uom.category" id="uom_cat_load">
|
||||
<field name="name">Load</field>
|
||||
</record>
|
||||
<record model="product.uom" id="uom_load">
|
||||
<field name="name">Load</field>
|
||||
<field name="symbol"> </field>
|
||||
<field name="category" ref="uom_cat_load"/>
|
||||
<field name="factor" eval="1"/>
|
||||
<field name="rate" eval="1"/>
|
||||
</record>
|
||||
<record model="monitoring.check.type" id="check_load">
|
||||
<field name="name">Load</field>
|
||||
<field name="internal_name">check_load</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_load_1">
|
||||
<field name="name">Load 1</field>
|
||||
<field name="internal_name">load_1</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="uom_load"/>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_load_5">
|
||||
<field name="name">Load 5</field>
|
||||
<field name="internal_name">load_5</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="uom_load"/>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_load_15">
|
||||
<field name="name">Load 15</field>
|
||||
<field name="internal_name">load_15</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="uom_load"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_raid">
|
||||
<field name="name">RAID Status</field>
|
||||
<field name="internal_name">check_raid</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_raid_status">
|
||||
<field name="name">RAID Status</field>
|
||||
<field name="internal_name">raid_status</field>
|
||||
<field name="type">char</field>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_ntp_status">
|
||||
<field name="name">NTP Status</field>
|
||||
<field name="internal_name">check_ntp_status</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_ntp_status">
|
||||
<field name="name">NTP Status</field>
|
||||
<field name="internal_name">ntp_offset</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="product.uom_second"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_apt">
|
||||
<field name="name">APT</field>
|
||||
<field name="internal_name">check_apt</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_apt_status">
|
||||
<field name="name">APT Status</field>
|
||||
<field name="internal_name">apt_status</field>
|
||||
<field name="type">char</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_apt_upgrades">
|
||||
<field name="name">APT Upgrades</field>
|
||||
<field name="internal_name">apt_upgrades</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="product.uom_unit"/>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_apt_security_upgrades">
|
||||
<field name="name">APT Security Upgrades</field>
|
||||
<field name="internal_name">apt_security_upgrades</field>
|
||||
<field name="type">float</field>
|
||||
<field name="uom" ref="product.uom_unit"/>
|
||||
</record>
|
||||
|
||||
<record model="monitoring.check.type" id="check_disk_writable">
|
||||
<field name="name">Disk Writable</field>
|
||||
<field name="internal_name">check_disk_writable</field>
|
||||
</record>
|
||||
<record model="monitoring.result.type" id="result_disk_writable">
|
||||
<field name="name">Disk Writable</field>
|
||||
<field name="internal_name">disk_writable</field>
|
||||
<field name="type">char</field>
|
||||
</record>
|
||||
|
||||
</data>
|
||||
</tryton>
|
||||
|
|
Loading…
Reference in a new issue