diff --git a/monitoring.py b/monitoring.py
index d755d9c..dabf245 100644
--- a/monitoring.py
+++ b/monitoring.py
@@ -1,12 +1,39 @@
# The COPYRIGHT file at the top level of this repository contains the full
# copyright notices and license terms.
-from trytond.pool import PoolMeta
+import os
import psutil
+import json
+import tempfile
+import socket
+import subprocess
+from datetime import datetime
+from trytond.pool import PoolMeta
__all__ = ['CheckPlan']
__metaclass__ = PoolMeta
+def check_output(*args):
+ process = subprocess.Popen(args, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ process.wait()
+ data = process.stdout.read()
+ return data
+
+
+def to_float(text):
+ try:
+ return float(text)
+ except ValueError:
+ return None
+
+
+PROTOCOLS = {
+ socket.SOCK_STREAM: 'TCP',
+ socket.SOCK_DGRAM: 'UDP',
+ }
+
+
class CheckPlan:
__name__ = 'monitoring.check.plan'
@@ -86,10 +113,24 @@ class CheckPlan:
})
return res
+ def check_process_count(self):
+ return [{
+ 'result': 'process_count',
+ 'float_value': len(psutil.pids()),
+ }]
+
+ def check_uptime(self):
+ boot_time = datetime.fromtimestamp(psutil.boot_time())
+ uptime = (datetime.now() - boot_time).total_seconds()
+ return [{
+ 'result': 'uptime',
+ 'float_value': uptime,
+ }]
+
def check_process_cpu_percent(self):
processes = self.get_attribute('processes')
if processes:
- processes = processes.split(',')
+ processes = [x.strip() for x in processes.split(',')]
res = []
for process in psutil.process_iter():
try:
@@ -97,7 +138,7 @@ class CheckPlan:
if processes and name not in processes:
continue
cpu = process.cpu_percent()
- except psutil.NoSuchProcess:
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
res.append({
'result': 'process_cpu_percent',
@@ -109,7 +150,7 @@ class CheckPlan:
def check_process_open_files_count(self):
processes = self.get_attribute('processes')
if processes:
- processes = processes.split(',')
+ processes = [x.strip() for x in processes.split(',')]
res = []
for process in psutil.process_iter():
try:
@@ -117,7 +158,7 @@ class CheckPlan:
if processes and name not in processes:
continue
files = process.num_fds()
- except psutil.NoSuchProcess:
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
res.append({
'result': 'process_open_files_count',
@@ -129,7 +170,7 @@ class CheckPlan:
def check_process_memory_percent(self):
processes = self.get_attribute('processes')
if processes:
- processes = processes.split(',')
+ processes = [x.strip() for x in processes.split(',')]
res = []
for process in psutil.process_iter():
try:
@@ -137,7 +178,7 @@ class CheckPlan:
if processes and name not in processes:
continue
memory = process.memory_percent()
- except psutil.NoSuchProcess:
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
res.append({
'result': 'process_memory_percent',
@@ -149,7 +190,7 @@ class CheckPlan:
def check_process_io_counters(self):
processes = self.get_attribute('processes')
if processes:
- processes = processes.split(',')
+ processes = [x.strip() for x in processes.split(',')]
res = []
for process in psutil.process_iter():
try:
@@ -157,7 +198,7 @@ class CheckPlan:
if processes and name not in processes:
continue
counters = process.io_counters()
- except psutil.NoSuchProcess:
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
for name in ('read_count', 'write_count', 'read_bytes',
'write_bytes'):
@@ -167,3 +208,186 @@ class CheckPlan:
'float_value': getattr(counters, name),
})
return res
+
+ def check_process_open_ports(self):
+ '''
+ Expected structure in ports attribute:
+
+ protocol:ip:port
+
+ Example:
+
+ TCP:*:22
+ TCP:*:8000
+ '''
+ valid_entries = set()
+ entries = [x.strip() for x in
+ self.get_attribute('process_open_ports').split()]
+ for entry in entries:
+ if len(entry.split(':')) != 3:
+ continue
+ protocol, ip, port = entry.split(':')
+ if '*' in entry:
+ valid_entries.add((protocol, entry.replace('*', '0.0.0.0'),
+ port))
+ valid_entries.add((protocol, entry.replace('*', '::'), port))
+ else:
+ valid_entries.add((protocol, ip, port))
+ invalids = []
+ value = 'OK'
+ for process in psutil.process_iter():
+ try:
+ connections = process.get_connections()
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
+ continue
+ for connection in connections:
+ if connection.status != 'LISTEN':
+ continue
+ if connection.type not in PROTOCOLS:
+ continue
+ protocol = PROTOCOLS[connection.type]
+ ip = connection.laddr[0]
+ port = connection.laddr[1]
+ entry = (protocol, ip, port)
+ if entry not in valid_entries:
+ invalids.append(entry)
+ value = 'Error'
+ continue
+ return [{
+ 'result': 'process_open_ports_status',
+ 'char_value': value,
+ 'payload': json.dumps({
+ 'invalid_ports': invalids,
+ }),
+ }]
+
+ def check_load(self):
+ one, five, fifteen = os.getloadavg()
+ res = []
+ res.append({
+ 'result': 'load_1',
+ 'float_value': one,
+ })
+ res.append({
+ 'result': 'load_5',
+ 'float_value': five,
+ })
+ res.append({
+ 'result': 'load_15',
+ 'float_value': fifteen,
+ })
+ return res
+
+ def check_raid(self):
+ """
+ Expected values in raid_devices:
+ md0, md1
+ """
+ devices = self.get_attribute('raid_devices')
+ if devices:
+ devices = [x.strip() for x in devices.split(',')]
+ lines = open('/proc/mdstat', 'r').readlines()
+ current_device = None
+ current_payload = ''
+ res = []
+ for line in lines:
+ if line.startswith('md'):
+ current_device = line.split()[0]
+ current_payload += line
+ continue
+ if current_device:
+ if devices and current_device not in devices:
+ current_device = None
+ current_payload = ''
+ continue
+ current_payload += line
+ if '[UU]' in line:
+ state = 'OK'
+ else:
+ state = 'Error'
+ res.append({
+ 'result': 'raid_status',
+ 'label': current_device,
+ 'char_value': state,
+ 'payload': json.dumps({
+ 'output': current_payload,
+ }),
+ })
+ current_device = None
+ current_payload = ''
+ return res
+
+ def check_ntp_status(self):
+ output = check_output('/usr/sbin/ntpdate', '-q', 'pool.ntp.org')
+ line = output.splitlines()[-1]
+ sec = line.split()[-1]
+ text = line.split()[-2]
+ offset = line.split()[-3]
+ res = []
+ value = 999999
+ if sec == 'sec' and offset == 'offset':
+ try:
+ value = float(text)
+ except ValueError:
+ pass
+ res.append({
+ 'result': 'ntp_offset',
+ 'float_value': value,
+ })
+ return res
+
+ def check_apt(self):
+ output = check_output('apt-get', '-s', 'upgrade')
+ upgrades = 0
+ security_upgrades = 0
+ errors = False
+ for line in output.splitlines():
+ if not line.startswith('Inst'):
+ continue
+
+ upgrades += 1
+
+ items = line.split()
+ if len(items) != 5:
+ errors = True
+ continue
+
+ release = items[3]
+ if 'security' in release.lower():
+ security_upgrades += 1
+
+ res = []
+ res.append({
+ 'result': 'apt_status',
+ 'char_value': 'Error' if errors else 'OK',
+ })
+ res.append({
+ 'result': 'apt_upgrades',
+ 'float_value': upgrades,
+ })
+ res.append({
+ 'result': 'apt_security_upgrades',
+ 'float_value': security_upgrades,
+ })
+ return res
+
+ def check_disk_writable(self):
+ path = self.get_attribute('writable_path')
+ path = path.strip()
+ if not path.endswith('/'):
+ path += '/'
+ try:
+ with tempfile.TemporaryFile(prefix=path):
+ pass
+ except Exception, e:
+ return [{
+ 'result': 'disk_writable',
+ 'label': path,
+ 'char_value': 'Error',
+ 'payload': str(e),
+ }]
+ return [{
+ 'result': 'disk_writable',
+ 'label': path,
+ 'char_value': 'OK',
+ }]
diff --git a/monitoring.xml b/monitoring.xml
index e5b4e19..36ca149 100644
--- a/monitoring.xml
+++ b/monitoring.xml
@@ -293,6 +293,28 @@
+
+ Process Count
+ check_process_count
+
+
+ Process Count
+ process_count
+ float
+
+
+
+
+ Uptime
+ check_uptime
+
+
+ Uptime
+ uptime
+ float
+
+
+
Process CPU Percent
check_process_cpu_percent
@@ -354,5 +376,102 @@
float
+
+
+ Process Open Ports
+ check_process_open_ports
+
+
+ Process Open Ports Status
+ process_open_ports_status
+ char
+
+
+
+ Load
+
+
+ Load
+
+
+
+
+
+
+ Load
+ check_load
+
+
+ Load 1
+ load_1
+ float
+
+
+
+ Load 5
+ load_5
+ float
+
+
+
+ Load 15
+ load_15
+ float
+
+
+
+
+ RAID Status
+ check_raid
+
+
+ RAID Status
+ raid_status
+ char
+
+
+
+ NTP Status
+ check_ntp_status
+
+
+ NTP Status
+ ntp_offset
+ float
+
+
+
+
+ APT
+ check_apt
+
+
+ APT Status
+ apt_status
+ char
+
+
+ APT Upgrades
+ apt_upgrades
+ float
+
+
+
+ APT Security Upgrades
+ apt_security_upgrades
+ float
+
+
+
+
+ Disk Writable
+ check_disk_writable
+
+
+ Disk Writable
+ disk_writable
+ char
+
+