Import ganglia-monitor-core-3.1.9 as wip/ganglia-monitor-core319.

Ganglia is a scalable distributed monitoring system for high-performance
computing systems such as clusters and Grids. It is based on a hierarchical
design targeted at federations of clusters. It relies on a multicast-based
listen/announce protocol to monitor state within clusters and uses a tree of
point-to-point connections amongst representative cluster nodes to federate
clusters and aggregate their state. It leverages widely used technologies such
as XML for data representation, XDR for compact, portable data transport, and
RRDtool for data storage and visualization. It uses carefully engineered data
structures and algorithms to achieve very low per-node overheads and high
concurrency. The implementation is robust, has been ported to an extensive set
of operating systems and processor architectures, and is currently in use on
over 500 clusters around the world. It has been used to link clusters across
university campuses and around the world and can scale to handle clusters with
2000 nodes.

You may wish to install www/ganglia-webfrontend to monitor your systems via
the web.
This commit is contained in:
Jason Bacon 2014-11-21 14:56:16 +00:00 committed by Thomas Klausner
parent 80a432f4ed
commit 91acf876b9
10 changed files with 481 additions and 0 deletions

View file

@ -0,0 +1,17 @@
Ganglia is a scalable distributed monitoring system for high-performance
computing systems such as clusters and Grids. It is based on a hierarchical
design targeted at federations of clusters. It relies on a multicast-based
listen/announce protocol to monitor state within clusters and uses a tree of
point-to-point connections amongst representative cluster nodes to federate
clusters and aggregate their state. It leverages widely used technologies such
as XML for data representation, XDR for compact, portable data transport, and
RRDtool for data storage and visualization. It uses carefully engineered data
structures and algorithms to achieve very low per-node overheads and high
concurrency. The implementation is robust, has been ported to an extensive set
of operating systems and processor architectures, and is currently in use on
over 500 clusters around the world. It has been used to link clusters across
university campuses and around the world and can scale to handle clusters with
2000 nodes.
You may wish to install www/ganglia-webfrontend to monitor your systems via
the web.

View file

@ -0,0 +1,93 @@
# $NetBSD: Makefile,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
#
DISTNAME= ganglia-3.1.9
PKGNAME= ganglia-monitor-core-3.1.9
CATEGORIES= net parallel
MASTER_SITES= ${MASTER_SITE_SOURCEFORGE:=ganglia/}
MAINTAINER= pkgsrc-users@NetBSD.org
HOMEPAGE= http://ganglia.sourceforge.net/
COMMENT= Ganglia cluster monitor, monitoring daemon
GNU_CONFIGURE= yes
SET_LIBDIR= yes
CONFIGURE_ARGS+= --with-gmetad
USE_LIBTOOL= yes
USE_LANGUAGES+= c c++ c99
PLIST_VARS+= python
PKG_OPTIONS_VAR= PKG_OPTIONS.ganglia
PKG_SUPPORTED_OPTIONS= python # Gmond support for python modules
.include "../../mk/bsd.options.mk"
CONFIGURE_ARGS+= --sysconfdir=${PREFIX}/etc
.if !empty(PKG_OPTIONS:Mpython)
CONFIGURE_ARGS+= --with-python=${PYTHONBIN}
PLIST.python= yes
.include "../../lang/python/application.mk"
.else
CONFIGURE_ARGS+= --disable-python
.endif
SUBST_CLASSES+= pkg_sysconfdir
SUBST_STAGE.pkg_sysconfdir= pre-configure
SUBST_MESSAGE.pkg_sysconfdir= Fixing PKG_SYSCONFDIR
SUBST_FILES.pkg_sysconfdir= ganglia.html \
gmetad/cmdline.c gmetad/cmdline.h \
gmetric/cmdline.c gmetric/cmdline.h \
gmond/cmdline.c gmond/cmdline.h \
gmond/g25_config.h gmond/gmond.conf.5 \
gmond/modules/conf.d/modpython.conf \
gmond/modules/conf.d/modpython.conf.in \
gmond/modules/python/README.in \
mans/gmetad.1 mans/gmetric.1 mans/gmond.1
SUBST_SED.pkg_sysconfdir+= -e 's,/etc/ganglia,${PKG_SYSCONFDIR},g'
SUBST_SED.pkg_sysconfdir+= -e 's,/usr/lib/ganglia,${PREFIX}/lib/ganglia,g'
.if ${OPSYS} == Linux
SUBST_CLASSES+= linux_init
SUBST_STAGE.linux_init= post-patch
SUBST_FILES.linux_init= gmetad/gmetad.init gmond/gmond.init
SUBST_SED.linux_init= -e 's|/usr/sbin|${PREFIX}/sbin|g'
.endif
DOCDIR= ${PREFIX}/share/doc/ganglia
EGDIR= ${PREFIX}/share/examples/ganglia
CONF_FILES= ${EGDIR}/gmond.conf ${PKG_SYSCONFDIR}/gmond.conf
CONF_FILES+= ${EGDIR}/gmetad.conf ${PKG_SYSCONFDIR}/gmetad.conf
RCD_SCRIPTS= gmond gmetad
INSTALLATION_DIRS= ${DOCDIR} ${EGDIR} ${PKGMANDIR}/man1 ${PKGMANDIR}/man5
post-install:
${INSTALL_MAN} ${WRKSRC}/mans/gmetad.1 ${DESTDIR}${PREFIX}/${PKGMANDIR}/man1/
${INSTALL_MAN} ${WRKSRC}/mans/gmetric.1 ${DESTDIR}${PREFIX}/${PKGMANDIR}/man1/
${INSTALL_MAN} ${WRKSRC}/mans/gmond.1 ${DESTDIR}${PREFIX}/${PKGMANDIR}/man1/
${INSTALL_MAN} ${WRKSRC}/mans/gstat.1 ${DESTDIR}${PREFIX}/${PKGMANDIR}/man1/
${INSTALL_MAN} ${WRKSRC}/gmond/gmond.conf.5 ${DESTDIR}${PREFIX}/${PKGMANDIR}/man5/
${INSTALL_DATA} ${WRKSRC}/gmetad/gmetad.conf ${DESTDIR}${EGDIR}/
${WRKSRC}/gmond/gmond -t > ${DESTDIR}${EGDIR}/gmond.conf
${INSTALL_DATA} ${WRKSRC}/gmond/modules/python/README ${DESTDIR}${DOCDIR}/
.if ${OPSYS} == Linux
${INSTALL_SCRIPT} ${WRKSRC}/gmetad/gmetad.init \
${DESTDIR}${PREFIX}/share/examples/rc.d
${INSTALL_SCRIPT} ${WRKSRC}/gmond/gmond.init \
${DESTDIR}${PREFIX}/share/examples/rc.d
.endif
post-package-install:
@echo Copy ${PREFIX}/rc.d/*.init to /etc/init.d/* to enable services on Linux
.include "../../databases/rrdtool/buildlink3.mk"
.include "../../devel/apr/buildlink3.mk"
.include "../../devel/confuse/buildlink3.mk"
.include "../../mk/pthread.buildlink3.mk"
.include "../../textproc/expat/buildlink3.mk"
.include "../../mk/bsd.pkg.mk"

View file

@ -0,0 +1,35 @@
@comment $NetBSD: PLIST,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
bin/ganglia-config
bin/gmetric
bin/gstat
etc/gmetad.conf
include/ganglia_gexec.h
include/ganglia.h
include/gm_metric.h
include/gm_mmn.h
include/gm_msg.h
include/gm_protocol.h
include/gm_value.h
lib/ganglia/modcpu.so
lib/ganglia/moddisk.so
lib/ganglia/modload.so
lib/ganglia/modmem.so
lib/ganglia/modmulticpu.so
lib/ganglia/modnet.so
lib/ganglia/modproc.so
lib/ganglia/modsys.so
lib/libganglia.la
man/man1/gmetad.1
man/man1/gmetric.1
man/man1/gmond.1
man/man1/gstat.1
man/man5/gmond.conf.5
sbin/gmetad
sbin/gmond
share/doc/ganglia/README
share/examples/ganglia/gmetad.conf
share/examples/ganglia/gmond.conf
share/examples/rc.d/gmetad
share/examples/rc.d/gmetad.init
share/examples/rc.d/gmond
share/examples/rc.d/gmond.init

View file

@ -0,0 +1,2 @@
share/examples/rc.d/gmetad.init
share/examples/rc.d/gmond.init

View file

@ -0,0 +1,9 @@
$NetBSD: distinfo,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
SHA1 (ganglia-3.1.9.tar.gz) = 13c38b8e703507a0d177e369a90cf0b74a4362e1
RMD160 (ganglia-3.1.9.tar.gz) = eefb10bd07d738dac7e939bc198929f37452dea2
Size (ganglia-3.1.9.tar.gz) = 1120483 bytes
SHA1 (patch-aa) = e258e4efb1ae012db51901de87e614cede78812c
SHA1 (patch-ab) = 260a78711c2c7f1111ce8c4b0d54d2edd60dde08
SHA1 (patch-ac) = a7d4251eed047cb6ffb89c23d87b0db1dbe7b5f4
SHA1 (patch-lib__gm_protocol_dr.c) = 9d17d79bb1a6426a2547c00006819483aff47363

View file

@ -0,0 +1,45 @@
#!@RCD_SCRIPTS_SHELL@
#
# $NetBSD: gmetad.sh,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
#
# PROVIDE: gmetad
# REQUIRE: DAEMON
# KEYWORD: shutdown
#
# You will need to set some variables in /etc/rc.conf to start gmetad:
#
# gmetad=YES
if [ -f /etc/rc.subr ]
then
. /etc/rc.subr
fi
name="gmetad"
rcvar=$name
command="@PREFIX@/sbin/gmetad"
required_files="@PKG_SYSCONFDIR@/gmetad.conf"
start_cmd="gmetad_start"
stop_cmd="gmetad_stop"
restart_cmd="gmetad_stop ; gmetad_start"
gmetad_start ()
{
# gmetad/process_xml.c 1060 re-releases a lock, claiming it's
# ``required under certain errors''
export PTHREAD_DIAGASSERT=A
${command}
}
gmetad_stop()
{
pkill ${name} 2>/dev/null
}
if [ -f /etc/rc.subr -a -f /etc/rc.conf -a -d /etc/rc.d -a -f /etc/rc.d/DAEMON ]
then
load_rc_config $name
run_rc_command "$1"
else
eval ${start_cmd}
fi

View file

@ -0,0 +1,42 @@
#!@RCD_SCRIPTS_SHELL@
#
# $NetBSD: gmond.sh,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
#
# PROVIDE: gmond
# REQUIRE: DAEMON
# KEYWORD: shutdown
#
# You will need to set some variables in /etc/rc.conf to start gmond:
#
# gmond=YES
if [ -f /etc/rc.subr ]
then
. /etc/rc.subr
fi
name="gmond"
rcvar=$name
command="@PREFIX@/sbin/gmond"
required_files="@PKG_SYSCONFDIR@/gmond.conf"
start_cmd="gmond_start"
stop_cmd="gmond_stop"
restart_cmd="gmond_stop ; gmond_start"
gmond_start ()
{
${command}
}
gmond_stop()
{
pkill ${name} 2>/dev/null
}
if [ -f /etc/rc.subr -a -f /etc/rc.conf -a -d /etc/rc.d -a -f /etc/rc.d/DAEMON ]
then
load_rc_config $name
run_rc_command "$1"
else
eval ${start_cmd}
fi

View file

@ -0,0 +1,6 @@
Linux:
cp /usr/pkg-1/share/examples/rc.d/gmond.init /etc/init.d/gmond
chkconfig --add gmond

View file

@ -0,0 +1,201 @@
$NetBSD: patch-ac,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
First chunk: update sysctl name for changes in NetBSD.
Rest:
From Manuel Tobias Schiller <mala@hinterbergen.de>:
I managed to trace things to the file libmetrics/netbsd/metrics.c in
the get_netbw function. Apparently, the code in get_netbw violates
alignment constraints for sparc64. I attached a patch against the result
of a "make patch" in parallel/ganglia-monitor-core. While I was at it, I
also changed proc_run_func somewhat to only count actually running
processes (having a look at NetBSD's ps(1) implementation) - without the
change, I got around 30 running processes on an idle machine.
--- libmetrics/netbsd/metrics.c.orig 2009-01-28 23:23:20.000000000 +0000
+++ libmetrics/netbsd/metrics.c
@@ -9,10 +9,15 @@
* Tested on NetBSD 2.0.2 (i386)
*/
+#define _KMEMUSER
+
#include <kvm.h>
#include <sys/param.h>
+#include <sys/proc.h>
#include <sys/mount.h>
+#include <sys/vmmeter.h>
+#include <uvm/uvm_extern.h>
#if __NetBSD_Version__ > 299000000
#include <sys/statvfs.h>
#define statfs statvfs
@@ -134,7 +139,10 @@ cpu_speed_func ( void )
cpu_speed = 0;
-#if (__NetBSD_Version__ > 299000000)
+#if (__NetBSD_Version__ > 500000000)
+ if (sysctlbyname("machdep.tsc_freq", &cpu_speed, &len, NULL, 0) == -1)
+ val.uint32 = 0;
+#elif (__NetBSD_Version__ > 299000000)
if (sysctlbyname("machdep.est.frequency.target", &cpu_speed, &len, NULL, 0) == -1)
val.uint32 = 0;
#endif
@@ -559,44 +567,40 @@ proc_total_func ( void )
g_val_t
proc_run_func( void )
{
- struct kinfo_proc *kp;
- int i;
- int state;
- int nentries;
- int what = KERN_PROC_ALL;
g_val_t val;
+ struct kinfo_proc2 *kp;
+ int cnt, i, j;
+ unsigned int count = 0;
val.uint32 = 0;
if (kd == NULL)
goto output;
-#ifdef KERN_PROC_NOTHREADS
- what |= KERN_PROC_NOTHREADS
-#endif
- if ((kp = kvm_getprocs(kd, what, 0, &nentries)) == 0 || nentries < 0)
+ kp = kvm_getproc2(kd, KERN_PROC_ALL, 0, sizeof(struct kinfo_proc2), &cnt);
+ if (0 == kp || cnt < 0)
goto output;
-
- for (i = 0; i < nentries; kp++, i++) {
-#ifdef KINFO_PROC_SIZE
- state = kp->ki_stat;
-#else
- state = kp->kp_proc.p_stat;
-#endif
- switch(state) {
-#if (__NetBSD_Version__ >= 200000000)
- case SACTIVE:
-#else
- case SRUN:
- case SONPROC:
-#endif
- case SIDL:
- val.uint32++;
- break;
+ for (i = 0; i < cnt; i++) {
+ struct kinfo_lwp* kl;
+ int nlwps;
+ if (((kp + i) -> p_realstat != SACTIVE))
+ continue;
+ kl = kvm_getlwps(kd, (kp + i)->p_pid, (kp + i)->p_paddr,
+ sizeof(struct kinfo_lwp), &nlwps);
+ if (kl == 0)
+ nlwps = 0;
+ if (0 == nlwps) count ++;
+ else {
+ for (j = 0; j < nlwps; j++) {
+ switch (kl[j].l_stat) {
+ case LSRUN:
+ ++count;
+ break;
+ }
+ }
}
}
- if (val.uint32 > 0)
- val.uint32--;
+ val.uint32 = count;
output:
return val;
@@ -1135,36 +1139,41 @@ get_netbw(double *in_bytes, double *out_
next = buf;
while (next < lim) {
+ struct if_msghdr tmp;
ifm = (struct if_msghdr *)next;
+ memcpy(&tmp, ifm, sizeof(tmp));
- if (ifm->ifm_type == RTM_IFINFO) {
+ if (tmp.ifm_type == RTM_IFINFO) {
sdl = (struct sockaddr_dl *)(ifm + 1);
} else {
fprintf(stderr, "out of sync parsing NET_RT_IFLIST\n");
fprintf(stderr, "expected %d, got %d\n", RTM_IFINFO,
- ifm->ifm_type);
- fprintf(stderr, "msglen = %d\n", ifm->ifm_msglen);
+ tmp.ifm_type);
+ fprintf(stderr, "msglen = %d\n", tmp.ifm_msglen);
fprintf(stderr, "buf:%p, next:%p, lim:%p\n", buf, next,
lim);
exit (1);
}
- next += ifm->ifm_msglen;
+ next += tmp.ifm_msglen;
while (next < lim) {
+ unsigned short msglen;
nextifm = (struct if_msghdr *)next;
if (nextifm->ifm_type != RTM_NEWADDR)
break;
- next += nextifm->ifm_msglen;
+ memcpy(&msglen, &nextifm->ifm_msglen,
+ sizeof(nextifm->ifm_msglen));
+ next += msglen;
}
- if ((ifm->ifm_flags & IFF_LOOPBACK) ||
- !(ifm->ifm_flags & IFF_UP))
+ if ((tmp.ifm_flags & IFF_LOOPBACK) ||
+ !(tmp.ifm_flags & IFF_UP))
continue;
- index = ifm->ifm_index;
+ index = tmp.ifm_index;
/* If we don't have a previous value yet, make a slot. */
if (index >= indexes) {
@@ -1187,25 +1196,25 @@ get_netbw(double *in_bytes, double *out_
*/
if (!seen[index]) {
seen[index] = 1;
- lastcount[index].in_bytes = ifm->ifm_data.ifi_ibytes;
- lastcount[index].out_bytes = ifm->ifm_data.ifi_obytes;
- lastcount[index].in_pkts = ifm->ifm_data.ifi_ipackets;
- lastcount[index].out_pkts = ifm->ifm_data.ifi_opackets;
+ lastcount[index].in_bytes = tmp.ifm_data.ifi_ibytes;
+ lastcount[index].out_bytes = tmp.ifm_data.ifi_obytes;
+ lastcount[index].in_pkts = tmp.ifm_data.ifi_ipackets;
+ lastcount[index].out_pkts = tmp.ifm_data.ifi_opackets;
}
traffic.in_bytes = counterdiff(lastcount[index].in_bytes,
- ifm->ifm_data.ifi_ibytes, ULONG_MAX, 0);
+ tmp.ifm_data.ifi_ibytes, ULONG_MAX, 0);
traffic.out_bytes = counterdiff(lastcount[index].out_bytes,
- ifm->ifm_data.ifi_obytes, ULONG_MAX, 0);
+ tmp.ifm_data.ifi_obytes, ULONG_MAX, 0);
traffic.in_pkts = counterdiff(lastcount[index].in_pkts,
- ifm->ifm_data.ifi_ipackets, ULONG_MAX, 0);
+ tmp.ifm_data.ifi_ipackets, ULONG_MAX, 0);
traffic.out_pkts = counterdiff(lastcount[index].out_pkts,
- ifm->ifm_data.ifi_opackets, ULONG_MAX, 0);
+ tmp.ifm_data.ifi_opackets, ULONG_MAX, 0);
- lastcount[index].in_bytes = ifm->ifm_data.ifi_ibytes;
- lastcount[index].out_bytes = ifm->ifm_data.ifi_obytes;
- lastcount[index].in_pkts = ifm->ifm_data.ifi_ipackets;
- lastcount[index].out_pkts = ifm->ifm_data.ifi_opackets;
+ lastcount[index].in_bytes = tmp.ifm_data.ifi_ibytes;
+ lastcount[index].out_bytes = tmp.ifm_data.ifi_obytes;
+ lastcount[index].in_pkts = tmp.ifm_data.ifi_ipackets;
+ lastcount[index].out_pkts = tmp.ifm_data.ifi_opackets;
#ifdef NETBW_DEBUG
if_indextoname(index, name);

View file

@ -0,0 +1,31 @@
$NetBSD: patch-lib__gm_protocol_dr.c,v 1.1 2014/11/21 14:56:16 outpaddling Exp $
Use INT32 macros, LONG ones were taken out of LP64 on Solaris.
--- lib/gm_protocol_xdr.c.orig 2009-01-28 23:23:20.000000000 +0000
+++ lib/gm_protocol_xdr.c
@@ -53,9 +53,9 @@ xdr_Ganglia_metadata_message (XDR *xdrs,
return FALSE;
} else {
- IXDR_PUT_U_LONG(buf, objp->slope);
- IXDR_PUT_U_LONG(buf, objp->tmax);
- IXDR_PUT_U_LONG(buf, objp->dmax);
+ IXDR_PUT_U_INT32(buf, objp->slope);
+ IXDR_PUT_U_INT32(buf, objp->tmax);
+ IXDR_PUT_U_INT32(buf, objp->dmax);
}
if (!xdr_array (xdrs, (char **)&objp->metadata.metadata_val, (u_int *) &objp->metadata.metadata_len, ~0,
sizeof (Ganglia_extra_data), (xdrproc_t) xdr_Ganglia_extra_data))
@@ -78,9 +78,9 @@ xdr_Ganglia_metadata_message (XDR *xdrs,
return FALSE;
} else {
- objp->slope = IXDR_GET_U_LONG(buf);
- objp->tmax = IXDR_GET_U_LONG(buf);
- objp->dmax = IXDR_GET_U_LONG(buf);
+ objp->slope = IXDR_GET_U_INT32(buf);
+ objp->tmax = IXDR_GET_U_INT32(buf);
+ objp->dmax = IXDR_GET_U_INT32(buf);
}
if (!xdr_array (xdrs, (char **)&objp->metadata.metadata_val, (u_int *) &objp->metadata.metadata_len, ~0,
sizeof (Ganglia_extra_data), (xdrproc_t) xdr_Ganglia_extra_data))