/*
* Copyright (C), 2000-2007 by the monit project group.
* All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_STDARG_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_SIGNAL_H
#include
#endif
#ifdef HAVE_SETJMP_H
#include
#endif
#ifdef HAVE_SYS_SOCKET_H
#include
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_SYS_TIME_H
#include
#endif
#ifdef HAVE_TIME_H
#include
#endif
#ifndef HAVE_SOL_IP
#include
#include
#include
#endif
#ifdef HAVE_NETINET_IP_ICMP_H
#include
#endif
#include "monitor.h"
#include "alert.h"
#include "event.h"
#include "socket.h"
#include "net.h"
#include "device.h"
#include "process.h"
#include "protocol.h"
/**
* Implementation of validation engine
*
* @author Jan-Henrik Haukeland,
* @author Olivier Beyssac, (check_skip)
* @author Martin Pala
* @author Christian Hopp
*
* @version \$Id: validate.c,v 1.167 2007/10/06 20:33:05 martinp Exp $
* @file
*/
/* -------------------------------------------------------------- Prototypes */
static void check_uid(Service_T);
static void check_gid(Service_T);
static void check_size(Service_T);
static void check_perm(Service_T);
static void check_match(Service_T);
static int check_match_ignore(Service_T, char *);
static void check_match_if(Service_T, char *);
static int check_skip(Service_T);
static int check_timeout(Service_T);
static void check_checksum(Service_T);
static void check_timestamp(Service_T);
static void check_process_state(Service_T);
static void check_process_pid(Service_T);
static void check_process_ppid(Service_T);
static void check_connection(Service_T, Port_T);
static void check_device_flags(Service_T);
static void check_device_resources(Service_T, Device_T);
static void check_process_resources(Service_T, Resource_T);
ProcessTree_T *ptree=NULL;
int ptreesize=0;
ProcessTree_T *oldptree=NULL;
int oldptreesize=0;
#define MATCH_LINE_LENGTH 512
/* ---------------------------------------------------------------- Public */
/**
* This function contains the main check machinery for monit. The
* validate function check services in the service list to see if
* they will pass all defined tests.
*/
void validate() {
Service_T s;
sigset_t ns, os;
Run.handler_flag = HANDLER_PASSED;
Event_queue_process(Run.eventlist);
if(Run.doprocess) {
initprocesstree(&ptree, &ptreesize, &oldptree, &oldptreesize);
update_system_load(ptree, ptreesize);
}
time(&systeminfo.collected);
/* In the case that at least one action is pending, perform quick
* loop to handle the actions ASAP */
if(Run.doaction) {
Run.doaction = 0;
for(s= servicelist; s; s= s->next) {
LOCK(s->mutex)
set_signal_block(&ns, &os);
if(s->doaction != ACTION_IGNORE) {
control_service(s->name, s->doaction);
s->doaction = ACTION_IGNORE;
}
unset_signal_block(&os);
END_LOCK;
}
}
/* Check the services */
for(s= servicelist; s; s= s->next) {
LOCK(s->mutex)
set_signal_block(&ns, &os);
if(s->doaction != ACTION_IGNORE) {
/* Check for pending action which may occured after previous check */
control_service(s->name, s->doaction);
s->doaction = ACTION_IGNORE;
} else if(s->monitor && !check_skip(s) && !check_timeout(s)) {
s->check(s);
/* The monitoring may be disabled by some matching rule in s->check
* so we have to check again before setting to MONITOR_YES */
if(s->monitor != MONITOR_NOT) s->monitor= MONITOR_YES;
}
time(&s->collected);
unset_signal_block(&os);
END_LOCK;
}
if(Run.doprocess)
delprocesstree(&oldptree, oldptreesize);
reset_depend();
handle_collector(NULL);
}
/**
* Validate a given process service s. Events are posted according to
* its configuration. In case of a fatal event FALSE is returned.
*/
int check_process(Service_T s) {
pid_t pid= -1;
Port_T pp= NULL;
Resource_T pr= NULL;
ASSERT(s);
/* Test for running process */
if(!(pid= Util_isProcessRunning(s))) {
/* Reset the service info object to prevent false data in the first run */
Util_resetInfo(s);
Event_post(s, EVENT_NONEXIST, STATE_FAILED, s->action_NONEXIST,
"'%s' process is not running", s->name);
return FALSE;
} else {
Event_post(s, EVENT_NONEXIST, STATE_PASSED, s->action_NONEXIST,
"'%s' process is running with pid %d", s->name, (int)pid);
}
s->inf->uptime= Util_getProcessUptime(s->path);
if(Run.doprocess) {
if(update_process_data(s, ptree, ptreesize, pid)) {
check_process_state(s);
check_process_pid(s);
check_process_ppid(s);
for(pr= s->resourcelist; pr; pr= pr->next) {
check_process_resources(s, pr);
}
} else {
LogError("'%s' failed to get service data\n", s->name);
}
}
/* Test each host:port and protocol in the service's portlist */
if(s->portlist)
for(pp= s->portlist; pp; pp= pp->next)
check_connection(s, pp);
return TRUE;
}
/**
* Validate a given device service s. Events are posted according to
* its configuration. In case of a fatal event FALSE is returned.
*/
int check_device(Service_T s) {
Device_T td;
struct stat stat_buf;
ASSERT(s);
if(stat(s->path, &stat_buf) != 0) {
Event_post(s, EVENT_NONEXIST, STATE_FAILED, s->action_NONEXIST,
"'%s' device doesn't exist", s->name);
return FALSE;
} else {
s->inf->st_mode= stat_buf.st_mode;
s->inf->st_uid= stat_buf.st_uid;
s->inf->st_gid= stat_buf.st_gid;
Event_post(s, EVENT_NONEXIST, STATE_PASSED, s->action_NONEXIST,
"'%s' device exist", s->name);
}
if(!device_usage(s->inf, s->path)) {
Event_post(s, EVENT_DATA, STATE_FAILED, s->action_DATA,
"'%s' unable to read device %s state", s->name, s->path);
return FALSE;
} else {
s->inf->inode_percent=
(int)((1000.0 * (s->inf->f_files - s->inf->f_filesfree)) /
(float)s->inf->f_files);
s->inf->space_percent=
(int)((1000.0 * (s->inf->f_blocks - s->inf->f_blocksfree)) /
(float)s->inf->f_blocks);
s->inf->inode_total= s->inf->f_files - s->inf->f_filesfree;
s->inf->space_total= s->inf->f_blocks - s->inf->f_blocksfreetotal;
Event_post(s, EVENT_DATA, STATE_PASSED, s->action_DATA,
"'%s' succeeded getting device statistic for %s", s->name, s->path);
}
if(s->perm)
check_perm(s);
if(s->uid)
check_uid(s);
if(s->gid)
check_gid(s);
check_device_flags(s);
if(s->devicelist)
for(td= s->devicelist; td; td= td->next)
check_device_resources(s, td);
return TRUE;
}
/**
* Validate a given file service s. Events are posted according to
* its configuration. In case of a fatal event FALSE is returned.
*/
int check_file(Service_T s) {
struct stat stat_buf;
ASSERT(s);
if(stat(s->path, &stat_buf) != 0) {
Event_post(s, EVENT_NONEXIST, STATE_FAILED, s->action_NONEXIST,
"'%s' file doesn't exist", s->name);
return FALSE;
} else {
s->inf->st_mode= stat_buf.st_mode;
if (s->inf->st_ino==0) {
s->inf->st_ino_prev= stat_buf.st_ino;
s->inf->readpos= stat_buf.st_size;
} else {
s->inf->st_ino_prev= s->inf->st_ino;
}
s->inf->st_ino= stat_buf.st_ino;
s->inf->st_uid= stat_buf.st_uid;
s->inf->st_gid= stat_buf.st_gid;
s->inf->st_size= stat_buf.st_size;
s->inf->timestamp= MAX(stat_buf.st_mtime, stat_buf.st_ctime);
DEBUG("'%s' file existence check passed\n", s->name);
Event_post(s, EVENT_NONEXIST, STATE_PASSED, s->action_NONEXIST,
"'%s' file exist", s->name);
}
if(!S_ISREG(s->inf->st_mode)) {
Event_post(s, EVENT_INVALID, STATE_FAILED, s->action_INVALID,
"'%s' is not regular file", s->name);
return FALSE;
} else {
DEBUG("'%s' is regular file\n", s->name);
Event_post(s, EVENT_INVALID, STATE_PASSED, s->action_INVALID,
"'%s' is regular file", s->name);
}
if(s->checksum)
check_checksum(s);
if(s->perm)
check_perm(s);
if(s->uid)
check_uid(s);
if(s->gid)
check_gid(s);
if(s->sizelist)
check_size(s);
if(s->timestamplist)
check_timestamp(s);
if(s->matchlist)
check_match(s);
return TRUE;
}
/**
* Validate a given directory service s. Events are posted according to
* its configuration. In case of a fatal event FALSE is returned.
*/
int check_directory(Service_T s) {
struct stat stat_buf;
ASSERT(s);
if(stat(s->path, &stat_buf) != 0) {
Event_post(s, EVENT_NONEXIST, STATE_FAILED, s->action_NONEXIST,
"'%s' directory doesn't exist", s->name);
return FALSE;
} else {
s->inf->st_mode= stat_buf.st_mode;
s->inf->st_uid= stat_buf.st_uid;
s->inf->st_gid= stat_buf.st_gid;
s->inf->timestamp= MAX(stat_buf.st_mtime, stat_buf.st_ctime);
DEBUG("'%s' directory existence check passed\n", s->name);
Event_post(s, EVENT_NONEXIST, STATE_PASSED, s->action_NONEXIST,
"'%s' directory exist", s->name);
}
if(!S_ISDIR(s->inf->st_mode)) {
Event_post(s, EVENT_INVALID, STATE_FAILED, s->action_INVALID,
"'%s' is not directory", s->name);
return FALSE;
} else {
DEBUG("'%s' is directory\n", s->name);
Event_post(s, EVENT_INVALID, STATE_PASSED, s->action_INVALID,
"'%s' is directory", s->name);
}
if(s->perm)
check_perm(s);
if(s->uid)
check_uid(s);
if(s->gid)
check_gid(s);
if(s->timestamplist)
check_timestamp(s);
return TRUE;
}
/**
* Validate a given fifo service s. Events are posted according to
* its configuration. In case of a fatal event FALSE is returned.
*/
int check_fifo(Service_T s) {
struct stat stat_buf;
ASSERT(s);
if(stat(s->path, &stat_buf) != 0) {
Event_post(s, EVENT_NONEXIST, STATE_FAILED, s->action_NONEXIST,
"'%s' fifo doesn't exist", s->name);
return FALSE;
} else {
s->inf->st_mode= stat_buf.st_mode;
s->inf->st_uid= stat_buf.st_uid;
s->inf->st_gid= stat_buf.st_gid;
s->inf->timestamp= MAX(stat_buf.st_mtime, stat_buf.st_ctime);
DEBUG("'%s' fifo existence check passed\n", s->name);
Event_post(s, EVENT_NONEXIST, STATE_PASSED, s->action_NONEXIST,
"'%s' fifo exist", s->name);
}
if(!S_ISFIFO(s->inf->st_mode)) {
Event_post(s, EVENT_INVALID, STATE_FAILED, s->action_INVALID,
"'%s' is not fifo", s->name);
return FALSE;
} else {
DEBUG("'%s' is fifo\n", s->name);
Event_post(s, EVENT_INVALID, STATE_PASSED, s->action_INVALID,
"'%s' is fifo", s->name);
}
if(s->perm)
check_perm(s);
if(s->uid)
check_uid(s);
if(s->gid)
check_gid(s);
if(s->timestamplist)
check_timestamp(s);
return TRUE;
}
/**
* Validate a remote service.
* @param s The remote service to validate
* @return FALSE if there was an error otherwise TRUE
*/
int check_remote_host(Service_T s) {
Port_T p = NULL;
Icmp_T icmp = NULL;
Icmp_T last_ping = NULL;
ASSERT(s);
/* Test each icmp type in the service's icmplist */
if(s->icmplist) {
for(icmp= s->icmplist; icmp; icmp= icmp->next) {
switch(icmp->type) {
case ICMP_ECHO:
icmp->response= icmp_echo(s->path, icmp->timeout, icmp->count);
if(icmp->response < 0) {
icmp->is_available= FALSE;
DEBUG("'%s' icmp ping failed\n", s->name);
Event_post(s, EVENT_ICMP, STATE_FAILED, icmp->action,
"'%s' failed ICMP test [%s]", s->name, icmpnames[icmp->type]);
} else {
icmp->is_available= TRUE;
DEBUG("'%s' icmp ping succeeded [response time %.3fs]\n",
s->name, icmp->response);
Event_post(s, EVENT_ICMP, STATE_PASSED, icmp->action,
"'%s' passed ICMP test [%s]", s->name, icmpnames[icmp->type]);
}
last_ping = icmp;
break;
default:
LogError("'%s' error -- unknown ICMP type: [%d]\n",
s->name, icmp->type);
return FALSE;
}
}
}
/* If we could not ping the host we assume it's down and do not
* continue to check any port connections */
if(last_ping && !last_ping->is_available) {
DEBUG("'%s' icmp ping failed, skipping any port connection tests\n",
s->name);
return FALSE;
}
/* Test each host:port and protocol in the service's portlist */
if(s->portlist)
for(p= s->portlist; p; p= p->next)
check_connection(s, p);
return TRUE;
}
/**
* Validate the general system indicators. In case of a fatal event
* FALSE is returned.
*/
int check_system(Service_T s) {
Resource_T r= NULL;
ASSERT(s);
for(r= s->resourcelist; r; r= r->next) {
check_process_resources(s, r);
}
return TRUE;
}
/* --------------------------------------------------------------- Private */
/**
* Test the connection and protocol
*/
static void check_connection(Service_T s, Port_T p) {
Socket_T socket;
volatile int rv= TRUE;
char report[STRLEN]={0};
struct timeval t1;
struct timeval t2;
ASSERT(s && p);
p->response = -1;
/* Get time of connection attempt beginning */
gettimeofday(&t1, NULL);
/* Open a socket to the destination INET[hostname:port] or UNIX[pathname] */
socket= socket_create(p);
if(!socket) {
snprintf(report, STRLEN,
"'%s' failed, cannot open a connection to %s%s%s",
s->name, p->address,
p->family==AF_INET?" via ":"",
p->family==AF_INET?Util_portTypeDescription(p):"");
rv= FALSE;
goto error;
} else {
DEBUG("'%s' succeeded connecting to %s%s%s\n",
s->name, p->address,
p->family==AF_INET?" via ":"",
p->family==AF_INET?Util_portTypeDescription(p):"");
}
/* Verify that the socket is ready for i|o. TCP sockets are checked anytime, UDP
* sockets just when there is no specific protocol test used since the socket_is_ready()
* adds 2s delay when used with UDP socket. When there is specific protocol used, we
* don't need it for UDP, since the protocol test is sufficient */
if((socket_get_type(socket) != SOCK_DGRAM || p->protocol->check == check_default)
&&
!socket_is_ready(socket))
{
snprintf(report, STRLEN,
"'%s' failed, the socket at %s%s%s is not ready for i|o -- %s",
s->name, p->address,
p->family==AF_INET?" via ":"",
p->family==AF_INET?Util_portTypeDescription(p):"",
STRERROR);
rv= FALSE;
goto error;
}
/* Run the protocol verification routine through the socket */
if(! p->protocol->check(socket)) {
snprintf(report, STRLEN,
"'%s' failed protocol test [%s] at %s%s%s",
s->name, p->protocol->name, p->address,
p->family==AF_INET?" via ":"",
p->family==AF_INET?Util_portTypeDescription(p):"");
rv= FALSE;
goto error;
} else {
DEBUG("'%s' succeeded testing protocol [%s] at %s%s%s\n",
s->name, p->protocol->name, p->address,
p->family==AF_INET?" via ":"",
p->family==AF_INET?Util_portTypeDescription(p):"");
}
/* Get time of connection attempt finish */
gettimeofday(&t2, NULL);
/* Get the response time */
p->response= (double)(t2.tv_sec - t1.tv_sec) +
(double)(t2.tv_usec - t1.tv_usec)/1000000;
error:
if(socket) socket_free(&socket);
if(!rv) {
p->is_available= FALSE;
Event_post(s, EVENT_CONNECTION, STATE_FAILED, p->action, report);
} else {
p->is_available= TRUE;
Event_post(s, EVENT_CONNECTION, STATE_PASSED, p->action,
"'%s' connection passed to %s%s%s", s->name,
p->address,
p->family==AF_INET?" via ":"",
p->family==AF_INET?Util_portTypeDescription(p):"");
}
}
/**
* Test process state (e.g. Zombie)
*/
static void check_process_state(Service_T s) {
ASSERT(s);
if(s->inf->status_flag & PROCESS_ZOMBIE) {
Event_post(s, EVENT_DATA, STATE_FAILED, s->action_DATA,
"'%s' process with pid %d is a zombie", s->name, s->inf->pid);
} else {
DEBUG("'%s' zombie check passed [status_flag=%04x]\n",
s->name, s->inf->status_flag);
Event_post(s, EVENT_DATA, STATE_PASSED, s->action_DATA,
"'%s' check process state passed", s->name);
}
}
/**
* Test process pid for possible change since last cycle
*/
static void check_process_pid(Service_T s) {
ASSERT(s && s->inf);
/* process pid was not initialized yet */
if(s->inf->_pid == -1)
return;
if(s->inf->_pid != s->inf->pid) {
Event_post(s, EVENT_CHANGED, STATE_FAILED, s->action_PID,
"'%s' process PID changed to %d", s->name, s->inf->pid);
} else {
DEBUG("'%s' PID has not changed since last cycle\n", s->name);
Event_post(s, EVENT_CHANGED, STATE_PASSED, s->action_PID,
"'%s' PID has not changed", s->name);
}
}
/**
* Test process ppid for possible change since last cycle
*/
static void check_process_ppid(Service_T s) {
ASSERT(s && s->inf);
/* process ppid was not initialized yet */
if(s->inf->_ppid == -1)
return;
if(s->inf->_ppid != s->inf->ppid) {
Event_post(s, EVENT_CHANGED, STATE_FAILED, s->action_PPID,
"'%s' process PPID changed to %d", s->name, s->inf->ppid);
} else {
DEBUG("'%s' PPID has not changed since last cycle\n", s->name);
Event_post(s, EVENT_CHANGED, STATE_PASSED, s->action_PPID,
"'%s' PPID has not changed", s->name);
}
}
/**
* Check process resources
*/
static void check_process_resources(Service_T s, Resource_T r) {
int okay= TRUE;
char report[STRLEN]={0};
ASSERT(s && r);
switch(r->resource_id) {
case RESOURCE_ID_CPU_PERCENT:
if(Util_evalQExpression(r->operator, s->inf->cpu_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' cpu usage of %.1f%% matches resource limit [cpu usage%s%.1f%%]",
s->name, s->inf->cpu_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' cpu usage check passed [current cpu usage=%.1f%%]\n",
s->name, s->inf->cpu_percent/10.0);
}
break;
case RESOURCE_ID_CPUUSER:
if(Util_evalQExpression(r->operator, systeminfo.total_cpu_user_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' cpu user usage of %.1f%% matches resource limit [cpu user usage%s%.1f%%]",
s->name, systeminfo.total_cpu_user_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' cpu user usage check passed [current cpu user usage=%.1f%%]\n",
s->name, systeminfo.total_cpu_user_percent/10.0);
}
break;
case RESOURCE_ID_CPUSYSTEM:
if(Util_evalQExpression(r->operator, systeminfo.total_cpu_syst_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' cpu system usage of %.1f%% matches resource limit [cpu system usage%s%.1f%%]",
s->name, systeminfo.total_cpu_syst_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' cpu system usage check passed [current cpu system usage=%.1f%%]\n",
s->name, systeminfo.total_cpu_syst_percent/10.0);
}
break;
case RESOURCE_ID_CPUWAIT:
if(Util_evalQExpression(r->operator, systeminfo.total_cpu_wait_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' cpu wait usage of %.1f%% matches resource limit [cpu wait usage%s%.1f%%]",
s->name, systeminfo.total_cpu_wait_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' cpu wait usage check passed [current cpu wait usage=%.1f%%]\n",
s->name, systeminfo.total_cpu_wait_percent/10.0);
}
break;
case RESOURCE_ID_MEM_PERCENT:
if(s->type == TYPE_SYSTEM) {
if(Util_evalQExpression(r->operator, systeminfo.total_mem_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' mem usage of %.1f%% matches resource limit [mem usage%s%.1f%%]",
s->name, systeminfo.total_mem_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' mem usage check passed [current mem usage=%.1f%%]\n",
s->name, systeminfo.total_mem_percent/10.0);
}
} else {
if(Util_evalQExpression(r->operator, s->inf->mem_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' mem usage of %.1f%% matches resource limit [mem usage%s%.1f%%]",
s->name, s->inf->mem_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' mem usage check passed [current mem usage=%.1f%%]\n",
s->name, s->inf->mem_percent/10.0);
}
}
break;
case RESOURCE_ID_MEM_KBYTE:
if(s->type == TYPE_SYSTEM) {
if(Util_evalQExpression(r->operator, systeminfo.total_mem_kbyte, r->limit)) {
snprintf(report, STRLEN,
"'%s' mem amount of %ldkB matches resource limit [mem amount%s%ldkB]",
s->name, systeminfo.total_mem_kbyte, operatorshortnames[r->operator], r->limit);
okay= FALSE;
} else {
DEBUG("'%s' mem amount check passed [current mem amount=%ldkB]\n",
s->name, systeminfo.total_mem_kbyte);
}
} else {
if(Util_evalQExpression(r->operator, s->inf->mem_kbyte, r->limit)) {
snprintf(report, STRLEN,
"'%s' mem amount of %ldkB matches resource limit [mem amount%s%ldkB]",
s->name, s->inf->mem_kbyte, operatorshortnames[r->operator], r->limit);
okay= FALSE;
} else {
DEBUG("'%s' mem amount check passed [current mem amount=%ldkB]\n",
s->name, s->inf->mem_kbyte);
}
}
break;
case RESOURCE_ID_LOAD1:
if(Util_evalQExpression(r->operator,
(int)(systeminfo.loadavg[0]*10.0), r->limit)) {
snprintf(report, STRLEN,
"'%s' loadavg(1min) of %.1f matches resource limit "
"[loadavg(1min)%s%.1f]",
s->name, systeminfo.loadavg[0], operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' loadavg(1min) check passed [current loadavg(1min)=%.1f]\n",
s->name, systeminfo.loadavg[0]);
}
break;
case RESOURCE_ID_LOAD5:
if(Util_evalQExpression(r->operator,
(int)(systeminfo.loadavg[1]*10.0), r->limit)) {
snprintf(report, STRLEN,
"'%s' loadavg(5min) of %.1f matches resource limit "
"[loadavg(5min)%s%.1f]",
s->name, systeminfo.loadavg[1], operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' loadavg(5min) check passed [current loadavg(5min)=%.1f]\n",
s->name, systeminfo.loadavg[1]);
}
break;
case RESOURCE_ID_LOAD15:
if(Util_evalQExpression(r->operator,
(int)(systeminfo.loadavg[2]*10.0), r->limit)) {
snprintf(report, STRLEN,
"'%s' loadavg(15min) of %.1f matches resource limit "
"[loadavg(15min)%s%.1f]",
s->name, systeminfo.loadavg[2], operatorshortnames[r->operator],
r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' loadavg(15min) check passed [current loadavg(15min)=%.1f]\n",
s->name, systeminfo.loadavg[2]);
}
break;
case RESOURCE_ID_CHILDREN:
if(Util_evalQExpression(r->operator, s->inf->children, r->limit)) {
snprintf(report, STRLEN,
"'%s' children of %i matches resource limit [children%s%ld]",
s->name, s->inf->children, operatorshortnames[r->operator], r->limit);
okay= FALSE;
} else {
DEBUG("'%s' children check passed [current children=%i]\n",
s->name, s->inf->children);
}
break;
case RESOURCE_ID_TOTAL_MEM_KBYTE:
if(Util_evalQExpression(r->operator, s->inf->total_mem_kbyte, r->limit)) {
snprintf(report, STRLEN,
"'%s' total mem amount of %ldkB matches resource limit"
" [total mem amount%s%ldkB]",
s->name, s->inf->total_mem_kbyte, operatorshortnames[r->operator],
r->limit);
okay= FALSE;
} else {
DEBUG("'%s' total mem amount check passed "
"[current total mem amount=%ldkB]\n", s->name, s->inf->total_mem_kbyte);
}
break;
case RESOURCE_ID_TOTAL_MEM_PERCENT:
if(Util_evalQExpression(r->operator, s->inf->total_mem_percent, r->limit)) {
snprintf(report, STRLEN,
"'%s' total mem amount of %.1f%% matches resource limit"
" [total mem amount%s%.1f%%]",
s->name, (float)s->inf->total_mem_percent/10.0,
operatorshortnames[r->operator], (float)r->limit/10.0);
okay= FALSE;
} else {
DEBUG("'%s' total mem amount check passed "
"[current total mem amount=%.1f%%]\n", s->name,
s->inf->total_mem_percent/10.0);
}
break;
default:
LogError("'%s' error -- unknown resource ID: [%d]\n",
s->name, r->resource_id);
return;
}
if(! okay) {
Event_post(s, EVENT_RESOURCE, STATE_FAILED, r->action, "%s", report);
} else {
Event_post(s, EVENT_RESOURCE, STATE_PASSED, r->action,
"'%s' resource passed", s->name);
}
}
/**
* Test for associated path checksum change
*/
static void check_checksum(Service_T s) {
Checksum_T cs;
int changed;
ASSERT(s && s->path && s->checksum && s->checksum->hash);
cs= s->checksum;
if(s->inf->cs_sum)
FREE(s->inf->cs_sum);
s->inf->cs_sum= Util_getChecksum(s->path, cs->type);
if(s->inf->cs_sum) {
Event_post(s, EVENT_DATA, STATE_PASSED, s->action_DATA,
"'%s' checksum computed for %s", s->name, s->path);
switch(cs->type) {
case HASH_MD5:
changed= strncmp(cs->hash, s->inf->cs_sum, 32);
break;
case HASH_SHA1:
changed= strncmp(cs->hash, s->inf->cs_sum, 40);
break;
default:
LogError("'%s' unknown hash type\n", s->name);
FREE(s->inf->cs_sum);
return;
}
if(changed) {
/* if we are testing for changes only, the value is variable */
if(cs->test_changes) {
Event_post(s, EVENT_CHANGED, STATE_FAILED, cs->action,
"'%s' checksum was changed for %s", s->name, s->path);
/* reset expected value for next cycle */
FREE(cs->hash);
cs->hash= xstrdup(s->inf->cs_sum);
} else {
/* we are testing constant value for failed or passed state */
Event_post(s, EVENT_CHECKSUM, STATE_FAILED, cs->action,
"'%s' checksum test failed for %s", s->name, s->path);
}
} else if(cs->test_changes) {
DEBUG("'%s' checksum has not changed\n", s->name);
Event_post(s, EVENT_CHANGED, STATE_PASSED, cs->action,
"'%s' checksum has not changed", s->name);
} else {
DEBUG("'%s' has valid checksums\n", s->name);
Event_post(s, EVENT_CHECKSUM, STATE_PASSED, cs->action,
"'%s' checksum passed", s->name);
}
return;
}
Event_post(s, EVENT_DATA, STATE_FAILED, s->action_DATA,
"'%s' cannot compute checksum for %s", s->name, s->path);
}
/**
* Test for associated path permission change
*/
static void check_perm(Service_T s) {
ASSERT(s && s->perm);
if((s->inf->st_mode & 07777) != s->perm->perm) {
Event_post(s, EVENT_PERMISSION, STATE_FAILED, s->perm->action,
"'%s' permission test failed for %s -- current permission is %04o",
s->name, s->path, s->inf->st_mode&07777);
} else {
DEBUG("'%s' permission check passed [current permission=%04o]\n",
s->name, s->inf->st_mode&07777);
Event_post(s, EVENT_PERMISSION, STATE_PASSED, s->perm->action,
"'%s' permission passed", s->name);
}
}
/**
* Test for associated path uid change
*/
static void check_uid(Service_T s) {
ASSERT(s && s->uid);
if(s->inf->st_uid != s->uid->uid) {
Event_post(s, EVENT_UID, STATE_FAILED, s->uid->action,
"'%s' uid test failed for %s -- current uid is %d",
s->name, s->path, (int)s->inf->st_uid);
} else {
DEBUG("'%s' uid check passed [current uid=%d]\n", s->name,
(int)s->inf->st_uid);
Event_post(s, EVENT_UID, STATE_PASSED, s->uid->action, "'%s' uid passed", s->name);
}
}
/**
* Test for associated path gid change
*/
static void check_gid(Service_T s) {
ASSERT(s && s->gid);
if(s->inf->st_gid != s->gid->gid ) {
Event_post(s, EVENT_GID, STATE_FAILED, s->gid->action,
"'%s' gid test failed for %s -- current gid is %d",
s->name, s->path, (int)s->inf->st_gid);
} else {
DEBUG("'%s' gid check passed [current gid=%d]\n", s->name,
(int)s->inf->st_gid);
Event_post(s, EVENT_GID, STATE_PASSED, s->gid->action, "'%s' gid passed", s->name);
}
}
/**
* Validate timestamps of a service s
*/
static void check_timestamp(Service_T s) {
Timestamp_T t;
time_t now;
ASSERT(s && s->timestamplist);
if((int)time(&now) == -1) {
Event_post(s, EVENT_DATA, STATE_FAILED, s->action_DATA,
"'%s' can't obtain actual system time", s->name);
return;
} else {
Event_post(s, EVENT_DATA, STATE_PASSED, s->action_DATA,
"'%s' actual system time obtained", s->name);
}
for(t= s->timestamplist; t; t= t->next) {
if(t->test_changes) {
/* if we are testing for changes only, the value is variable */
if(t->timestamp != s->inf->timestamp) {
/* reset expected value for next cycle */
t->timestamp= s->inf->timestamp;
Event_post(s, EVENT_CHANGED, STATE_FAILED, t->action,
"'%s' timestamp was changed for %s", s->name, s->path);
} else {
DEBUG("'%s' timestamp was not changed for %s\n", s->name, s->path);
Event_post(s, EVENT_CHANGED, STATE_PASSED, t->action,
"'%s' timestamp was not changed for %s", s->name, s->path);
}
break;
} else {
/* we are testing constant value for failed or passed state */
if(Util_evalQExpression(t->operator, (int)(now - s->inf->timestamp), t->time)) {
Event_post(s, EVENT_TIMESTAMP, STATE_FAILED, t->action,
"'%s' timestamp test failed for %s", s->name, s->path);
} else {
DEBUG("'%s' timestamp test passed for %s\n", s->name, s->path);
Event_post(s, EVENT_TIMESTAMP, STATE_PASSED, t->action,
"'%s' timestamp passed", s->name);
}
}
}
}
/**
* Test size
*/
static void check_size(Service_T s) {
Size_T sl;
ASSERT(s && s->sizelist);
for(sl= s->sizelist; sl; sl= sl->next) {
/* if we are testing for changes only, the value is variable */
if(sl->test_changes) {
if(sl->size != s->inf->st_size) {
Event_post(s, EVENT_CHANGED, STATE_FAILED, sl->action,
"'%s' size was changed for %s", s->name, s->path);
/* reset expected value for next cycle */
sl->size= s->inf->st_size;
} else {
DEBUG("'%s' size has not changed [current size=%llu B]\n", s->name,
s->inf->st_size);
Event_post(s, EVENT_CHANGED, STATE_PASSED, sl->action,
"'%s' size was not changed", s->name, s->path);
}
break;
}
/* we are testing constant value for failed or passed state */
if(Util_evalQExpression(sl->operator, s->inf->st_size, sl->size)) {
Event_post(s, EVENT_SIZE, STATE_FAILED, sl->action,
"'%s' size test failed for %s -- current size is %llu B",
s->name, s->path, s->inf->st_size);
} else {
DEBUG("'%s' file size check passed [current size=%llu B]\n", s->name,
s->inf->st_size);
Event_post(s, EVENT_SIZE, STATE_PASSED, sl->action, "'%s' size passed", s->name);
}
}
}
/**
* Match content
*/
static void check_match(Service_T s) {
char line[MATCH_LINE_LENGTH];
FILE *file;
int inode_checked=FALSE;
int advance=0;
int ignore;
ASSERT(s && s->matchlist);
/* did inode change -> read position = 0 */
if((inode_checked==FALSE) && (s->inf->st_ino != s->inf->st_ino_prev)) {
s->inf->readpos= 0;
}
inode_checked= TRUE;
/* did file decrease (readpos > file_size) -> read position = 0 */
if(s->inf->readpos > s->inf->st_size) {
s->inf->readpos= 0;
}
/* Do we need to match? (readpos < file_size) */
if(!(s->inf->readpos < s->inf->st_size)){
return;
}
/* Open the file */
if(NULL==(file=fopen(s->path, "r"))) {
/* We can't open the file */
DEBUG("FILE: cannot open file %s: %s!\n", s->path, strerror(errno));
return;
}
while (TRUE) {
ignore=FALSE;
/* Seek to the read position */
if (fseek(file, s->inf->readpos, SEEK_SET)!=0) {
/* We can not seek to the read position */
DEBUG("FILE: cannot seek file %s: %s!\n", s->path, strerror(errno));
goto final;
}
if(NULL==fgets(line, MATCH_LINE_LENGTH, file)) {
/* We can not read the content! */
if (!feof(file)) {
DEBUG("FILE: cannot read file %s: %s!\n", s->path, strerror(errno));
}
goto final;
}
/* Close the file */
/* Empty line? Should not happen... but who knows */
if (strlen(line) == 0) {
/* ==> ERROR */
goto final;
}
/* Complete line oder just beginning? (igore full buffers) */
if ((strlen(line)<(MATCH_LINE_LENGTH)-1) &&
(line[strlen(line)-1] != '\n')) {
/* we gonna read it next time */
goto final;
}
advance=strlen(line);
/*
Does this line end with '\n'? Otherwise ignore and check it
as soon as it is complete
*/
if (strlen(line)==(MATCH_LINE_LENGTH)-1) {
int rv=0;
while (((unsigned char) rv != '\n') && (rv!=EOF)) {
rv=fgetc(file);
advance++;
}
if (rv==EOF) {
break;
}
}
/* Set read position to the end of last read */
s->inf->readpos+=advance;
/* Remove appending newline */
if (line[strlen(line)-1] == '\n') {
line[strlen(line)-1] = 0;
}
check_match_if(s, line);
}
final:
fclose(file);
}
/**
* Match line for "ignore" statements
*/
static int check_match_ignore(Service_T s, char *line) {
int rv=FALSE;
Match_T ml;
Match_T prev=NULL;
int match_return;
/* Check ignores */
for(ml= s->matchlist; ml; prev=ml, ml= ml->next) {
if (ml->ignore) {
#ifdef HAVE_REGEX_H
match_return=regexec(ml->regex_comp,
line,
0,
NULL,
0);
#else
if (strstr(line, ml->match_string) == NULL) {
match_return= -1;
} else {
match_return= 0;
}
#endif
if((match_return==0) ^ (ml->not)) {
/* We match! -> line is ignored! */
DEBUG("FILE: Regular expression %s\"%s\" "
"ignore match on content line\n",
ml->not?"not ":"",
ml->match_string);
rv=TRUE;
break;
}
}
}
/* Optimize match list => put recent match in front */
if (prev!=NULL && rv==TRUE) {
prev->next=ml->next;
ml->next=s->matchlist;
s->matchlist=ml;
}
return rv;
}
/**
* Match line for "if" statements
*/
static void check_match_if(Service_T s, char *line) {
Match_T ml;
int match_return;
int ignore_tested= FALSE;
/* Check non ignores */
for(ml= s->matchlist; ml; ml= ml->next) {
if (!(ml->ignore)) {
#ifdef HAVE_REGEX_H
match_return=regexec(ml->regex_comp,
line,
0,
NULL,
0);
#else
if (strstr(line, ml->match_string) == NULL) {
match_return= -1;
} else {
match_return= 0;
}
#endif
if((match_return==0) ^ (ml->not)) {
/* Check if we have to test for ignores! */
if (!ignore_tested && check_match_ignore(s, line)) {
return;
}
Event_post(s, EVENT_MATCH, STATE_FAILED, ml->action,
"'%s' content match "
"[%s]",
s->name, line);
DEBUG("FILE: Regular expression %s\"%s\" "
"DOES match on content line\n",
ml->not?"not ":"",
ml->match_string);
} else {
DEBUG("FILE: Regular expression %s\"%s\" "
"does not match on content line\n",
ml->not?"not ":"",
ml->match_string);
}
}
}
return;
}
/**
* Test filesystem flags for possible change since last cycle
*/
static void check_device_flags(Service_T s) {
ASSERT(s && s->inf);
/* filesystem flags were not initialized yet */
if(s->inf->_flags == -1)
return;
if(s->inf->_flags != s->inf->flags) {
Event_post(s, EVENT_CHANGED, STATE_FAILED, s->action_FSFLAG,
"'%s' filesytem flags changed to %#lx", s->name, s->inf->flags);
} else {
DEBUG("'%s' filesystem flags has not changed since last cycle\n", s->name);
Event_post(s, EVENT_CHANGED, STATE_PASSED, s->action_FSFLAG,
"'%s' filesytem flags has not changed", s->name);
}
}
/**
* Device test
*/
static void check_device_resources(Service_T s, Device_T td) {
ASSERT(s && td);
if( (td->limit_percent < 0) && (td->limit_absolute < 0) ) {
LogError("'%s' error: device limit not set\n", s->name);
return;
}
switch(td->resource) {
case RESOURCE_ID_INODE:
if(s->inf->f_files <= 0) {
DEBUG("'%s' filesystem doesn't support inodes\n", s->name);
return;
}
if(td->limit_percent >= 0) {
if(Util_evalQExpression( td->operator, s->inf->inode_percent, td->limit_percent)) {
Event_post(s, EVENT_RESOURCE, STATE_FAILED, td->action,
"'%s' inode usage %.1f%% matches resource limit [inode usage%s%.1f%%]",
s->name,
s->inf->inode_percent/10.,
operatorshortnames[td->operator],
td->limit_percent/10.);
return;
}
} else {
if(Util_evalQExpression(td->operator, s->inf->inode_total, td->limit_absolute)) {
Event_post(s, EVENT_RESOURCE, STATE_FAILED, td->action,
"'%s' inode usage %ld matches resource limit [inode usage%s%ld]",
s->name,
s->inf->inode_total,
operatorshortnames[td->operator],
td->limit_absolute);
return;
}
}
DEBUG("'%s' inode usage check passed [current inode usage=%.1f%%]\n",
s->name, s->inf->inode_percent/10.);
Event_post(s, EVENT_RESOURCE, STATE_PASSED, td->action,
"'%s' device resources passed", s->name);
return;
case RESOURCE_ID_SPACE:
if(td->limit_percent >= 0) {
if(Util_evalQExpression( td->operator, s->inf->space_percent, td->limit_percent)) {
Event_post(s, EVENT_RESOURCE, STATE_FAILED, td->action,
"'%s' space usage %.1f%% matches resource limit [space usage%s%.1f%%]",
s->name,
s->inf->space_percent/10.,
operatorshortnames[td->operator],
td->limit_percent/10.);
return;
}
} else {
if(Util_evalQExpression(td->operator, s->inf->space_total, td->limit_absolute)) {
Event_post(s, EVENT_RESOURCE, STATE_FAILED, td->action,
"'%s' space usage %ld blocks matches resource limit "
"[space usage%s%ld blocks]",
s->name,
s->inf->space_total,
operatorshortnames[td->operator],
td->limit_absolute);
return;
}
}
DEBUG("'%s' space usage check passed [current space usage=%.1f%%]\n",
s->name, s->inf->space_percent/10.);
Event_post(s, EVENT_RESOURCE, STATE_PASSED, td->action,
"'%s' device resources passed", s->name);
return;
default:
LogError("'%s' error -- unknown resource type: [%d]\n", s->name,
td->resource);
return;
}
}
/**
* Returns TRUE if the service timed out, otherwise FALSE.
*/
static int check_timeout(Service_T s) {
ASSERT(s);
if(!s->def_timeout)
return FALSE;
/*
* Start counting cycles
*/
if(s->nstart > 0)
s->ncycle++;
/*
* Check timeout
*/
if(s->nstart >= s->to_start && s->ncycle <= s->to_cycle) {
Event_post(s, EVENT_TIMEOUT, STATE_FAILED, s->action_TIMEOUT,
"'%s' service timed out and will not be checked anymore",
s->name);
return TRUE;
}
/*
* Stop counting and reset if the
* cycle interval is passed
*/
if(s->ncycle > s->to_cycle) {
s->ncycle= 0;
s->nstart= 0;
}
return FALSE;
}
/**
* Returns TRUE if validation should be skiped for
* this service in this cycle, otherwise FALSE
*/
static int check_skip(Service_T s) {
ASSERT(s);
if(s->visited) {
DEBUG("'%s' check skipped -- service already handled "
"in a dependency chain\n", s->name);
return TRUE;
}
if(!s->def_every)
return FALSE;
if(++s->nevery < s->every)
return TRUE;
s->nevery= 0;
return FALSE;
}