/*
* Copyright (C), 2000-2007 by the monit project group.
* All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif
#include "monitor.h"
#include "alert.h"
#include "event.h"
#include "process.h"
/**
* Implementation of the event interface.
*
* @author Jan-Henrik Haukeland, <hauk@tildeslash.com>
* @author Martin Pala <martinp@tildeslash.com>
* @version \$Id: event.c,v 1.66 2007/07/25 12:54:28 hauk Exp $
* @file
*/
/* ------------------------------------------------------------- Definitions */
EventTable_T Event_Table[]= {
{EVENT_CHANGED, "Changed", "Changed not"},
{EVENT_CHECKSUM, "Checksum failed", "Checksum passed"},
{EVENT_CONNECTION, "Connection failed", "Connection passed"},
{EVENT_DATA, "Data access error", "Data access succeeded"},
{EVENT_EXEC, "Execution failed", "Execution succeeded"},
{EVENT_GID, "GID failed", "GID passed"},
{EVENT_ICMP, "ICMP failed", "ICMP passed"},
{EVENT_INSTANCE, "Monit instance changed", "Monit instance changed not"},
{EVENT_INVALID, "Invalid type", "Type passed"},
{EVENT_MATCH, "Regex match", "No regex match"},
{EVENT_NONEXIST, "Does not exist", "Exists"},
{EVENT_PERMISSION, "Permission failed", "Permission passed"},
{EVENT_RESOURCE, "Resource limit matched", "Resource limit passed"},
{EVENT_SIZE, "Size failed", "Size passed"},
{EVENT_TIMEOUT, "Timeout", "Timeout recovery"},
{EVENT_TIMESTAMP, "Timestamp failed", "Timestamp passed"},
{EVENT_UID, "UID failed", "UID passed"},
/* Virtual events */
{EVENT_NULL, "No Event", "No Event"},
};
/* -------------------------------------------------------------- Prototypes */
static void handle_event(Event_T);
static void handle_action(Event_T, Action_T);
static void Event_queue_add(Event_T);
/* ------------------------------------------------------------------ Public */
/**
* Post a new Event
* @param service The Service the event belongs to
* @param id The event identification
* @param state The event state
* @param action Description of the event action
* @param s Optional message describing the event
*/
void Event_post(Service_T service, long id, short state, EventAction_T action,
char *s, ...) {
Event_T e = service->eventlist;
ASSERT(service);
ASSERT(action);
ASSERT(state == STATE_FAILED || state == STATE_PASSED);
if(e == NULL)
{
/* Only first failed event can initialize the queue for given event type,
* thus passed events are ignored until first error. However, in the case
* that the error flag is set for the passed event, we will allow it (i.e.
* event queue was flushed during monit reload and the service was in
* failed state before reload) */
if(state != STATE_FAILED && !(service->error & id))
return;
/* Initialize event list and add first event. The manadatory informations
* are cloned so the event is as standalone as possible and may be saved
* to the queue without the dependency on the original service, thus
* persistent and managable across monit restarts */
NEW(e);
e->id = id;
e->collected = time(NULL);
e->source = xstrdup(service->name);
e->group = service->group?xstrdup(service->group):xstrdup("");
e->mode = service->mode;
e->type = service->type;
e->state = STATE_INIT;
e->state_map = state;
e->action = action;
if(s)
{
long l;
va_list ap;
va_start(ap, s);
e->message = Util_formatString(s, ap, &l);
va_end(ap);
}
pthread_mutex_init(&e->mutex, NULL);
service->eventlist = e;
}
else
{
/* Try to find the event with the same origin and type identification.
* Each service and each test have its own custom actions object, so
* we share actions object address to identify event source. */
do
{
if(e->action == action && e->id == id)
{
LOCK(e->mutex)
e->collected = time(NULL);
/* Shift the existing event flags to the left
* and set the first bit based on actual state */
e->state_map <<= 1;
e->state_map |= state;
/* Update the message */
if(s)
{
long l;
va_list ap;
FREE(e->message);
va_start(ap, s);
e->message = Util_formatString(s, ap, &l);
va_end(ap);
}
END_LOCK;
break;
}
e = e->next;
}
while(e);
if(!e)
{
/* Only first failed event can initialize the queue for given event type,
* thus passed events are ignored until first error */
if(state != STATE_FAILED)
return;
/* Event was not found in the pending events list, we will add it.
* The manadatory informations are cloned so the event is as standalone
* as possible and may be saved to the queue without the dependency on
* the original service, thus persistent and managable across monit
* restarts */
NEW(e);
e->id = id;
e->collected = time(NULL);
e->source = xstrdup(service->name);
e->group = service->group?xstrdup(service->group):xstrdup("");
e->mode = service->mode;
e->type = service->type;
e->state = STATE_INIT;
e->state_map = state;
e->action = action;
if(s)
{
long l;
va_list ap;
va_start(ap, s);
e->message = Util_formatString(s, ap, &l);
va_end(ap);
}
pthread_mutex_init(&e->mutex, NULL);
e->next = service->eventlist;
service->eventlist = e;
}
}
e->state_changed = Event_check_state(e, state);
/* In the case that the state changed, update it and reset the counter */
if(e->state_changed)
{
e->state = state;
e->count = 1;
}
else
{
e->count++;
}
LOCK(e->mutex)
handle_event(e);
END_LOCK;
}
/* -------------------------------------------------------------- Properties */
/**
* Get the Service where the event orginated
* @param E An event object
* @return The Service where the event orginated
*/
Service_T Event_get_source(Event_T E) {
Service_T s = NULL;
ASSERT(E);
if(!(s = Util_getService(E->source)))
{
LogError("Service %s not found in monit configuration\n", E->source);
}
return s;
}
/**
* Get the Service name where the event orginated
* @param E An event object
* @return The Service name where the event orginated
*/
char *Event_get_source_name(Event_T E) {
ASSERT(E);
return (E->source);
}
/**
* Get the group name of the service where the event orginated
* @param E An event object
* @return The group name of the service where the event orginated
*/
char *Event_get_source_group(Event_T E) {
ASSERT(E);
return (E->group);
}
/**
* Get the service type of the service where the event orginated
* @param E An event object
* @return The service type of the service where the event orginated
*/
int Event_get_source_type(Event_T E) {
ASSERT(E);
return (E->type);
}
/**
* Get the Event timestamp
* @param E An event object
* @return The Event timestamp
*/
time_t Event_get_collected(Event_T E) {
ASSERT(E);
return E->collected;
}
/**
* Get the Event raw state
* @param E An event object
* @return The Event raw state
*/
short Event_get_state(Event_T E) {
ASSERT(E);
return E->state;
}
/**
* Return the actual event state based on event state bitmap
* and event ratio needed to trigger the state change
* @param E An event object
* @param S Actual posted state
* @return The Event raw state
*/
short Event_check_state(Event_T E, short S) {
int i;
int count = 0;
Action_T action;
Service_T service;
long long flag;
ASSERT(E);
if(!(service = Event_get_source(E)))
return TRUE;
/* Only the true failed state condition can change the initial state */
if(S == STATE_PASSED && E->state == STATE_INIT && !(service->error & E->id))
{
return FALSE;
}
action = (S == STATE_PASSED)?E->action->passed:E->action->failed;
/* Compare as many bits as cycles able to trigger the action */
for(i = 0; i < action->cycles; i++)
{
/* Check the state of the particular cycle given by the bit position */
flag = (E->state_map >> i) & 0x1;
/* Count occurences of the posted state */
if(flag == S)
{
count++;
}
}
if(count >= action->count && S != E->state)
{
return TRUE;
}
return FALSE;
}
/**
* Get the Event type
* @param E An event object
* @return The Event type
*/
int Event_get_id(Event_T E) {
ASSERT(E);
return E->id;
}
/**
* Get the optionally Event message describing why the event was
* fired.
* @param E An event object
* @return The Event message. May be NULL
*/
const char *Event_get_message(Event_T E) {
ASSERT(E);
return E->message;
}
/**
* Get a textual description of actual event type. For instance if the
* event type is possitive EVENT_TIMESTAMP, the textual description is
* "Timestamp error". Likewise if the event type is negative EVENT_CHECKSUM
* the textual description is "Checksum recovery" and so on.
* @param E An event object
* @return A string describing the event type in clear text. If the
* event type is not found NULL is returned.
*/
const char *Event_get_description(Event_T E) {
EventTable_T *et= Event_Table;
ASSERT(E);
while((*et).id)
{
if(E->id == (*et).id)
{
return E->state?(*et).description_failed:(*et).description_passed;
}
et++;
}
return NULL;
}
/**
* Get an event action id.
* @param E An event object
* @return An action id
*/
short Event_get_action(Event_T E) {
short id;
Action_T A;
ASSERT(E);
A = E->state?E->action->failed:E->action->passed;
/* In the case of passive mode we replace the description of start, stop
* or restart action for alert action, because these actions are passive in
* this mode */
id= (E->mode == MODE_PASSIVE &&
((A->id == ACTION_START)||
(A->id == ACTION_STOP) ||
(A->id == ACTION_RESTART))
)?ACTION_ALERT:A->id;
return id;
}
/**
* Get a textual description of actual event action. For instance if the
* event type is possitive EVENT_NONEXIST, the textual description of
* failed state related action is "restart". Likewise if the event type is
* negative EVENT_CHECKSUM the textual description of recovery related action
* is "alert" and so on.
* @param E An event object
* @return A string describing the event type in clear text. If the
* event type is not found NULL is returned.
*/
const char *Event_get_action_description(Event_T E) {
ASSERT(E);
return actionnames[Event_get_action(E)];
}
/**
* Reprocess the partialy handled event queue
*/
void Event_queue_process() {
DIR *dir = NULL;
FILE *file = NULL;
struct dirent *de = NULL;
EventAction_T ea = NULL;
Action_T a = NULL;
/* return in the case that the eventqueue is not enabled or empty */
if( !Run.eventlist_dir ||
(
!Run.handler_init &&
!Run.handler_queue[HANDLER_ALERT] &&
!Run.handler_queue[HANDLER_COLLECTOR]
)
)
{
return;
}
if(! (dir = opendir(Run.eventlist_dir)) )
{
if(errno != ENOENT) {
LogError("%s: cannot open the directory %s -- %s\n",
prog, Run.eventlist_dir, STRERROR);
}
return;
}
if((de = readdir(dir)))
{
DEBUG("Processing postponed events queue\n");
}
NEW(ea);
NEW(a);
while(de)
{
int size;
int *version = NULL;
short *action = NULL;
Event_T e = NULL;
struct stat st;
char file_name[STRLEN];
/* In the case that all handlers failed, skip the further processing in
* this cycle. Alert handler is currently defined anytime (either
* explicitly or localhost by default) */
if( (Run.collectors
&&
FLAG(Run.handler_flag, HANDLER_COLLECTOR)
&&
FLAG(Run.handler_flag, HANDLER_ALERT)
)
||
FLAG(Run.handler_flag, HANDLER_ALERT))
{
break;
}
snprintf(file_name, STRLEN,
"%s/%s",
Run.eventlist_dir, de->d_name);
if(!stat(file_name, &st) && S_ISREG(st.st_mode))
{
DEBUG("%s: processing queued event %s\n", prog, file_name);
if(! (file = fopen(file_name, "r")) )
{
LogError("%s: Processing failed - cannot open the event file %s -- %s\n",
prog, file_name, STRERROR);
goto error1;
}
/* read event structure version */
if(!(version = File_readQueue(file, &size)) || size != sizeof(int)) {
LogError("skipping %s - unknown data format\n",
file_name, *version);
goto error2;
}
if(*version != EVENT_VERSION)
{
LogError("Aborting event %s - incompatible data format version %d\n",
file_name, *version);
goto error2;
}
/* read event structure */
if(!(e = File_readQueue(file, &size)) || size != sizeof(*e))
goto error2;
/* read source */
if(!(e->source = File_readQueue(file, &size)))
goto error3;
/* read group */
if(!(e->group = File_readQueue(file, &size)))
goto error3;
/* read message */
if(!(e->message = File_readQueue(file, &size)))
goto error3;
/* read event action */
if(!(action = File_readQueue(file, &size)) || size != sizeof(short))
goto error3;
a->id = *action;
if(e->state == STATE_FAILED)
{
ea->failed = a;
}
else
{
ea->passed = a;
}
e->action = ea;
/* Retry all remaining handlers */
/* alert */
if(e->flag & HANDLER_ALERT)
{
if(Run.handler_init)
{
Run.handler_queue[HANDLER_ALERT]++;
}
if((Run.handler_flag & HANDLER_ALERT) != HANDLER_ALERT)
{
if( handle_alert(e) != HANDLER_ALERT )
{
e->flag &= ~HANDLER_ALERT;
Run.handler_queue[HANDLER_ALERT]--;
}
else
{
LogError("Alert handler failed, retry scheduled for next cycle\n");
Run.handler_flag |= HANDLER_ALERT;
}
}
}
/* collector */
if(e->flag & HANDLER_COLLECTOR)
{
if(Run.handler_init)
{
Run.handler_queue[HANDLER_COLLECTOR]++;
}
if((Run.handler_flag & HANDLER_COLLECTOR) != HANDLER_COLLECTOR)
{
if( handle_collector(e) != HANDLER_COLLECTOR )
{
e->flag &= ~HANDLER_COLLECTOR;
Run.handler_queue[HANDLER_COLLECTOR]--;
}
else
{
LogError("Collector handler failed, retry scheduled for next cycle\n");
Run.handler_flag |= HANDLER_COLLECTOR;
}
}
}
/* If no error persists, remove it from the queue */
if(e->flag == HANDLER_PASSED)
{
DEBUG("Removing event %s from the queue for later external delivery\n",
file_name);
unlink(file_name);
}
error3:
FREE(e->source);
FREE(e->group);
FREE(e->message);
FREE(e);
FREE(action);
error2:
FREE(version);
fclose(file);
}
error1:
de = readdir(dir);
}
Run.handler_init = FALSE;
closedir(dir);
FREE(a);
FREE(ea);
return;
}
/* ----------------------------------------------------------------- Private */
/*
* Handle the event
* @param E An event
*/
static void handle_event(Event_T E) {
Service_T S;
ASSERT(E);
ASSERT(E->action);
ASSERT(E->action->failed);
ASSERT(E->action->passed);
/* We will handle only first passed event, recurrent passed events
* or insufficient passed events during failed service state are
* ignored. Failed events are handled each time. */
if(!E->state_changed && (E->state == STATE_PASSED || ((E->state_map & 0x1) ^ 0x1)))
{
return;
}
if(E->message)
{
/* In the case that the service state is yet initializing and error
* occured, log it and exit. Passed events in init state are not
* logged. */
if(E->state != STATE_INIT || E->state_map & 0x1)
{
if(E->id == EVENT_INSTANCE || E->state == STATE_PASSED) {
LogInfo("%s\n", E->message);
} else {
LogError("%s\n", E->message);
}
}
if(E->state == STATE_INIT)
{
return;
}
}
S = Event_get_source(E);
if(!S)
{
LogError("Event handling aborted\n");
return;
}
if(E->state == STATE_FAILED)
{
S->error |= E->id;
handle_action(E, E->action->failed);
}
else
{
S->error &= ~E->id;
handle_action(E, E->action->passed);
}
/* Possible event state change was handled so we will reset the flag. */
E->state_changed = FALSE;
}
static void handle_action(Event_T E, Action_T A) {
Service_T s;
ASSERT(E);
ASSERT(A);
E->flag = HANDLER_PASSED;
if(A->id == ACTION_IGNORE)
{
return;
}
/* Alert and collector event notification are common actions */
E->flag |= handle_alert(E);
E->flag |= handle_collector(E);
/* In the case that some subhandler failed, enqueue the event for
* partial reprocessing */
if(E->flag != HANDLER_PASSED)
{
if(Run.eventlist_dir)
{
Event_queue_add(E);
}
else
{
LogError("Aborting event\n");
}
}
if(!(s = Event_get_source(E)))
{
LogError("Event action handling aborted\n");
return;
}
if(A->id == ACTION_ALERT)
{
return; /* Already handled */
}
else if(A->id == ACTION_EXEC)
{
spawn(s, A->exec, Event_get_description(E));
return;
}
else
{
if(s->def_timeout &&
(A->id == ACTION_START ||
A->id == ACTION_RESTART))
{
s->nstart++;
}
if(s->mode == MODE_PASSIVE &&
(A->id == ACTION_START ||
A->id == ACTION_STOP ||
A->id == ACTION_RESTART))
{
return;
}
control_service(s->name, A->id);
}
}
/**
* Add the partialy handled event to the global queue
* @param E An event object
*/
static void Event_queue_add(Event_T E) {
FILE *file = NULL;
char file_name[STRLEN];
int version = EVENT_VERSION;
short action = Event_get_action(E);
int rv = FALSE;
mode_t mask;
sigset_t ns;
sigset_t os;
ASSERT(E);
ASSERT(E->flag != HANDLER_PASSED);
if(!File_checkQueueDirectory(Run.eventlist_dir, 0700))
{
LogError("%s: Aborting event - cannot access the directory %s\n",
prog, Run.eventlist_dir);
return;
}
if(!File_checkQueueLimit(Run.eventlist_dir, Run.eventlist_slots))
{
LogError("%s: Aborting event - queue over quota\n", prog);
return;
}
set_signal_block(&ns, &os);
/* compose the file name of actual timestamp and service name */
snprintf(file_name, STRLEN,
"%s/%ld_%s",
Run.eventlist_dir, (long int)time(NULL), E->source);
DEBUG("%s: Adding event to the queue file %s for later delivery\n",
prog, file_name);
mask = umask(QUEUEMASK);
file = fopen(file_name, "w");
umask(mask);
if(! file)
{
LogError("%s: Aborting event - cannot open the event file %s -- %s\n",
prog, file_name, STRERROR);
return;
}
/* write event structure version */
if(!(rv = File_writeQueue(file, &version, sizeof(int))))
goto error;
/* write event structure */
if(!(rv = File_writeQueue(file, E, sizeof(*E))))
goto error;
/* write source */
if(!(rv = File_writeQueue(file, E->source, E->source?strlen(E->source)+1:0)))
goto error;
/* write group */
if(!(rv = File_writeQueue(file, E->group, E->group?strlen(E->group)+1:0)))
goto error;
/* write message */
if(!(rv = File_writeQueue(file, E->message, E->message?strlen(E->message)+1:0)))
goto error;
/* write event action */
if(!(rv = File_writeQueue(file, &action, sizeof(short))))
goto error;
error:
if(!rv)
{
LogError("%s: Aborting event - unable to save event information to %s\n",
prog, file_name);
unlink(file_name);
}
else
{
if(!Run.handler_init && E->flag & HANDLER_ALERT)
{
Run.handler_queue[HANDLER_ALERT]++;
}
if(!Run.handler_init && E->flag & HANDLER_COLLECTOR)
{
Run.handler_queue[HANDLER_COLLECTOR]++;
}
fclose(file);
}
unset_signal_block(&os);
return;
}
syntax highlighted by Code2HTML, v. 0.9.1