/*
 * Copyright (C), 2000-2007 by the monit project group.
 * All Rights Reserved.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "config.h"

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif

#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif

#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif

#include "monitor.h"
#include "alert.h"
#include "event.h"
#include "process.h"


/**
 * Implementation of the event interface.
 *
 * @author Jan-Henrik Haukeland, <hauk@tildeslash.com>
 * @author Martin Pala <martinp@tildeslash.com>
 * @version \$Id: event.c,v 1.66 2007/07/25 12:54:28 hauk Exp $
 * @file
 */


/* ------------------------------------------------------------- Definitions */

EventTable_T Event_Table[]= {
  {EVENT_CHANGED,         "Changed",                "Changed not"},
  {EVENT_CHECKSUM,        "Checksum failed",        "Checksum passed"},
  {EVENT_CONNECTION,      "Connection failed",      "Connection passed"},
  {EVENT_DATA,            "Data access error",      "Data access succeeded"},
  {EVENT_EXEC,            "Execution failed",       "Execution succeeded"},
  {EVENT_GID,             "GID failed",             "GID passed"},
  {EVENT_ICMP,            "ICMP failed",            "ICMP passed"},
  {EVENT_INSTANCE,        "Monit instance changed", "Monit instance changed not"},
  {EVENT_INVALID,         "Invalid type",           "Type passed"},
  {EVENT_MATCH,           "Regex match",            "No regex match"},
  {EVENT_NONEXIST,        "Does not exist",         "Exists"},
  {EVENT_PERMISSION,      "Permission failed",      "Permission passed"},
  {EVENT_RESOURCE,        "Resource limit matched", "Resource limit passed"},
  {EVENT_SIZE,            "Size failed",            "Size passed"},
  {EVENT_TIMEOUT,         "Timeout",                "Timeout recovery"},
  {EVENT_TIMESTAMP,       "Timestamp failed",       "Timestamp passed"},
  {EVENT_UID,             "UID failed",             "UID passed"},
  /* Virtual events */
  {EVENT_NULL,            "No Event",               "No Event"},
};


/* -------------------------------------------------------------- Prototypes */


static void handle_event(Event_T);
static void handle_action(Event_T, Action_T);
static void Event_queue_add(Event_T);


/* ------------------------------------------------------------------ Public */


/**
 * Post a new Event
 * @param service The Service the event belongs to
 * @param id The event identification
 * @param state The event state
 * @param action Description of the event action
 * @param s Optional message describing the event
 */
void Event_post(Service_T service, long id, short state, EventAction_T action,
  char *s, ...) {

  Event_T e = service->eventlist;

  ASSERT(service);
  ASSERT(action);
  ASSERT(state == STATE_FAILED || state == STATE_PASSED);

  if(e == NULL)
  {
    /* Only first failed event can initialize the queue for given event type,
     * thus passed events are ignored until first error. However, in the case
     * that the error flag is set for the passed event, we will allow it (i.e.
     * event queue was flushed during monit reload and the service was in
     * failed state before reload) */
    if(state != STATE_FAILED && !(service->error & id))
      return;

    /* Initialize event list and add first event. The manadatory informations
     * are cloned so the event is as standalone as possible and may be saved
     * to the queue without the dependency on the original service, thus
     * persistent and managable across monit restarts */
    NEW(e);
    e->id = id;
    e->collected = time(NULL);
    e->source = xstrdup(service->name);
    e->group = service->group?xstrdup(service->group):xstrdup("");
    e->mode = service->mode;
    e->type = service->type;
    e->state = STATE_INIT;
    e->state_map = state;
    e->action = action;
    if(s)
    {
      long l;
      va_list ap;

      va_start(ap, s);
      e->message = Util_formatString(s, ap, &l);
      va_end(ap);
    }
    pthread_mutex_init(&e->mutex, NULL);
    service->eventlist = e;
  }
  else
  {
    /* Try to find the event with the same origin and type identification.
     * Each service and each test have its own custom actions object, so
     * we share actions object address to identify event source. */
    do
    {
      if(e->action == action && e->id == id)
      {
        LOCK(e->mutex)
          e->collected = time(NULL);

          /* Shift the existing event flags to the left
           * and set the first bit based on actual state */
          e->state_map <<= 1;
          e->state_map |= state;

          /* Update the message */
          if(s)
          {
            long l;
            va_list ap;

            FREE(e->message);
            va_start(ap, s);
            e->message = Util_formatString(s, ap, &l);
            va_end(ap);
          }

        END_LOCK;
	break;
      }
      e = e->next;
    }
    while(e);

    if(!e)
    {
      /* Only first failed event can initialize the queue for given event type,
       * thus passed events are ignored until first error */
      if(state != STATE_FAILED)
        return;

      /* Event was not found in the pending events list, we will add it.
       * The manadatory informations are cloned so the event is as standalone
       * as possible and may be saved to the queue without the dependency on
       * the original service, thus persistent and managable across monit
       * restarts */
      NEW(e);
      e->id = id;
      e->collected = time(NULL);
      e->source = xstrdup(service->name);
      e->group = service->group?xstrdup(service->group):xstrdup("");
      e->mode = service->mode;
      e->type = service->type;
      e->state = STATE_INIT;
      e->state_map = state;
      e->action = action;
      if(s)
      {
        long l;
        va_list ap;

        va_start(ap, s);
        e->message = Util_formatString(s, ap, &l);
        va_end(ap);
      }
      pthread_mutex_init(&e->mutex, NULL);
      e->next = service->eventlist;
      service->eventlist = e;
    }
  }

  e->state_changed = Event_check_state(e, state);

  /* In the case that the state changed, update it and reset the counter */
  if(e->state_changed)
  {
    e->state = state;
    e->count = 1;
  }
  else
  {
    e->count++;
  }

  LOCK(e->mutex)
    handle_event(e);
  END_LOCK;

}


/* -------------------------------------------------------------- Properties */


/**
 * Get the Service where the event orginated
 * @param E An event object
 * @return The Service where the event orginated
 */
Service_T Event_get_source(Event_T E) {

  Service_T s = NULL;

  ASSERT(E);

  if(!(s = Util_getService(E->source)))
  {
    LogError("Service %s not found in monit configuration\n", E->source);
  }

  return s;

}


/**
 * Get the Service name where the event orginated
 * @param E An event object
 * @return The Service name where the event orginated
 */
char *Event_get_source_name(Event_T E) {

  ASSERT(E);

  return (E->source);

}


/**
 * Get the group name of the service where the event orginated
 * @param E An event object
 * @return The group name of the service where the event orginated
 */
char *Event_get_source_group(Event_T E) {

  ASSERT(E);

  return (E->group);

}


/**
 * Get the service type of the service where the event orginated
 * @param E An event object
 * @return The service type of the service where the event orginated
 */
int Event_get_source_type(Event_T E) {

  ASSERT(E);

  return (E->type);

}


/**
 * Get the Event timestamp
 * @param E An event object
 * @return The Event timestamp
 */
time_t Event_get_collected(Event_T E) {

  ASSERT(E);
  
  return E->collected;

}


/**
 * Get the Event raw state
 * @param E An event object
 * @return The Event raw state
 */
short Event_get_state(Event_T E) {

  ASSERT(E);
  
  return E->state;

}


/**
 * Return the actual event state based on event state bitmap
 * and event ratio needed to trigger the state change
 * @param E An event object
 * @param S Actual posted state
 * @return The Event raw state
 */
short Event_check_state(Event_T E, short S) {

  int       i;
  int       count = 0;
  Action_T  action;
  Service_T service;
  long long flag;

  ASSERT(E);

  if(!(service = Event_get_source(E)))
    return TRUE;

  /* Only the true failed state condition can change the initial state */
  if(S == STATE_PASSED && E->state == STATE_INIT && !(service->error & E->id))
  {
    return FALSE;
  }

  action = (S == STATE_PASSED)?E->action->passed:E->action->failed;

  /* Compare as many bits as cycles able to trigger the action */
  for(i = 0; i < action->cycles; i++)
  {
    /* Check the state of the particular cycle given by the bit position */
    flag = (E->state_map >> i) & 0x1;

    /* Count occurences of the posted state */
    if(flag == S)
    {
      count++;
    }
  }

  if(count >= action->count && S != E->state)
  {
    return TRUE;
  }
  
  return FALSE;

}


/**
 * Get the Event type
 * @param E An event object
 * @return The Event type
 */
int Event_get_id(Event_T E) {

  ASSERT(E);
  
  return E->id;

}


/**
 * Get the optionally Event message describing why the event was
 * fired.
 * @param E An event object
 * @return The Event message. May be NULL
 */
const char *Event_get_message(Event_T E) {

  ASSERT(E);

  return E->message;

}


/**
 * Get a textual description of actual event type. For instance if the
 * event type is possitive EVENT_TIMESTAMP, the textual description is
 * "Timestamp error". Likewise if the event type is negative EVENT_CHECKSUM
 * the textual description is "Checksum recovery" and so on.
 * @param E An event object
 * @return A string describing the event type in clear text. If the
 * event type is not found NULL is returned.
 */
const char *Event_get_description(Event_T E) {

  EventTable_T *et= Event_Table;

  ASSERT(E);

  while((*et).id)
  {
    if(E->id == (*et).id)
    {
      return E->state?(*et).description_failed:(*et).description_passed;
    }
    et++;
  }
  
  return NULL;

}


/**
 * Get an event action id.
 * @param E An event object
 * @return An action id
 */
short Event_get_action(Event_T E) {

  short id;
  Action_T A;

  ASSERT(E);

  A = E->state?E->action->failed:E->action->passed;

  /* In the case of passive mode we replace the description of start, stop
   * or restart action for alert action, because these actions are passive in
   * this mode */
  id= (E->mode == MODE_PASSIVE &&
       ((A->id == ACTION_START)||
        (A->id == ACTION_STOP) ||
        (A->id == ACTION_RESTART))
      )?ACTION_ALERT:A->id;

  return id;

}


/**
 * Get a textual description of actual event action. For instance if the
 * event type is possitive EVENT_NONEXIST, the textual description of
 * failed state related action is "restart". Likewise if the event type is
 * negative EVENT_CHECKSUM the textual description of recovery related action
 * is "alert" and so on.
 * @param E An event object
 * @return A string describing the event type in clear text. If the
 * event type is not found NULL is returned.
 */
const char *Event_get_action_description(Event_T E) {

  ASSERT(E);

  return actionnames[Event_get_action(E)];

}


/**
 * Reprocess the partialy handled event queue
 */
void Event_queue_process() {

  DIR           *dir = NULL;
  FILE          *file = NULL;
  struct dirent *de = NULL;
  EventAction_T  ea = NULL;
  Action_T       a = NULL;

  /* return in the case that the eventqueue is not enabled or empty */
  if( !Run.eventlist_dir ||
     (
      !Run.handler_init                     &&
      !Run.handler_queue[HANDLER_ALERT]     &&
      !Run.handler_queue[HANDLER_COLLECTOR]
     )
    )
  {
    return;
  }

  if(! (dir = opendir(Run.eventlist_dir)) )
  {
    if(errno != ENOENT) {
      LogError("%s: cannot open the directory %s -- %s\n",
        prog, Run.eventlist_dir, STRERROR);
    }
    return;
  }

  if((de = readdir(dir)))
  {
    DEBUG("Processing postponed events queue\n");
  }

  NEW(ea);
  NEW(a);

  while(de)
  {
    int            size;
    int           *version = NULL;
    short         *action = NULL;
    Event_T        e = NULL;
    struct stat    st;
    char           file_name[STRLEN];

    /* In the case that all handlers failed, skip the further processing in
     * this cycle. Alert handler is currently defined anytime (either
     * explicitly or localhost by default) */
    if( (Run.collectors
           &&
         FLAG(Run.handler_flag, HANDLER_COLLECTOR)
           &&
         FLAG(Run.handler_flag, HANDLER_ALERT)
        )
          ||
        FLAG(Run.handler_flag, HANDLER_ALERT))
    {
      break;
    }

    snprintf(file_name, STRLEN,
      "%s/%s",
      Run.eventlist_dir, de->d_name);

    if(!stat(file_name, &st) && S_ISREG(st.st_mode))
    {

      DEBUG("%s: processing queued event %s\n", prog, file_name);

      if(! (file = fopen(file_name, "r")) )
      {
        LogError("%s: Processing failed - cannot open the event file %s -- %s\n",
          prog, file_name, STRERROR);
        goto error1;
      }

      /* read event structure version */
      if(!(version = File_readQueue(file, &size)) || size != sizeof(int)) {
        LogError("skipping %s - unknown data format\n",
          file_name, *version);
        goto error2;
      }
      if(*version != EVENT_VERSION)
      {
        LogError("Aborting event %s - incompatible data format version %d\n",
          file_name, *version);
        goto error2;
      }

      /* read event structure */
      if(!(e = File_readQueue(file, &size)) || size != sizeof(*e))
        goto error2;

      /* read source */
      if(!(e->source = File_readQueue(file, &size)))
        goto error3;

      /* read group */
      if(!(e->group = File_readQueue(file, &size)))
        goto error3;

      /* read message */
      if(!(e->message = File_readQueue(file, &size)))
        goto error3;

      /* read event action */
      if(!(action = File_readQueue(file, &size)) || size != sizeof(short))
        goto error3;
      a->id = *action;
      if(e->state == STATE_FAILED)
      {
        ea->failed = a;
      }
      else
      {
        ea->passed = a;
      }
      e->action = ea;

      /* Retry all remaining handlers */

      /* alert */
      if(e->flag & HANDLER_ALERT)
      {
        if(Run.handler_init)
        {
          Run.handler_queue[HANDLER_ALERT]++;
        }
        if((Run.handler_flag & HANDLER_ALERT) != HANDLER_ALERT)
        {
          if( handle_alert(e) != HANDLER_ALERT )
          {
            e->flag &= ~HANDLER_ALERT;
            Run.handler_queue[HANDLER_ALERT]--;
          }
          else
          {
            LogError("Alert handler failed, retry scheduled for next cycle\n");
            Run.handler_flag |= HANDLER_ALERT;
          }
        }
      }

      /* collector */
      if(e->flag & HANDLER_COLLECTOR)
      {
        if(Run.handler_init)
        {
          Run.handler_queue[HANDLER_COLLECTOR]++;
        }
        if((Run.handler_flag & HANDLER_COLLECTOR) != HANDLER_COLLECTOR)
        {
          if( handle_collector(e) != HANDLER_COLLECTOR )
          {
            e->flag &= ~HANDLER_COLLECTOR;
            Run.handler_queue[HANDLER_COLLECTOR]--;
          }
          else
          {
            LogError("Collector handler failed, retry scheduled for next cycle\n");
            Run.handler_flag |= HANDLER_COLLECTOR;
          }
        }
      }

      /* If no error persists, remove it from the queue */
      if(e->flag == HANDLER_PASSED)
      {
        DEBUG("Removing event %s from the queue for later external delivery\n",
          file_name);
        unlink(file_name);
      }

      error3:
      FREE(e->source);
      FREE(e->group);
      FREE(e->message);
      FREE(e);
      FREE(action);
      error2:
      FREE(version);
      fclose(file);
    }
    error1:
    de = readdir(dir);
  }
  Run.handler_init = FALSE;
  closedir(dir);
  FREE(a);
  FREE(ea);
  return;
}


/* ----------------------------------------------------------------- Private */


/*
 * Handle the event
 * @param E An event
 */
static void handle_event(Event_T E) {

  Service_T S;

  ASSERT(E);
  ASSERT(E->action);
  ASSERT(E->action->failed);
  ASSERT(E->action->passed);

  /* We will handle only first passed event, recurrent passed events
   * or insufficient passed events during failed service state are
   * ignored. Failed events are handled each time. */
  if(!E->state_changed && (E->state == STATE_PASSED || ((E->state_map & 0x1) ^ 0x1)))
  {
    return;
  }

  if(E->message)
  {
    /* In the case that the service state is yet initializing and error
     * occured, log it and exit. Passed events in init state are not
     * logged. */
    if(E->state != STATE_INIT || E->state_map & 0x1)
    {
      if(E->id == EVENT_INSTANCE || E->state == STATE_PASSED) {
        LogInfo("%s\n", E->message);
      } else {
        LogError("%s\n", E->message);
      }
    }
    if(E->state == STATE_INIT)
    {
      return;
    }
  }

  S = Event_get_source(E);
  if(!S)
  {
    LogError("Event handling aborted\n");
    return;
  }

  if(E->state == STATE_FAILED)
  {
    S->error |= E->id;
    handle_action(E, E->action->failed);
  }
  else
  {
    S->error &= ~E->id;
    handle_action(E, E->action->passed);
  }

  /* Possible event state change was handled so we will reset the flag. */
  E->state_changed = FALSE;

}


static void handle_action(Event_T E, Action_T A) {

  Service_T s;

  ASSERT(E);
  ASSERT(A);

  E->flag = HANDLER_PASSED;

  if(A->id == ACTION_IGNORE)
  {
    return;
  }

  /* Alert and collector event notification are common actions */
  E->flag |= handle_alert(E);
  E->flag |= handle_collector(E);

  /* In the case that some subhandler failed, enqueue the event for
   * partial reprocessing */
  if(E->flag != HANDLER_PASSED)
  {
    if(Run.eventlist_dir)
    {
      Event_queue_add(E);
    }
    else
    {
      LogError("Aborting event\n");
    }
  }

  if(!(s = Event_get_source(E)))
  {
    LogError("Event action handling aborted\n");
    return;
  }

  if(A->id == ACTION_ALERT)
  {
    return;     /* Already handled */
  }
  else if(A->id == ACTION_EXEC)
  {
    spawn(s, A->exec, Event_get_description(E));
    return;
  }
  else 
  {
    if(s->def_timeout &&
         (A->id == ACTION_START ||
          A->id == ACTION_RESTART))
    {
      s->nstart++;
    }

    if(s->mode == MODE_PASSIVE &&
         (A->id == ACTION_START ||
          A->id == ACTION_STOP  ||
          A->id == ACTION_RESTART))
    {
      return;
    }

    control_service(s->name, A->id);
  }
}


/**
 * Add the partialy handled event to the global queue
 * @param E An event object
 */
static void Event_queue_add(Event_T E) {

  FILE        *file = NULL;
  char         file_name[STRLEN];
  int          version = EVENT_VERSION;
  short        action = Event_get_action(E);
  int          rv = FALSE;
  mode_t       mask;
  sigset_t     ns;
  sigset_t     os;

  ASSERT(E);
  ASSERT(E->flag != HANDLER_PASSED);

  if(!File_checkQueueDirectory(Run.eventlist_dir, 0700))
  {
    LogError("%s: Aborting event - cannot access the directory %s\n",
      prog, Run.eventlist_dir);
    return;
  }
    
  if(!File_checkQueueLimit(Run.eventlist_dir, Run.eventlist_slots))
  {
    LogError("%s: Aborting event - queue over quota\n", prog);
    return;
  }
    
  set_signal_block(&ns, &os);

  /* compose the file name of actual timestamp and service name */
  snprintf(file_name, STRLEN,
    "%s/%ld_%s",
    Run.eventlist_dir, (long int)time(NULL), E->source);

  DEBUG("%s: Adding event to the queue file %s for later delivery\n",
    prog, file_name);

  mask = umask(QUEUEMASK);
  file = fopen(file_name, "w");
  umask(mask);
  if(! file)
  {
    LogError("%s: Aborting event - cannot open the event file %s -- %s\n",
      prog, file_name, STRERROR);
    return;
  }

  /* write event structure version */
  if(!(rv = File_writeQueue(file, &version, sizeof(int))))
    goto error;

  /* write event structure */
  if(!(rv = File_writeQueue(file, E, sizeof(*E))))
    goto error;

  /* write source */
  if(!(rv = File_writeQueue(file, E->source, E->source?strlen(E->source)+1:0)))
    goto error;

  /* write group */
  if(!(rv = File_writeQueue(file, E->group, E->group?strlen(E->group)+1:0)))
    goto error;

  /* write message */
  if(!(rv = File_writeQueue(file, E->message, E->message?strlen(E->message)+1:0)))
    goto error;

  /* write event action */
  if(!(rv = File_writeQueue(file, &action, sizeof(short))))
    goto error;

  error:
  if(!rv)
  {
    LogError("%s: Aborting event - unable to save event information to %s\n",
      prog, file_name);
    unlink(file_name);
  }
  else
  {
    if(!Run.handler_init && E->flag & HANDLER_ALERT)
    {
      Run.handler_queue[HANDLER_ALERT]++;
    }
    if(!Run.handler_init && E->flag & HANDLER_COLLECTOR)
    {
      Run.handler_queue[HANDLER_COLLECTOR]++;
    }
    fclose(file);
  }

  unset_signal_block(&os);
  return;
}



syntax highlighted by Code2HTML, v. 0.9.1