/***************************************************************************** * * CHECKS.C - Service and host check functions for NetSaint * * Copyright (c) 1999-2001 Ethan Galstad (netsaint@netsaint.org) * Last Modified: 12-02-2001 * * License: * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *****************************************************************************/ #include "../common/config.h" #include "../common/common.h" #include "../common/statusdata.h" #include "netsaint.h" #ifdef EMBEDDEDPERL #include #include #include #endif extern char temp_file[MAX_FILENAME_LENGTH]; extern int interval_length; extern int command_check_interval; extern int ipc_pipe[2]; extern int log_initial_states; extern int service_check_timeout; extern int host_check_timeout; extern int service_check_reaper_interval; extern int use_aggressive_host_checking; extern int soft_state_dependencies; extern int currently_running_service_checks; extern int non_parallelized_check_running; extern int accept_passive_service_checks; extern int obsess_over_services; extern timed_event *event_list_low; extern service *service_list; extern host *host_list; extern servicedependency *servicedependency_list; extern service_message svc_msg; /******************************************************************/ /****************** SERVICE MONITORING FUNCTIONS ******************/ /******************************************************************/ /* forks a child process to run a service check, but does not wait for the service check result */ void run_service_check(service *svc){ char raw_command[MAX_INPUT_BUFFER]; char processed_command[MAX_INPUT_BUFFER]; char plugin_output[MAX_PLUGINOUTPUT_LENGTH]; char temp_buffer[MAX_INPUT_BUFFER]; int check_service=TRUE; time_t current_time; time_t preferred_time=0L; time_t next_valid_time; pid_t pid; int fork_error=FALSE; int wait_result=0; host *temp_host=NULL; FILE *fp; int pclose_result=0; int time_is_valid=TRUE; #ifdef EMBEDDEDPERL char fname[512]; char tmpfname[32]; char *args[5] = {"",DO_CLEAN, "", "", NULL }; int isperl; int tmpfd; #ifdef THREADEDPERL dTHX; #endif dSP; #endif /* get the current time */ time(¤t_time); /* if the service check is currently disabled... */ if(svc->checks_enabled==FALSE){ /* don't check the service if we're not forcing it through */ if(!(svc->check_options & CHECK_OPTION_FORCE_EXECUTION)) check_service=FALSE; /* reschedule the service check */ preferred_time=current_time+(svc->check_interval*interval_length); } /* make sure this is a valid time to check the service */ if(check_time_against_period((unsigned long)current_time,svc->check_period)==ERROR){ /* don't check the service if we're not forcing it through */ if(!(svc->check_options & CHECK_OPTION_FORCE_EXECUTION)) check_service=FALSE; /* get the next valid time we can run the check */ preferred_time=current_time; /* set the invalid time flag */ time_is_valid=FALSE; } /* check service dependencies for execution */ if(check_service_dependencies(svc,EXECUTION_DEPENDENCY)==DEPENDENCIES_FAILED){ /* don't check the service if we're not forcing it through */ if(!(svc->check_options & CHECK_OPTION_FORCE_EXECUTION)) check_service=FALSE; /* reschedule the service check */ preferred_time=current_time+(svc->check_interval*interval_length); } /* clear the force execution flag */ if(svc->check_options & CHECK_OPTION_FORCE_EXECUTION) svc->check_options-=CHECK_OPTION_FORCE_EXECUTION; /* find the host associated with this service */ temp_host=find_host(svc->host_name,NULL); /* don't check the service if we couldn't find the associated host */ if(temp_host==NULL) check_service=FALSE; /* if we shouldn't check the service, just reschedule it and leave... */ if(check_service==FALSE){ /* make sure we rescheduled the next service check at a valid time */ get_next_valid_time(preferred_time,&next_valid_time,svc->check_period); /* the service could not be rescheduled properly - set the next check time for next year, but don't actually reschedule it */ if(time_is_valid==FALSE && next_valid_time==preferred_time){ svc->next_check=(time_t)(next_valid_time+(60*60*24*365)); svc->should_be_scheduled=FALSE; #ifdef DEBUG1 printf("Warning: Could not find any valid times to reschedule a check of service '%s' on host '%s'!\n",svc->description,svc->host_name); #endif } /* this service could be rescheduled... */ else{ svc->next_check=next_valid_time; svc->should_be_scheduled=TRUE; } /* update the status log with the current service */ update_service_status(svc,FALSE); /* reschedule the next service check - unless we couldn't find a valid next check time */ if(svc->should_be_scheduled==TRUE) schedule_service_check(svc,svc->next_check,FALSE); return; } /**** ELSE RUN THE SERVICE CHECK ****/ #ifdef DEBUG3 printf("\tChecking service '%s' on host '%s'...\n",svc->description,svc->host_name); #endif /* increment number of parallel service checks currently out there... */ currently_running_service_checks++; /* set a flag if this service check shouldn't be parallelized with others... */ if(svc->parallelize==FALSE) non_parallelized_check_running=TRUE; /* set the execution flag */ svc->is_executing=TRUE; /* grab the host and service macro variables */ clear_volatile_macros(); grab_host_macros(temp_host); grab_service_macros(svc); /* get the raw command line */ get_raw_command_line(svc->service_check_command,raw_command,sizeof(raw_command)); strip(raw_command); /* process any macros contained in the argument */ process_macros(raw_command,processed_command,sizeof(processed_command)); strip(processed_command); /* save service info */ strncpy(svc_msg.host_name,svc->host_name,sizeof(svc_msg.host_name)-1); svc_msg.host_name[sizeof(svc_msg.host_name)-1]='\x0'; strncpy(svc_msg.description,svc->description,sizeof(svc_msg.description)-1); svc_msg.description[sizeof(svc_msg.description)-1]='\x0'; svc_msg.parallelized=svc->parallelize; svc_msg.check_time=current_time; svc_msg.finish_time=current_time; #ifdef EMBEDDEDPERL strncpy(fname,processed_command,strcspn(processed_command," ")); fname[strcspn(processed_command," ")] = '\x0'; /* have "filename" component of command. Check for PERL */ fp=fopen(fname, "r"); if(fp==NULL) strcpy(raw_command,""); else{ fgets(raw_command,80,fp); fclose(fp); } isperl=0; if(strstr(raw_command,"/bin/perl")!=NULL){ isperl = 1; args[0] = fname; args[2] = tmpfname; if(strchr(processed_command,' ')==NULL) args[3]=""; else args[3]=processed_command+strlen(fname)+1; /* call our perl interpreter to compile and optionally cache the command */ perl_call_argv("Embed::Persistent::eval_file", G_DISCARD | G_EVAL, args); } #endif /* fork a child process */ pid=fork(); /* an error occurred while trying to fork */ if(pid==-1) fork_error=TRUE; /* if we are in the child process... */ else if(pid==0){ /* free allocated memory */ free_memory(); /* fork again... */ pid=fork(); /* an error occurred while trying to fork again */ if(pid==-1) exit(STATE_UNKNOWN); /* the grandchild process should run the service check... */ if(pid==0){ /* reset signal handling */ reset_sighandler(); /* become the process group leader */ setpgid(0,0); /* close read end of IPC pipe */ close(ipc_pipe[0]); /* catch plugins that don't finish in a timely manner */ signal(SIGALRM,service_check_sighandler); alarm(service_check_timeout); /******** BEGIN EMBEDDED PERL INTERPRETER EXECUTION ********/ #ifdef EMBEDDEDPERL if(isperl){ /* generate a temporary filename to which stdout can be redirected. */ snprintf(tmpfname,sizeof(tmpfname)-1,"/tmp/embeddedXXXXXX"); if((tmpfd=mkstemp(tmpfname))==-1) _exit(STATE_UNKNOWN); /* execute our previously compiled script */ ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(sv_2mortal(newSVpv(args[0],0))); XPUSHs(sv_2mortal(newSVpv(args[1],0))); XPUSHs(sv_2mortal(newSVpv(args[2],0))); XPUSHs(sv_2mortal(newSVpv(args[3],0))); PUTBACK; perl_call_pv("Embed::Persistent::run_package", G_EVAL); SPAGAIN; pclose_result=POPi; PUTBACK; FREETMPS; LEAVE; /* check return status */ if(SvTRUE(ERRSV)){ pclose_result=-2; #ifdef DEBUG1 printf("embedded perl ran %s with error %s\n",fname,SvPV(ERRSV,PL_na)); #endif } /* read back stdout from script */ fp=fopen(tmpfname, "r"); /* default return string in case nothing was returned */ strcpy(plugin_output,"(No output!)"); /* read output from plugin (which was redirected to temp file) */ fgets(plugin_output,sizeof(plugin_output)-1,fp); plugin_output[sizeof(plugin_output)-1]='\x0'; strip(plugin_output); /* close and delete temp file */ fclose(fp); close(tmpfd); unlink(tmpfname); #ifdef DEBUG1 printf("embedded perl plugin output was %d,%s\n",pclose_result, plugin_output); #endif /* reset the alarm */ alarm(0); /* test for execution error */ if(pclose_result==-2){ strncpy(svc_msg.output,"(Error returned by call to perl script)",sizeof(svc_msg.output)-1); svc_msg.output[sizeof(svc_msg.output)-1]='\x0'; svc_msg.return_code=STATE_CRITICAL; svc_msg.exited_ok=FALSE; svc_msg.finish_time=time(NULL); write_svc_message(&svc_msg); /* close write end of IPC pipe */ close(ipc_pipe[1]); _exit(STATE_UNKNOWN); } /* else write plugin check results to message queue */ else{ strncpy(svc_msg.output,plugin_output,sizeof(svc_msg.output)-1); svc_msg.output[sizeof(svc_msg.output)-1]='\x0'; svc_msg.return_code=pclose_result; svc_msg.exited_ok=TRUE; svc_msg.check_type=SERVICE_CHECK_ACTIVE; svc_msg.finish_time=time(NULL); write_svc_message(&svc_msg); } /* close write end of IPC pipe */ close(ipc_pipe[1]); /* return with plugin exit status - not really necessary... */ _exit(pclose_result); } /* Not a perl command. Use popen... */ #endif /******** END EMBEDDED PERL INTERPRETER EXECUTION ********/ /* run the plugin check command */ fp=popen(processed_command,"r"); if(fp==NULL) _exit(STATE_UNKNOWN); /* default return string in case nothing was returned */ strcpy(plugin_output,"(No output!)"); /* grab the plugin output and clean it */ fgets(plugin_output,sizeof(plugin_output)-1,fp); strip(plugin_output); /* close the process */ pclose_result=pclose(fp); /* reset the alarm */ alarm(0); /* test for execution error */ if(pclose_result==-1){ strncpy(svc_msg.output,"(Error returned by call to pclose() function)",sizeof(svc_msg.output)-1); svc_msg.output[sizeof(svc_msg.output)-1]='\x0'; svc_msg.return_code=STATE_CRITICAL; svc_msg.exited_ok=FALSE; svc_msg.finish_time=time(NULL); write_svc_message(&svc_msg); /* close write end of IPC pipe */ close(ipc_pipe[1]); _exit(STATE_UNKNOWN); } /* else write plugin check results to message queue */ else{ strncpy(svc_msg.output,plugin_output,sizeof(svc_msg.output)-1); svc_msg.output[sizeof(svc_msg.output)-1]='\x0'; svc_msg.return_code=(int)WEXITSTATUS(pclose_result); svc_msg.exited_ok=TRUE; svc_msg.check_type=SERVICE_CHECK_ACTIVE; svc_msg.finish_time=time(NULL); write_svc_message(&svc_msg); } /* close write end of IPC pipe */ close(ipc_pipe[1]); /* return with plugin exit status - not really necessary... */ _exit(pclose_result); } /* close write end of IPC pipe */ close(ipc_pipe[1]); /* parent exits immediately - grandchild process is inherited by the INIT process, so we have no zombie problem... */ _exit(STATE_OK); } /* else the parent should wait for the first child to return... */ else if(pid>0){ wait_result=waitpid(pid,NULL,0); /* removed 06/28/2000 - caused problems under AIX */ /* result=WEXITSTATUS(wait_result); if(result==STATE_UNKNOWN) fork_error=TRUE; */ } /* see if we could run the check... */ if(fork_error==TRUE){ /* log an error */ snprintf(temp_buffer,sizeof(temp_buffer),"Warning: The check of service '%s' on host '%s' could not be performed due to a fork() error. The check will be rescheduled.\n",svc_msg.description,svc_msg.host_name); temp_buffer[sizeof(temp_buffer)-1]='\x0'; write_to_logs_and_console(temp_buffer,NSLOG_RUNTIME_WARNING,TRUE); /* make sure we rescheduled the next service check at a valid time */ get_next_valid_time(preferred_time,&next_valid_time,svc->check_period); /* the service could not be rescheduled properly - set the next check time for next year, but don't actually reschedule it */ if(time_is_valid==FALSE && next_valid_time==preferred_time){ svc->next_check=(time_t)(next_valid_time+(60*60*24*365)); svc->should_be_scheduled=FALSE; #ifdef DEBUG1 printf("Warning: Could not find any valid times to reschedule a check of service '%s' on host '%s'!\n",svc->description,svc->host_name); #endif } /* this service could be rescheduled... */ else{ svc->next_check=next_valid_time; svc->should_be_scheduled=TRUE; } /* update the status log with the current service */ update_service_status(svc,FALSE); /* reschedule the next service check - unless we couldn't find a valid next check time */ if(svc->should_be_scheduled==TRUE) schedule_service_check(svc,svc->next_check,FALSE); } return; } /* reaps service check results */ void reap_service_checks(void){ service_message queued_svc_msg; service *temp_service=NULL; host *temp_host=NULL; time_t preferred_time; time_t next_valid_time; char temp_buffer[MAX_INPUT_BUFFER]; int state_change=FALSE; int hard_state_change=FALSE; int route_result=HOST_UP; int dependency_result=DEPENDENCIES_OK; time_t current_time; int first_check=FALSE; int state_was_logged=FALSE; char old_plugin_output[MAX_PLUGINOUTPUT_LENGTH]=""; char temp_plugin_output[MAX_PLUGINOUTPUT_LENGTH]=""; char *temp_ptr; #ifdef DEBUG0 printf("reap_service_checks() start\n"); #endif #ifdef DEBUG3 printf("Starting to reap service check results...\n"); #endif /* read all service checks results that have come in... */ while(read_svc_message(&queued_svc_msg)!=-1){ /* make sure we really have something... */ if(!strcmp(queued_svc_msg.description,"") && !strcmp(queued_svc_msg.host_name,"")){ #ifdef DEBUG1 printf("Found an empty message in service result pipe!\n"); #endif continue; } /* get the current time */ time(¤t_time); /* skip this service check results if its passive and we aren't accepting passive check results */ if(accept_passive_service_checks==FALSE && queued_svc_msg.check_type==SERVICE_CHECK_PASSIVE) continue; /* because of my idiotic idea of having UNKNOWN states be equivalent to -1, I must hack things a bit... */ if(queued_svc_msg.return_code==255 || queued_svc_msg.return_code==-1) queued_svc_msg.return_code=STATE_UNKNOWN; /* find the service */ temp_service=find_service(queued_svc_msg.host_name,queued_svc_msg.description,NULL); if(temp_service==NULL){ snprintf(temp_buffer,sizeof(temp_buffer),"Warning: Message queue contained results for service '%s' on host '%s'. The service could not be found!\n",queued_svc_msg.description,queued_svc_msg.host_name); temp_buffer[sizeof(temp_buffer)-1]='\x0'; write_to_logs_and_console(temp_buffer,NSLOG_RUNTIME_WARNING,TRUE); continue; } /* update the execution time for this check */ if(queued_svc_msg.check_time>current_time || queued_svc_msg.finish_time>current_time || (queued_svc_msg.finish_timeexecution_time=0; else temp_service->execution_time=(int)(queued_svc_msg.finish_time-queued_svc_msg.check_time); /* ignore passive service check results if we're not accepting them for this service */ if(temp_service->accept_passive_service_checks==FALSE && queued_svc_msg.check_type==SERVICE_CHECK_PASSIVE) continue; #ifdef DEBUG3 printf("\n\tFound check result for service '%s' on host '%s'\n",temp_service->description,temp_service->host_name); printf("\t\tCheck Type: %s\n",(queued_svc_msg.check_type==SERVICE_CHECK_ACTIVE)?"ACTIVE":"PASSIVE"); printf("\t\tParallelized?: %s\n",(queued_svc_msg.parallelized==TRUE)?"Yes":"No"); printf("\t\tExited OK?: %s\n",(queued_svc_msg.exited_ok==TRUE)?"Yes":"No"); printf("\t\tReturn Status: %d\n",queued_svc_msg.return_code); printf("\t\tPlugin Output: '%s'\n",queued_svc_msg.output); #endif /* decrement the number of service checks still out there... */ if(queued_svc_msg.check_type==SERVICE_CHECK_ACTIVE && currently_running_service_checks>0) currently_running_service_checks--; /* if this check was not parallelized, clear the flag */ if(queued_svc_msg.parallelized==FALSE && queued_svc_msg.check_type==SERVICE_CHECK_ACTIVE) non_parallelized_check_running=FALSE; /* clear the execution flag if this was an active check */ if(queued_svc_msg.check_type==SERVICE_CHECK_ACTIVE) temp_service->is_executing=FALSE; /* get the last check time */ temp_service->last_check=queued_svc_msg.check_time; /* was this check passive or active? */ temp_service->check_type=(queued_svc_msg.check_type==SERVICE_CHECK_ACTIVE)?SERVICE_CHECK_ACTIVE:SERVICE_CHECK_PASSIVE; /* INITIALIZE VARIABLES FOR THIS SERVICE */ state_change=FALSE; hard_state_change=FALSE; route_result=HOST_UP; dependency_result=DEPENDENCIES_OK; first_check=FALSE; state_was_logged=FALSE; strcpy(old_plugin_output,""); strcpy(temp_plugin_output,""); /* save the old service status info */ temp_service->last_state=temp_service->current_state; /* save old plugin output */ strncpy(old_plugin_output,(temp_service->plugin_output==NULL)?"":temp_service->plugin_output,sizeof(old_plugin_output)-1); old_plugin_output[sizeof(old_plugin_output)-1]='\x0'; /* clear the old plugin output and perf data buffers */ strcpy(temp_service->plugin_output,""); strcpy(temp_service->perf_data,""); /* check for empty plugin output */ if(!strcmp(temp_plugin_output,"")) strcpy(temp_plugin_output,"(No output returned from plugin)"); /* get performance data (if it exists) */ strncpy(temp_plugin_output,queued_svc_msg.output,sizeof(temp_plugin_output)-1); temp_plugin_output[sizeof(temp_plugin_output)-1]='\x0'; temp_ptr=strtok(temp_plugin_output,"|\n"); temp_ptr=strtok(NULL,"\n"); if(temp_ptr!=NULL){ strip(temp_ptr); strncpy(temp_service->perf_data,temp_ptr,MAX_PLUGINOUTPUT_LENGTH-1); temp_service->perf_data[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } /* get status data - everything before pipe sign */ strncpy(temp_plugin_output,queued_svc_msg.output,sizeof(temp_plugin_output)-1); temp_plugin_output[sizeof(temp_plugin_output)-1]='\x0'; temp_ptr=strtok(temp_plugin_output,"|\n"); /* if there was some error running the command, just skip it (this shouldn't be happening) */ if(queued_svc_msg.exited_ok==FALSE){ snprintf(temp_buffer,sizeof(temp_buffer),"Warning: Check of service '%s' on host '%s' did not exit properly!\n",temp_service->description,temp_service->host_name); temp_buffer[sizeof(temp_buffer)-1]='\x0'; write_to_logs_and_console(temp_buffer,NSLOG_RUNTIME_WARNING,TRUE); snprintf(temp_service->plugin_output,MAX_PLUGINOUTPUT_LENGTH-1,"(Service check did not exit properly)"); temp_service->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; temp_service->current_state=STATE_CRITICAL; } /* make sure the return code is within bounds */ else if(queued_svc_msg.return_code<0 || queued_svc_msg.return_code>3){ snprintf(temp_buffer,sizeof(temp_buffer),"Warning: Return code of %d for check of service '%s' on host '%s' was out of bounds.\n",queued_svc_msg.return_code,temp_service->description,temp_service->host_name); temp_buffer[sizeof(temp_buffer)-1]='\x0'; write_to_logs_and_console(temp_buffer,NSLOG_RUNTIME_WARNING,TRUE); snprintf(temp_service->plugin_output,MAX_PLUGINOUTPUT_LENGTH-1,"(Return code of %d is out of bounds)",queued_svc_msg.return_code); temp_service->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; temp_service->current_state=STATE_CRITICAL; } /* else the return code is okay... */ else{ /* make sure the plugin output isn't null */ if(temp_ptr==NULL){ strncpy(temp_service->plugin_output,"(No output returned from plugin)",MAX_PLUGINOUTPUT_LENGTH-1); temp_service->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } /* grab the plugin output */ else{ strip(temp_ptr); if(!strcmp(temp_ptr,"")) strncpy(temp_service->plugin_output,"(No output returned from plugin)",MAX_PLUGINOUTPUT_LENGTH-1); else strncpy(temp_service->plugin_output,temp_ptr,MAX_PLUGINOUTPUT_LENGTH-1); temp_service->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } /* replace semicolons in plugin output (but not performance data) with colons */ temp_ptr=temp_service->plugin_output; while((temp_ptr=strchr(temp_ptr,';'))) *temp_ptr=':'; /* grab the return code */ temp_service->current_state=queued_svc_msg.return_code; } /* get the host that this service runs on */ temp_host=find_host(temp_service->host_name,NULL); /* if the service check was okay... */ if(temp_service->current_state==STATE_OK){ /* if the host has never been checked before... */ if(temp_host->has_been_checked==FALSE){ /* set the checked flag */ temp_host->has_been_checked=TRUE; /* update the last check time */ temp_host->last_check=temp_service->last_check; /* assume the host is up */ temp_host->status=HOST_UP; /* plugin output should reflect our guess at the current host state */ strncpy(temp_host->plugin_output,"(Host assumed to be up)",MAX_PLUGINOUTPUT_LENGTH); temp_host->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; /* update the status log with the host status */ update_host_status(temp_host,FALSE); /* log the initial state if the user wants */ if(log_initial_states==TRUE) log_host_event(temp_host,HOST_UP,HARD_STATE); } /* log the initial state if the user wants */ if(temp_service->has_been_checked==FALSE && log_initial_states==TRUE){ log_service_event(temp_service,HARD_STATE); state_was_logged=TRUE; } } /* check for a state change (either soft or hard) */ if(temp_service->current_state!=temp_service->last_state){ #ifdef DEBUG3 printf("\t\tService '%s' on host '%s' has changed state since last check!\n",temp_service->description,temp_service->host_name); #endif /* set the state change flag */ state_change=TRUE; } /* checks for a hard state change where host was down or dependency failed at last service check */ if((temp_service->host_problem_at_last_check==TRUE || temp_service->dependency_failure_at_last_check==TRUE) && temp_service->current_state==STATE_OK){ #ifdef DEBUG3 printf("\t\tService '%s' on host '%s' has had a HARD STATE CHANGE!!\n",temp_service->description,temp_service->host_name); #endif hard_state_change=TRUE; } /* check for a "normal" hard state change where max check attempts is reached */ else if(temp_service->current_attempt>=temp_service->max_attempts){ if(temp_service->current_state!=temp_service->last_hard_state){ #ifdef DEBUG3 printf("\t\tService '%s' on host '%s' has had a HARD STATE CHANGE!!\n",temp_service->description,temp_service->host_name); #endif hard_state_change=TRUE; } } /* reset last and next notification times and acknowledgement flag if necessary */ if(state_change==TRUE || hard_state_change==TRUE){ temp_service->last_notification=(time_t)0; temp_service->next_notification=(time_t)0; temp_service->problem_has_been_acknowledged=FALSE; /* do NOT reset current notification number!!! */ /*temp_service->current_notification_number=0;*/ } /* initialize the last state change times if necessary */ if(temp_service->last_state_change==(time_t)0) temp_service->last_state_change=temp_service->last_check; if(temp_host->last_state_change==(time_t)0) temp_host->last_state_change=temp_service->last_check; /**************************************/ /******* SERVICE CHECK OK LOGIC *******/ /**************************************/ /* if the service is up and running OK... */ if(temp_service->current_state==STATE_OK){ /* clear the next notification time (this is not used, since we are now in an OK state) */ temp_service->next_notification=(time_t)0; /* reset the acknowledgement flag (this should already have been done, but just in case...) */ temp_service->problem_has_been_acknowledged=FALSE; /* the service check was okay, so the associated host must be up... */ if(temp_host->status!=HOST_UP){ /* verify the route to the host and send out host recovery notifications */ verify_route_to_host(temp_host); /* set the host problem flag (i.e. don't notify about recoveries for this service) */ temp_service->host_problem_at_last_check=TRUE; } /* if a hard service recovery has occurred... */ if(hard_state_change==TRUE){ /* set the state type macro */ temp_service->state_type=HARD_STATE; /* update service state times and last state change time */ update_service_state_times(temp_service); /* log the service recovery */ log_service_event(temp_service,HARD_STATE); state_was_logged=TRUE; /* notify contacts about the service recovery */ service_notification(temp_service,NULL); /* run the service event handler to handle the hard state change */ handle_service_event(temp_service,HARD_STATE); } /* else if a soft service recovery has occurred... */ else if(state_change==TRUE){ /* this is a soft recovery */ temp_service->state_type=SOFT_STATE; /* update service state times and last state change time */ update_service_state_times(temp_service); /* log the soft recovery */ log_service_event(temp_service,SOFT_STATE); state_was_logged=TRUE; /* run the service event handler to handle the soft state change */ handle_service_event(temp_service,SOFT_STATE); } /* else no service state change has occured... */ /* should we obsessive over service checks? */ if(obsess_over_services==TRUE) obsessive_compulsive_service_check_processor(temp_service,temp_service->state_type); /* reset all service variables because its okay now... */ temp_service->host_problem_at_last_check=FALSE; temp_service->dependency_failure_at_last_check=FALSE; temp_service->no_recovery_notification=FALSE; temp_service->current_attempt=1; temp_service->state_type=HARD_STATE; temp_service->last_hard_state=STATE_OK; temp_service->last_notification=(time_t)0; temp_service->next_notification=(time_t)0; temp_service->current_notification_number=0; temp_service->problem_has_been_acknowledged=FALSE; temp_service->has_been_unknown=FALSE; temp_service->has_been_warning=FALSE; temp_service->has_been_critical=FALSE; temp_service->no_more_notifications=FALSE; if(temp_service->check_type==SERVICE_CHECK_ACTIVE) temp_service->next_check=(time_t)(temp_service->last_check+(temp_service->check_interval*interval_length)); } /*******************************************/ /******* SERVICE CHECK PROBLEM LOGIC *******/ /*******************************************/ /* hey, something's not working quite like it should... */ else{ /* reset the recovery notification flag (it may get set again though) */ temp_service->no_recovery_notification=FALSE; /* check the route to the host if its supposed to be up right now... */ if(temp_host->status==HOST_UP) route_result=verify_route_to_host(temp_host); /* else the host is either down or unreachable, so recheck it if necessary */ else{ /* we're using agressive host checking, so really do recheck the host... */ if(use_aggressive_host_checking==TRUE) route_result=verify_route_to_host(temp_host); /* the service wobbled between non-OK states, so check the host... */ else if(state_change==TRUE && temp_service->last_hard_state!=STATE_OK) route_result=verify_route_to_host(temp_host); /* else fake the host check, but (possibly) resend host notifications to contacts... */ else{ /* if the host has never been checked beforem, set the checked flag */ if(temp_host->has_been_checked==FALSE) temp_host->has_been_checked=TRUE; /* update the last host check time */ temp_host->last_check=temp_service->last_check; /* fake the route check result */ route_result=temp_host->status; /* log the initial state if the user wants to and this host hasn't been checked yet */ if(log_initial_states==TRUE && temp_service->has_been_checked==FALSE) log_host_event(temp_host,temp_host->status,HARD_STATE); /* possibly re-send host notifications... */ host_notification(temp_host,temp_host->status,NULL); } } /* if the host is down or unreachable ... */ if(route_result!=HOST_UP){ /* "fake" a hard state change for the service - well, its not really fake, but it didn't get caught earler... */ if(temp_service->last_hard_state!=temp_service->current_state) hard_state_change=TRUE; /* update service state times if necessary and last state change time */ if(hard_state_change==TRUE) update_service_state_times(temp_service); /* put service into a hard state without attempting check retries and don't send out notifications about it */ temp_service->host_problem_at_last_check=TRUE; temp_service->state_type=HARD_STATE; temp_service->last_hard_state=temp_service->current_state; temp_service->current_attempt=1; } /* else the host is up.. */ else{ /* check service notification dependencies */ dependency_result=check_service_dependencies(temp_service,NOTIFICATION_DEPENDENCY); /* the service notification dependencies failed... */ if(dependency_result==DEPENDENCIES_FAILED){ /* set the dependency problem */ temp_service->dependency_failure_at_last_check=TRUE; /* don't send a recovery notification if the service recovers at the next check */ temp_service->no_recovery_notification=TRUE; } /* the service notification dependencies are okay */ else temp_service->dependency_failure_at_last_check=FALSE; /* the host recovered since the last time the service was checked... */ if(temp_service->host_problem_at_last_check==TRUE){ /* next time the service is checked we shouldn't get into this same case... */ temp_service->host_problem_at_last_check=FALSE; /* reset the current check counter, so we can mabye avoid a false recovery alarm - added 07/28/99 */ temp_service->current_attempt=1; /* don't send a recovery notification if the service recovers at the next check */ temp_service->no_recovery_notification=TRUE; } } /* if we should retry the service check, do so (except it the host is down or unreachable!) */ if(temp_service->current_attempt < temp_service->max_attempts){ /* the host is down or unreachable, so don't attempt to retry the service check */ if(route_result!=HOST_UP){ /* the host is not up, so reschedule the next service check at regular interval */ if(temp_service->check_type==SERVICE_CHECK_ACTIVE) temp_service->next_check=(time_t)(temp_service->last_check+(temp_service->check_interval*interval_length)); /* log the problem as a hard state if the host just went down (new in 0.0.5) */ if(hard_state_change==TRUE){ log_service_event(temp_service,HARD_STATE); state_was_logged=TRUE; } } /* the host is up, so continue to retry the service check */ else{ /* this is a soft state */ temp_service->state_type=SOFT_STATE; /* update service state times if necessary and last state change time */ if(state_change==TRUE) update_service_state_times(temp_service); /* log the service check retry */ log_service_event(temp_service,SOFT_STATE); state_was_logged=TRUE; /* run the service event handler to handle the soft state */ handle_service_event(temp_service,SOFT_STATE); /* increment the current attempt number */ temp_service->current_attempt=temp_service->current_attempt+1; if(temp_service->check_type==SERVICE_CHECK_ACTIVE) temp_service->next_check=(time_t)(temp_service->last_check+(temp_service->retry_interval*interval_length)); } } /* we've reached the maximum number of service rechecks, so handle the error */ else{ /* this is a hard state */ temp_service->state_type=HARD_STATE; /* keep track of this state, in case we "float" amongst other non-OK states before recovering */ if(temp_service->current_state==STATE_UNKNOWN) temp_service->has_been_unknown=TRUE; else if(temp_service->current_state==STATE_WARNING) temp_service->has_been_warning=TRUE; else if(temp_service->current_state==STATE_CRITICAL) temp_service->has_been_critical=TRUE; /* if we've hard a hard state change... */ if(hard_state_change==TRUE){ /* update service state times and last state change time */ update_service_state_times(temp_service); /* log the service problem (even if host is not up, which is new in 0.0.5) */ log_service_event(temp_service,HARD_STATE); state_was_logged=TRUE; } /* else log the problem (again) if this service is flagged as being volatile */ else if(temp_service->is_volatile==TRUE){ log_service_event(temp_service,HARD_STATE); state_was_logged=TRUE; } /* (re)send notifications out about this service problem if the host is up (and was at last check also) and the dependencies were okay... */ service_notification(temp_service,NULL); /* run the service event handler if we changed state from the last hard state or if this service is flagged as being volatile */ if(hard_state_change==TRUE || temp_service->is_volatile==TRUE) handle_service_event(temp_service,HARD_STATE); /* save the last hard state */ temp_service->last_hard_state=temp_service->current_state; /* reschedule the next check at the regular interval */ if(temp_service->check_type==SERVICE_CHECK_ACTIVE) temp_service->next_check=(time_t)(temp_service->last_check+(temp_service->check_interval*interval_length)); } /* should we obsessive over service checks? */ if(obsess_over_services==TRUE) obsessive_compulsive_service_check_processor(temp_service,temp_service->state_type); } /* reschedule the next service check ONLY for active checks */ if(temp_service->check_type==SERVICE_CHECK_ACTIVE){ /* make sure we don't get ourselves into too much trouble... */ if(current_time>temp_service->next_check) temp_service->next_check=current_time; /* make sure we rescheduled the next service check at a valid time */ preferred_time=temp_service->next_check; get_next_valid_time(preferred_time,&next_valid_time,temp_service->check_period); temp_service->next_check=next_valid_time; /* schedule a non-forced check */ schedule_service_check(temp_service,temp_service->next_check,FALSE); } /* if we're stalking this state type and state was not already logged AND the plugin output changed since last check, log it now.. */ if(temp_service->state_type==HARD_STATE && state_change==FALSE && state_was_logged==FALSE && strcmp(old_plugin_output,temp_service->plugin_output)){ if((temp_service->current_state==STATE_OK && temp_service->stalk_on_ok==TRUE)) log_service_event(temp_service,HARD_STATE); else if((temp_service->current_state==STATE_WARNING && temp_service->stalk_on_warning==TRUE)) log_service_event(temp_service,HARD_STATE); else if((temp_service->current_state==STATE_UNKNOWN && temp_service->stalk_on_unknown==TRUE)) log_service_event(temp_service,HARD_STATE); else if((temp_service->current_state==STATE_CRITICAL && temp_service->stalk_on_critical==TRUE)) log_service_event(temp_service,HARD_STATE); } /* set the checked flag */ temp_service->has_been_checked=TRUE; /* update the current service status log */ update_service_status(temp_service,FALSE); /* check to see if the service is flapping */ check_for_service_flapping(temp_service); /* check to see if the associated host is flapping */ check_for_host_flapping(temp_host); /* update service performance info */ update_service_performance_data(temp_service); /* check for external commands if we're doing so as often as possible */ if(command_check_interval==-1) check_for_external_commands(); } #ifdef DEBUG3 printf("Finished reaping service check results.\n"); #endif #ifdef DEBUG0 printf("reap_service_checks() end\n"); #endif return; } /* checks service dependencies */ int check_service_dependencies(service *svc,int dependency_type){ servicedependency *temp_dependency; service *temp_service; int state; #ifdef DEBUG0 printf("check_service_dependencies() start\n"); #endif /* check all dependencies... */ for(temp_dependency=servicedependency_list;temp_dependency!=NULL;temp_dependency=temp_dependency->next){ /* only check dependencies of the desired type (notification or execution) */ if(temp_dependency->dependency_type!=dependency_type) continue; /* if we have the right service... */ if(!strcmp(svc->host_name,temp_dependency->dependent_host_name) && !strcmp(svc->description,temp_dependency->dependent_service_description)){ /* find the service we depend on... */ temp_service=find_service(temp_dependency->host_name,temp_dependency->service_description,NULL); if(temp_service==NULL) continue; /* get the status to use (use last hard state if its currently in a soft state) */ if(temp_service->state_type==SOFT_STATE && soft_state_dependencies==FALSE) state=temp_service->last_hard_state; else state=temp_service->current_state; /* is the service we depend on in state that fails the dependency tests? */ if(state==STATE_OK && temp_dependency->fail_on_ok==TRUE) return DEPENDENCIES_FAILED; if(state==STATE_WARNING && temp_dependency->fail_on_warning==TRUE) return DEPENDENCIES_FAILED; if(state==STATE_UNKNOWN && temp_dependency->fail_on_unknown==TRUE) return DEPENDENCIES_FAILED; if(state==STATE_CRITICAL && temp_dependency->fail_on_critical==TRUE) return DEPENDENCIES_FAILED; } } #ifdef DEBUG0 printf("check_service_dependencies() end\n"); #endif return DEPENDENCIES_OK; } /* check for services that never returned from a check... */ void check_for_orphaned_services(void){ service *temp_service; time_t current_time; time_t expected_time; char buffer[MAX_INPUT_BUFFER]; #ifdef DEBUG0 printf("check_for_orphaned_services() start\n"); #endif time(¤t_time); /* check all services... */ for(temp_service=service_list;temp_service!=NULL;temp_service=temp_service->next){ /* skip services that are not currently executing */ if(temp_service->is_executing==FALSE) continue; /* determine the time at which the check results should have come in (allow 10 minutes slack time) */ expected_time=(time_t)(temp_service->next_check+temp_service->latency+service_check_timeout+service_check_reaper_interval+600); /* this service was supposed to have executed a while ago, but for some reason the results haven't come back in... */ if(expected_timedescription,temp_service->host_name); buffer[sizeof(buffer)-1]='\x0'; write_to_logs_and_console(buffer,NSLOG_RUNTIME_WARNING,TRUE); /* decremement the number of running service checks */ if(currently_running_service_checks>0) currently_running_service_checks--; /* disable the executing flag */ temp_service->is_executing=FALSE; /* schedule an immediate check of the service */ schedule_service_check(temp_service,current_time,FALSE); } } #ifdef DEBUG0 printf("check_for_orphaned_services() end\n"); #endif return; } /******************************************************************/ /******************* ROUTE/HOST CHECK FUNCTIONS *******************/ /******************************************************************/ /* check to see if we can reach the host */ int verify_route_to_host(host *hst){ int result; #ifdef DEBUG0 printf("verify_route_to_host() start\n"); #endif /* check route to the host (propagate problems and recoveries both up and down the tree) */ result=check_host(hst,PROPAGATE_TO_PARENT_HOSTS | PROPAGATE_TO_CHILD_HOSTS); #ifdef DEBUG0 printf("verify_route_to_host() end\n"); #endif return result; } /* see if the remote host is alive at all */ int check_host(host *hst,int propagation_options){ int result=HOST_UP; int parent_result=HOST_UP; host *parent_host=NULL; hostsmember *temp_hostsmember=NULL; host *child_host=NULL; int return_result=HOST_UP; int max_check_attempts=1; int route_blocked=TRUE; int old_state=HOST_UP; char old_plugin_output[MAX_PLUGINOUTPUT_LENGTH]=""; #ifdef DEBUG0 printf("check_host() start\n"); #endif /* make sure we return the original host state unless it changes... */ return_result=hst->status; /* set the checked flag */ hst->has_been_checked=TRUE; /* save the old plugin output and host state for use with state stalking routines */ old_state=hst->status; strncpy(old_plugin_output,(hst->plugin_output==NULL)?"":hst->plugin_output,sizeof(old_plugin_output)-1); old_plugin_output[sizeof(old_plugin_output)-1]='\x0'; /* if the host is already down or unreachable... */ if(hst->status!=HOST_UP){ /* how many times should we retry checks for this host? */ if(use_aggressive_host_checking==FALSE) max_check_attempts=1; else max_check_attempts=hst->max_attempts; /* retry the host check as many times as necessary or allowed... */ for(hst->current_attempt=1;hst->current_attempt<=max_check_attempts;hst->current_attempt++){ /* check the host */ result=run_host_check(hst); /* update performance data */ update_host_performance_data(hst,result,HARD_STATE); /* the host recovered from a hard problem... */ if(result==HOST_UP){ return_result=HOST_UP; /* handle the hard host recovery */ handle_host_state(hst,HOST_UP,HARD_STATE); /* propagate the host recovery upwards (at least one parent should be up now) */ if(propagation_options & PROPAGATE_TO_PARENT_HOSTS){ /* propagate to all parent hosts */ for(temp_hostsmember=hst->parent_hosts;temp_hostsmember!=NULL;temp_hostsmember=temp_hostsmember->next){ /* find the parent host */ parent_host=find_host(temp_hostsmember->host_name,NULL); /* check the parent host (and propagate upwards) if its not up */ if(parent_host!=NULL && parent_host->status!=HOST_UP) check_host(parent_host,PROPAGATE_TO_PARENT_HOSTS); } } /* propagate the host recovery downwards (children may or may not be down) */ if(propagation_options & PROPAGATE_TO_CHILD_HOSTS){ /* check all child hosts... */ for(child_host=host_list;child_host!=NULL;child_host=child_host->next){ /* if this is a child of the host, check it if it is not marked as UP */ if(is_host_immediate_child_of_host(hst,child_host)==TRUE && child_host->status!=HOST_UP) check_host(child_host,PROPAGATE_TO_CHILD_HOSTS); } } break; } } /* if the host isn't up and its currently marked as being unreachable, make absolutely sure it isn't down now. */ /* to do this we have to check the (saved) status of all parent hosts. this situation can occur if a host is */ /* unreachable, its parent recovers, but it does not return to an UP state. Even though it is not up, the host */ /* has changed from an unreachable to a down state */ if(hst->status==HOST_UNREACHABLE && result!=HOST_UP){ /* check all parent hosts */ for(temp_hostsmember=hst->parent_hosts;temp_hostsmember!=NULL;temp_hostsmember=temp_hostsmember->next){ /* find the parent host */ parent_host=find_host(temp_hostsmember->host_name,NULL); /* if at least one parent host is up, this host is no longer unreachable - it is now down instead */ if(parent_host->status==HOST_UP){ /* handle the hard host state change */ handle_host_state(hst,HOST_DOWN,HARD_STATE); break; } } } } /* else the host is supposed to be up right now... */ else{ for(hst->current_attempt=1;hst->current_attempt<=hst->max_attempts;hst->current_attempt++){ /* run the host check */ result=run_host_check(hst); /* update performance data */ if((result==HOST_UP && hst->current_attempt==1) || (result!=HOST_UP && hst->current_attempt>=hst->max_attempts)) update_host_performance_data(hst,result,HARD_STATE); else update_host_performance_data(hst,result,SOFT_STATE); /* if this is the last check and we still haven't had a recovery, check the parents and children and then handle the hard host state */ if(result!=HOST_UP && (hst->current_attempt==hst->max_attempts)){ /* check all parent hosts */ for(temp_hostsmember=hst->parent_hosts;temp_hostsmember!=NULL;temp_hostsmember=temp_hostsmember->next){ /* find the parent host */ parent_host=find_host(temp_hostsmember->host_name,NULL); /* check the parent host, assume its up if we can't find it, use the parent host's "old" status if we shouldn't propagate */ if(parent_host==NULL) parent_result=HOST_UP; else if(propagation_options & PROPAGATE_TO_PARENT_HOSTS) parent_result=check_host(parent_host,PROPAGATE_TO_PARENT_HOSTS); else parent_result=parent_host->status; /* if this parent host was up, the route is okay */ if(parent_result==HOST_UP) route_blocked=FALSE; /* we could break out of this loop once we've found one parent host that is up, but we won't because we want immediate notification of state changes (i.e. recoveries) for parent hosts */ } /* if this host has at least one parent host and the route to this host is blocked, it is unreachable */ if(route_blocked==TRUE && hst->parent_hosts!=NULL) return_result=HOST_UNREACHABLE; /* else the parent host is up (or there isn't a parent host), so this host must be down */ else return_result=HOST_DOWN; /* handle the hard host state (whether it is DOWN or UNREACHABLE) */ handle_host_state(hst,return_result,HARD_STATE); /* propagate the host problem to all child hosts (they should be unreachable now unless they have multiple parents) */ if(propagation_options & PROPAGATE_TO_CHILD_HOSTS){ /* check all child hosts... */ for(child_host=host_list;child_host!=NULL;child_host=child_host->next){ /* if this is a child of the host, check it if it is not marked as UP */ if(is_host_immediate_child_of_host(hst,child_host)==TRUE && child_host->status!=HOST_UP) check_host(child_host,PROPAGATE_TO_CHILD_HOSTS); } } } /* handle any soft states (during host check retries that return a non-ok state) */ else if(result!=HOST_UP || (result==HOST_UP && hst->current_attempt!=1)) handle_host_state(hst,result,SOFT_STATE); /* the host recovered (or it was never down), so break out of the check loop */ if(result==HOST_UP){ /* this is the first check of the host */ if(hst->has_been_checked==FALSE){ /* set the checked flag */ hst->has_been_checked=TRUE; /* update the status log with the current host info */ update_host_status(hst,FALSE); } break; } } } /* adjust the current check number if we exceeded the max count */ if(hst->current_attempt>hst->max_attempts) hst->current_attempt=hst->max_attempts; /* if the host didn't change state and the plugin output differs from the last time it was checked, log the current state/output if state stalking is enabled */ if(old_state==return_result && strcmp(old_plugin_output,hst->plugin_output)){ if(return_result==HOST_UP && hst->stalk_on_up==TRUE) log_host_event(hst,HOST_UP,HARD_STATE); if(return_result==HOST_DOWN && hst->stalk_on_down==TRUE) log_host_event(hst,HOST_DOWN,HARD_STATE); if(return_result==HOST_UNREACHABLE && hst->stalk_on_unreachable==TRUE) log_host_event(hst,HOST_UNREACHABLE,HARD_STATE); } /* check to see if the associated host is flapping */ check_for_host_flapping(hst); /* check for external commands if we're doing so as often as possible */ if(command_check_interval==-1) check_for_external_commands(); #ifdef DEBUG3 printf("\tHost Check Result: Host '%s' is ",hst->name); if(return_result==HOST_UP) printf("UP\n"); else if(return_result==HOST_DOWN) printf("DOWN\n"); else if(return_result==HOST_UNREACHABLE) printf("UNREACHABLE\n"); #endif #ifdef DEBUG0 printf("check_host() end\n"); #endif return return_result; } /* run an "alive" check on a host */ int run_host_check(host *hst){ int result=STATE_OK; int return_result=HOST_UP; char processed_check_command[MAX_INPUT_BUFFER]; char raw_check_command[MAX_INPUT_BUFFER]; char temp_buffer[MAX_INPUT_BUFFER]; command *temp_command; time_t start_time; time_t finish_time; char *temp_ptr; int early_timeout=FALSE; char temp_plugin_output[MAX_PLUGINOUTPUT_LENGTH]; #ifdef DEBUG0 printf("run_host_check() start\n"); #endif /* if checks are disabled, just return the last host state */ if(hst->checks_enabled==FALSE) return hst->status; /* if there is no host check command, just return with no error */ if(hst->host_check_command==NULL){ #ifdef DEBUG3 printf("\tNo host check command specified, so no check will be done (host assumed to be up)!\n"); #endif return HOST_UP; } /* find the command we use to check the host */ temp_command=find_command(hst->host_check_command,NULL); /* if we couldn't find the command, return with an error */ if(temp_command==NULL){ #ifdef DEBUG3 printf("\tCouldn't find the host check command!\n"); #endif return HOST_UP; } /* grab the host macros */ clear_volatile_macros(); grab_host_macros(hst); /* get the last host check time */ time(&start_time); hst->last_check=start_time; /* get the raw command line */ strncpy(raw_check_command,temp_command->command_line,sizeof(raw_check_command)); raw_check_command[sizeof(raw_check_command)-1]='\x0'; /* process any macros in the check command */ process_macros(raw_check_command,&processed_check_command[0],(int)sizeof(processed_check_command)); #ifdef DEBUG3 printf("\t\tRaw Command: %s\n",hst->host_check_command); printf("\t\tProcessed Command: %s\n",processed_check_command); #endif /* clear plugin output and performance data buffers */ strcpy(hst->plugin_output,""); strcpy(hst->perf_data,""); /* run the host check command */ result=my_system(processed_check_command,host_check_timeout,&early_timeout,temp_plugin_output,MAX_PLUGINOUTPUT_LENGTH-1); /* if the check timed out, report an error */ if(early_timeout==TRUE){ snprintf(hst->plugin_output,MAX_PLUGINOUTPUT_LENGTH-1,"Host check timed out after %d seconds\n",host_check_timeout); hst->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; /* log the timeout */ snprintf(temp_buffer,sizeof(temp_buffer)-1,"Warning: Host check command '%s' for host '%s' timed out after %d seconds\n",hst->host_check_command,hst->name,host_check_timeout); temp_buffer[sizeof(temp_buffer)-1]='\x0'; write_to_logs_and_console(temp_buffer,NSLOG_RUNTIME_WARNING,TRUE); } /* calculate total execution time */ time(&finish_time); hst->execution_time=(int)(finish_time-start_time); /* check for empty plugin output */ if(!strcmp(temp_plugin_output,"")) strcpy(temp_plugin_output,"(No Information Returned From Host Check)"); /* first part of plugin output (up to pipe) is status info */ temp_ptr=strtok(temp_plugin_output,"|\n"); /* make sure the plugin output isn't NULL */ if(temp_ptr==NULL){ strncpy(hst->plugin_output,"(No output returned from host check)",MAX_PLUGINOUTPUT_LENGTH-1); hst->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } else{ strip(temp_ptr); if(!strcmp(temp_ptr,"")){ strncpy(hst->plugin_output,"(No output returned from host check)",MAX_PLUGINOUTPUT_LENGTH-1); hst->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } else{ strncpy(hst->plugin_output,temp_ptr,MAX_PLUGINOUTPUT_LENGTH-1); hst->plugin_output[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } } /* second part of plugin output (after pipe) is performance data (which may or may not exist) */ temp_ptr=strtok(NULL,"\n"); /* grab performance data if we found it available */ if(temp_ptr!=NULL){ strip(temp_ptr); strncpy(hst->perf_data,temp_ptr,MAX_PLUGINOUTPUT_LENGTH-1); hst->perf_data[MAX_PLUGINOUTPUT_LENGTH-1]='\x0'; } /* replace semicolons in plugin output (but not performance data) with colons */ temp_ptr=hst->plugin_output; while((temp_ptr=strchr(temp_ptr,';'))) *temp_ptr=':'; /* if we're not doing agressive host checking, let WARNING states indicate the host is up (fake the result to be STATE_OK) */ if(use_aggressive_host_checking==FALSE && result==STATE_WARNING) result=STATE_OK; if(result==STATE_OK) return_result=HOST_UP; else return_result=HOST_DOWN; #ifdef DEBUG3 printf("\tHost Check Result: Host '%s' is ",hst->name); if(return_result==HOST_UP) printf("UP\n"); else printf("DOWN\n"); #endif #ifdef DEBUG0 printf("run_host_check() end\n"); #endif return return_result; }