// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ---
// Author: Sanjay Ghemawat
//         Chris Demetriou (refactoring)
//
// Profile current program by sampling stack-trace every so often
//
// TODO: Detect whether or not setitimer() applies to all threads in
// the process.  If so, instead of starting and stopping by changing
// the signal handler, start and stop by calling setitimer() and
// do nothing in the per-thread registration code.

#include "config.h"
#include "getpc.h"      // should be first to get the _GNU_SOURCE dfn
#include <signal.h>
#include <assert.h>
#include <stdio.h>
#include <errno.h>
#include <ucontext.h>
#include <string.h>
#include <sys/time.h>
#include <string>
#include <google/profiler.h>
#include <google/stacktrace.h>
#include "base/commandlineflags.h"
#include "base/logging.h"
#include "base/googleinit.h"
#include "base/mutex.h"
#include "base/spinlock.h"
#include "base/sysinfo.h"
#include "profiledata.h"
#ifdef HAVE_CONFLICT_SIGNAL_H
#include "conflict-signal.h"          /* used on msvc machines */
#endif

using std::string;

DEFINE_string(cpu_profile, "",
              "Profile file name (used if CPUPROFILE env var not specified)");

// This takes as an argument an environment-variable name (like
// CPUPROFILE) whose value is supposed to be a file-path, and sets
// path to that path, and returns true.  If the env var doesn't exist,
// or is the empty string, leave path unchanged and returns false.
// The reason this is non-trivial is that this function handles munged
// pathnames.  Here's why:
//
// If we're a child process of the 'main' process, we can't just use
// getenv("CPUPROFILE") -- the parent process will be using that path.
// Instead we append our pid to the pathname.  How do we tell if we're a
// child process?  Ideally we'd set an environment variable that all
// our children would inherit.  But -- and this is seemingly a bug in
// gcc -- if you do a setenv() in a shared libarary in a global
// constructor, the environment setting is lost by the time main() is
// called.  The only safe thing we can do in such a situation is to
// modify the existing envvar.  So we do a hack: in the parent, we set
// the high bit of the 1st char of CPUPROFILE.  In the child, we
// notice the high bit is set and append the pid().  This works
// assuming cpuprofile filenames don't normally have the high bit set
// in their first character!  If that assumption is violated, we'll
// still get a profile, but one with an unexpected name.
// TODO(csilvers): set an envvar instead when we can do it reliably.
static bool GetUniquePathFromEnv(const char* env_name, string* path) {
  char* envval = getenv(env_name);
  if (envval == NULL || *envval == '\0')
    return false;
  if (envval[0] & 128) {                    // high bit is set
    char pid[64];              // pids are smaller than this!
    snprintf(pid, sizeof(pid), "%u", (unsigned int)(getpid()));
    *path = envval;
    *path += "_";
    *path += pid;
    (*path)[0] &= 127;
  } else {
    *path = string(envval);
    envval[0] |= 128;                       // set high bit for kids to see
  }
  return true;
}


// Collects up all profile data.  This is a singleton, which is
// initialized by a constructor at startup.
class CpuProfiler {
 public:
  CpuProfiler();
  ~CpuProfiler();

  // Start profiler to write profile info into fname
  bool Start(const char* fname, bool (*filter)(void*), void* filter_arg);

  // Stop profiling and write the data to disk.
  void Stop();

  // Write the data to disk (and continue profiling).
  void FlushTable();

  bool Enabled();

  void GetCurrentState(ProfilerState* state);

  // Start interval timer for the current thread.  We do this for
  // every known thread.  If profiling is off, the generated signals
  // are ignored, otherwise they are captured by prof_handler().
  void RegisterThread();

  static CpuProfiler instance_;

 private:
  static const int kMaxFrequency = 4000;        // Largest allowed frequency
  static const int kDefaultFrequency = 100;     // Default frequency

  // Sample frequency, read-only after construction.
  int           frequency_;

  // These locks implement the locking requirements described in the
  // ProfileData documentation, specifically:
  //
  // control_lock_ is held all over all collector_ method calls except for
  // the 'Add' call made from the signal handler, to protect against
  // concurrent use of collector_'s control routines.
  //
  // signal_lock_ is held over calls to 'Start', 'Stop', 'Flush', and
  // 'Add', to protect against concurrent use of data collection and
  // writing routines.  Code other than the signal handler must disable
  // the timer signal while holding signal_lock, to prevent deadlock.
  //
  // Locking order is control_lock_ first, and then signal_lock_.
  // signal_lock_ is acquired by the prof_handler without first
  // acquiring control_lock_.
  Mutex         control_lock_;
  SpinLock      signal_lock_;
  ProfileData   collector_;

  // Filter function and its argument, if any.  (NULL means include
  // all samples).  Set at start, read-only while running.  Written
  // while holding both control_lock_ and signal_lock_, read and
  // executed under signal_lock_.
  bool          (*filter_)(void*);
  void*         filter_arg_;

  // Sets the timer interrupt signal handler to one that stores the pc.
  static void EnableHandler();

  // Disables (ignores) the timer interrupt signal.
  static void DisableHandler();

  // Signale handler that records the interrupted pc in the profile data
  static void prof_handler(int sig, siginfo_t*, void* signal_ucontext);
};

// Profile data structure singleton: Constructor will check to see if
// profiling should be enabled.  Destructor will write profile data
// out to disk.
CpuProfiler CpuProfiler::instance_;

// Initialize profiling: activated if getenv("CPUPROFILE") exists.
CpuProfiler::CpuProfiler() {
  // Get frequency of interrupts (if specified)
  char junk;
  const char* fr = getenv("CPUPROFILE_FREQUENCY");
  if (fr != NULL && (sscanf(fr, "%d%c", &frequency_, &junk) == 1) &&
      (frequency_ > 0)) {
    // Limit to kMaxFrequency
    frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_;
  } else {
    frequency_ = kDefaultFrequency;
  }

  // Ignore signals until we decide to turn profiling on.  (Paranoia;
  // should already be ignored.)
  DisableHandler();

  RegisterThread();

  // Should profiling be enabled automatically at start?
  string fname;
  if (!GetUniquePathFromEnv("CPUPROFILE", &fname)) {
    return;
  }
  // We don't enable profiling if setuid -- it's a security risk
#ifdef HAVE_GETEUID
  if (getuid() != geteuid())
    return;
#endif

  if (!Start(fname.c_str(), NULL, NULL)) {
    RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n",
            fname.c_str(), strerror(errno));
  }
}

bool CpuProfiler::Start(const char* fname,
                        bool (*filter)(void*), void* filter_arg) {
  MutexLock cl(&control_lock_);

  if (collector_.enabled()) {
    return false;
  }

  {
    // spin lock really is needed to protect init here, since it's
    // conceivable that prof_handler may still be running from a
    // previous profiler run.  (For instance, if prof_handler just
    // started, had not grabbed the spinlock, then was switched out,
    // it might start again right now.)  Any such late sample will be
    // recorded against the new profile, but there's no harm in that.
    SpinLockHolder sl(&signal_lock_);

    if (!collector_.Start(fname, frequency_)) {
      return false;
    }

    filter_ = filter;
    filter_arg_ = filter_arg;

    // Must unlock before setting prof_handler to avoid deadlock
    // with signal delivered to this thread.
  }

  // Setup handler for SIGPROF interrupts
  EnableHandler();

  return true;
}

CpuProfiler::~CpuProfiler() {
  Stop();
}

// Stop profiling and write out any collected profile data
void CpuProfiler::Stop() {
  MutexLock cl(&control_lock_);

  if (!collector_.enabled()) {
    return;
  }

  // Ignore timer signals.  Note that the handler may have just
  // started and might not have taken signal_lock_ yet.  Holding
  // signal_lock_ here along with the semantics of collector_.Add()
  // (which does nothing if collection is not enabled) prevents that
  // late sample from causing a problem.
  DisableHandler();

  {
    SpinLockHolder sl(&signal_lock_);
    collector_.Stop();
  }
}

void CpuProfiler::FlushTable() {
  MutexLock cl(&control_lock_);

  if (!collector_.enabled()) {
    return;
  }

  // Disable timer signal while hoding signal_lock_, to prevent deadlock
  // if we take a timer signal while flushing.
  DisableHandler();
  {
    SpinLockHolder sl(&signal_lock_);
    collector_.FlushTable();
  }
  EnableHandler();
}

bool CpuProfiler::Enabled() {
  MutexLock cl(&control_lock_);
  return collector_.enabled();
}

void CpuProfiler::GetCurrentState(ProfilerState* state) {
  ProfileData::State collector_state;
  {
    MutexLock cl(&control_lock_);
    collector_.GetCurrentState(&collector_state);
  }

  state->enabled = collector_state.enabled;
  state->start_time = static_cast<time_t>(collector_state.start_time);
  state->samples_gathered = collector_state.samples_gathered;
  int buf_size = sizeof(state->profile_name);
  strncpy(state->profile_name, collector_state.profile_name, buf_size);
  state->profile_name[buf_size-1] = '\0';
}

void CpuProfiler::RegisterThread() {
  // TODO: Randomize the initial interrupt value?
  // TODO: Randomize the inter-interrupt period on every interrupt?
  struct itimerval timer;
  timer.it_interval.tv_sec = 0;
  timer.it_interval.tv_usec = 1000000 / frequency_;
  timer.it_value = timer.it_interval;
  setitimer(ITIMER_PROF, &timer, 0);
}

void CpuProfiler::EnableHandler() {
  struct sigaction sa;
  sa.sa_sigaction = prof_handler;
  sa.sa_flags = SA_RESTART | SA_SIGINFO;
  sigemptyset(&sa.sa_mask);
  RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed");
}

void CpuProfiler::DisableHandler() {
  struct sigaction sa;
  sa.sa_handler = SIG_IGN;
  sa.sa_flags = SA_RESTART;
  sigemptyset(&sa.sa_mask);
  RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed");
}

// Signal handler that records the pc in the profile-data structure
//
// NOTE: it is possible for profiling to be disabled just as this
// signal handler starts, before signal_lock_ is acquired.  Therefore,
// collector_.Add must check whether profiling is enabled before
// trying to record any data.  (See also comments in Start and Stop.)
void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext) {
  int saved_errno = errno;

  // Hold the spin lock while we're gathering the trace because there's
  // no real harm in holding it and there's little point in releasing
  // and re-acquiring it.  (We'll only be blocking Start, Stop, and
  // Flush.)  We make sure to release it before restoring errno.
  {
    SpinLockHolder sl(&instance_.signal_lock_);

    if (instance_.filter_ == NULL ||
        (*instance_.filter_)(instance_.filter_arg_)) {
      void* stack[ProfileData::kMaxStackDepth];

      // The top-most active routine doesn't show up as a normal
      // frame, but as the "pc" value in the signal handler context.
      stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext));

      // We skip the top two stack trace entries (this function and one
      // signal handler frame) since they are artifacts of profiling and
      // should not be measured.  Other profiling related frames may be
      // removed by "pprof" at analysis time.  Instead of skipping the top
      // frames, we could skip nothing, but that would increase the
      // profile size unnecessarily.
      int depth = GetStackTrace(stack + 1, arraysize(stack) - 1, 2);
      depth++;              // To account for pc value in stack[0];

      instance_.collector_.Add(depth, stack);
    }
  }

  errno = saved_errno;
}

extern "C" void ProfilerRegisterThread() {
  CpuProfiler::instance_.RegisterThread();
}

// DEPRECATED routines
extern "C" void ProfilerEnable() { }
extern "C" void ProfilerDisable() { }

extern "C" void ProfilerFlush() {
  CpuProfiler::instance_.FlushTable();
}

extern "C" bool ProfilingIsEnabledForAllThreads() {
  return CpuProfiler::instance_.Enabled();
}

extern "C" bool ProfilerStart(const char* fname) {
  return CpuProfiler::instance_.Start(fname, NULL, NULL);
}

extern "C" bool ProfilerStartFiltered(const char* fname,
                                      bool (*filter_in_thread)(void* arg),
                                      void *filter_in_thread_arg) {
  return CpuProfiler::instance_.Start(fname, filter_in_thread,
                                      filter_in_thread_arg);
}

extern "C" void ProfilerStop() {
  CpuProfiler::instance_.Stop();
}

extern "C" void ProfilerGetCurrentState(ProfilerState* state) {
  CpuProfiler::instance_.GetCurrentState(state);
}


REGISTER_MODULE_INITIALIZER(profiler, {
  if (!FLAGS_cpu_profile.empty()) {
    ProfilerStart(FLAGS_cpu_profile.c_str());
  }
});


syntax highlighted by Code2HTML, v. 0.9.1