///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// http://www.cooldevtools.com
// $Id: FrequencyDBImpl_bdb.cc 272 2007-01-06 19:37:27Z brian $
//
// Copyright (C) 2007 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt.  If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
//    http://www.burton-computer.com/qpl.html
//    http://www.cooldevtools.com/qpl.html
//

#ifdef USE_DB

#include <unistd.h>
#include <stdexcept>
#include <strstream>
#include "CleanupManager.h"
#include "LockFile.h"
#include "WordData.h"
#include "FrequencyDBImpl_bdb.h"

struct MyDBT : public DBT
{
  MyDBT()
  {
    memset(this, 0, sizeof(*this));
  }

  MyDBT(const string &word)
  {
    memset(this, 0, sizeof(*this));
    size = word.length() + 1;
    data = (char *)word.c_str();
  }

  MyDBT(const WordData &counts)
  {
    memset(this, 0, sizeof(*this));
    size = sizeof(WordData);
    data = (char *)&counts;
  }
};

inline int throw_on_error(const char *function_name,
                          int rc)
{
  if (rc == DB_NOTFOUND) {
    return rc;
  }
  if (rc != 0) {
    static char buffer[4096];
    ostrstream msg(buffer, sizeof(buffer));
    msg << function_name << ": " << db_strerror(rc) << " (" << rc << ")" << ends;
    throw runtime_error(buffer);
  }
  return rc;
}

inline int warn_on_error(const char *function_name,
                         int rc)
{
  if (rc == DB_NOTFOUND) {
    return rc;
  }
  if (rc != 0) {
    cerr << "warning: berkeley db reported error: "
         << function_name
         << ": "
         << db_strerror(rc)
         << " (" << rc << ")"
         << endl;
  }
  return rc;
}

FrequencyDBImpl *FrequencyDBImpl_bdb::factory(const DatabaseConfig *config)
{
  return new FrequencyDBImpl_bdb();
}

FrequencyDBImpl_bdb::FrequencyDBImpl_bdb()
  : m_env(0),
    m_file(0),
    m_cursor(0)
{
}

FrequencyDBImpl_bdb::~FrequencyDBImpl_bdb()
{
  close();
}

bool FrequencyDBImpl_bdb::open(const string &arg_filename,
                               bool read_only,
                               int create_mode)
{
  close();

  m_isReadOnly = read_only;

  File db_file(arg_filename);
  if (!openEnvironment(db_file, read_only, create_mode)) {
    return false;
  }

  if (!openDatabase(db_file, read_only, create_mode)) {
    closeEnvironment();
    return false;
  }

  if (is_debug) {
    cerr << "DATABASE OPENED " << db_file.getPath() << endl;
  }

  return true;
}

bool FrequencyDBImpl_bdb::openDatabase(const File &db_file,
                                       bool read_only,
                                       int create_mode)
{
  if (is_debug) {
    cerr << "OPENING DATABASE " << db_file.getPath() << endl;
  }

  int ret = db_create(&m_file, m_env, 0);
  if (ret != 0) {
    cerr << "error: unable to create database " << db_file.getPath() << ": " << db_strerror(ret) << endl;
    return false;
  }

  // SleepyCat in their infinite wisdom decided to change the open
  // function's signature in the 4.1 release.  Gee thanks for breaking
  // my code guys. That was real smart and so much better than
  // introducing a second open function rather than force me to embed
  // hideous ifdefs into my code.
  string filename(m_env ? db_file.getName() : db_file.getPath());
  int flags = read_only ? DB_RDONLY : DB_CREATE;
#if DB_VERSION_MAJOR >= 4 && DB_VERSION_MINOR >= 1
  ret = m_file->open(m_file, NULL, filename.c_str(), NULL, DB_BTREE, flags, create_mode);
#else
  ret = m_file->open(m_file, filename.c_str(), NULL, DB_BTREE, flags, create_mode);
#endif
  if (ret != 0) {
    cerr << "error: unable to open database " << db_file.getPath() << ": " << db_strerror(ret) << endl;
    m_file = 0;
    return false;
  }

  if (is_debug) {
    cerr << "OPENED DATABASE " << db_file.getPath() << endl;
  }

  return true;
}

bool FrequencyDBImpl_bdb::openEnvironment(const File &db_file,
                                          bool read_only,
                                          int create_mode)
{
#if USE_CDB
  File env_dir(db_file.parent());

  if (is_debug) {
    cerr << "OPENING ENVIRONMENT " << env_dir.getPath() << endl;
  }

  int ret = db_env_create(&m_env, 0);
  if (ret != 0) {
    cerr << "error: unable to create environment " << db_file.getPath() << ": " << db_strerror(ret) << endl;
    m_env = 0;
    return false;
  }

  int env_flags = DB_INIT_CDB | DB_INIT_MPOOL | DB_CREATE;
  ret = m_env->open(m_env, db_file.parent().getPath().c_str(), env_flags, create_mode);
  if (ret != 0) {
    if (read_only) {
      // we can still operate without the environment if we're in read-only mode
      m_env = 0;
    } else {
      cerr << "error: unable to open environment " << env_dir.getPath() << ": " << db_strerror(ret) << endl;
      m_env = 0;
      return false;
    }
  }
#else
  m_env = 0;
#endif

  return true;
}

void FrequencyDBImpl_bdb::closeCursor()
{
  if (m_cursor) {
    warn_on_error("c_close", m_cursor->c_close(m_cursor));
    m_cursor = 0;
  }
}

void FrequencyDBImpl_bdb::closeDatabase()
{
  if (m_file) {
    warn_on_error("db sync", m_file->sync(m_file, 0));
    warn_on_error("db close", m_file->close(m_file, 0));
    m_file = 0;
  }
}

void FrequencyDBImpl_bdb::closeEnvironment()
{
#ifdef USE_CDB
  if (m_env) {
    warn_on_error("env close", m_env->close(m_env, 0));
    m_env = 0;
  }
#endif
}

void FrequencyDBImpl_bdb::close()
{
  closeCursor();
  closeDatabase();
  closeEnvironment();
}

void FrequencyDBImpl_bdb::flush()
{
  if (is_debug) {
    cerr << "flushing database..." << endl;
  }
  throw_on_error("sync", m_file->sync(m_file, 0));
}

void FrequencyDBImpl_bdb::writeWord(const string &word,
                                    const WordData &counts)
{
  assert(m_file);

  MyDBT key(word);
  bool delete_word = counts.totalCount() <= 0;

  if (is_debug) {
    WordData old_counts;
    bool exists = loadKey(key, old_counts);
    if (delete_word) {
      cerr << "writeWord: deleting '" << word << "'"
           << endl;
    } else if (exists) {
      cerr << "writeWord: updating '" << word << "'"
           << " old (" << old_counts.goodCount() << "," << old_counts.spamCount() << ")"
           << " new (" << counts.goodCount() << "," << counts.spamCount() << ")"
           << endl;
    } else {
      cerr << "writeWord: inserting '" << word << "'"
           << " new (" << counts.goodCount() << "," << counts.spamCount() << ")"
           << endl;
    }
  }

  if (delete_word) {
    throw_on_error("del", m_file->del(m_file, NULL, &key, 0));
  } else {
    MyDBT value(counts);
    throw_on_error("put", m_file->put(m_file, NULL, &key, &value, 0));
  }
}

bool FrequencyDBImpl_bdb::readWord(const string &word,
                                   WordData &counts)
{
  assert(m_file);

  MyDBT key(word);
  return loadKey(key, counts);
}

bool FrequencyDBImpl_bdb::firstWord(string &word,
                                    WordData &counts)
{
  return firstWord(word, counts, true);
}

bool FrequencyDBImpl_bdb::firstWord(string &word,
                                    WordData &counts,
                                    bool read_only)
{
  closeCursor();

  assert(read_only || !m_isReadOnly);

  int cursor_flags = 0;
#ifdef USE_CDB
  if (!read_only) {
    cursor_flags |= DB_WRITECURSOR;
  }
#endif

  int ret = warn_on_error("cursor", m_file->cursor(m_file, NULL, &m_cursor, cursor_flags));
  if (ret != 0) {
    return false;
  }
  assert(m_cursor);

  return nextWord(word, counts);
}

bool FrequencyDBImpl_bdb::nextWord(string &word,
                                   WordData &counts)
{
  if (!m_cursor) {
    return false;
  }

  MyDBT key;
  MyDBT value;
  int ret = warn_on_error("c_get", m_cursor->c_get(m_cursor, &key, &value, DB_NEXT));
  if (ret != 0) {
    word.erase();
    counts.clear();
    closeCursor();
    return false;
  }

  if (!value.data || value.size != sizeof(WordData)) {
    word.erase();
    counts.clear();
    return false;
  }

  word.assign((const char *)key.data, max((u_int32_t)0, key.size - 1));
  counts = *((WordData *)value.data);
  return true;
}

bool FrequencyDBImpl_bdb::loadKey(DBT &key,
                                  string &word,
                                  WordData &counts) const
{
  if (key.data == NULL) {
    word.erase();
    counts.clear();
    return false;
  }

  if (key.size == 0) {
    word.erase();
  } else {
    word.assign((const char *)key.data, key.size - 1);
  }
  return loadKey(key, counts);
}

bool FrequencyDBImpl_bdb::loadKey(DBT &key,
                                  WordData &counts) const
{
  if (key.data == NULL) {
    counts.clear();
    return false;
  }

  MyDBT value;
  int ret = throw_on_error("get", m_file->get(m_file, NULL, &key, &value, 0));
  if (ret != 0) {
    counts.clear();
    return false;
  }

  if (!value.data || value.size != sizeof(WordData)) {
    counts.clear();
    return false;
  }

  counts = *((WordData *)value.data);
  return true;
}

string FrequencyDBImpl_bdb::getDatabaseType() const
{
  return "BerkeleyDB-btree";
}

void FrequencyDBImpl_bdb::sweepOutOldTerms(const CleanupManager &cleanman)
{
  string word;
  WordData counts;

  assert(!m_isReadOnly);

  bool again = firstWord(word, counts, false);
  while (again) {
    bool delete_word = false;
    if (word.length() >= 3 && word[0] == '_' && word[1] == '_') {
      if (starts_with(word, "__MD5") && counts.totalCount() == 0) {
        // go ahead and remove digests that have a count of zero
        delete_word = true;
      } else {
        // ignore special words like __MD5 and __COUNT__
      }
    } else {
      delete_word = cleanman.shouldDelete(counts);
    }
    if (delete_word) {
      if (is_debug) {
        cerr << "sweepOutJunk: removing term " << word
             << " with total count " << counts.totalCount()
             << " and age " << counts.age()
             << endl;
      }
      warn_on_error("c_del", m_cursor->c_del(m_cursor, 0));
    }
    again = nextWord(word, counts);
  }

  flush();
}

#endif // USE_DBM


syntax highlighted by Code2HTML, v. 0.9.1