///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// http://www.cooldevtools.com
// $Id: FrequencyDBImpl_pbl.cc 272 2007-01-06 19:37:27Z brian $
//
// Copyright (C) 2007 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt.  If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
//    http://www.burton-computer.com/qpl.html
//    http://www.cooldevtools.com/qpl.html
//

#ifdef USE_PBL

#include <unistd.h>
#include <fcntl.h>
#include <strstream>
#include <stdexcept>
#include "CleanupManager.h"
#include "LockFile.h"
#include "WordData.h"
#include "FrequencyDBImpl_pbl.h"

static const int BUFFER_SIZE = 1024;
static const int CLEANUP_RECORDS_PER_TRANSACTION = 10;

const char *FrequencyDBImpl_pbl::SEARCH_SUFFIX("pkey");

inline int throw_on_error(const char *function_name,
                          int rc)
{
  if (rc >= 0) {
    return rc;
  }

  if (pbl_errno == PBL_ERROR_NOT_FOUND) {
    return PBL_ERROR_NOT_FOUND;
  }

  static char buffer[4096];
  ostrstream msg(buffer, sizeof(buffer));
  msg << function_name << ": " << pbl_errstr << " (" << pbl_errno << ")" << ends;
  throw runtime_error(buffer);
}

inline int warn_on_error(const char *function_name,
                         int rc)
{
  if (rc >= 0) {
    return rc;
  }

  if (pbl_errno == PBL_ERROR_NOT_FOUND) {
    return PBL_ERROR_NOT_FOUND;
  }

  cerr << "warning: pbl reported error: "
       << function_name
       << ": "
       << pbl_errstr
       << " (" << pbl_errno << ")"
       << endl;
  return rc;
}

FrequencyDBImpl *FrequencyDBImpl_pbl::factory(const DatabaseConfig *config)
{
  return new FrequencyDBImpl_pbl();
}

FrequencyDBImpl_pbl::FrequencyDBImpl_pbl()
  : m_kf(0),
    m_isReadOnly(false),
    m_inTransaction(false)
{
}

FrequencyDBImpl_pbl::~FrequencyDBImpl_pbl()
{
  close();
}

inline int copy_key(const string &str,
                    unsigned char *ptr)
{
  int len = str.size();
  if (len >= 254) {
    len = 254;
  }
  memcpy(ptr, str.c_str(), len);
  ptr[len] = (unsigned char)0;
  return len + 1;
}

bool FrequencyDBImpl_pbl::open(const string &arg_filename,
                               bool read_only,
                               int open_mode)
{
  close();
  assert(m_kf == 0);

  File db_file(arg_filename);
  db_file.setSuffix(SEARCH_SUFFIX);
  string filename(db_file.getPath());
  char *filename_ptr = const_cast<char *>(filename.c_str());

  if (!read_only) {
    m_kf = pblKfCreate(filename_ptr, 0);
  }
  if (!m_kf) {
    m_kf = pblKfOpen(filename_ptr, read_only ? 0 : 1, 0);
  }

  m_isReadOnly = read_only;
  m_inTransaction = false;

  if (is_debug) {
    cerr << "DATABASE OPENED " << db_file.getPath() << endl;
  }

  return m_kf != 0;
}

void FrequencyDBImpl_pbl::close()
{
  if (m_kf) {
    quietAbortTransaction();
    warn_on_error("pblKfFlush", pblKfFlush(m_kf));
    warn_on_error("pblKfClose", pblKfClose(m_kf));
    m_kf = 0;
  }
}

void FrequencyDBImpl_pbl::flush()
{
  if (m_kf) {
    throw_on_error("pblKfFlush", pblKfFlush(m_kf));
  }
}

void FrequencyDBImpl_pbl::quietAbortTransaction()
{
  assert(m_kf);

  if (m_inTransaction) {
    warn_on_error("pblKfCommit", pblKfCommit(m_kf, 1));
    m_inTransaction = false;
  }
}

void FrequencyDBImpl_pbl::beginTransaction()
{
  assert(m_kf);
  assert(!m_inTransaction);

  if (!m_inTransaction) {
    if (is_debug) {
      cerr << "starting transaction" << endl;
    }
    throw_on_error("pblKfStartTransaction", pblKfStartTransaction(m_kf));
    m_inTransaction = true;
  }
}

void FrequencyDBImpl_pbl::endTransaction(bool commit)
{
  assert(m_kf);
  assert(m_inTransaction);

  if (m_inTransaction) {
    if (is_debug) {
      cerr << "ending transaction: " << commit << endl;
    }
    throw_on_error("pblKfCommit", pblKfCommit(m_kf, commit ? 0 : 1));
    m_inTransaction = false;
  }
}

void FrequencyDBImpl_pbl::writeWord(const string &word,
                                    const WordData &counts)
{
  assert(m_kf);
  assert(!m_isReadOnly);

  bool delete_word = counts.totalCount() <= 0;
  unsigned char key[BUFFER_SIZE];
  int keylen = copy_key(word, key);

  WordData db_counts(counts);
  db_counts.toDatabaseOrder();

  int rc = throw_on_error("pblKfFind", pblKfFind(m_kf, PBLEQ, key, keylen, 0, 0));
  if (rc == PBL_ERROR_NOT_FOUND) {
    if (!delete_word) {
      if (is_debug) {
        cerr << "inserting word " << word << ": keylen " << keylen << endl;
      }
      throw_on_error("pblKfInsert", pblKfInsert(m_kf, key, keylen, (unsigned char *)&db_counts, sizeof(db_counts)));
    } else {
      if (is_debug) {
        cerr << "ignoring zero count word " << word << ": keylen " << keylen << endl;
      }
    }
  } else {
    assert(rc >= 0);
    if (delete_word) {
      if (is_debug) {
        cerr << "deleting word " << word << ": keylen " << keylen << endl;
      }
      throw_on_error("pblKfDelete", pblKfDelete(m_kf));
    } else {
      if (is_debug) {
        cerr << "updating word " << word << ": keylen " << keylen << endl;
      }
      throw_on_error("pblKfUpdateData", pblKfUpdate(m_kf, (unsigned char *)&db_counts, sizeof(db_counts)));
    }
  }
}

bool FrequencyDBImpl_pbl::readWord(const string &word,
                                   WordData &counts)
{
  assert(m_kf);

  unsigned char key[BUFFER_SIZE];
  int keylen = copy_key(word, key);

  int rc = throw_on_error("pblKfFind", pblKfFind(m_kf, PBLEQ, key, keylen, 0, 0));
  if (rc == PBL_ERROR_NOT_FOUND) {
    return false;
  }

  assert(rc == sizeof(counts));
  throw_on_error("pblKfRead", pblKfRead(m_kf, (unsigned char *)&counts, sizeof(counts)));

  counts.toHostOrder();
  return true;
}

bool FrequencyDBImpl_pbl::getWord(int pbl_code,
                                  string &word,
                                  WordData &counts)
{
  char key[BUFFER_SIZE];
  int keylen = 0;
  int rc = 0;

  switch (pbl_code) {
  case PBLFIRST:
    rc = throw_on_error("pblKfGetAbs", pblKfGetAbs(m_kf, 0, key, &keylen));
    break;

  case PBLTHIS:
    rc = throw_on_error("pblKfThis", pblKfThis(m_kf, key, &keylen));
    break;

  default:
    assert(pbl_code == PBLNEXT);
    rc = throw_on_error("pblKfNext", pblKfNext(m_kf, key, &keylen));
    break;
  }

  if (rc == PBL_ERROR_NOT_FOUND) {
    return false;
  }

  word.assign(key);
  throw_on_error("pblKfRead", pblKfRead(m_kf, (unsigned char *)&counts, sizeof(counts)));

  counts.toHostOrder();
  return true;
}

bool FrequencyDBImpl_pbl::firstWord(string &word,
                                    WordData &counts)
{
  return getWord(PBLFIRST, word, counts);
}

bool FrequencyDBImpl_pbl::nextWord(string &word,
                                   WordData &counts)
{
  return getWord(PBLNEXT, word, counts);
}

string FrequencyDBImpl_pbl::getDatabaseType() const
{
  return "DBM-pbl";
}

void FrequencyDBImpl_pbl::sweepOutOldTerms(const CleanupManager &cleanman)
{
  string word;
  WordData counts;

  assert(!m_isReadOnly);
  assert(!m_inTransaction);

  beginTransaction();
  try {
    int records_processed = 0;
    bool again = firstWord(word, counts);
    while (again) {
      bool delete_word = false;
      if (word.length() >= 3 && word[0] == '_' && word[1] == '_') {
        if (starts_with(word, "__MD5") && counts.totalCount() == 0) {
          // go ahead and remove digests that have a count of zero
          delete_word = true;
        } else {
          // ignore special words like __MD5 and __COUNT__
        }
      } else {
        delete_word = cleanman.shouldDelete(counts);
      }
      if (delete_word) {
        if (is_debug) {
          cerr << "sweepOutJunk: removing term " << word
               << " with total count " << counts.totalCount()
               << " and age " << counts.age()
               << endl;
        }
        throw_on_error("pblKfDelete", pblKfDelete(m_kf));
        again = getWord(PBLTHIS, word, counts);
        ++records_processed;
      } else {
        again = nextWord(word, counts);
      }
      if (records_processed >= CLEANUP_RECORDS_PER_TRANSACTION) {
        endTransaction(true);
        beginTransaction();
        records_processed = 0;
      }
    }
  } catch (runtime_error &ex) {
    quietAbortTransaction();
    throw;
  }
  endTransaction(true);

  flush();
}

#endif // USE_PBL


syntax highlighted by Code2HTML, v. 0.9.1