/*
$Id: dbsearch.c 2300 2006-10-06 16:22:47Z aaron $
Copyright (C) 1999-2004 IC & S dbmail@ic-s.nl
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* \file dbsearch.c
* \brief functions implementing searching for messages
* the functions in this file used to be located in the
* dbpgsql.c (PostgreSQL) and dbmysql (MySQL), but have
* been made backend-independent, so they can be used
* by any SQL database.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "dbsearch.h"
#include "db.h"
#include "dbmailtypes.h"
#include "rfcmsg.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
/**
* abbreviated names of the months
*/
const char *month_desc[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
/* for issuing queries to the backend */
char query[DEF_QUERYSIZE];
/* used only locally */
/** \brief performs a binary search on an array to find key. Array should
* be ascending in values.
* \param array array to be searched through
* \param arraysize
* \param key key to be found in array
* \return
* - -1 if not found
* - index of key in array if found
*/
static int db_binary_search(const u64_t * array, int arraysize, u64_t key);
/**
* \brief perform search on on the body of a message
* \param msg mime_message_t struct of message
* \param sk search key
* \param msg_idnr
* \return
* - 0 if no match
* - 1 if match
*/
static int db_exec_search(mime_message_t * msg, search_key_t * sk,
u64_t msg_idnr);
/**
* \brief search the specified range of a message for a key
* \param start of range
* \param end of range
* \param key key to search for
* \param msg_idnr
* \return
* - 0 if not found
* - 1 if found
*/
static int db_search_range(db_pos_t start, db_pos_t end, const char *key,
u64_t msg_idnr);
/**
* \brief converts an IMAP date to a number (strictly ascending in date)
* valid IMAP dates:
* - d-mon-yyyy
* - dd-mon-yyyy ('-' may be a space)
* \param date the IMAP date
* \return integer representation of the date
*/
static int num_from_imapdate(const char *date);
int db_search(unsigned int *rset, int setlen, const char *key,
mailbox_t * mb, int type)
{
u64_t uid;
int msn;
unsigned i;
if (!key)
return -2;
memset(rset, 0, setlen * sizeof(int));
if (type == IST_IDATE) {
/** \todo this next solution (pms.%s) is really dirty. If anything,
the IMAP search algorithm is dirty, and should be fixed */
snprintf(query, DEF_QUERYSIZE,
"SELECT msg.message_idnr FROM dbmail_messages msg, dbmail_physmessage pms "
"WHERE msg.mailbox_idnr = '%llu' "
"AND msg.physmessage_id = pms.id "
"AND msg.status < '%d' "
"AND pms.%s", mb->uid, MESSAGE_STATUS_DELETE,
key);
} else if (type == IST_SORT) {
snprintf(query, DEF_QUERYSIZE,
"SELECT msg.message_idnr FROM dbmail_messages msg, dbmail_physmessage pms "
"WHERE msg.mailbox_idnr = '%llu' "
"AND msg.physmessage_id = pms.id "
"AND msg.status < 2 " "%s", mb->uid, key);
} else {
snprintf(query, DEF_QUERYSIZE,
"SELECT message_idnr FROM dbmail_messages "
"WHERE mailbox_idnr = '%llu' "
"AND status < '%d' AND %s", mb->uid,
MESSAGE_STATUS_DELETE, key);
}
if (db_query(query) == -1) {
trace(TRACE_ERROR, "%s,%s: could not execute query",
__FILE__, __func__);
return (-1);
}
for (i = 0; i < db_num_rows(); i++) {
uid = db_get_result_u64(i, 0);
if (type != IST_SORT) {
msn =
db_binary_search(mb->seq_list, mb->exists,
uid);
if (msn == -1 || msn >= setlen) {
db_free_result();
return 1;
}
rset[msn] = 1;
} else {
rset[i] = (i + 1);
}
}
db_free_result();
return 0;
}
void addto_btree_curr(sortitems_t ** root, char *str, int mid)
{
sortitems_t *curr = (sortitems_t *) dm_malloc(sizeof(sortitems_t));
curr->left = curr->right = NULL;
curr->mid = mid;
curr->ustr = (char *) dm_malloc(sizeof(char) * (strlen(str) + 8));
memset(curr->ustr, '\0', sizeof(char) * (strlen(str) + 8));
sprintf(curr->ustr, "%s%06d", str, mid);
list_btree_insert(root, curr);
}
int db_sort_parsed(unsigned int *rset, unsigned int setlen,
search_key_t * sk, mailbox_t * mb, int condition)
{
unsigned int i;
int result, idx = 0;
mime_message_t msg;
struct mime_record *mr;
sortitems_t *root = NULL;
if (!sk->search)
return 0;
if (mb->exists != setlen)
return 1;
if (condition != IST_SUBSEARCH_AND
&& condition != IST_SUBSEARCH_NOT)
memset(rset, 0, sizeof(int) * setlen);
/*create a btree for all messages hdrfld */
for (i = 0; i < setlen; i++) {
if (condition == IST_SUBSEARCH_AND && rset[i] == 0)
continue;
if (condition == IST_SUBSEARCH_NOT && rset[i] == 1)
continue;
memset(&msg, 0, sizeof(msg));
result = db_fetch_headers(mb->seq_list[i], &msg);
if (result != 0)
continue; /* ignore parse errors */
if (list_getstart(&msg.mimeheader)) {
mime_findfield(sk->hdrfld, &msg.mimeheader, &mr);
if (mr)
addto_btree_curr(&root, (char *) mr->value,
(i + 1));
}
if (list_getstart(&msg.rfcheader)) {
mime_findfield(sk->hdrfld, &msg.rfcheader, &mr);
if (mr)
addto_btree_curr(&root, (char *) mr->value,
(i + 1));
}
db_free_msg(&msg);
}
list_btree_traverse(root, &idx, rset); /* fill in the rset array with mid's */
list_btree_free(root);
return 0;
}
int db_search_parsed(unsigned int *rset, unsigned int setlen,
search_key_t * sk, mailbox_t * mb, int condition)
{
unsigned i;
int result;
mime_message_t msg;
if (mb->exists != setlen)
return 1;
if ((condition != IST_SUBSEARCH_AND
&& condition != IST_SUBSEARCH_NOT))
memset(rset, 0, sizeof(int) * setlen);
for (i = 0; i < setlen; i++) {
if (condition == IST_SUBSEARCH_AND && rset[i] == 0)
continue;
if (condition == IST_SUBSEARCH_NOT && rset[i] == 1)
continue;
memset(&msg, 0, sizeof(msg));
switch (sk->type) {
case IST_HDR:
case IST_HDRDATE_BEFORE:
case IST_HDRDATE_ON:
case IST_HDRDATE_SINCE:
result =
db_get_main_header(mb->seq_list[i],
&msg.rfcheader);
break;
default:
result = db_fetch_headers(mb->seq_list[i], &msg);
}
if (result != 0)
continue; /* ignore parse errors */
if (sk->type == IST_SIZE_LARGER) {
rset[i] =
((msg.rfcheadersize + msg.bodylines +
msg.bodysize) > sk->size)
? 1 : 0;
} else if (sk->type == IST_SIZE_SMALLER) {
rset[i] =
((msg.rfcheadersize + msg.bodylines +
msg.bodysize) < sk->size)
? 1 : 0;
} else {
rset[i] =
db_exec_search(&msg, sk, mb->seq_list[i]);
}
db_free_msg(&msg);
}
return 0;
}
int db_binary_search(const u64_t * array, int arraysize, u64_t key)
{
int low, high, mid;
low = 0;
high = arraysize - 1;
while (low <= high) {
mid = (high + low) / 2;
if (array[mid] < key)
low = mid + 1;
else if (array[mid] > key)
high = mid - 1;
else
return mid;
}
return -1; /* not found */
}
int db_exec_search(mime_message_t * msg, search_key_t * sk, u64_t msg_idnr)
{
struct element *el;
struct mime_record *mr;
int i, givendate, sentdate;
if (!sk->search)
return 0;
switch (sk->type) {
case IST_HDR:
if (list_getstart(&msg->mimeheader)) {
mime_findfield(sk->hdrfld, &msg->mimeheader, &mr);
if (mr) {
for (i = 0; mr->value[i]; i++)
if (strncasecmp
(&mr->value[i], sk->search,
strlen(sk->search)) == 0)
return 1;
}
}
if (list_getstart(&msg->rfcheader)) {
mime_findfield(sk->hdrfld, &msg->rfcheader, &mr);
if (mr) {
for (i = 0; mr->value[i]; i++)
if (strncasecmp
(&mr->value[i], sk->search,
strlen(sk->search)) == 0)
return 1;
}
}
break;
case IST_HDRDATE_BEFORE:
case IST_HDRDATE_ON:
case IST_HDRDATE_SINCE:
/* do not check children */
if (list_getstart(&msg->rfcheader)) {
mime_findfield("date", &msg->rfcheader, &mr);
if (mr
&& strlen(mr->value) >=
strlen("Day, d mon yyyy "))
/* 01234567890123456 */
{
givendate = num_from_imapdate(sk->search);
if (mr->value[6] == ' ')
mr->value[15] = 0;
else
mr->value[16] = 0;
sentdate =
num_from_imapdate(&mr->value[5]);
switch (sk->type) {
case IST_HDRDATE_BEFORE:
return sentdate < givendate;
case IST_HDRDATE_ON:
return sentdate == givendate;
case IST_HDRDATE_SINCE:
return sentdate > givendate;
}
}
}
return 0;
case IST_DATA_TEXT:
el = list_getstart(&msg->rfcheader);
while (el) {
mr = (struct mime_record *) el->data;
for (i = 0; mr->field[i]; i++)
if (strncasecmp(&mr->field[i], sk->search,
strlen(sk->search)) == 0)
return 1;
for (i = 0; mr->value[i]; i++)
if (strncasecmp(&mr->value[i], sk->search,
strlen(sk->search)) == 0)
return 1;
el = el->nextnode;
}
el = list_getstart(&msg->mimeheader);
while (el) {
mr = (struct mime_record *) el->data;
for (i = 0; mr->field[i]; i++)
if (strncasecmp(&mr->field[i], sk->search,
strlen(sk->search)) == 0)
return 1;
for (i = 0; mr->value[i]; i++)
if (strncasecmp(&mr->value[i], sk->search,
strlen(sk->search)) == 0)
return 1;
el = el->nextnode;
}
return 0;
case IST_DATA_BODY:
/* only check body if there are no children */
if (list_getstart(&msg->children))
break;
/* only check text bodies */
mime_findfield("content-type", &msg->mimeheader, &mr);
if (mr && strncasecmp(mr->value, "text", 4) != 0)
break;
mime_findfield("content-type", &msg->rfcheader, &mr);
if (mr && strncasecmp(mr->value, "text", 4) != 0)
break;
return db_search_range(msg->bodystart, msg->bodyend,
sk->search, msg_idnr);
}
/* no match found yet, try the children */
el = list_getstart(&msg->children);
while (el) {
if (db_exec_search
((mime_message_t *) el->data, sk, msg_idnr) == 1)
return 1;
el = el->nextnode;
}
return 0;
}
int db_search_range(db_pos_t start, db_pos_t end,
const char *key, u64_t msg_idnr)
{
unsigned i, j;
unsigned startpos, endpos;
int distance;
const char *query_result;
if (start.block > end.block) {
trace(TRACE_ERROR, "%s,%s: bad range specified",
__FILE__, __func__);
return 0;
}
if (start.block == end.block && start.pos > end.pos) {
trace(TRACE_ERROR, "%s,%s: bad range specified",
__FILE__, __func__);
return 0;
}
snprintf(query, DEF_QUERYSIZE,
"SELECT block.messageblk "
"FROM dbmail_messageblks block, dbmail_messages msg "
"WHERE block.physmessage_id = msg.physmessage_id "
"AND msg.message_idnr = '%llu' "
"ORDER BY block.messageblk_idnr", msg_idnr);
if (db_query(query) == -1) {
trace(TRACE_ERROR, "%s,%s: could not get message",
__FILE__, __func__);
return 0;
}
if (db_num_rows() == 0) {
trace(TRACE_ERROR, "%s,%s: bad range specified",
__FILE__, __func__);
db_free_result();
return 0;
}
query_result = db_get_result(start.block, 0);
if (!query_result) {
trace(TRACE_ERROR, "%s,%s: bad range specified",
__FILE__, __func__);
db_free_result();
return 0;
}
/* just one block? */
if (start.block == end.block) {
unsigned e = (end.pos > strlen(key)) ? end.pos - strlen(key) : 0;
for (i = start.pos; i <= e; i++) {
if (strncasecmp(&query_result[i], key, strlen(key))
== 0) {
db_free_result();
return 1;
}
}
db_free_result();
return 0;
}
/*
* multiple block range specified
*/
for (i = start.block; i <= end.block; i++) {
if (!query_result) {
trace(TRACE_ERROR, "%s,%s: bad range specified",
__FILE__, __func__);
db_free_result();
return 0;
}
startpos = (i == start.block) ? start.pos : 0;
endpos =
(i == end.block) ? end.pos + 1 : db_get_length(i, 0);
distance = endpos - startpos;
for (j = 0; j < distance - strlen(key); j++) {
if (strncasecmp(&query_result[j], key, strlen(key))
== 0) {
db_free_result();
return 1;
}
}
query_result = db_get_result(i, 0); /* fetch next row */
}
db_free_result();
return 0;
}
int num_from_imapdate(const char *date)
{
int j = 0, i;
char datenum[] = "YYYYMMDD";
char sub[4];
if (date[1] == ' ' || date[1] == '-')
j = 1;
strncpy(datenum, &date[7 - j], 4);
strncpy(sub, &date[3 - j], 3);
sub[3] = 0;
for (i = 0; i < 12; i++) {
if (strcasecmp(sub, month_desc[i]) == 0)
break;
}
i++;
if (i > 12)
i = 12;
sprintf(&datenum[4], "%02d", i);
if (j) {
datenum[6] = '0';
datenum[7] = date[0];
} else {
datenum[6] = date[0];
datenum[7] = date[1];
}
return atoi(datenum);
}
syntax highlighted by Code2HTML, v. 0.9.1