/* * Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd. * * This file is part of Rast. * See the file COPYING for redistribution information. * */ #include #include #include #include "rast/config.h" #include "rast/query.h" #include "rast/string.h" static rast_error_t * default_query_optimize(rast_query_t *query, rast_query_t **optimized_query, apr_pool_t *pool) { *optimized_query = query; return RAST_OK; } typedef struct { rast_query_t base; } null_query_t; static char * null_query_inspect(rast_query_t *base, apr_pool_t *pool) { return "(null)"; } static rast_error_t * null_query_exec(rast_query_t *query, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool) { return rast_error(RAST_ERROR_EMPTY_QUERY, NULL); } static rast_query_type_t null_query_type = { null_query_inspect, null_query_exec, default_query_optimize, }; static rast_query_t * null_query_create(apr_pool_t *pool) { null_query_t *query; query = (null_query_t *) apr_palloc(pool, sizeof(null_query_t)); query->base.type = &null_query_type; return (rast_query_t *) query; } int rast_query_is_null(rast_query_t *query) { return query->type == &null_query_type; } typedef struct { rast_query_t base; const char *term; } term_query_t; static char * term_query_inspect(rast_query_t *base, apr_pool_t *pool) { term_query_t *query = (term_query_t *) base; return apr_psprintf(pool, "\"%s\"", query->term); } static rast_error_t * term_query_exec(rast_query_t *base, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool) { term_query_t *query = (term_query_t *) base; int need_tf = options->score_method != RAST_SCORE_METHOD_NONE; return rast_text_index_search(db->text_index, query->term, need_tf, result, pool); } static rast_query_type_t term_query_type = { term_query_inspect, term_query_exec, default_query_optimize, }; static rast_query_t * term_query_create(apr_pool_t *pool, const char *term) { term_query_t *query; query = (term_query_t *) apr_palloc(pool, sizeof(term_query_t)); query->base.type = &term_query_type; query->term = term; return (rast_query_t *) query; } typedef struct { rast_query_t base; const char *name; const char *value; } property_query_t; static char * property_query_inspect(rast_query_t *base, const char *op, apr_pool_t *pool) { property_query_t *query = (property_query_t *) base; return apr_psprintf(pool, "(%s%s \"%s\")", query->name, op, query->value); } static rast_query_t * property_query_create(apr_pool_t *pool, rast_query_type_t *type, const char *name, const char *value) { property_query_t *query; query = (property_query_t *) apr_palloc(pool, sizeof(property_query_t)); query->base.type = type; query->name = name; query->value = value; return (rast_query_t *) query; } static char * property_pe_query_inspect(rast_query_t *base, apr_pool_t *pool) { return property_query_inspect(base, ":", pool); } static rast_error_t * get_property(rast_local_db_t *db, const char *name, rast_property_t **property, rast_property_index_t **property_index) { int i; for (i = 0; i < db->num_properties; i++) { if (strcmp(db->properties[i].name, name) == 0) { *property = db->properties + i; *property_index = db->property_indices + i; return RAST_OK; } } return rast_error(RAST_ERROR_INVALID_QUERY, "unknown property: %s", name); } static rast_error_t * property_pe_query_exec(rast_query_t *base, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool) { property_query_t *query = (property_query_t *) base; rast_property_t *property; rast_property_index_t *property_index; rast_error_t *error; error = get_property(db, query->name, &property, &property_index); if (error != RAST_OK) { return error; } if (!(property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH)) { return rast_error(RAST_ERROR_INVALID_QUERY, ": not supported for %s", query->name); } return rast_text_index_search(property_index->text, query->value, 0, result, pool); } static rast_query_type_t property_pe_query_type = { property_pe_query_inspect, property_pe_query_exec, default_query_optimize, }; rast_query_result_t * rast_query_result_create(apr_pool_t *pool) { rast_query_result_t *result; result = (rast_query_result_t *) apr_palloc(pool, sizeof(rast_query_result_t)); APR_RING_INIT(&result->terms, rast_term_t, link); APR_RING_INIT(&result->candidates, rast_candidate_t, link); return result; } typedef struct { rast_query_t base; const char *name; const char *min_value; const char *max_value; int include_min; int include_max; } property_range_query_t; static char format_bool_value(int n) { return n ? 't' : 'f'; } static const char * format_string_value(apr_pool_t *pool, const char *s) { return s == NULL ? "nil" : apr_psprintf(pool, "\"%s\"", s); } static char * property_range_query_inspect(rast_query_t *base, apr_pool_t *pool) { property_range_query_t *query = (property_range_query_t *) base; return apr_psprintf(pool, "(range \"%s\" %s %c %s %c)", query->name, format_string_value(pool, query->min_value), format_bool_value(query->include_min), format_string_value(pool, query->max_value), format_bool_value(query->include_max)); } typedef struct { apr_pool_t *pool; rast_local_db_t *db; property_range_query_t *query; rast_type_e property_type; DB *inv_db; DBT db_key; DBT db_value; DBT min_key; DBT max_key; DBC *cursor; int found; } range_cursor_t; static rast_error_t * pack_property_key(apr_pool_t *pool, DBT *key, rast_type_e type, const char *value, int is_native) { char *invalid; rast_uint_t n, *fixed_number; if (value == NULL) { return RAST_OK; } memset(key, 0, sizeof(DBT)); switch (type) { case RAST_TYPE_STRING: case RAST_TYPE_DATE: case RAST_TYPE_DATETIME: key->data = (void *) value; key->size = strlen(value); break; case RAST_TYPE_UINT: n = strtol(value, &invalid, 10); if (*invalid != '\0') { return rast_error(RAST_ERROR_INVALID_QUERY, "invalid number: %s", value); } fixed_number = (rast_uint_t *) apr_palloc(pool, sizeof(rast_uint_t)); *fixed_number = rast_fix_byte_order(n, is_native); key->data = fixed_number; key->size = sizeof(rast_uint_t); break; } return RAST_OK; } static rast_error_t * range_cursor_create(apr_pool_t *pool, rast_local_db_t *db, property_range_query_t *query, range_cursor_t **result) { int dberr; rast_error_t *error; range_cursor_t *cursor; rast_property_t *property; rast_property_index_t *property_index; cursor = (range_cursor_t *) apr_palloc(pool, sizeof(range_cursor_t)); cursor->pool = pool; cursor->db = db; cursor->query = query; error = get_property(db, query->name, &property, &property_index); if (error != RAST_OK) { return error; } if (!(property->flags & RAST_PROPERTY_FLAG_SEARCH)) { return rast_error(RAST_ERROR_INVALID_QUERY, ": not supported for `%s'", query->name); } cursor->inv_db = property_index->inv; memset(&cursor->db_key, 0, sizeof(DBT)); memset(&cursor->db_value, 0, sizeof(DBT)); error = pack_property_key(pool, &cursor->min_key, property->type, query->min_value, db->is_native); if (error != RAST_OK) { return error; } error = pack_property_key(pool, &cursor->max_key, property->type, query->max_value, db->is_native); if (error != RAST_OK) { return error; } cursor->property_type = property->type; dberr = property_index->inv->cursor(property_index->inv, db->bdb_txn, &cursor->cursor, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } cursor->found = 0; *result = cursor; return RAST_OK; } static void range_cursor_destroy(range_cursor_t *cursor) { cursor->cursor->c_close(cursor->cursor); } static rast_error_t * check_db_result(int dberr, int *found) { if (dberr != 0 && dberr != DB_NOTFOUND) { return db_error_to_rast_error(dberr); } *found = (dberr != DB_NOTFOUND); return RAST_OK; } static rast_error_t * range_cursor_skip_min_value(range_cursor_t *cursor) { int dberr; if (cursor->query->min_value != NULL && !cursor->query->include_min) { while (rast_compare_keys(cursor->property_type, cursor->inv_db, &cursor->db_key, &cursor->min_key) == 0) { dberr = cursor->cursor->c_get(cursor->cursor, &cursor->db_key, &cursor->db_value, DB_NEXT); if (dberr != 0) { return check_db_result(dberr, &cursor->found); } } } return RAST_OK; } static rast_error_t * range_cursor_get_first(range_cursor_t *cursor) { int dberr; rast_error_t *error; if (cursor->query->min_value == NULL) { dberr = cursor->cursor->c_get(cursor->cursor, &cursor->db_key, &cursor->db_value, DB_FIRST); } else { cursor->db_key.data = cursor->min_key.data; cursor->db_key.size = cursor->min_key.size; dberr = cursor->cursor->c_get(cursor->cursor, &cursor->db_key, &cursor->db_value, DB_SET_RANGE); } error = check_db_result(dberr, &cursor->found); if (error != RAST_OK) { return error; } if (!cursor->found) { return RAST_OK; } return range_cursor_skip_min_value(cursor); } static rast_error_t * range_cursor_get_next(range_cursor_t *cursor) { int dberr; dberr = cursor->cursor->c_get(cursor->cursor, &cursor->db_key, &cursor->db_value, DB_NEXT); return check_db_result(dberr, &cursor->found); } static int range_cursor_is_done(range_cursor_t *cursor) { int n; if (!cursor->found) { return 1; } if (cursor->query->max_value == NULL) { return 0; } n = rast_compare_keys(cursor->property_type, cursor->inv_db, &cursor->db_key, &cursor->max_key); if (cursor->query->include_max) { return n > 0; } else { return n >= 0; } } static void range_query_add_candidate(apr_pool_t *pool, apr_array_header_t *candidates, range_cursor_t *cursor) { rast_candidate_t *candidate; candidate = (rast_candidate_t *) apr_palloc(pool, sizeof(rast_candidate_t)); candidate->doc_id = rast_fix_byte_order(*(rast_size_t *) cursor->db_value.data, cursor->db->is_native); APR_RING_INIT(&candidate->terms, rast_term_frequency_t, link); *(rast_candidate_t **) apr_array_push(candidates) = candidate; } static int compare_candidates(const void *v1, const void *v2) { const rast_candidate_t *c1 = *(const rast_candidate_t **) v1; const rast_candidate_t *c2 = *(const rast_candidate_t **) v2; return c1->doc_id - c2->doc_id; } static rast_error_t * range_query_result_create(apr_pool_t *pool, apr_array_header_t *candidates, rast_query_result_t **result) { int i; qsort(candidates->elts, candidates->nelts, sizeof(rast_candidate_t *), compare_candidates); *result = rast_query_result_create(pool); for (i = 0; i < candidates->nelts; i++) { APR_RING_INSERT_TAIL(&(*result)->candidates, ((rast_candidate_t **) candidates->elts)[i], rast_candidate_t, link); } return RAST_OK; } static rast_error_t * property_range_query_exec(rast_query_t *base, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool) { property_range_query_t *query = (property_range_query_t *) base; range_cursor_t *cursor; apr_pool_t *sub_pool; rast_error_t *error; apr_array_header_t *candidates; apr_pool_create(&sub_pool, pool); error = range_cursor_create(sub_pool, db, query, &cursor); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = range_cursor_get_first(cursor); if (error != RAST_OK) { return error; } candidates = apr_array_make(sub_pool, 10, sizeof(rast_candidate_t *)); while (!range_cursor_is_done(cursor)) { range_query_add_candidate(pool, candidates, cursor); error = range_cursor_get_next(cursor); if (error != RAST_OK) { range_cursor_destroy(cursor); apr_pool_destroy(sub_pool); return error; } } error = range_query_result_create(pool, candidates, result); range_cursor_destroy(cursor); apr_pool_destroy(sub_pool); return error; } static rast_query_type_t property_range_query_type = { property_range_query_inspect, property_range_query_exec, default_query_optimize, }; static rast_query_t * property_range_query_create(apr_pool_t *pool, const char *name, const char *min_value, int include_min, const char *max_value, int include_max) { property_range_query_t *query; query = (property_range_query_t *) apr_palloc(pool, sizeof(property_range_query_t)); query->base.type = &property_range_query_type; query->name = name; query->min_value = min_value; query->include_min = include_min; query->max_value = max_value; query->include_max = include_max; return (rast_query_t *) query; } typedef struct { rast_query_type_t base; const char *operator; rast_candidate_t *(*merge_candidates)(rast_candidate_t *c1, rast_candidate_t *c2); void (*process_rest_candidates)(rast_candidate_ring_t *candidates1, rast_query_result_t *r1, rast_candidate_t *c1, rast_query_result_t *r2, rast_candidate_t *c2); } composite_query_type_t; typedef struct { rast_query_t base; rast_query_ring_t operands; } composite_query_t; static rast_error_t *composite_query_exec(rast_query_t *base, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool); static char *composite_query_inspect(rast_query_t *base, apr_pool_t *pool); static rast_candidate_t * and_query_merge_candidates(rast_candidate_t *c1, rast_candidate_t *c2) { if (c2->doc_id > c1->doc_id) { APR_RING_REMOVE(c1, link); return NULL; } if (c1->doc_id == c2->doc_id) { APR_RING_CONCAT(&c1->terms, &c2->terms, rast_term_frequency_t, link); c2 = APR_RING_NEXT(c2, link); return NULL; } return APR_RING_NEXT(c2, link); } static void and_query_process_rest_candidates(rast_candidate_ring_t *candidates1, rast_query_result_t *r1, rast_candidate_t *c1, rast_query_result_t *r2, rast_candidate_t *c2) { if (c1 != APR_RING_SENTINEL(candidates1, rast_candidate_t, link)) { rast_candidate_t *last_c = APR_RING_LAST(candidates1); APR_RING_UNSPLICE(c1, last_c, link); } } static void merge_property_range_queries(property_range_query_t *rq, property_range_query_t *rq2) { if (rq->min_value == NULL) { rq->min_value = rq2->min_value; rq->include_min = rq2->include_min; } if (rq->max_value == NULL) { rq->max_value = rq2->max_value; rq->include_max = rq2->include_max; } } static rast_error_t * and_query_optimize(rast_query_t *base, rast_query_t **optimized_query, apr_pool_t *pool) { composite_query_t *query = (composite_query_t *) base; rast_query_t *q, *q2, *next_q2; property_range_query_t *rq, *rq2; q = APR_RING_FIRST(&query->operands); while (q != APR_RING_SENTINEL(&query->operands, rast_query_t, link)) { if (q->type == &property_range_query_type) { rq = (property_range_query_t *) q; q2 = APR_RING_NEXT(q, link); if (q2 != APR_RING_SENTINEL(&query->operands, rast_query_t, link) && q2->type == &property_range_query_type) { rq2 = (property_range_query_t *) q2; if (strcmp(rq->name, rq2->name) != 0) { break; } merge_property_range_queries(rq, rq2); next_q2 = APR_RING_NEXT(q2, link); APR_RING_REMOVE(q2, link); q2 = next_q2; } } q = APR_RING_NEXT(q, link); } q = APR_RING_FIRST(&query->operands); if (APR_RING_NEXT(q, link) == APR_RING_SENTINEL(&query->operands, rast_query_t, link)) { *optimized_query = q; } else { *optimized_query = base; } return RAST_OK; } static composite_query_type_t and_query_type = { { composite_query_inspect, composite_query_exec, and_query_optimize, }, "&", and_query_merge_candidates, and_query_process_rest_candidates, }; static rast_candidate_t * or_query_merge_candidates(rast_candidate_t *c1, rast_candidate_t *c2) { rast_candidate_t *next; if (c1->doc_id == c2->doc_id) { APR_RING_CONCAT(&c1->terms, &c2->terms, rast_term_frequency_t, link); c2 = APR_RING_NEXT(c2, link); return c2; } if (c2->doc_id >= c1->doc_id) { return NULL; } next = APR_RING_NEXT(c2, link); APR_RING_INSERT_BEFORE(c1, c2, link); return next; } static void or_query_process_rest_candidates(rast_candidate_ring_t *candidates1, rast_query_result_t *r1, rast_candidate_t *c1, rast_query_result_t *r2, rast_candidate_t *c2) { if (c2 != APR_RING_SENTINEL(&r2->candidates, rast_candidate_t, link)) { APR_RING_SPLICE_BEFORE(APR_RING_SENTINEL(&r1->candidates, rast_candidate_t, link), c2, APR_RING_LAST(&r2->candidates), link); } } static composite_query_type_t or_query_type = { { composite_query_inspect, composite_query_exec, default_query_optimize, }, "|", or_query_merge_candidates, or_query_process_rest_candidates }; static rast_candidate_t * not_query_merge_candidates(rast_candidate_t *c1, rast_candidate_t *c2) { if (c2->doc_id >= c1->doc_id) { if (c2->doc_id == c1->doc_id) { APR_RING_REMOVE(c1, link); } return NULL; } else { return APR_RING_NEXT(c2, link); } } static void not_query_process_rest_candidates(rast_candidate_ring_t *candidates1, rast_query_result_t *r1, rast_candidate_t *c1, rast_query_result_t *r2, rast_candidate_t *c2) { } static composite_query_type_t not_query_type = { { composite_query_inspect, composite_query_exec, default_query_optimize, }, "!", not_query_merge_candidates, not_query_process_rest_candidates }; static char * composite_query_inspect(rast_query_t *base, apr_pool_t *pool) { composite_query_type_t *query_type = (composite_query_type_t *) base->type; composite_query_t *query = (composite_query_t *) base; char *s; rast_query_t *q; s = apr_pstrcat(pool, "(", query_type->operator, NULL); for (q = APR_RING_FIRST(&query->operands); q != APR_RING_SENTINEL(&query->operands, rast_query_t, link); q = APR_RING_NEXT(q, link)) { s = apr_pstrcat(pool, s, " ", rast_query_inspect(q, pool), NULL); } s = apr_pstrcat(pool, s, ")", NULL); return s; } static rast_error_t * composite_query_exec(rast_query_t *base, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool) { composite_query_type_t *query_type = (composite_query_type_t *) base->type; composite_query_t *query = (composite_query_t *) base; rast_query_t *q; rast_query_result_t *r, *r2; rast_candidate_t *c, *c2; rast_score_method_e save = options->score_method; rast_error_t *error; q = APR_RING_FIRST(&query->operands); if ((error = q->type->exec(q, db, options, &r, pool)) != RAST_OK) { return error; } if (query_type == ¬_query_type) { options->score_method = RAST_SCORE_METHOD_NONE; } q = APR_RING_NEXT(q, link); do { if ((error = q->type->exec(q, db, options, &r2, pool)) != RAST_OK) { return error; } APR_RING_CONCAT(&r->terms, &r2->terms, rast_term_t, link); c = APR_RING_FIRST(&r->candidates); c2 = APR_RING_FIRST(&r2->candidates); while (c != APR_RING_SENTINEL(&r->candidates, rast_candidate_t, link)) { rast_candidate_t *next_c = APR_RING_NEXT(c, link); while (c2 != APR_RING_SENTINEL(&r2->candidates, rast_candidate_t, link)) { rast_candidate_t *next_c2; next_c2 = query_type->merge_candidates(c, c2); if (next_c2 == NULL) { break; } c2 = next_c2; } if (c2 == APR_RING_SENTINEL(&r2->candidates, rast_candidate_t, link)) { break; } c = next_c; } query_type->process_rest_candidates(&r->candidates, r, c, r2, c2); q = APR_RING_NEXT(q, link); } while (q != APR_RING_SENTINEL(&query->operands, rast_query_t, link)); if (query_type == ¬_query_type) { options->score_method = save; } *result = r; return RAST_OK; } static void composite_query_add_operand(composite_query_t *query, rast_query_t *operand) { APR_RING_INSERT_TAIL(&query->operands, operand, rast_query_t, link); } static rast_query_t * composite_query_create(apr_pool_t *pool, composite_query_type_t *type, rast_query_t *q1, rast_query_t *q2) { composite_query_t *query; query = (composite_query_t *) apr_palloc(pool, sizeof(composite_query_t)); query->base.type = (rast_query_type_t *) type; APR_RING_INIT(&query->operands, rast_query_t, link); composite_query_add_operand(query, q1); composite_query_add_operand(query, q2); return (rast_query_t *) query; } char * rast_query_inspect(rast_query_t *query, apr_pool_t *pool) { return query->type->inspect(query, pool); } rast_error_t * rast_query_exec(rast_query_t *query, rast_local_db_t *db, rast_query_option_t *options, rast_query_result_t **result, apr_pool_t *pool) { return query->type->exec(query, db, options, result, pool); } rast_error_t * rast_query_optimize(rast_query_t *query, rast_query_t **optimized_query, apr_pool_t *pool) { return query->type->optimize(query, optimized_query, pool); } typedef enum { TOKEN_NONE, TOKEN_ERROR, TOKEN_EOF, TOKEN_TERM, TOKEN_AND, TOKEN_OR, TOKEN_NOT, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_COLON, TOKEN_EQ, TOKEN_LT, TOKEN_GT, TOKEN_LE, TOKEN_GE, } token_e; static const char * token_name(token_e token) { switch (token) { case TOKEN_NONE: return "NONE"; case TOKEN_ERROR: return "ERROR"; case TOKEN_EOF: return "EOF"; case TOKEN_TERM: return "TERM"; case TOKEN_AND: return "AND"; case TOKEN_OR: return "OR"; case TOKEN_NOT: return "NOT"; case TOKEN_LPAREN: return "LPAREN"; case TOKEN_RPAREN: return "RPAREN"; case TOKEN_COLON: return "COLON"; case TOKEN_EQ: return "EQ"; case TOKEN_LT: return "LT"; case TOKEN_GT: return "GT"; case TOKEN_LE: return "LE"; case TOKEN_GE: return "GE"; default: return "UNKNOWN"; } } typedef struct { apr_pool_t *pool; rast_tokenizer_t *tokenizer; const char *value; rast_error_t *error; } lexer_t; static lexer_t * lexer_create(apr_pool_t *pool, apr_pool_t *result_pool, rast_encoding_module_t *encoding_module, const char *s) { lexer_t *lexer; lexer = (lexer_t *) apr_palloc(pool, sizeof(lexer_t)); lexer->pool = result_pool; lexer->tokenizer = rast_char_tokenizer_create(pool, encoding_module, s, strlen(s)); lexer->value = NULL; lexer->error = RAST_OK; return lexer; } static token_e get_quoted_term(lexer_t *lexer, rast_char_t *ch) { rast_string_t *string; string = rast_string_create(lexer->pool, "", 0, 8); while (1) { if (rast_char_tokenizer_is_done(lexer->tokenizer)) { lexer->error = rast_error(RAST_ERROR_INVALID_QUERY, "unterminated string"); return TOKEN_ERROR; } rast_char_tokenizer_get_current(lexer->tokenizer, ch); switch (*ch->ptr) { case '"': rast_char_tokenizer_next(lexer->tokenizer); lexer->value = string->ptr; return TOKEN_TERM; case '\\': rast_char_tokenizer_next(lexer->tokenizer); if (rast_char_tokenizer_is_done(lexer->tokenizer)) { lexer->error = rast_error(RAST_ERROR_INVALID_QUERY, "unterminated string"); return TOKEN_ERROR; } rast_char_tokenizer_get_current(lexer->tokenizer, ch); rast_string_append(string, ch->ptr, ch->nbytes); break; default: rast_string_append(string, ch->ptr, ch->nbytes); break; } rast_char_tokenizer_next(lexer->tokenizer); } } static token_e get_term(lexer_t *lexer, rast_char_t *ch) { const char *start; start = ch->ptr; while (1) { if (rast_char_tokenizer_is_done(lexer->tokenizer)) { ch->ptr += ch->nbytes; break; } rast_char_tokenizer_get_current(lexer->tokenizer, ch); if (rast_char_is_space(ch) || *ch->ptr == ')') { goto end; } rast_char_tokenizer_next(lexer->tokenizer); } end: lexer->value = apr_pstrndup(lexer->pool, start, ch->ptr - start); return TOKEN_TERM; } static token_e lexer_get_token(lexer_t *lexer) { rast_char_t ch; while (!rast_char_tokenizer_is_done(lexer->tokenizer)) { rast_char_tokenizer_get_current(lexer->tokenizer, &ch); if (!rast_char_is_space(&ch)) { break; } rast_char_tokenizer_next(lexer->tokenizer); } if (rast_char_tokenizer_is_done(lexer->tokenizer)) { return TOKEN_EOF; } rast_char_tokenizer_get_current(lexer->tokenizer, &ch); switch (*ch.ptr) { case '&': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_AND; case '|': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_OR; case '!': case '-': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_NOT; case '(': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_LPAREN; case ')': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_RPAREN; case ':': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_COLON; case '=': rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_EQ; case '<': rast_char_tokenizer_next(lexer->tokenizer); if (!rast_char_tokenizer_is_done(lexer->tokenizer)) { rast_char_tokenizer_get_current(lexer->tokenizer, &ch); if (*ch.ptr == '=') { rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_LE; } } return TOKEN_LT; case '>': rast_char_tokenizer_next(lexer->tokenizer); if (!rast_char_tokenizer_is_done(lexer->tokenizer)) { rast_char_tokenizer_get_current(lexer->tokenizer, &ch); if (*ch.ptr == '=') { rast_char_tokenizer_next(lexer->tokenizer); return TOKEN_GE; } } return TOKEN_GT; case '"': rast_char_tokenizer_next(lexer->tokenizer); return get_quoted_term(lexer, &ch); default: return get_term(lexer, &ch); } } typedef struct { rast_encoding_module_t *encoding_module; const char *query_string; lexer_t *lexer; token_e token; } parser_t; static token_e lookahead(parser_t *parser) { if (parser->token == TOKEN_NONE) { parser->token = lexer_get_token(parser->lexer); } return parser->token; } static token_e shift_token(parser_t *parser) { token_e token; token = lookahead(parser); parser->token = TOKEN_NONE; return token; } static rast_error_t * token_error(parser_t *parser, token_e token, token_e expected) { if (token == TOKEN_ERROR) { return parser->lexer->error; } if (expected == TOKEN_NONE) { return rast_error(RAST_ERROR_INVALID_QUERY, "unexpected token %s: `%s'", token_name(token), parser->query_string); } else { return rast_error(RAST_ERROR_INVALID_QUERY, "unexpected token %s (expected %s): `%s'", token_name(token), token_name(expected), parser->query_string); } } static rast_error_t * match_token(parser_t *parser, token_e expected, apr_pool_t *pool) { token_e token; token = shift_token(parser); if (token != expected) { return token_error(parser, token, expected); } return RAST_OK; } static rast_error_t * parse_property_pe_query(parser_t *parser, const char *name, rast_query_t **result, rast_query_type_t *type, apr_pool_t *pool) { const char *value; rast_error_t *error; shift_token(parser); if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) { return error; } value = parser->lexer->value; value = rast_normalize_text(parser->encoding_module, value, strlen(value), NULL, pool); *result = property_query_create(pool, &property_pe_query_type, name, value); return RAST_OK; } static rast_error_t * parse_optional_range(parser_t *parser, rast_query_t *query, apr_pool_t *pool) { property_range_query_t *range_query = (property_range_query_t *) query; token_e token; const char *p; rast_error_t *error; token = lookahead(parser); if (range_query->min_value == NULL && (token == TOKEN_LT || token == TOKEN_LE)) { shift_token(parser); if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) { return error; } range_query->min_value = range_query->name; range_query->include_min = range_query->include_max; range_query->name = range_query->max_value; range_query->max_value = parser->lexer->value; range_query->include_max = (token == TOKEN_LE); } else if (range_query->max_value == NULL && (token == TOKEN_GT || token == TOKEN_GE)) { shift_token(parser); if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) { return error; } range_query->max_value = range_query->name; range_query->include_max = range_query->include_min; range_query->name = range_query->min_value; range_query->min_value = parser->lexer->value; range_query->include_min = (token == TOKEN_GE); } for (p = range_query->name; *p != '\0'; p++) { if (strchr("abcdefghijklmnlopqrstuvwxyzABCDEFGHIJKLMNLOPQRSTUVWXYZ" "0123456789_-", *p) == NULL) { return rast_error(RAST_ERROR_GENERAL, "invalid property name: %s", range_query->name); } } return RAST_OK; } static rast_error_t * parse_property_range_query(parser_t *parser, token_e token, const char *name, rast_query_t **result, apr_pool_t *pool) { const char *value; rast_error_t *error; shift_token(parser); if ((error = match_token(parser, TOKEN_TERM, pool)) != RAST_OK) { return error; } value = parser->lexer->value; switch (token) { case TOKEN_EQ: *result = property_range_query_create(pool, name, value, 1, value, 1); break; case TOKEN_LT: *result = property_range_query_create(pool, name, NULL, 0, value, 0); break; case TOKEN_GT: *result = property_range_query_create(pool, name, value, 0, NULL, 0); break; case TOKEN_LE: *result = property_range_query_create(pool, name, NULL, 0, value, 1); break; case TOKEN_GE: *result = property_range_query_create(pool, name, value, 1, NULL, 0); break; default: return token_error(parser, token, TOKEN_NONE); } return parse_optional_range(parser, *result, pool); } static rast_error_t *parse_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool); static rast_error_t * parse_term_or_property_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool) { token_e token; const char *term; term = parser->lexer->value; shift_token(parser); token = lookahead(parser); switch (token) { case TOKEN_COLON: return parse_property_pe_query(parser, term, result, &property_pe_query_type, pool); case TOKEN_EQ: case TOKEN_LT: case TOKEN_GT: case TOKEN_LE: case TOKEN_GE: return parse_property_range_query(parser, token, term, result, pool); default: term = rast_normalize_text(parser->encoding_module, term, strlen(term), NULL, pool); *result = term_query_create(pool, term); return RAST_OK; } } static rast_error_t * parse_paren_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool) { rast_error_t *error; shift_token(parser); if ((error = parse_query(parser, result, pool)) != RAST_OK) { return error; } if ((error = match_token(parser, TOKEN_RPAREN, pool)) != RAST_OK) { return error; } return RAST_OK; } static rast_error_t * parse_primary_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool) { token_e token; token = lookahead(parser); switch (token) { case TOKEN_TERM: return parse_term_or_property_query(parser, result, pool); case TOKEN_LPAREN: return parse_paren_query(parser, result, pool); default: return token_error(parser, token, TOKEN_NONE); } } static rast_query_t * merge_queries(apr_pool_t *pool, rast_query_t *q1, rast_query_t *q2, composite_query_type_t *type) { if (q1 == NULL) { return q2; } else if (q1->type == (rast_query_type_t *) type) { composite_query_add_operand((composite_query_t *) q1, q2); return q1; } else { return composite_query_create(pool, type, q1, q2); } } static composite_query_type_t * get_composite_query_type(token_e token) { switch (token) { case TOKEN_AND: return &and_query_type; case TOKEN_OR: return &or_query_type; case TOKEN_NOT: return ¬_query_type; default: return NULL; } } static rast_error_t * parse_composite_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool) { rast_query_t *query; composite_query_type_t *type; rast_error_t *error; if ((error = parse_primary_query(parser, &query, pool)) != RAST_OK) { return error; } while ((type = get_composite_query_type(lookahead(parser))) != NULL) { rast_query_t *q; shift_token(parser); if ((error = parse_primary_query(parser, &q, pool)) != RAST_OK) { return error; } query = merge_queries(pool, query, q, type); } *result = query; return RAST_OK; } static rast_error_t * parse_query(parser_t *parser, rast_query_t **result, apr_pool_t *pool) { rast_query_t *query = NULL; token_e token; rast_error_t *error; while ((token = lookahead(parser)) != TOKEN_EOF && token != TOKEN_RPAREN) { rast_query_t *q; if ((error = parse_composite_query(parser, &q, pool)) != RAST_OK) { return error; } query = merge_queries(pool, query, q, &and_query_type); } if (query == NULL) { *result = null_query_create(pool); } else { *result = query; } return RAST_OK; } rast_error_t * rast_parse_query(apr_pool_t *pool, rast_encoding_module_t *encoding_module, const char *s, rast_query_t **result) { apr_pool_t *sub_pool; rast_error_t *error; parser_t *parser; apr_pool_create(&sub_pool, pool); parser = (parser_t *) apr_palloc(sub_pool, sizeof(parser_t)); parser->encoding_module = encoding_module; parser->query_string = s; parser->lexer = lexer_create(sub_pool, pool, encoding_module, s); parser->token = TOKEN_NONE; error = parse_query(parser, result, pool); apr_pool_destroy(sub_pool); return error; } /* vim: set filetype=c sw=4 expandtab : */