/****************************************************************************** * $Id: mlex.h,v 1.2 2005/09/25 11:55:53 gareuselesinge Exp $ * This file is part of liberopops (http://liberopops.sf.net) * * This file is distributed under the terms of GNU GPL license. * ******************************************************************************/ /******************************************************************************/ /*! * \file mlex.h * \brief Markup Language EXpression * \author Enrico Tassi */ /******************************************************************************/ #ifndef MLEX_H #define MLEX_H #include "list.h" //! the struct that is used to identify a piece of string struct chunk_t { int start,stop; }; //! \brief Finds Markup Language chuks matching exp /*! * What is an ml-expression?
* Simply a regular expression with some * more infos about murkups.
* Grammar: * * Example: * * Limitation: * *
* What is an ml-get-expression?
* It is the counterpart of a ml-expression. * It selects what is important and what not.
* Grammar: *
    *
  • MLGEX := REGGEX TAGGEX | MLGEX MLGEX | ''
  • *
  • TAGGEX := '<'EX'>' | '{'EX'}'
  • *
  • REGGEX := EX | '['EX']'
  • *
  • EX := 'X' | 'O'
  • *
* Example: *
    *
  • If the ml-expression is * ".*.*<.*img.*src.*>.*"
    * and the ml-get-expression is * "OOX"
    * and data is * "hello"
    * mlmatch returns a list of length 2 * (read: the nember of "X") the first defining * "img src="nice.jpg"" and the second defining * "hello".
  • *
* Remembre that if an optional string/tag is used in the ml-expression, * the corrspong optional string/tag signature must be used in * the ml-get-expression.
*
* A short explanation of how the engine works * (considering the prevoius example):
*
    *
  1. * tokenize the strings: *
      *
    • "hello" becames * "","","","","","","hello",""
    • *
    • ".*.*<.*img.*src.*>.*" becames * ".*","",".*","<.*img.*src.*>",".*",""
    • *
    • "OOX" becames * "O","","O","","X",""
    • *
    *
  2. *
  3. * The ml-expression matches perfectly the data starting from the third token, * since each regexp matches the corresponding token. so we obtain this * sub-list of tokens * "","","","","hello","" *
  4. *
  5. * The sublist has the same length of the ret expression and selecting only * the tokens with a corresponding X we obtain * {"img src="nice.jpg"","hello".
  6. } * *
* Notes: *
    *
  • data, exp and ret MUST be modifyable. * they will not be altered, but * during processing they may be accessed in write.
  • *
* * \param data is a Markup Language file like an html page (must be modifyable) * \param exp is the ml-expression (must be modifyable) * \param ret is the ml-get-expression (must be modifyable) * \return a list of list of chunk_t */ list_t* mlmatch(char* data, char* exp, char*ret); //! debug functions that prints the resul matrix void mlmatch_print_results(list_t*res,char* str); //! free the list of lists returned by mlmatch void mlmatch_free_results(list_t*res); //! \brief gets a cell from the result matrix /*! mlmatch returns a list of lists. this is a matrix. * each line is the list of X fields. * Example: *
    *
  • * src := "hello bad guys"
    * exp := "<.*>.*
    "
    * ret := "X"
    * calling
    * rc = mlmatch(src,exp,ret);
    * will return
    * {{"b","hello"},
    *  {"i","guys"} }
    * and the respective coordinates are from 0,0 to 1,1. * For example "hello" is 1,0. *
  • * The returned poiter must be freed by the caller. *
* * \param x column * \param y row * \param res returned by mlmatch * \param s the src string * \return a strdup of s chunked in the right position or NULL * */ char* mlmatch_get_result(int x,int y, list_t* res,char* s); #endif