/*
 * FILE:        scan_c.l
 *
 * DESCRIPTION: This is the lex file used to parse a C source code.
 */

%{
/*
 * freescope - Free source browser
 * Copyright (C) 2001  Olivier Deme
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <limits.h>
#include <unistd.h>
#include <sys/mman.h>

#include "parser.h"

#ifndef MAP_FILE
#define MAP_FILE 0      /* for systems other than 4.3+BSD */
#endif /* MAP_FILE */

#define FILE_MODE       (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)

    void scan_c (char*, size_t);
    static int strip_comments (register char*, register char*, off_t);
    static int is_native_type (register char*, size_t);

    size_t       scope_length;
    char         scope [255];
    char         file [255];
    size_t       file_length;
    int          scope_level  = 0;
    int          call_level   = 0;
    line_nbr_t   lineno;

%}


WS              [ \t\n]
WS_PTR          [ \t\n\*]
TYPE            char|double|float|int|long|short|signed|void|unsigned
RESERVED        auto|break|case|const|continue|default|do|default|do|else|enum|extern|for|goto|if|register|return|sizeof|static|struct|switch|typedef|union|volatile|while
KWORD           {TYPE}|{RESERVED}
LETTER          [a-zA-Z_]
DIGIT           [0-9]
HEXDIGIT        [a-fA-F0-9]
SYMBOL          {LETTER}({LETTER}|{DIGIT})*
CHAR            '.'
VAR             {SYMBOL}{WS}*
ARRAY           {SYMBOL}{WS}*(\[.*\]{WS}*)+
PREPROCESS      #{SYMBOL}
STRUCT          struct{WS}+
VAR_DECL        {SYMBOL}{WS_PTR}+{VAR}{WS}*;
ARRAY_DECL      {SYMBOL}{WS_PTR}+{ARRAY}{WS}*;
VAR_MULTI_DECL  {SYMBOL}{WS_PTR}+{VAR}{WS}*,
VAR_MIDDLE_DECL {WS_PTR}*{SYMBOL}{WS}*,
ARRAY_MULTI_DECL {SYMBOL}{WS_PTR}+{ARRAY}{WS}*,
NEW_MULTI_DECL  {WS_PTR}*{VAR}{WS}*,
NEW_ARRAY_MULTI_DECL  {WS_PTR}*{ARRAY}{WS}*,
LAST_MULTI_DECL {WS_PTR}*{VAR}{WS}*\}?;
LAST_ARRAY_MULTI_DECL {WS_PTR}*{ARRAY}{WS}*\}?;
VAR_DECL_ASSIGN {SYMBOL}{WS_PTR}+{VAR}{WS}*=[^=]?
ARRAY_DECL_ASSIGN {SYMBOL}{WS_PTR}+{ARRAY}=[^=]?
MULTI_DECL_ASSIGN {SYMBOL}{WS_PTR}+{SYMBOL}{WS}*=[^=]?[^{]\+,
NEW_MULTI_DECL_ASSIGN {WS_PTR}*{SYMBOL}{WS}*=[^=]?.*,
LAST_MULTI_DECL_ASSIGN {WS_PTR}*{SYMBOL}{WS}*=[^=]?[^,]*\}?;
FALSE_FUNC_DEF_1 {KWORD}{WS}*\([^\)]*\){WS}*\{
FALSE_FUNC_DEF_2 ({SYMBOL}|{KWORD}){WS_PTR}+{KWORD}{WS}*\([^\)]*\){WS}*\{
STRUCT_DECL     struct{WS}+{SYMBOL}{WS}*\{
ANSI_FUNC_DEF_1 {SYMBOL}{WS}*\([^\)]*\){WS}*\{
ANSI_FUNC_DEF_2 {SYMBOL}{WS_PTR}+{SYMBOL}{WS}*\([^\)]*\){WS}*\{
KR_FUNC_DEF_1 {SYMBOL}{WS}*\([^\)]*\){WS}*([^\n]+;{WS}*)+\{
KR_FUNC_DEF_2 {SYMBOL}{WS_PTR}+{SYMBOL}{WS}*\([^\)]*\){WS}*([^\n]+;{WS}*)+\{
FUNC_CALL       {SYMBOL}{WS}*\(
FALSE_FUNC_CALL {KWORD}{WS}*\(
FALSE_DECL1     {RESERVED}{WS}+{SYMBOL}{WS}*;
FALSE_DECL2     {PREPROCESS}{WS}+{SYMBOL}{WS}*;
DEFINITION      #\ *define[ \t]+{SYMBOL}
UNION_DECL      union{WS}+{SYMBOL}{WS}*\{
ENUM_DECL       enum{WS}+{SYMBOL}{WS}*\{
EXTERN_DECL     extern\ +
FALSE_STRUCT_UNION_DECL \}{WS_PTR}*{KWORD}{WS}*;
STRUCT_UNION_DECL \}{WS_PTR}*(({VAR}|{ARRAY}),{WS_PTR}*)*({SYMBOL}|{ARRAY}){WS}*;
%x FC
%x MULTI_DECL
%x FUNC_ARG
%option stack

%%

<INITIAL,FC,MULTI_DECL,FUNC_ARG>{CHAR} {
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>\n {
    ++lineno;
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>\{ {
    ++scope_level;
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>\} {
    if ((scope_level != 0) && (--scope_level == 0))
    {
        strcpy(scope, GLOBAL_SCOPE);
        scope_length = GLOBAL_SCOPE_LENGTH;
    }

    if (YYSTATE != INITIAL)
        yy_pop_state();
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>; {
    while (YYSTATE != INITIAL)
        yy_pop_state();
}

{EXTERN_DECL} {
    /* extern ... */
    yyless(6);
    yy_push_state(FUNC_ARG);
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>{RESERVED} {
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>{TYPE} {
}

<FUNC_ARG>\) {
    yy_pop_state();
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>{SYMBOL} {
    add2db(ANY,
           yytext,
           yyleng,
           scope,
           scope_length,
           lineno);
}

<INITIAL,FC,MULTI_DECL,FUNC_ARG>0x{HEXDIGIT}+ {
}

{FALSE_FUNC_DEF_1} {
    /*
     * This is something like if (...) { ...
     * We need to go back to the opening bracket since we might have a function
     * call inside (or something else).
     */

    register size_t length = 0;

    /* Find the '(' */

    while (yytext[length] != '(')
    {
        if (yytext[length] == '\n')
            ++lineno;
        ++length;
    }

    yyless(length);
}

{FALSE_FUNC_DEF_2} {
    /*
     * This is something like else if (...) { ...
     * We need to go back to the opening bracket since we might have a function
     * call inside (or something else).
     */

    register size_t length = 0;

    /* Find the '(' */

    while (yytext[length] != '(')
    {
        if (yytext[length] == '\n')
            ++lineno;
        ++length;
    }

    yyless(length);
}

{ANSI_FUNC_DEF_1} |
{KR_FUNC_DEF_1} {
    /*
     * func (type1 arg1, type2 arg2, etc...) {
     */

    register size_t length = 0; 
    
    /* Get symbol length */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '(') &&
           (yytext[length] != '\n'))
        ++length;
    
    add2db(DEFINITION,
           yytext,
           length,
           scope,
           scope_length,
           lineno);

    strncpy(scope, yytext, length);
    scope[length] = '\0';
    scope_length = length;

    yyless(length);
    yy_push_state(FUNC_ARG);
}

{ANSI_FUNC_DEF_2} |
{KR_FUNC_DEF_2} {
    /*
     * symbol func (type1 arg1, type2 arg2, etc...) {
     * symbol ** func (type1 arg1, type2 arg2, etc...) {
     */

    register size_t length = 0; 
    register char*  p      = yytext;

    /* Get return type */
    while ((*p != ' ') && (*p != '*') && (*p != '\n'))
        ++p;

    if (!is_native_type(yytext, p - yytext))
    {
        add2db(ANY,
               yytext,
               p - yytext,
               scope,
               scope_length,
               lineno);
    }
        
    /* Skip '*' and spaces */
    while ((*p == ' ') || (*p == '*') || (*p == '\n'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Get function name */
    while ((p[length] != ' ') && (p[length] != '(') && (p[length] != '\n'))
        ++length;

    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    strncpy(scope, p, length);
    scope[length] = '\0';
    scope_length = length;
    
    yyless(length + (p - yytext));
    yy_push_state(FUNC_ARG);
}

<INITIAL,FC>{FALSE_FUNC_CALL} {
    /*
     * This is something like if (...)
     * We need to go back to the opening bracket since we might have a function
     * call inside (or something else).
     */

    register size_t length = 0;

    /* Find the '(' */

    while (yytext[length] != '(')
    {
        if (yytext[length] == '\n')
            ++lineno;
        ++length;
    }

    yyless(length);
}

<INITIAL,FC>{FUNC_CALL} { 
    register size_t length = 0; 
    
    /* Get symbol length */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '(') &&
           (yytext[length] != '\n'))
        ++length;
    
    if (strcmp(scope, GLOBAL_SCOPE) != 0)
    {
        add2db(FUNC_CALL,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }
    else
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    if (call_level == 0)
        yy_push_state(FC);
    ++call_level;

    yyless(length);
}

<FC>\) {
    if (--call_level == 0)
        yy_pop_state();
}

{FALSE_DECL1} |         /* "return a;", etc...  are not var decl */
{FALSE_DECL2} {         /* "#endif SYMBOL;", etc...  are not var decl */
    register size_t      length = 0;

    /* Go to first space */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }
    
    yyless(length);
}

{DEFINITION} {
    register char* p = yytext;

    /* skip "#   define " */
    p = strstr(yytext, "define") + 6;

    /* Go to first non-blank */
    while ((*p == ' ') || (*p == '\n'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Go to first space */
    add2db(DEFINITION,
           p,
           yytext + yyleng - p,
           scope,
           scope_length,
           lineno);
}

{VAR_DECL} {
    /* TYPE ** var ; */

    register int   length = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') && (p[length] != ';') &&(p[length] != '\n'))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

}

{ARRAY_DECL} {
    /* TYPE ** var [arg1] [arg2] ... ; */
    register int   length = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '['))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
}

{VAR_MULTI_DECL} {
    /* TYPE ** var1, */
    register int   length = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != ','))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);
    yy_push_state(MULTI_DECL);

    yyless(length + (p - yytext));
}

<MULTI_DECL>{NEW_MULTI_DECL} {
    /* ** var , */
    register int   length = 0;
    register char* p = yytext;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Get variable */
    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != ','))
    {
        ++length;
    }
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
}

<MULTI_DECL>{LAST_MULTI_DECL} {
    /* ** var ; */
    /* ** var , */
    register int   length = 0;
    register char* p = yytext;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Get variable */
    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != ';'))
    {
        ++length;
    }
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
    yy_pop_state();
}

{ARRAY_MULTI_DECL} {
    /* TYPE ** var [arg1] [arg2] ... , */
    register int   length = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }


    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '['))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
    yy_push_state(MULTI_DECL);
}

<MULTI_DECL>{NEW_ARRAY_MULTI_DECL} {
    /* ** var [arg1] [arg2] ... , */
    register int   length = 0;
    register char* p = yytext;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Get variable */
    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '['))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
}

<MULTI_DECL>{LAST_ARRAY_MULTI_DECL} {
    /* ** var [arg1] [arg2] ... ; */
    register int   length = 0;
    register char* p = yytext;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Get variable */
    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '['))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    /* Skip blanks */
    while ((*p == ' ') || (*p == '\n'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    yyless(length + (p - yytext));
    yy_pop_state();
}

{VAR_DECL_ASSIGN}   |
{ARRAY_DECL_ASSIGN} {
    /*
     * TYPE ** var =
     * TYPE ** var [arg1] [arg2] ... =
     */
    register int   length = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '[') &&
           (p[length] != '='))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
}

{MULTI_DECL_ASSIGN} {
    /* TYPE ** var = ... , */
    register int   length = 0;
    register int   length2 = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != ','))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    p += length;

    /* Skip blanks and '=' */
    while ((*p == ' ') || (*p == '\n') || (*p == '='))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    length = 0;

    /* Get initialiser */
    if ((*p != '"') && 
        (*p != '\'') &&
        (*p != '{') &&
        (*p < 60 || *p > 71))
    {
        while ((p[length2] != ' ') && (p[length2] != '\n') &&
               (p[length2] != ','))
        {
            ++length2;
        }

        add2db(ANY,
               p,
               length2,
               scope,
               scope_length,
               lineno);
    }
    else
    {
        yyless(length + (p - yytext));
    }
    yy_push_state(MULTI_DECL);
}

<MULTI_DECL>{NEW_MULTI_DECL_ASSIGN} {
    /* ** arg = ... , */
    register int   length = 0;
    register int   length2 = 0;
    register char* p      = yytext;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '='))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    p += length;

    /* Skip blanks and '=' */
    while ((*p == ' ') || (*p == '\n') || (*p == '='))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    length2 = 0;

    /* Get initialiser */
    if ((*p != '"') && 
        (*p != '\'') &&
        (*p != '{') &&
        (*p < 60 || *p > 71))
    {
        while ((p[length2] != ' ') && (p[length2] != '\n') &&
               (p[length2] != ','))
        {
            ++length2;
        }

        add2db(ANY,
               p,
               length2,
               scope,
               scope_length,
               lineno);
    }
    else
    {
        yyless(length + (p - yytext));
    }
}

<MULTI_DECL>{LAST_MULTI_DECL_ASSIGN} {
    /* ** arg = ... ; */
    register int   length = 0;
    register char* p;

    /* Add type in database */
    while ((yytext[length] != ' ') &&
           (yytext[length] != '\n') &&
           (yytext[length] != '*'))
    {
        ++length;
    }

    if (!is_native_type(yytext, length))
    {
        add2db(ANY,
               yytext,
               length,
               scope,
               scope_length,
               lineno);
    }

    p = yytext + length;

    /* Skip blanks and pointers */
    while ((*p == ' ') || (*p == '\n') || (*p == '*'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }
    
    /* Get variable */
    length = 0;

    while ((p[length] != ' ') &&
           (p[length] != '\n') &&
           (p[length] != '='))
    {
        ++length;
    }
    
    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));

    if (YYSTATE != INITIAL)
        yy_pop_state();
}

{STRUCT_DECL} |
{UNION_DECL} {
    /*
     * struct var { 
     * union var { 
     */

    register int   length = 0;
    register char* p      = yytext;

    /* Go to first space */
    while ((*p != ' ') && (*p != '\n'))
        ++p;
    
    /* Go to first character */
    while ((*p == ' ') || (*p == '\n'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Add type in database */
    while ((p[length] != ' ') && (p[length] != '\n') && (p[length] != '{'))
        ++length;

    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));
}

{FALSE_STRUCT_UNION_DECL} {
    /* } (break|else|...) ; */
    if ((scope_level != 0) && (--scope_level == 0))
    {
        strcpy(scope, GLOBAL_SCOPE);
        scope_length = GLOBAL_SCOPE_LENGTH;
    }

    yyless(1);
}

{STRUCT_UNION_DECL} {
    /* } ** var1, var2[arg1], ... ; */

    yyless(1);

    if ((scope_level != 0) && (--scope_level == 0))
    {
        strcpy(scope, GLOBAL_SCOPE);
        scope_length = GLOBAL_SCOPE_LENGTH;
    }

    yy_push_state(MULTI_DECL);
}

{ENUM_DECL} {
    /* enum var {...} */

    register int   length = 0;
    register char* p      = yytext + 5;

    /* Go to var */
    while ((*p == ' ') || (*p == '\n'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    /* Add type in database */
    while ((p[length] != ' ') && (p[length] != '\n'))
        ++length;

    add2db(DEFINITION,
           p,
           length,
           scope,
           scope_length,
           lineno);

    yyless(length + (p - yytext));

    yy_push_state(MULTI_DECL);
}

\ *if             |
\ *ifdef          |
\ *ifndef         |
\ *else           |
\ *elif           |
\ *endif          |
\ *pragma         {
}

\ *include\ *\<[^\ ]+\> {
    /* #include <...> */

    register char*  p;
    register char*  q;

    /* Get header file */
    p = strchr(yytext, '<') + 1;

    while ((*p == ' ') || (*p == '\n'))
    {
        if (*p == '\n')
            ++lineno;
        ++p;
    }

    q = strchr(yytext, '>') - 1;

    add2db(INCLUDE,
           p,
           q - p + 1,
           scope,
           scope_length,
           lineno);

    parse_later(p, (size_t)(q - p) + 1);
}

\ *include\ *\"[^\ ]+\" {
    /* #include "..." */

    register char*  p;
    register char*  q;

    /* Get header file */
    p = strchr(yytext, '"');
    q = p + 1;

    while (*q != '"')
    {
        if (*q == '\n')
            ++lineno;
        ++q;
    }

    add2db(INCLUDE,
           p + 1,
           q - p - 1,
           scope,
           scope_length,
           lineno);
}

%%

/*
 * FUNCTION:    scan_c
 *
 * DESCRIPTION: This is the entry point for the C file lexer
 *              This function starts by creating a copy of the passed file
 *              without any C comment.
 *              It then run the lexer on this temporary file. 
 *              Once the file has been parsed, it is removed.
 *
 * IN:          filename        The file to scan
 *              filename_length The number of charin the file name
 * IN-OUT:
 * RETURN CODE:
 */

void scan_c (char* filename, size_t filename_length)
{
    char*        src;
    static char* buf = NULL;
    int          fdin;
    int          length;
    int          remaining;
    struct stat  statbuf;
    YY_BUFFER_STATE yybuf;

    scope_level = 0;

    strncpy(file, filename, filename_length);
    file_length = filename_length;

    strcpy(scope, GLOBAL_SCOPE);
    scope_length = GLOBAL_SCOPE_LENGTH;

    /* Open input file */
    if ((fdin = open(filename, O_RDONLY | O_NONBLOCK)) < 0)
        return;

    /* Get size */
    if (fstat(fdin, &statbuf) < 0)
    {
        close(fdin);
        return;
    }

    /* Check if file is empty */
    if (statbuf.st_size == 0)
    {
        close(fdin);
        return;
    }

    /* Allocate memory for stripped file */
    if (buf == NULL)
        buf = (char*)calloc(statbuf.st_size, sizeof(char));
    else
        buf = (char*)realloc(buf, statbuf.st_size);
    
    if (buf == NULL)
    {
        fprintf(stderr, "Out of memory!\n");
        exit(-1);
    }

    /* Create a memory mapped region */
    if ((src = mmap(0,
                    statbuf.st_size,
                    PROT_READ,
                    MAP_FILE | MAP_SHARED,
                    fdin,
                    0)) == 0)
    {
        close(fdin);
        return;
    }

    /* Read the whole file in the buffer */
    remaining = statbuf.st_size;

    while (remaining != 0 && remaining >= SSIZE_MAX)
    {
        read(fdin, buf + statbuf.st_size - remaining, SSIZE_MAX);
        remaining -= SSIZE_MAX;
    }

    read(fdin, buf + statbuf.st_size - remaining, remaining);

    /* Remove all comments */
    length = strip_comments(src, buf, statbuf.st_size);

    munmap(src, statbuf.st_size);

    /* close file */
    close(fdin);

    if ((yyout = fopen("/dev/null", "rw")) == NULL)
        return;

    lineno = 1;

    /* Scan file */
    yybuf = yy_scan_bytes(buf, length);
    yylex();
    yy_delete_buffer(yybuf);

    fclose(yyout);
}

/*
 * FUNCTION:    strip_comments
 *
 * DESCRIPTION: This functions takes as argument two memory mapped region and
 *              copies one region to another stripping all C comments and
 *              quoted strings. A C comment can start with "//".
 *
 * IN:          src             The source region
 *              size            The size of the src region
 * IN-OUT:      dst             The destination region
 * OUT:
 * RETURN CODE: The number of bytes written in the temporary file
 */

static int strip_comments (register char* src,
                           register char* dst,
                                    off_t size)
{
    int i = 0;
    int j = 0;
    int one_line_comment = 0;

    /* Copy everything except comments and quoted strings*/
    while (i < size)
    {
        if ((src[i] == '/') && (i != size - 1) && 
            ((src[i + 1] == '*') || (src[i + 1] == '/')))
        {
            if (src[i + 1] == '/')
            {
                /* This is the start of a "//" comment */
                one_line_comment = 1;
            }

            dst[j++] = ' ';

            i += 2;

            if (one_line_comment == 0)
            {
                /* Skip everything except new lines until end of comment */
                while ((i != size - 1) && 
                       ((src[i] != '*') || (src[i+1] != '/')))
                {
                    if (src[i] == '\n')
                    {
                        dst[j++] = '\n';
                    }
                    ++i;
                }

                i += 2;
            }
            else
            {
                /* One line comment */
                while ((i != size - 1) && (src[i] != '\n'))
                {
                    ++i;
                }

                if (i != size - 1)
                    dst[j++] = '\n';

                ++i;
                one_line_comment = 0;
            }

            /* Did we reach end of file? */
            if (i == size - 1)
                break;
            
        }

        if ((i != size) && (src[i] == '"'))
        {
            /*
             * We need to go back and make sure that we haven't encountered an
             * include statement: # include "file.h"
             */
            
            int pos = j--;

            while (dst[j] == ' ')
                --j;
            
            if (dst[j] == 'e')
            {
                j -= 6;
                if (strncmp(&dst[j], "include", 7) == 0)
                {
                    j = pos;
                    dst[j++] = src[i++];
                    do
                    {
                        if (i == size)
                            break;    /* This should not happen! */

                        dst[j++] = src[i++];
                    } while (dst[j-1] != '"');
                    continue;
                }
            }

            j = pos;

            dst[j++] = '"';
            ++i;

            while ((i != size - 1) && (src[i] != '"'))
            {
                if (src[i] == '\n')
                {
                    dst[j++] = '\n';
                }
                ++i;
            }
            
            /* Did we reach end of file? */
            if (i == size -1)
                break;
        }

        /* This is not the beginning of a comment */
        if (src[i] != '\t')
            dst[j++] = src[i++];
        else
        {
            dst[j++] = ' ';
            ++i;
        }
    }

    return j;
}

/*
 * FUNCTION:    is_native_type
 *
 * DESCRIPTION: This function checks if a symbol is a native data type
 *
 * IN:          symbol          The symbol
 *              length          The symbol length
 * IN-OUT:
 * OUT:
 * RETURN CODE: 1 if the symbol is a native type. 0 otherwise.
 */

int is_native_type (register char* symbol, size_t length)
{
    if ((strncmp(symbol, "char", length) == 0) ||
        (strncmp(symbol, "int", length) == 0) ||
        (strncmp(symbol, "unsigned", length) == 0) ||
        (strncmp(symbol, "long", length) == 0) ||
        (strncmp(symbol, "short", length) == 0) ||
        (strncmp(symbol, "void", length) == 0)  ||
        (strncmp(symbol, "float", length) == 0) ||
        (strncmp(symbol, "double", length) == 0) ||
        (strncmp(symbol, "const", length) == 0) ||
        (strncmp(symbol, "signed", length) == 0))
    {
        return 1;
    }

    return 0;
}




syntax highlighted by Code2HTML, v. 0.9.1