/** * @file d.tokenize.cpp * This file gets included into tokenize.cpp. * This is specific to the D language. * * @author Ben Gardner * @license GPL v2+ * * $Id: d.tokenize.cpp 858 2007-08-10 23:45:33Z bengardner $ */ /** * Parses all legal D string constants. * * Quoted strings: * r"Wysiwyg" # WYSIWYG string * x"hexstring" # Hexadecimal array * `Wysiwyg` # WYSIWYG string * 'char' # single character * "reg_string" # regular string * * Non-quoted strings: * \x12 # 1-byte hex constant * \u1234 # 2-byte hex constant * \U12345678 # 4-byte hex constant * \123 # octal constant * \& # named entity * \n # single character * * @param pc The structure to update, str is an input. * @return Whether a string was parsed */ static bool d_parse_string(chunk_t *pc) { if (pc->str[0] == '"') { return(parse_string(pc, 0, true)); } else if ((pc->str[0] == '\'') || (pc->str[0] == '`')) { return(parse_string(pc, 0, true)); } else if (pc->str[0] == '\\') { pc->len = 0; while (pc->str[pc->len] == '\\') { pc->len++; /* Check for end of file */ switch (pc->str[pc->len]) { case 'x': /* \x HexDigit HexDigit */ pc->len += 3; break; case 'u': /* \u HexDigit HexDigit HexDigit HexDigit */ pc->len += 5; break; case 'U': /* \U HexDigit (x8) */ pc->len += 9; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* handle up to 3 octal digits */ pc->len++; if ((pc->str[pc->len] >= '0') && (pc->str[pc->len] <= '7')) { pc->len++; if ((pc->str[pc->len] >= '0') && (pc->str[pc->len] <= '7')) { pc->len++; } } break; case '&': /* \& NamedCharacterEntity ; */ pc->len++; while (isalpha(pc->str[pc->len])) { pc->len++; } if (pc->str[pc->len] == ';') { pc->len++; } break; default: /* Everything else is a single character */ pc->len++; break; } } if (pc->len > 1) { pc->type = CT_STRING; cpd.column += pc->len; return(true); } } else if (pc->str[1] == '"') { if ((pc->str[0] == 'r') || (pc->str[0] == 'x')) { return(parse_string(pc, 1, false)); } } return(false); }