1 /*=========================================================================
3 Program: Visualization Toolkit
4 Module: vtkParseString.c
6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
10 This software is distributed WITHOUT ANY WARRANTY; without even
11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the above copyright notice for more information.
14 =========================================================================*/
15 /*-------------------------------------------------------------------------
16 Copyright (c) 2012 David Gobbi.
18 Contributed to the VisualizationToolkit by the author in April 2012
19 under the terms of the Visualization Toolkit 2008 copyright.
20 -------------------------------------------------------------------------*/
22 #include "vtkParseString.h"
27 /*----------------------------------------------------------------
28 * String tokenization methods
30 * Strings must be broken into C++ tokens.
31 * A hash is computed for ids, but not for other tokens.
32 * Comments are generally considered to be whitespace, but
33 * WS_COMMENT can be used to consider comments as tokens.
36 /** Array for quick lookup of char types */
37 unsigned char parse_charbits[256] = {
38 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 CPRE_HSPACE, /* tab */
40 CPRE_VSPACE, CPRE_VSPACE, CPRE_VSPACE, /* newline, vtab, form feed */
41 CPRE_HSPACE, /* carriage return */
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 CPRE_HSPACE, /* ' ' */
44 0, CPRE_QUOTE, 0, 0, 0, 0, CPRE_QUOTE, 0, 0, /* !"#$%&'() */
45 0, CPRE_SIGN, 0, CPRE_SIGN, 0, 0, /* *+,-./ */
46 CPRE_DIGIT|CPRE_HEX, /* 0 */
47 CPRE_DIGIT|CPRE_HEX, CPRE_DIGIT|CPRE_HEX,
48 CPRE_DIGIT|CPRE_HEX, CPRE_DIGIT|CPRE_HEX,
49 CPRE_DIGIT|CPRE_HEX, CPRE_DIGIT|CPRE_HEX,
50 CPRE_DIGIT|CPRE_HEX, CPRE_DIGIT|CPRE_HEX,
51 CPRE_DIGIT|CPRE_HEX, /* 9 */
52 0, 0, 0, 0, 0, 0, 0, /* :;<=>?@ */
53 CPRE_ID|CPRE_HEX, /* A */
54 CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* BCD */
55 CPRE_ID|CPRE_HEX|CPRE_EXP, /* E */
56 CPRE_ID|CPRE_HEX, CPRE_ID, CPRE_ID, CPRE_ID, /* FGHI */
57 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* JKLM */
58 CPRE_ID, CPRE_ID, CPRE_ID|CPRE_EXP, CPRE_ID, /* NOPQ */
59 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* RSTU */
60 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* VWXY */
62 0, 0, 0, 0, /* [\\]^ */
65 CPRE_ID|CPRE_HEX, /* a */
66 CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* bcd */
67 CPRE_ID|CPRE_HEX|CPRE_EXP, /* e */
68 CPRE_ID|CPRE_HEX, CPRE_ID, CPRE_ID, CPRE_ID, /* fghi */
69 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* jklm */
70 CPRE_ID, CPRE_ID, CPRE_ID|CPRE_EXP, CPRE_ID, /* nopq */
71 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* rstu */
72 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* vwxy */
74 0, 0, 0, 0, /* {|}~ */
76 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
77 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
78 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
79 CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
82 #define parse_chartype(c, bits) \
83 ((parse_charbits[(unsigned char)(c)] & (bits)) != 0)
85 /** Skip over a comment. */
86 size_t vtkParse_SkipComment(const char *text)
88 const char *cp = text;
95 while (*cp != '\n' && *cp != '\0')
99 if (cp[1] == '\n') { cp++; }
100 else if (cp[1] == '\r' && cp[2] == '\n') { cp += 2; }
105 else if (cp[1] == '*')
110 if (cp[0] == '*' && cp[1] == '/') { cp += 2; break; }
119 /** Skip over whitespace. */
120 size_t vtkParse_SkipWhitespace(const char *text, parse_space_t spacetype)
122 const char *cp = text;
126 if (parse_chartype(*cp, spacetype))
132 while (parse_chartype(*cp, spacetype));
140 else if (cp[1] == '\r' && cp[2] == '\n')
149 else if (cp[0] == '/' && (spacetype & WS_COMMENT) != WS_COMMENT)
151 if (cp[1] == '/' || cp[1] == '*')
153 cp += vtkParse_SkipComment(cp);
169 /** Skip over string and char literals. */
170 size_t vtkParse_SkipQuotes(const char *text)
172 const char *cp = text;
175 if (parse_chartype(*cp, CPRE_QUOTE))
178 while (*cp != qc && *cp != '\n' && *cp != '\0')
182 if (cp[0] == '\r' && cp[1] == '\n') { cp += 2; }
183 else if (*cp != '\0') { cp++; }
195 /** Skip over a number. */
196 size_t vtkParse_SkipNumber(const char *text)
198 const char *cp = text;
200 if (parse_chartype(cp[0], CPRE_DIGIT) ||
201 (cp[0] == '.' && parse_chartype(cp[1], CPRE_DIGIT)))
206 if (parse_chartype(c, CPRE_EXP) &&
207 parse_chartype(*cp, CPRE_SIGN))
212 while (parse_chartype(*cp, CPRE_IDGIT) || *cp == '.');
218 /** Skip over a name. */
219 size_t vtkParse_SkipId(const char *text)
221 const char *cp = text;
223 if (parse_chartype(*cp, CPRE_ID))
229 while (parse_chartype(*cp, CPRE_IDGIT));
235 /** A simple 32-bit hash function based on "djb2". */
236 #define parse_hash_name(cp, h) \
238 do { h = (h << 5) + h + (unsigned char)*cp++; } \
239 while (parse_chartype(*cp, CPRE_IDGIT));
241 unsigned int vtkParse_HashId(const char *cp)
245 if (parse_chartype(*cp, CPRE_ID))
247 parse_hash_name(cp, h);
253 /** Skip a string or */
254 size_t parse_skip_quotes_with_suffix(const char *cp)
256 size_t l = vtkParse_SkipQuotes(cp);
257 if (l && cp[l] == '_')
259 l += vtkParse_SkipId(cp + l);
264 /** Return the next token, or 0 if none left. */
265 int vtkParse_NextToken(StringTokenizer *tokens)
267 const char *cp = tokens->text + tokens->len;
269 /* avoid extra function call for simple whitespace */
270 if (parse_chartype(*cp, tokens->ws))
272 do { cp++; } while (parse_chartype(*cp, tokens->ws));
274 /* function call is necessary if slash encountered */
275 if (*cp == '/' || *cp == '\\')
277 cp += vtkParse_SkipWhitespace(cp, tokens->ws);
280 if (parse_chartype(*cp, CPRE_ID))
285 /* use a macro to compute the hash */
286 parse_hash_name(ep, h);
288 tokens->tok = TOK_ID;
291 tokens->len = ep - cp;
293 /* check if this is a prefixed string */
294 if (parse_chartype(*ep, CPRE_QUOTE) &&
295 ((*ep == '\'' && tokens->len == 1 &&
296 (*cp == 'u' || *cp == 'U' || *cp == 'L')) ||
297 (*ep == '\"' && tokens->len == 1 &&
298 (*cp == 'U' || *cp == 'u' || *cp == 'L')) ||
299 (*ep == '\"' && tokens->len == 2 && cp[0] == 'u' && cp[1] == '8')))
301 tokens->tok = (*ep == '\"' ? TOK_STRING : TOK_CHAR);
303 tokens->len += parse_skip_quotes_with_suffix(ep);
307 /* check if this ID is a named operator */
308 static const char *op_str_array[32] = {
309 "compl", 0, 0, 0, 0, "bitor", "or", 0, 0, 0, 0, "not_eq",
310 0, "and_eq", 0, 0, 0, 0, 0, "xor_eq", 0, 0, "not", "bitand",
311 "and", 0, 0, "or_eq", 0, 0, "xor", 0 };
312 static unsigned char op_len_array[32] = {
313 5, 0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 6,
314 0, 6, 0, 0, 0, 0, 0, 6, 0, 0, 3, 6,
315 3, 0, 0, 5, 0, 0, 3, 0 };
316 static int op_tok_array[32] = {
317 '~', 0, 0, 0, 0, '|', TOK_OR, 0, 0, 0, 0, TOK_NE,
318 0, TOK_AND_EQ, 0, 0, 0, 0, 0, TOK_XOR_EQ, 0, 0, '!', '&',
319 TOK_AND, 0, 0, TOK_OR_EQ, 0, 0, '^', 0 };
322 ep = op_str_array[h];
323 if (ep && tokens->len == op_len_array[h] &&
324 strncmp(cp, ep, tokens->len) == 0)
326 tokens->tok = op_tok_array[h];
331 else if (parse_chartype(*cp, CPRE_QUOTE))
333 tokens->tok = (*cp == '\"' ? TOK_STRING : TOK_CHAR);
336 tokens->len = parse_skip_quotes_with_suffix(cp);
338 else if (parse_chartype(*cp, CPRE_DIGIT) ||
339 (cp[0] == '.' && parse_chartype(cp[1], CPRE_DIGIT)))
341 tokens->tok = TOK_NUMBER;
344 tokens->len = vtkParse_SkipNumber(cp);
346 else if (cp[0] == '/' && (cp[1] == '/' || cp[1] == '*'))
348 tokens->tok = TOK_COMMENT;
351 tokens->len = vtkParse_SkipComment(cp);
361 if (cp[1] == ':') { l = 2; t = TOK_SCOPE; }
362 else if (cp[1] == '>') { l = 2; t = ']'; }
365 if (cp[1] == '.' && cp[2] == '.') { l = 3; t = TOK_ELLIPSIS; }
366 else if (cp[1] == '*') { l = 2; t = TOK_DOT_STAR; }
369 if (cp[1] == '=') { l = 2; t = TOK_EQ; }
372 if (cp[1] == '=') { l = 2; t = TOK_NE; }
375 if (cp[1] == '<' && cp[2] == '=') { l = 3; t = TOK_LSHIFT_EQ; }
376 else if (cp[1] == '<') { l = 2; t = TOK_LSHIFT; }
377 else if (cp[1] == '=') { l = 2; t = TOK_LE; }
378 else if (cp[1] == '%') { l = 2; t = '{'; }
379 else if (cp[1] == ':') { l = 2; t = '['; }
382 if (cp[1] == '>' && cp[2] == '=') { l = 3; t = TOK_RSHIFT_EQ; }
383 else if (cp[1] == '>') { l = 2; t = TOK_RSHIFT; }
384 else if (cp[1] == '=') { l = 2; t = TOK_GE; }
387 if (cp[1] == '=') { l = 2; t = TOK_AND_EQ; }
388 else if (cp[1] == '&') { l = 2; t = TOK_AND; }
391 if (cp[1] == '=') { l = 2; t = TOK_OR_EQ; }
392 else if (cp[1] == '|') { l = 2; t = TOK_OR; }
395 if (cp[1] == '=') { l = 2; t = TOK_XOR_EQ; }
398 if (cp[1] == '=') { l = 2; t = TOK_MUL_EQ; }
401 if (cp[1] == '=') { l = 2; t = TOK_DIV_EQ; }
404 if (cp[1] == '=') { l = 2; t = TOK_MOD_EQ; }
405 else if (cp[1] == '>') { l = 2; t = '}'; }
406 else if (cp[1] == ':') {
407 if (cp[2] == '%' && cp[3] == ':') { l = 4; t = TOK_DBLHASH; }
408 else { l = 2; t = '#'; } }
411 if (cp[1] == '+') { l = 2; t = TOK_INCR; }
412 else if (cp[1] == '=') { l = 2; t = TOK_ADD_EQ; }
415 if (cp[1] == '>' && cp[2] == '*') { l = 3; t = TOK_ARROW_STAR; }
416 else if (cp[1] == '>') { l = 2; t = TOK_ARROW; }
417 else if (cp[1] == '-') { l = 2; t = TOK_DECR; }
418 else if (cp[1] == '=') { l = 2; t = TOK_SUB_EQ; }
421 if (cp[1] == '#') { l = 2; t = TOK_DBLHASH; }
438 /** Initialize the tokenizer. */
439 void vtkParse_InitTokenizer(
440 StringTokenizer *tokens, const char *text, parse_space_t wstype)
447 vtkParse_NextToken(tokens);
451 /*----------------------------------------------------------------
452 * String allocation methods
454 * Strings are centrally allocated and are const. They should not
455 * be freed until the parse is complete and all the data structures
456 * generated by the parse have been freed.
459 /* allocate a string of n+1 bytes */
460 void vtkParse_InitStringCache(StringCache *cache)
462 cache->NumberOfChunks = 0;
463 cache->Chunks = NULL;
464 cache->ChunkSize = 0;
468 /* allocate a string of n+1 bytes */
469 char *vtkParse_NewString(StringCache *cache, size_t n)
474 if (cache->ChunkSize == 0)
476 cache->ChunkSize = 8176;
479 // align next start position on an 8-byte boundary
480 nextPosition = (((cache->Position + n + 8) | 7 ) - 7);
482 if (cache->NumberOfChunks == 0 || nextPosition > cache->ChunkSize)
484 if (n + 1 > cache->ChunkSize)
486 cache->ChunkSize = n + 1;
488 cp = (char *)malloc(cache->ChunkSize);
490 /* if empty, alloc for the first time */
491 if (cache->NumberOfChunks == 0)
493 cache->Chunks = (char **)malloc(sizeof(char *));
495 /* if count is power of two, reallocate with double size */
496 else if ((cache->NumberOfChunks & (cache->NumberOfChunks-1)) == 0)
498 cache->Chunks = (char **)realloc(
499 cache->Chunks, (2*cache->NumberOfChunks)*sizeof(char *));
502 cache->Chunks[cache->NumberOfChunks++] = cp;
505 nextPosition = (((n + 8) | 7) - 7);
508 cp = &cache->Chunks[cache->NumberOfChunks-1][cache->Position];
511 cache->Position = nextPosition;
516 /* free all allocated strings */
517 void vtkParse_FreeStringCache(StringCache *cache)
521 for (i = 0; i < cache->NumberOfChunks; i++)
523 free(cache->Chunks[i]);
530 cache->Chunks = NULL;
531 cache->NumberOfChunks = 0;
534 /* duplicate the first n bytes of a string and terminate it */
535 const char *vtkParse_CacheString(StringCache *cache, const char *in, size_t n)
539 res = vtkParse_NewString(cache, n);