1 /*=========================================================================
3 Program: Visualization Toolkit
4 Module: vtkParseString.h
6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
10 This software is distributed WITHOUT ANY WARRANTY; without even
11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the above copyright notice for more information.
14 =========================================================================*/
15 /*-------------------------------------------------------------------------
16 Copyright (c) 2012 David Gobbi.
18 Contributed to the VisualizationToolkit by the author in April 2012
19 under the terms of the Visualization Toolkit 2008 copyright.
20 -------------------------------------------------------------------------*/
23 This file provides string handling routines.
25 The two important jobs done by these routines are string tokenization
28 Tokenization is done as per the rules of a C++ preprocessor, and
29 breaks the strings into ids, literals, and operators. Any string
30 is a valid input for the tokenizer, and it is up to the parser to
31 decide if the resulting tokens are valid within the grammar. The
32 two primary tokenization functions are vtkParse_InitTokenizer()
33 and vtkParse_NextToken().
35 Cacheing refers to how string memory management is done. The
36 parser uses "const char *" for all strings, and expects all strings
37 to be persistent and constant. These conditions are automatically
38 met by static strings, but dynamically-generated strings must be
39 cached until the parse is complete. The primary cacheing functions
40 are vtkParse_CacheString() and vtkParse_FreeStringCache().
43 #ifndef VTK_PARSE_STRING_H
44 #define VTK_PARSE_STRING_H
53 * Various important char types for tokenization
55 typedef enum _parse_char_type
57 CPRE_ID = 0x01, /* A-Z a-z and _ */
58 CPRE_DIGIT = 0x02, /* 0-9 */
59 CPRE_IDGIT = 0x03, /* 0-9 A-Z a-z and _ */
60 CPRE_HEX = 0x04, /* 0-9A-Fa-f */
61 CPRE_EXP = 0x08, /* EPep (exponents for floats) */
62 CPRE_SIGN = 0x10, /* +- (sign for floats) */
63 CPRE_QUOTE = 0x20, /* " and ' */
64 CPRE_HSPACE = 0x40, /* space, tab, carriage return */
65 CPRE_VSPACE = 0x80, /* newline, vertical tab, form feed */
66 CPRE_WHITE = 0xC0, /* all whitespace characters */
70 * Character type lookup table
72 extern unsigned char parse_charbits[256];
75 * Macro to check if a char is of a certain type
77 #define vtkParse_CharType(c, bits) \
78 ((parse_charbits[(unsigned char)(c)] & (bits)) != 0)
81 * Whitespace types that can be used with the tokenizer.
82 * - WS_DEFAULT treats newlines and formfeeds as regular whitespace.
83 * - WS_PREPROC treats newline as end-of-line, not as whitespace.
84 * - WS_COMMENT treats comments as tokens, not as whitespace.
86 typedef enum _parse_space_t
88 WS_DEFAULT = CPRE_WHITE, /* skip all whitespace */
89 WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */
90 WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */
94 * Preprocessor tokens for C++.
96 typedef enum _preproc_token_t
100 TOK_CHAR, /* char literal */
101 TOK_STRING, /* string literal */
102 TOK_NUMBER, /* any numeric literal */
103 TOK_COMMENT, /* C or C++ comment */
104 TOK_DBLHASH, /* ## */
125 TOK_DOT_STAR, /* .* */
126 TOK_ARROW_STAR,/* ->* */
127 TOK_RSHIFT_EQ, /* >>= */
128 TOK_LSHIFT_EQ, /* <<= */
129 TOK_ELLIPSIS, /* ... */
133 * A struct for going through a string one token at a time.
134 * If ws is set to WS_PREPROC, then tokenization stops when a
135 * newline or null is encountered. If ws is set to WS_DEFAULT,
136 * then tokenization only stops when a null is encountered. If
137 * ws is set to WS_COMMENT, then tokenization stops only when
138 * a null is encountered, and comments are returned as tokens
139 * instead of being skipped as whitespace.
141 typedef struct _StringTokenizer
143 int tok; /* the current token */
144 unsigned int hash; /* the hash of the current token, if it is an id */
145 const char *text; /* the text for the current token, not null-teminated */
146 size_t len; /* the length of the current token */
147 parse_space_t ws; /* controls what to consider as whitespace */
151 * Initialize the tokenizer and get the first token.
153 void vtkParse_InitTokenizer(
154 StringTokenizer *tokens, const char *text, parse_space_t wstype);
157 * Return the next preprocessor token, or '0' if none left.
159 int vtkParse_NextToken(StringTokenizer *tokens);
162 * Skip over whitespace.
163 * Return the number of chars until the first non-whitespace token.
164 * Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT.
166 size_t vtkParse_SkipWhitespace(
167 const char *cp, parse_space_t spacetype);
170 * Skip over a comment, C style or C++ style.
171 * Return the number of chars until the end of the comment.
173 size_t vtkParse_SkipComment(const char *cp);
176 * Skip over a string in double or single quotes.
177 * Return the number of chars until the end of the quotes.
179 size_t vtkParse_SkipQuotes(const char *cp);
182 * Skip over a number. Uses preprocessor semantics.
183 * Return the number of chars until the end of the number.
185 size_t vtkParse_SkipNumber(const char *cp);
188 * Skip over an identifier.
189 * Return the number of chars until the end of the identifier.
191 size_t vtkParse_SkipId(const char *cp);
194 * Compute the hash for a id, for use in hash table lookups.
195 * This stops at the first non-Id character, so it is safe to use
196 * on a string that is not null-terminated as long as there is either
197 * whitespace or an operator character before the end of the string.
198 * It can be used on null-terminated strings as well, of course.
200 unsigned int vtkParse_HashId(const char *cp);
204 * StringCache provides a simple way of allocating strings centrally.
205 * It eliminates the need to allocate and free each individual string,
206 * which makes the code simpler and more efficient.
208 typedef struct _StringCache
210 unsigned long NumberOfChunks;
217 * Initialize the string cache.
219 void vtkParse_InitStringCache(StringCache *cache);
222 * Alocate a new string from the cache.
223 * A total of n+1 bytes will be allocated, to leave room for null.
225 char *vtkParse_NewString(StringCache *cache, size_t n);
228 * Cache a string so that it can then be used in the vtkParse data
229 * structures. The string will last until the application exits.
230 * At most 'n' chars will be copied, and the string will be terminated.
231 * If a null pointer is provided, then a null pointer will be returned.
233 const char *vtkParse_CacheString(
234 StringCache *cache, const char *cp, size_t n);
237 * Free all strings that were created with vtkParse_NewString() or
238 * with vtkParse_CacheString().
240 void vtkParse_FreeStringCache(StringCache *cache);