src/VTKWrapping/ParaView/vtkParseString.h

   1 /*=========================================================================
   2
   3   Program:   Visualization Toolkit
   4   Module:    vtkParseString.h
   5
   6   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
   7   All rights reserved.
   8   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
   9
  10      This software is distributed WITHOUT ANY WARRANTY; without even
  11      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  12      PURPOSE.  See the above copyright notice for more information.
  13
  14 =========================================================================*/
  15 /*-------------------------------------------------------------------------
  16   Copyright (c) 2012 David Gobbi.
  17
  18   Contributed to the VisualizationToolkit by the author in April 2012
  19   under the terms of the Visualization Toolkit 2008 copyright.
  20 -------------------------------------------------------------------------*/
  21
  22 /**
  23   This file provides string handling routines.
  24
  25   The two important jobs done by these routines are string tokenization
  26   and string cacheing.
  27
  28   Tokenization is done as per the rules of a C++ preprocessor, and
  29   breaks the strings into ids, literals, and operators.  Any string
  30   is a valid input for the tokenizer, and it is up to the parser to
  31   decide if the resulting tokens are valid within the grammar.  The
  32   two primary tokenization functions are vtkParse_InitTokenizer()
  33   and vtkParse_NextToken().
  34
  35   Cacheing refers to how string memory management is done.  The
  36   parser uses "const char *" for all strings, and expects all strings
  37   to be persistent and constant.  These conditions are automatically
  38   met by static strings, but dynamically-generated strings must be
  39   cached until the parse is complete.  The primary cacheing functions
  40   are vtkParse_CacheString() and vtkParse_FreeStringCache().
  41 */
  42
  43 #ifndef VTK_PARSE_STRING_H
  44 #define VTK_PARSE_STRING_H
  45
  46 #include <stddef.h>
  47
  48 #ifdef __cplusplus
  49 extern "C" {
  50 #endif
  51
  52 /**
  53  * Various important char types for tokenization
  54  */
  55 typedef enum _parse_char_type
  56 {
  57   CPRE_ID       = 0x01,  /* A-Z a-z and _ */
  58   CPRE_DIGIT    = 0x02,  /* 0-9 */
  59   CPRE_IDGIT    = 0x03,  /* 0-9 A-Z a-z and _ */
  60   CPRE_HEX      = 0x04,  /* 0-9A-Fa-f */
  61   CPRE_EXP      = 0x08,  /* EPep (exponents for floats) */
  62   CPRE_SIGN     = 0x10,  /* +- (sign for floats) */
  63   CPRE_QUOTE    = 0x20,  /* " and ' */
  64   CPRE_HSPACE   = 0x40,  /* space, tab, carriage return */
  65   CPRE_VSPACE   = 0x80,  /* newline, vertical tab, form feed */
  66   CPRE_WHITE    = 0xC0,  /* all whitespace characters */
  67 } parse_char_type;
  68
  69 /**
  70  * Character type lookup table
  71  */
  72 extern unsigned char parse_charbits[256];
  73
  74 /**
  75  * Macro to check if a char is of a certain type
  76  */
  77 #define vtkParse_CharType(c, bits) \
  78   ((parse_charbits[(unsigned char)(c)] & (bits)) != 0)
  79
  80 /**
  81  * Whitespace types that can be used with the tokenizer.
  82  * - WS_DEFAULT treats newlines and formfeeds as regular whitespace.
  83  * - WS_PREPROC treats newline as end-of-line, not as whitespace.
  84  * - WS_COMMENT treats comments as tokens, not as whitespace.
  85  */
  86 typedef enum _parse_space_t
  87 {
  88   WS_DEFAULT = CPRE_WHITE,  /* skip all whitespace */
  89   WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */
  90   WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */
  91 } parse_space_t;
  92
  93 /**
  94  * Preprocessor tokens for C++.
  95  */
  96 typedef enum _preproc_token_t
  97 {
  98   TOK_OTHER = 257,
  99   TOK_ID,        /* any id */
 100   TOK_CHAR,      /* char literal */
 101   TOK_STRING,    /* string literal */
 102   TOK_NUMBER,    /* any numeric literal */
 103   TOK_COMMENT,   /* C or C++ comment */
 104   TOK_DBLHASH,   /* ## */
 105   TOK_SCOPE,     /* :: */
 106   TOK_INCR,      /* ++ */
 107   TOK_DECR,      /* -- */
 108   TOK_RSHIFT,    /* >> */
 109   TOK_LSHIFT,    /* << */
 110   TOK_AND,       /* && */
 111   TOK_OR,        /* || */
 112   TOK_EQ,        /* == */
 113   TOK_NE,        /* != */
 114   TOK_GE,        /* >= */
 115   TOK_LE,        /* <= */
 116   TOK_ADD_EQ,    /* += */
 117   TOK_SUB_EQ,    /* -= */
 118   TOK_MUL_EQ,    /* *= */
 119   TOK_DIV_EQ,    /* /= */
 120   TOK_MOD_EQ,    /* %= */
 121   TOK_AND_EQ,    /* &= */
 122   TOK_OR_EQ,     /* |= */
 123   TOK_XOR_EQ,    /* ^= */
 124   TOK_ARROW,     /* -> */
 125   TOK_DOT_STAR,  /* .* */
 126   TOK_ARROW_STAR,/* ->* */
 127   TOK_RSHIFT_EQ, /* >>= */
 128   TOK_LSHIFT_EQ, /* <<= */
 129   TOK_ELLIPSIS,  /* ... */
 130 } preproc_token_t;
 131
 132 /**
 133  * A struct for going through a string one token at a time.
 134  * If ws is set to WS_PREPROC, then tokenization stops when a
 135  * newline or null is encountered.  If ws is set to WS_DEFAULT,
 136  * then tokenization only stops when a null is encountered.  If
 137  * ws is set to WS_COMMENT, then tokenization stops only when
 138  * a null is encountered, and comments are returned as tokens
 139  * instead of being skipped as whitespace.
 140  */
 141 typedef struct _StringTokenizer
 142 {
 143   int tok;           /* the current token */
 144   unsigned int hash; /* the hash of the current token, if it is an id */
 145   const char *text;  /* the text for the current token, not null-teminated */
 146   size_t len;        /* the length of the current token */
 147   parse_space_t ws;  /* controls what to consider as whitespace */
 148 } StringTokenizer;
 149
 150 /**
 151  * Initialize the tokenizer and get the first token.
 152  */
 153 void vtkParse_InitTokenizer(
 154   StringTokenizer *tokens, const char *text, parse_space_t wstype);
 155
 156 /**
 157  * Return the next preprocessor token, or '0' if none left.
 158  */
 159 int vtkParse_NextToken(StringTokenizer *tokens);
 160
 161 /**
 162  * Skip over whitespace.
 163  * Return the number of chars until the first non-whitespace token.
 164  * Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT.
 165  */
 166 size_t vtkParse_SkipWhitespace(
 167   const char *cp, parse_space_t spacetype);
 168
 169 /**
 170  * Skip over a comment, C style or C++ style.
 171  * Return the number of chars until the end of the comment.
 172  */
 173 size_t vtkParse_SkipComment(const char *cp);
 174
 175 /**
 176  * Skip over a string in double or single quotes.
 177  * Return the number of chars until the end of the quotes.
 178  */
 179 size_t vtkParse_SkipQuotes(const char *cp);
 180
 181 /**
 182  * Skip over a number.  Uses preprocessor semantics.
 183  * Return the number of chars until the end of the number.
 184  */
 185 size_t vtkParse_SkipNumber(const char *cp);
 186
 187 /**
 188  * Skip over an identifier.
 189  * Return the number of chars until the end of the identifier.
 190  */
 191 size_t vtkParse_SkipId(const char *cp);
 192
 193 /**
 194  * Compute the hash for a id, for use in hash table lookups.
 195  * This stops at the first non-Id character, so it is safe to use
 196  * on a string that is not null-terminated as long as there is either
 197  * whitespace or an operator character before the end of the string.
 198  * It can be used on null-terminated strings as well, of course.
 199  */
 200 unsigned int vtkParse_HashId(const char *cp);
 201
 202
 203 /**
 204  * StringCache provides a simple way of allocating strings centrally.
 205  * It eliminates the need to allocate and free each individual string,
 206  * which makes the code simpler and more efficient.
 207  */
 208 typedef struct _StringCache
 209 {
 210   unsigned long  NumberOfChunks;
 211   char         **Chunks;
 212   size_t         ChunkSize;
 213   size_t         Position;
 214 } StringCache;
 215
 216 /**
 217  * Initialize the string cache.
 218  */
 219 void vtkParse_InitStringCache(StringCache *cache);
 220
 221 /**
 222  * Alocate a new string from the cache.
 223  * A total of n+1 bytes will be allocated, to leave room for null.
 224  */
 225 char *vtkParse_NewString(StringCache *cache, size_t n);
 226
 227 /**
 228  * Cache a string so that it can then be used in the vtkParse data
 229  * structures.  The string will last until the application exits.
 230  * At most 'n' chars will be copied, and the string will be terminated.
 231  * If a null pointer is provided, then a null pointer will be returned.
 232  */
 233 const char *vtkParse_CacheString(
 234   StringCache *cache, const char *cp, size_t n);
 235
 236 /**
 237  * Free all strings that were created with vtkParse_NewString() or
 238  * with vtkParse_CacheString().
 239  */
 240 void vtkParse_FreeStringCache(StringCache *cache);
 241
 242 #ifdef __cplusplus
 243 } /* extern "C" */
 244 #endif
 245
 246 #endif