Rocstar  1.0
Rocstar multiphysics simulation application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
inks/FileTokenizer.cpp
Go to the documentation of this file.
1 /* *****************************************************************
2  MESQUITE -- The Mesh Quality Improvement Toolkit
3 
4  Copyright 2004 Lawrence Livermore National Laboratory. Under
5  the terms of Contract B545069 with the University of Wisconsin --
6  Madison, Lawrence Livermore National Laboratory retains certain
7  rights in this software.
8 
9  This library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU Lesser General Public
11  License as published by the Free Software Foundation; either
12  version 2.1 of the License, or (at your option) any later version.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public License
20  (lgpl.txt) along with this library; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 
23  kraftche@cae.wisc.edu
24 
25  ***************************************************************** */
26 
27 #include "FileTokenizer.hpp"
28 #include "MsqError.hpp"
29 #ifdef MSQ_USE_OLD_C_HEADERS
30 # include <string.h>
31 # include <ctype.h>
32 #else
33 # include <cstring>
34 # include <cctype>
35 # include <cstdlib>
36  using namespace std;
37 #endif
38 
39 namespace Mesquite
40 {
41 
42 FileTokenizer::FileTokenizer( FILE* file_ptr )
43  : filePtr( file_ptr ),
44  nextToken( buffer ),
45  bufferEnd( buffer ),
46  lineNumber( 1 ),
47  lastChar( '\0' )
48  {}
49 
51  { fclose( filePtr ); }
52 
53 bool FileTokenizer::eof() const
54  { return nextToken == bufferEnd && feof(filePtr); }
55 
56 const char* FileTokenizer::get_string( MsqError& err )
57 {
58  // If the whitepsace character marking the end of the
59  // last token was a newline, increment the line count.
60  if (lastChar == '\n')
61  ++lineNumber;
62 
63  // Loop until either found the start of a token to return or have
64  // reached the end of the file.
65  for (;;)
66  {
67  // If the buffer is empty, read more.
68  if (nextToken == bufferEnd)
69  {
70  size_t count = fread( buffer, 1, sizeof(buffer) - 1, filePtr );
71  if (!count)
72  {
73  if (feof(filePtr))
74  MSQ_SETERR(err)( "File truncated.\n", MsqError::PARSE_ERROR );
75  else
77  return NULL;
78  }
79 
80  nextToken = buffer;
81  bufferEnd = buffer + count;
82  }
83 
84  // If the current character is not a space, we've found a token.
85  if (!isspace(*nextToken))
86  break;
87 
88  // If the current space character is a newline,
89  // increment the line number count.
90  if (*nextToken == '\n')
91  ++lineNumber;
92  ++nextToken;
93  }
94 
95  // Store the start of the token in "result" and
96  // advance "nextToken" to one past the end of the
97  // token.
98  char* result = nextToken;
99  while (nextToken != bufferEnd && !isspace(*nextToken))
100  ++nextToken;
101 
102  // If we have reached the end of the buffer without finding
103  // a whitespace character terminating the token, we need to
104  // read more from the file. Only try once. If the token is
105  // too large to fit in the buffer, give up.
106  if (nextToken == bufferEnd)
107  {
108  // Shift the (possibly) partial token to the start of the buffer.
109  size_t remaining = bufferEnd - result;
110  memmove( buffer, result, remaining );
111  result = buffer;
112  nextToken = result + remaining;
113 
114  // Fill the remainder of the buffer after the token.
115  size_t count = fread( nextToken, 1, sizeof(buffer) - remaining - 1, filePtr );
116  if (!count && !feof(filePtr))
117  {
118  MSQ_SETERR(err)( "I/O error.\n", MsqError::IO_ERROR );
119  return NULL;
120  }
121  bufferEnd = nextToken + count;
122 
123  // Continue to advance nextToken until we find the space
124  // terminating the token.
125  while (nextToken != bufferEnd && !isspace(*nextToken))
126  ++nextToken;
127 
128  if (nextToken == bufferEnd) // EOF
129  {
130  *bufferEnd = '\0';
131  ++bufferEnd;
132  }
133  }
134 
135  // Save terminating whitespace character (or NULL char if EOF).
136  lastChar = *nextToken;
137  // Put null in buffer to mark end of current token.
138  *nextToken = '\0';
139  // Advance nextToken to the next character to search next time.
140  ++nextToken;
141  return result;
142 }
143 
144 bool FileTokenizer::get_double_internal( double& result, MsqError& err )
145 {
146  // Get a token
147  const char *token_end, *token = get_string( err );
148  if (MSQ_CHKERR(err))
149  return false;
150 
151  // Check for hex value -- on some platforms (e.g. Linux), strtod
152  // will accept hex values, on others (e.g. Sun) it wil not. Force
153  // failure on hex numbers for consistancy.
154  if (token[0] && token[1] && token[0] == '0' && toupper(token[1]) == 'X')
155  {
157  "Syntax error at line %d: expected number, got \"%s\"",
158  line_number(), token );
159  return false;
160  }
161 
162 
163  // Parse token as double
164  result = strtod( token, (char**)&token_end );
165 
166  // If the one past the last char read by strtod is
167  // not the NULL character terminating the string,
168  // then parse failed.
169  if (*token_end)
170  {
172  "Syntax error at line %d: expected number, got \"%s\"",
173  line_number(), token );
174  return false;
175  }
176 
177  return true;
178 }
179 
180 bool FileTokenizer::get_float_internal( float& result, MsqError& err )
181 {
182  double d;
183  get_double_internal( d, err );
184  if (MSQ_CHKERR(err))
185  return false;
186 
187  result = (float)d;
188  if (d != (double)result)
189  {
190  MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
191  return false;
192  }
193 
194  return true;
195 }
196 
197 bool FileTokenizer::get_long_int_internal( long& result, MsqError& err )
198 {
199  // Get a token
200  const char *token_end, *token = get_string( err );
201  if (MSQ_CHKERR(err))
202  return false;
203 
204  // Parse token as long
205  result = strtol( token, (char**)&token_end, 0 );
206 
207  // If the one past the last char read by strtol is
208  // not the NULL character terminating the string,
209  // then parse failed.
210  if (*token_end)
211  {
213  "Syntax error at line %d: expected integer, got \"%s\"",
214  line_number(), token );
215  return false;
216  }
217 
218  return true;
219 }
220 
221 bool FileTokenizer::get_byte_internal( unsigned char& result, MsqError& err )
222 {
223  long i;
224  get_long_int_internal( i, err );
225  if (MSQ_CHKERR(err))
226  return false;
227 
228  result = (unsigned char)i;
229  if (i != (long)result)
230  {
231  MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
232  return false;
233  }
234 
235  return true;
236 }
237 
238 bool FileTokenizer::get_short_int_internal( short& result, MsqError& err )
239 {
240  long i;
241  get_long_int_internal( i, err );
242  if (MSQ_CHKERR(err))
243  return false;
244 
245  result = (short)i;
246  if (i != (long)result)
247  {
248  MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
249  return false;
250  }
251 
252  return true;
253 }
254 
255 bool FileTokenizer::get_integer_internal( int& result, MsqError& err )
256 {
257  long i;
258  get_long_int_internal( i, err );
259  if (MSQ_CHKERR(err))
260  return false;
261 
262  result = (int)i;
263  if (i != (long)result)
264  {
265  MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() );
266  return false;
267  }
268 
269  return true;
270 }
271 
272 bool FileTokenizer::get_boolean_internal( bool& result, MsqError& err )
273 {
274  // Get a token
275  const char *token = get_string( err );
276  if (MSQ_CHKERR(err))
277  return false;
278 
279  if (token[1] || (token[0] != '0' && token[0] != '1'))
280  {
282  "Syntax error at line %d: expected 0 or 1, got \"%s\"",
283  line_number(), token );
284  return false;
285  }
286 
287  result = token[0] == '1';
288  return true;
289 }
290 
291 bool FileTokenizer::get_floats( size_t count, float* array, MsqError& err )
292 {
293  for (size_t i = 0; i < count; ++i)
294  {
295  if (!get_float_internal( *array, err ))
296  return false;
297  ++array;
298  }
299  return true;
300 }
301 
302 bool FileTokenizer::get_doubles( size_t count, double* array, MsqError& err )
303 {
304  for (size_t i = 0; i < count; ++i)
305  {
306  get_double_internal( *array, err );
307  if (MSQ_CHKERR(err))
308  return false;
309  ++array;
310  }
311  return true;
312 }
313 
314 bool FileTokenizer::get_bytes( size_t count, unsigned char* array, MsqError& err )
315 {
316  for (size_t i = 0; i < count; ++i)
317  {
318  get_byte_internal( *array, err );
319  if (MSQ_CHKERR(err))
320  return false;
321  ++array;
322  }
323  return true;
324 }
325 
326 bool FileTokenizer::get_short_ints( size_t count, short* array, MsqError& err )
327 {
328  for (size_t i = 0; i < count; ++i)
329  {
330  get_short_int_internal( *array, err );
331  if (MSQ_CHKERR(err))
332  return false;
333  ++array;
334  }
335  return true;
336 }
337 
338 
339 bool FileTokenizer::get_integers( size_t count, int* array, MsqError& err )
340 {
341  for (size_t i = 0; i < count; ++i)
342  {
343  get_integer_internal( *array, err );
344  if (MSQ_CHKERR(err))
345  return false;
346  ++array;
347  }
348  return true;
349 }
350 
351 bool FileTokenizer::get_long_ints( size_t count, long* array, MsqError& err )
352 {
353  for (size_t i = 0; i < count; ++i)
354  {
355  get_long_int_internal( *array, err );
356  if (MSQ_CHKERR(err))
357  return false;
358  ++array;
359  }
360  return true;
361 }
362 
363 bool FileTokenizer::get_booleans( size_t count, bool* array, MsqError& err )
364 {
365  for (size_t i = 0; i < count; ++i)
366  {
367  get_boolean_internal( *array, err );
368  if (MSQ_CHKERR(err))
369  return false;
370  ++array;
371  }
372  return true;
373 }
374 
376 {
377  if (nextToken - buffer < 2)
378  return;
379 
380  --nextToken;
381  *nextToken = lastChar;
382  --nextToken;
383  while (nextToken > buffer && *nextToken)
384  --nextToken;
385 
386  if (!*nextToken)
387  ++nextToken;
388 
389  lastChar = '\0';
390 }
391 
392 bool FileTokenizer::match_token( const char* str, MsqError& err )
393 {
394  // Get a token
395  const char *token = get_string( err );
396  if (MSQ_CHKERR(err))
397  return false;
398 
399  // Check if it matches
400  if (0 == strcmp( token, str ))
401  return true;
402 
403  // Construct error message
405  "Syntax error at line %d: expected \"%s\", got \"%s\"",
406  line_number(), str, token );
407  return false;
408 } // namespace Mesquite
409 
410 
411 int FileTokenizer::match_token( const char* const* list, MsqError& err )
412 {
413  // Get a token
414  const char *token = get_string( err );
415  if (MSQ_CHKERR(err))
416  return false;
417 
418  // Check if it matches any input string
419  const char* const* ptr;
420  for (ptr = list; *ptr; ++ptr)
421  if (0 == strcmp( token, *ptr ))
422  return ptr - list + 1;
423 
424  // No match, constuct error message
425  msq_std::string message( "Parsing error at line " );
426  char lineno[16];
427  sprintf( lineno, "%d", line_number() );
428  message += lineno;
429  message += ": expected one of {";
430  for (ptr = list; *ptr; ++ptr)
431  {
432  message += " ";
433  message += *ptr;
434  }
435  message += " } got \"";
436  message += token;
437  message += "\"";
438  MSQ_SETERR(err)( message, MsqError::PARSE_ERROR );
439  return false;
440 }
441 
442 bool FileTokenizer::get_newline( MsqError& err )
443 {
444  if (lastChar == '\n')
445  {
446  lastChar = ' ';
447  ++lineNumber;
448  return true;
449  }
450 
451  // Loop until either we a) find a newline, b) find a non-whitespace
452  // character or c) reach the end of the file.
453  for (;;)
454  {
455  // If the buffer is empty, read more.
456  if (nextToken == bufferEnd)
457  {
458  size_t count = fread( buffer, 1, sizeof(buffer), filePtr );
459  if (!count)
460  {
461  if (eof())
462  MSQ_SETERR(err)( "File truncated.", MsqError::PARSE_ERROR );
463  else
465  return false;
466  }
467 
468  nextToken = buffer;
469  bufferEnd = buffer + count;
470  }
471 
472  // If the current character is not a space, the we've failed.
473  if (!isspace(*nextToken))
474  {
475  MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Expected newline at line %d.", line_number() );
476  return false;
477  }
478 
479  // If the current space character is a newline,
480  // increment the line number count.
481  if (*nextToken == '\n')
482  {
483  ++lineNumber;
484  ++nextToken;
485  lastChar = ' ';
486  return true;
487  }
488  ++nextToken;
489  }
490 
491  // should never reach this
492  return false;
493 }
494 
495 
496 } // namespace Mesquite
497 
An I/O error occured (e.g.
bool get_boolean_internal(bool &result, MsqError &err)
Internal implementation of get_Booleans.
int line_number() const
Get the line number the last token was read from.
void unget_token()
Put current token back in buffer.
int fread(T *const ptr, const unsigned int nmemb, std::FILE *stream)
Read file data, and check for possible errors.
Definition: CImg.h:5569
~FileTokenizer()
destructor : closes file.
bool get_float_internal(float &result, MsqError &err)
Internal implementation of get_floats.
const NT & d
bool get_long_int_internal(long &result, MsqError &err)
Internal implementation of get_long_ints.
bool match_token(const char *string, MsqError &err)
Match current token to passed string.
bool get_newline(MsqError &err)
check for newline
int fclose(std::FILE *file)
Close a file, and check for possible errors.
Definition: CImg.h:5507
bool get_floats(size_t count, float *array, MsqError &err)
Parse a sequence of float values.
bool get_short_ints(size_t count, short *array, MsqError &err)
Parse a sequence of integer values.
bool get_doubles(size_t count, double *array, MsqError &err)
Parse a sequence of double values.
const char * get_string(MsqError &err)
get next token
bool get_bytes(size_t count, unsigned char *array, MsqError &err)
Parse a sequence of integer values.
#define MSQ_CHKERR(err)
Mesquite&#39;s Error Checking macro.
#define MSQ_SETERR(err)
Macro to set error - use err.clear() to clear.
bool get_long_ints(size_t count, long *array, MsqError &err)
Parse a sequence of integer values.
blockLoc i
Definition: read.cpp:79
bool get_short_int_internal(short &result, MsqError &err)
Internal implementation of get_short_ints.
char * bufferEnd
One past the last used byte of the buffer.
char * nextToken
One past the end of the last token returned.
bool get_double_internal(double &result, MsqError &err)
Internal implementation of get_doubles.
bool get_integer_internal(int &result, MsqError &err)
Internal implementation of get_integers.
bool get_booleans(size_t count, bool *array, MsqError &err)
Parse a sequence of bit or boolean values.
bool get_byte_internal(unsigned char &result, MsqError &err)
Internal implementation of get_bytes.
msq_stdc::FILE * filePtr
Pointer to standard C FILE struct.
bool get_integers(size_t count, int *array, MsqError &err)
Parse a sequence of integer values.
Error parsing input (or input file)
int lineNumber
Line number of last returned token.
char lastChar
The whitespace character marking the end of the last returned token.
bool eof() const
Check for end-of-file condition.