2010-05-29 02:47:57 +00:00

914 lines
28 KiB

* Token.h *
* *
* The Token class ecapsulates a lexical analyzer for C++-like syntax. *
* A token instance is associated with one or more text files, and *
* grabs C++ tokens from them sequentially. There are many member *
* functions designed to make parsing easy, such as "==" operators for *
* strings and characters, and automatic conversion of numeric tokens *
* into numeric values. *
* *
* Files can be nested via #include directives, and both styles of C++ *
* comments are supported. *
* *
* Name Date Description *
* *
* arvo 10/05/99 Fixed bug in TokFrame string allocation. *
* arvo 01/15/95 Added ifdef, ifndef, else, and endif. *
* arvo 02/13/94 Added Debug() member function. *
* arvo 01/22/94 Several sections rewritten. *
* arvo 06/19/93 Converted to C++ *
* arvo 07/15/89 Rewritten for scene description parser. *
* arvo 01/22/89 Initial coding. *
* *
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
#include <stdlib.h>
#include <string.h>
#include "Token.h"
#include "Char.h"
namespace ArvoMath {
FILE* Token::debug = NULL; // Static data member of Token class.
int Token::argc = 0;
char** Token::argv = NULL;
typedef TokMacro *TokMacroPtr;
static const int True = 1;
static const int False = 0;
static const int HashConst = 217; // Size of hash-table for macros.
next = NULL;
source = NULL;
fname = NULL;
line = 0;
column = 0;
if( fname != NULL ) delete[] fname;
if( source != NULL ) fclose( source );
void TokFrame::operator=( const TokFrame &frame )
next = frame.next;
source = frame.source;
fname = strdup( frame.fname );
line = frame.line;
column = frame.column;
static int HashName( const char *str )
static int prime[5] = { 7, 11, 17, 23, 3 };
int k = 0;
int h = 0;
while( *str != NullChar )
h += (*str++) * prime[k++];
if( k == 5 ) k = 0;
if( h < 0 ) h = 0; // Check for overflow.
return h % HashConst;
TokMacro *Token::MacroLookup( const char *str ) const
if( table == NULL ) return NULL;
int i = HashName( str );
for( TokMacro *m = table[i]; m != NULL; m = m->next )
if( strcmp( str, m->macro ) == 0 ) return m;
return NULL;
int Token::MacroReplace( char *str, int &length, TokType &type ) const
TokMacro *m = MacroLookup( str );
if( m == NULL ) return 0;
strcpy( str, m->repl );
length = strlen( str );
type = m->type;
return 1;
* D e b u g P r i n t *
* *
* This routine is used to record the entire token stream in a file to *
* use as a debugging aid. It does not affect the action of the lexer; *
* it merely records a "shadow" copy of all the tokens that are read by *
* ANY Token instance. The data that is written to the file is *
* *
* <Line number> <Column number> <File name> <Token> *
* *
static void DebugPrint( const Token &tok, FILE *fp )
fprintf( fp, "%3d %3d ", tok.Line(), tok.Column() );
fprintf( fp, "%s " , tok.FileName() );
fprintf( fp, "%s\n" , tok.Spelling() );
fflush ( fp );
* T o k e n (Constructors) *
* *
Token::Token( const char *file_name )
Open( file_name );
Token::Token( FILE *fp )
Open( fp );
Token::Token( )
* T o k e n (Destructor) *
* *
* Close all files and deletes all frames and paths. *
* *
Token::~Token( )
// Don't try to delete "frame" as its a member of this class, not
// something that we've allocated.
TokFrame *f = frame.next;
while( f != NULL )
TokFrame *n = f->next;
delete f;
f = n;
* O p e n *
* *
* Establish a new file to read from, either by name, or by pointer. *
* *
void Token::Open( const char *file_name )
FILE *fp = fopen( file_name, "r" );
if( fp == NULL ) return;
Open( fp );
frame.fname = strdup( file_name );
void Token::Open( FILE *fp )
frame.source = fp;
frame.line = 1;
frame.column = 0;
pushed = NullChar;
* O p e r a t o r == *
* *
* A token can be compared with a string, a single character, or a type. *
* *
int Token::operator==( const char *s ) const
const char *t = spelling;
if( case_sensitive )
do { if( *s != *t ) return False; }
while( *s++ && *t++ );
do { if( ToUpper(*s) != ToUpper(*t) ) return False; }
while( *s++ && *t++ );
return True;
int Token::operator==( char c ) const
if( length != 1 ) return False;
if( case_sensitive ) return spelling[0] == c;
else return ToUpper(spelling[0]) == ToUpper(c);
int Token::operator==( TokType _type_ ) const
int match = 0;
switch( _type_ )
case T_char : match = ( type == T_string && Len() == 1 ); break;
case T_numeric: match = ( type == T_integer || type == T_float ); break;
default : match = ( type == _type_ ); break;
return match;
* O p e r a t o r != *
* *
* Define negations of the three types of "==" tests. *
* *
int Token::operator!=( const char *s ) const { return !( *this == s ); }
int Token::operator!=( char c ) const { return !( *this == c ); }
int Token::operator!=( TokType t ) const { return !( *this == t ); }
* E r r o r *
* *
* Print error message to "stderr" followed by optional "name". *
* *
void Token::Error( TokError error, const char *name )
char *s;
switch( error )
case T_malformed_float : s = "malformed real number "; break;
case T_unterm_string : s = "unterminated string "; break;
case T_unterm_comment : s = "unterminated comment "; break;
case T_file_not_found : s = "include file not found: "; break;
case T_unknown_directive : s = "unknown # directive "; break;
case T_string_expected : s = "string expected "; break;
case T_putback_error : s = "putback overflow "; break;
case T_name_too_long : s = "file name is too long "; break;
case T_no_endif : s = "#endif directive missing"; break;
case T_extra_endif : s = "#endif with no #ifdef "; break;
case T_extra_else : s = "#else with no #ifdef "; break;
default : s = "unknown error type "; break;
fprintf( stderr, "LEXICAL ERROR, line %d, column %d: %s",
frame.line, frame.column, s );
if( name == NULL )
fprintf( stderr, " \n" );
else fprintf( stderr, "%s\n", name );
exit( 1 );
* G e t c *
* *
* This routine fetches one character at a time from the current file *
* being read. It is responsible for keeping track of the column number *
* and for handling single characters that have been "put back". *
* *
int Token::Getc( int &c )
if( pushed != NullChar ) // Return the pushed character.
c = pushed;
pushed = NullChar;
else // Get a new character from the source file.
c = getc( frame.source );
return c;
* N o n W h i t e *
* *
* This routine implements a simple finite state machine that skips *
* white space and recognizes the two styles of comments used in C++. *
* It returns the first non-white character not part of a comment. *
* *
int Token::NonWhite( int &c )
Getc( c );
if( c == Space ) goto start_state;
if( c == Tab ) goto start_state;
if( c == NewLine ) goto start_new_line;
if( c == Slash ) goto start_comment;
goto return_char;
Getc( c );
if( c == Star ) goto in_comment1;
if( c == Slash ) goto in_comment2;
Unget( c );
c = Slash;
goto return_char;
Getc( c );
if( c == Star ) goto end_comment1;
if( c == NewLine ) goto newline_in_comment;
if( c == EOF ) goto return_char;
goto in_comment1;
Getc( c );
if( c == Slash ) goto start_state;
if( c == NewLine ) goto newline_in_comment;
if( c == EOF ) goto unterm_comment;
goto in_comment1;
Getc( c );
if( c == NewLine ) goto start_new_line;
if( c == EOF ) goto return_char;
goto in_comment2;
Error( T_unterm_comment );
c = EOF;
goto return_char;
frame.column = 0;
goto start_state;
frame.column = 0;
goto in_comment1;
Tcolumn = frame.column; // This is where the token starts.
return c;
* N e x t R a w T o k *
* *
int Token::NextRawTok( )
static int Trans0[] = { 0, 1, 3, 3, 3 }; // Found a digit.
static int Trans1[] = { 5, 6, 4, 6, 7 }; // Found a sign.
static int Trans2[] = { 1, 6, 7, 6, 7 }; // Found decimal point.
static int Trans3[] = { 2, 2, 7, 6, 7 }; // Found an exponent.
static int Trans4[] = { 5, 6, 7, 6, 7 }; // Found something else.
char *tok = spelling;
int state;
int c;
length = 0;
type = T_null;
// Skip comments and whitespace.
if( NonWhite( c ) == EOF ) goto endtok;
// Is this the beginning of an identifier? If so, get the rest.
if( isAlpha( c ) )
type = T_ident;
do {
*tok++ = c;
if( Getc( c ) == EOF ) goto endtok;
while( isAlpha( c ) || isDigit( c ) || c == Underscore );
Unget( c );
goto endtok;
// Is this the beginning of a number?
else if( isDigit( c ) || c == Minus || c == Period )
char c1 = c;
state = 0;
*tok++ = c;
switch( Getc( c ) )
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': state = Trans0[ state ]; break;
case '+':
case '-': state = Trans1[ state ]; break;
case '.': state = Trans2[ state ]; break;
case 'e':
case 'E': state = Trans3[ state ]; break;
default : state = Trans4[ state ]; break;
switch( state )
case 5 : Unget( c );
type = ( c1 == Period ) ? T_float : T_integer;
goto endtok;
case 6 : Unget( c ); type = T_float ; goto endtok;
case 7 : Error( T_malformed_float ) ; break;
default: continue;
} // for
} // if numeric
// Is this the beginning of an operator?
if( c == '*' || c == '>' || c == '<' || c == '+' || c == '-' || c == '!' )
char oldc = c;
type = T_other;
*tok++ = c;
if( Getc( c ) == EOF ) goto endtok;
if( c == oldc || c == EqualSign )
*tok++ = c;
else Unget( c );
goto endtok;
// Is this the beginning of a string?
else if( c == DoubleQuote )
type = T_string;
while( Getc( c ) != EOF && length < MaxTokenLen )
if( c == DoubleQuote ) goto endtok;
*tok++ = c;
Error( T_unterm_string );
// Is this the beginning of a "#" directive?
else if( c == Hash )
type = T_directive;
NonWhite( c );
while( isAlpha( c ) )
*tok++ = c;
Getc( c );
Unget( c );
goto endtok;
// This must be a one-character token.
*tok++ = c;
length = 1;
type = T_other;
endtok: // Jump to here when token is completed.
*tok = NullChar; // Terminate the string.
if( debug != NULL ) DebugPrint( *this, debug );
return length;
* N e x t T o k *
* *
int Token::NextTok( )
// If the token is an identifier, see if it's a macro.
// If the macro substitution is null, get another token.
if( type == T_ident )
if( table != NULL )
if( MacroReplace( spelling, length, type ) && debug != NULL )
DebugPrint( *this, debug );
if( type == T_nullmacro ) NextTok();
return length;
* O p e r a t o r - - *
* *
* Puts back the last token found. Only one token can be put back. *
* *
Token & Token::operator--( ) // Put the last token back.
if( put_back ) Error( T_putback_error ); // Can only handle one putback.
put_back = 1;
return *this;
Token & Token::operator--( int ) // Postfix decrement.
fprintf( stderr, "Postfix decrement is not implemented for the Token class.\n" );
return *this;
* H a n d l e D i r e c t i v e *
* *
* Directive beginning with "#" must be handled by the lexer, as they *
* determine the current source file via "#include", etc. *
* *
* Returns 1 if, after handling this directive, we now have the next *
* token. *
* *
int Token::HandleDirective( )
FILE *fp;
char name[128];
if( *this == "define" )
strcpy( tempbuff, Spelling() ); // This is the macro name.
int line = Line();
if( Line() == line )
AddMacro( tempbuff, Spelling(), Type() );
// If next token is on a different line; we went too far.
AddMacro( tempbuff, "", T_nullmacro );
return 1; // Signal that we already have the next token.
else if( *this == "include" )
if( *this == "<" )
GetName( name, sizeof(name) );
PushFrame( ResolveName( name ), name );
else if( type == T_string )
fp = fopen( spelling, "r" );
if( fp == NULL ) Error( T_file_not_found, spelling );
else PushFrame( fp, spelling );
else Error( T_string_expected );
else if( *this == "ifdef" )
TokMacro *m = MacroLookup( Spelling() );
if( m == NULL ) // Skip until else or endif.
while( *this != T_null )
if( *this != T_directive ) continue;
if( *this == "endif" ) break;
if( *this == "else" ) { if_nesting++; break; } // Like m != NULL.
if( *this == T_null ) Error( T_no_endif );
return 0; // Ready to get the next token.
else if_nesting++;
else if( *this == "ifndef" )
TokMacro *m = MacroLookup( Spelling() );
if( m != NULL ) // Skip until else or endif.
while( *this != T_null )
if( *this != T_directive ) continue;
if( *this == "endif" ) break;
if( *this == "else" ) { if_nesting++; break; } // Like m == NULL.
if( *this == T_null ) Error( T_no_endif );
return 0; // Ready to get the next token.
else if_nesting++;
else if( *this == "else" ) // Skip until #endif.
if( if_nesting == 0 ) Error( T_extra_else );
while( *this != T_null )
if( *this == T_directive && *this == "endif" ) break;
if( *this == T_null ) Error( T_no_endif );
return 0; // Ready to get next token.
else if( *this == "endif" )
if( if_nesting == 0 ) Error( T_extra_endif );
return 0; // Ready to get next token.
else if( *this == "error" )
int line = Line();
NextTok(); // Allow macro substitution.
if( Line() == line )
fprintf( stderr, "(preprocessor, line %d) %s\n", line, Spelling() );
return 0; // Ready to get next token.
// If next token is on a different line; we went too far.
fprintf( stderr, "(null preprocessor message, line %d)\n", line );
return 1; // Signal that we already have the next token.
return 0;
* O p e r a t o r + + *
* *
* Grab the next token from the current source file. If at end of file, *
* pick up where we left off in the previous file. If there is no *
* previous file, return "T_null". *
* *
Token & Token::operator++( )
if( put_back )
put_back = 0;
return *this;
// If we've reached the end of an include file, pop the stack.
if( type == T_directive )
if( HandleDirective() ) break;
else if( type == T_null )
fclose( frame.source );
if( !PopFrame() ) break;
else break; // We have a real token.
// Now fill in the value fields if the token is a number.
switch( type )
case T_integer : ivalue = atoi( spelling ); break;
case T_float : fvalue = atof( spelling ); break;
case T_null : if( if_nesting > 0 ) Error( T_no_endif ); break;
default : break;
return *this;
Token & Token::operator++( int )
fprintf( stderr, "Postfix increment is not implemented for the Token class.\n" );
return *this;
* T o k e n Push & Pop Frame *
* *
* These functions are used to create and destroy the context "frames" *
* that are used to handle nested files (via "include"). *
* *
void Token::PushFrame( FILE *fp, char *fname )
// Create a copy of the current (top-level) frame.
TokFrame *n = new TokFrame;
*n = frame;
// Now overwrite the top-level frame with the new state.
frame.next = n;
frame.source = fp;
frame.line = 1;
frame.column = 0;
frame.fname = strdup( fname );
pushed = NullChar;
int Token::PopFrame()
if( frame.next == NULL ) return 0;
TokFrame *old = frame.next;
frame = *old;
delete old; // Delete the frame that we just copied from.
return 1;
* Miscellaneous Functions *
* *
void Token::Init()
case_sensitive = 1;
put_back = 0;
pushed = NullChar;
if_nesting = 0;
frame.source = NULL;
frame.next = NULL;
frame.fname = NULL;
first = NULL;
last = NULL;
table = NULL;
pushed = NullChar;
SearchArgs(); // Search command-line args for macro definitions.
const char* Token::Spelling() const
return spelling;
char Token::Char() const
return spelling[0];
const char* Token::FileName() const
static char *null_string = "";
if( frame.fname == NULL ) return null_string;
else return frame.fname;
float Token::Fvalue() const
float val = 0.0;
if( type == T_float ) val = fvalue;
if( type == T_integer ) val = ivalue;
return val;
void Token::GetName( char *name, int max )
int c;
for( int i = 1; i < max; i++ )
if( NonWhite(c) == '>' )
*name = NullChar;
*name++ = c;
Error( T_name_too_long );
void Token::AddPath( const char *new_path )
char *name = strdup( new_path );
if( name == NULL ) return;
TokPath *p = new TokPath;
p->next = NULL;
p->path = name;
if( first == NULL ) first = p;
else last->next = p;
last = p;
void Token::ClearPaths()
TokPath *p = first;
while( p != NULL )
TokPath *q = p->next;
delete[] p->path; // delete the string.
delete p; // delete the path structure.
p = q;
first = NULL;
last = NULL;
FILE *Token::ResolveName( const char *name )
char resolved[128];
for( const TokPath *p = first; p != NULL; p = p->next )
strcpy( resolved, p->path );
strcat( resolved, "/" );
strcat( resolved, name );
FILE *fp = fopen( resolved, "r" );
if( fp != NULL ) return fp;
Error( T_file_not_found, name );
return NULL;
void Token::CaseSensitive( int on_off = 1 )
case_sensitive = on_off;
void Token::Debug( FILE *fp )
debug = fp;
void Token::AddMacro( const char *macro, const char *repl, TokType t )
if( table == NULL ) // Create and initialize the table.
table = new TokMacroPtr[ HashConst ];
for( int j = 0; j < HashConst; j++ ) table[j] = NULL;
int i = HashName( macro );
TokMacro *m = new TokMacro;
m->next = table[i];
m->macro = strdup( macro );
m->repl = strdup( repl );
m->type = t;
table[i] = m;
void Token::Args( int argc_, char *argv_[] )
argc = argc_; // Set the static variables.
argv = argv_;
void Token::SearchArgs( )
TokType type = T_null;
for( int i = 1; i < argc; i++ )
if( strcmp( argv[i], "-macro" ) == 0 )
if( i+2 >= argc )
fprintf( stderr, "(Token) ERROR macro argument(s) missing\n" );
char *macro = argv[i+1];
char *repl = argv[i+2];
if( isAlpha ( repl[0] ) ) type = T_ident ; else
if( isInteger( repl ) ) type = T_integer; else
type = T_float ;
AddMacro( macro, repl, type );
i += 2;