Lexer.cpp

Go to the documentation of this file.
00001 //===-- Lexer.cpp ---------------------------------------------------------===//
00002 //
00003 //                     The KLEE Symbolic Virtual Machine
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #include "expr/Lexer.h"
00011 
00012 #include "llvm/Support/MemoryBuffer.h"
00013 #include "llvm/Support/Streams.h"
00014 
00015 #include <iomanip>
00016 #include <iostream>
00017 #include <string.h>
00018 
00019 using namespace llvm;
00020 using namespace klee;
00021 using namespace klee::expr;
00022 
00024 
00025 const char *Token::getKindName() const {
00026   switch (kind) {
00027   default:
00028   case Unknown:    return "Unknown";
00029   case Arrow:      return "Arrow";
00030   case At:         return "At";
00031   case Colon:      return "Colon";
00032   case Comma:      return "Comma";
00033   case Comment:    return "Comment";
00034   case EndOfFile:  return "EndOfFile";
00035   case Equals:     return "Equals";
00036   case Identifier: return "Identifier";
00037   case KWFalse:    return "KWFalse";
00038   case KWQuery:    return "KWQuery";
00039   case KWReserved: return "KWReserved";
00040   case KWTrue:     return "KWTrue";
00041   case KWWidth:    return "KWWidth";
00042   case LBrace:     return "LBrace";
00043   case LParen:     return "LParen";
00044   case LSquare:    return "LSquare";
00045   case Number:     return "Number";
00046   case RBrace:     return "RBrace";
00047   case RParen:     return "RParen";
00048   case RSquare:    return "RSquare";
00049   case Semicolon:  return "Semicolon";
00050   }
00051 }
00052 
00053 void Token::dump() {
00054   llvm::cerr << "(Token \"" << getKindName() << "\" "
00055              << (void*) start << " " << length << " "
00056              << line << " " << column << ")";
00057 }
00058 
00060 
00061 static inline bool isInternalIdentifierChar(int Char) {
00062   return isalnum(Char) || Char == '_' || Char == '.';
00063 }
00064 
00065 Lexer::Lexer(const llvm::MemoryBuffer *MB) 
00066   : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()), 
00067     LineNumber(1), ColumnNumber(0) {
00068 }
00069 
00070 Lexer::~Lexer() {
00071 }
00072 
00073 int Lexer::PeekNextChar() {
00074   if (BufferPos == BufferEnd)
00075     return -1;
00076   return *BufferPos;
00077 }
00078 
00079 int Lexer::GetNextChar() {
00080   if (BufferPos == BufferEnd)
00081     return -1;
00082 
00083   // Handle DOS/Mac newlines here, by stripping duplicates and by
00084   // returning '\n' for both.
00085   char Result = *BufferPos++;
00086   if (Result == '\n' || Result == '\r') {
00087     if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result))
00088       ++BufferPos;
00089     Result = '\n';
00090   }
00091 
00092   if (Result == '\n') {
00093     ++LineNumber;
00094     ColumnNumber = 0;
00095   } else {
00096     ++ColumnNumber;
00097   }
00098 
00099   return Result;
00100 }
00101 
00102 Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) {
00103   Result.kind = k;
00104   Result.length = BufferPos - Result.start;
00105   return Result;
00106 }
00107 
00108 static bool isReservedKW(const char *Str, unsigned N) {
00109     unsigned i;
00110 
00111   // Check for i[0-9]+
00112   if (N>1 && Str[0] == 'i') {
00113     for (i=1; i<N; ++i)
00114       if (!isdigit(Str[i]))
00115         break;
00116     if (i==N)
00117       return true;
00118   }
00119 
00120   // Check for fp[0-9]+([.].*)?$
00121   if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) {
00122     for (i=3; i<N; ++i)
00123       if (!isdigit(Str[i]))
00124         break;
00125     if (i==N || Str[i]=='.')
00126       return true;
00127   }
00128   
00129   return false;
00130 }
00131 static bool isWidthKW(const char *Str, unsigned N) {
00132   if (N<2 || Str[0] != 'w')
00133     return false;
00134   for (unsigned i=1; i<N; ++i)
00135     if (!isdigit(Str[i]))
00136       return false;
00137   return true;
00138 }
00139 Token &Lexer::SetIdentifierTokenKind(Token &Result) {
00140   unsigned Length = BufferPos - Result.start;
00141   switch (Length) {
00142   case 3:
00143     if (memcmp("def", Result.start, 3) == 0)
00144       return SetTokenKind(Result, Token::KWReserved);
00145     if (memcmp("var", Result.start, 3) == 0)
00146       return SetTokenKind(Result, Token::KWReserved);
00147     break;
00148 
00149   case 4:
00150     if (memcmp("true", Result.start, 4) == 0)
00151       return SetTokenKind(Result, Token::KWTrue);
00152     break;
00153 
00154   case 5:
00155     if (memcmp("array", Result.start, 5) == 0)
00156       return SetTokenKind(Result, Token::KWReserved);
00157     if (memcmp("false", Result.start, 5) == 0)
00158       return SetTokenKind(Result, Token::KWFalse);
00159     if (memcmp("query", Result.start, 5) == 0)
00160       return SetTokenKind(Result, Token::KWQuery);
00161     break;      
00162     
00163   case 6:
00164     if (memcmp("define", Result.start, 6) == 0)
00165       return SetTokenKind(Result, Token::KWReserved);
00166     break;
00167 
00168   case 7:
00169     if (memcmp("declare", Result.start, 7) == 0)
00170       return SetTokenKind(Result, Token::KWReserved);
00171     break;
00172   }
00173 
00174   if (isReservedKW(Result.start, Length))
00175     return SetTokenKind(Result, Token::KWReserved);
00176   if (isWidthKW(Result.start, Length))
00177     return SetTokenKind(Result, Token::KWWidth);
00178 
00179   return SetTokenKind(Result, Token::Identifier);
00180 }
00181 
00182 void Lexer::SkipToEndOfLine() {
00183   for (;;) {
00184     int Char = GetNextChar();
00185     if (Char == -1 || Char =='\n')
00186       break;
00187   }
00188 }
00189 
00190 Token &Lexer::LexNumber(Token &Result) {
00191   while (isalnum(PeekNextChar()) || PeekNextChar()=='_')
00192     GetNextChar();
00193   return SetTokenKind(Result, Token::Number);
00194 }
00195 
00196 Token &Lexer::LexIdentifier(Token &Result) {
00197   while (isInternalIdentifierChar(PeekNextChar()))
00198     GetNextChar();
00199 
00200   // Recognize keywords specially.
00201   return SetIdentifierTokenKind(Result);
00202 }
00203 
00204 Token &Lexer::Lex(Token &Result) {
00205   Result.kind = Token::Unknown;
00206   Result.length = 0;
00207   Result.start = BufferPos;
00208   
00209   // Skip whitespace.
00210   while (isspace(PeekNextChar()))
00211     GetNextChar();
00212 
00213   Result.start = BufferPos;
00214   Result.line = LineNumber;
00215   Result.column = ColumnNumber;
00216   int Char = GetNextChar();
00217   switch (Char) {
00218   case -1:  return SetTokenKind(Result, Token::EndOfFile);
00219     
00220   case '(': return SetTokenKind(Result, Token::LParen);
00221   case ')': return SetTokenKind(Result, Token::RParen);
00222   case ',': return SetTokenKind(Result, Token::Comma);
00223   case ':': return SetTokenKind(Result, Token::Colon);
00224   case ';': return SetTokenKind(Result, Token::Semicolon);
00225   case '=': return SetTokenKind(Result, Token::Equals);
00226   case '@': return SetTokenKind(Result, Token::At);
00227   case '[': return SetTokenKind(Result, Token::LSquare);
00228   case ']': return SetTokenKind(Result, Token::RSquare);
00229   case '{': return SetTokenKind(Result, Token::LBrace);
00230   case '}': return SetTokenKind(Result, Token::RBrace);
00231 
00232   case '#':
00233     SkipToEndOfLine();
00234     return SetTokenKind(Result, Token::Comment);
00235 
00236   case '+': {
00237     if (isdigit(PeekNextChar()))
00238       return LexNumber(Result);
00239     else
00240       return SetTokenKind(Result, Token::Unknown);
00241   }
00242 
00243   case '-': {
00244     int Next = PeekNextChar();
00245     if (Next == '>')
00246       return GetNextChar(), SetTokenKind(Result, Token::Arrow);
00247     else if (isdigit(Next))
00248       return LexNumber(Result);
00249     else
00250       return SetTokenKind(Result, Token::Unknown);
00251     break;
00252   }
00253 
00254   default:
00255     if (isdigit(Char))
00256       return LexNumber(Result);
00257     else if (isalpha(Char) || Char == '_')
00258       return LexIdentifier(Result);
00259     return SetTokenKind(Result, Token::Unknown);
00260   }
00261 }

Generated on Fri Jun 5 03:31:32 2009 for klee by  doxygen 1.5.8