00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "expr/Lexer.h"
00011
00012 #include "llvm/Support/MemoryBuffer.h"
00013 #include "llvm/Support/Streams.h"
00014
00015 #include <iomanip>
00016 #include <iostream>
00017 #include <string.h>
00018
00019 using namespace llvm;
00020 using namespace klee;
00021 using namespace klee::expr;
00022
00024
00025 const char *Token::getKindName() const {
00026 switch (kind) {
00027 default:
00028 case Unknown: return "Unknown";
00029 case Arrow: return "Arrow";
00030 case At: return "At";
00031 case Colon: return "Colon";
00032 case Comma: return "Comma";
00033 case Comment: return "Comment";
00034 case EndOfFile: return "EndOfFile";
00035 case Equals: return "Equals";
00036 case Identifier: return "Identifier";
00037 case KWFalse: return "KWFalse";
00038 case KWQuery: return "KWQuery";
00039 case KWReserved: return "KWReserved";
00040 case KWTrue: return "KWTrue";
00041 case KWWidth: return "KWWidth";
00042 case LBrace: return "LBrace";
00043 case LParen: return "LParen";
00044 case LSquare: return "LSquare";
00045 case Number: return "Number";
00046 case RBrace: return "RBrace";
00047 case RParen: return "RParen";
00048 case RSquare: return "RSquare";
00049 case Semicolon: return "Semicolon";
00050 }
00051 }
00052
00053 void Token::dump() {
00054 llvm::cerr << "(Token \"" << getKindName() << "\" "
00055 << (void*) start << " " << length << " "
00056 << line << " " << column << ")";
00057 }
00058
00060
00061 static inline bool isInternalIdentifierChar(int Char) {
00062 return isalnum(Char) || Char == '_' || Char == '.';
00063 }
00064
00065 Lexer::Lexer(const llvm::MemoryBuffer *MB)
00066 : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()),
00067 LineNumber(1), ColumnNumber(0) {
00068 }
00069
00070 Lexer::~Lexer() {
00071 }
00072
00073 int Lexer::PeekNextChar() {
00074 if (BufferPos == BufferEnd)
00075 return -1;
00076 return *BufferPos;
00077 }
00078
00079 int Lexer::GetNextChar() {
00080 if (BufferPos == BufferEnd)
00081 return -1;
00082
00083
00084
00085 char Result = *BufferPos++;
00086 if (Result == '\n' || Result == '\r') {
00087 if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result))
00088 ++BufferPos;
00089 Result = '\n';
00090 }
00091
00092 if (Result == '\n') {
00093 ++LineNumber;
00094 ColumnNumber = 0;
00095 } else {
00096 ++ColumnNumber;
00097 }
00098
00099 return Result;
00100 }
00101
00102 Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) {
00103 Result.kind = k;
00104 Result.length = BufferPos - Result.start;
00105 return Result;
00106 }
00107
00108 static bool isReservedKW(const char *Str, unsigned N) {
00109 unsigned i;
00110
00111
00112 if (N>1 && Str[0] == 'i') {
00113 for (i=1; i<N; ++i)
00114 if (!isdigit(Str[i]))
00115 break;
00116 if (i==N)
00117 return true;
00118 }
00119
00120
00121 if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) {
00122 for (i=3; i<N; ++i)
00123 if (!isdigit(Str[i]))
00124 break;
00125 if (i==N || Str[i]=='.')
00126 return true;
00127 }
00128
00129 return false;
00130 }
00131 static bool isWidthKW(const char *Str, unsigned N) {
00132 if (N<2 || Str[0] != 'w')
00133 return false;
00134 for (unsigned i=1; i<N; ++i)
00135 if (!isdigit(Str[i]))
00136 return false;
00137 return true;
00138 }
00139 Token &Lexer::SetIdentifierTokenKind(Token &Result) {
00140 unsigned Length = BufferPos - Result.start;
00141 switch (Length) {
00142 case 3:
00143 if (memcmp("def", Result.start, 3) == 0)
00144 return SetTokenKind(Result, Token::KWReserved);
00145 if (memcmp("var", Result.start, 3) == 0)
00146 return SetTokenKind(Result, Token::KWReserved);
00147 break;
00148
00149 case 4:
00150 if (memcmp("true", Result.start, 4) == 0)
00151 return SetTokenKind(Result, Token::KWTrue);
00152 break;
00153
00154 case 5:
00155 if (memcmp("array", Result.start, 5) == 0)
00156 return SetTokenKind(Result, Token::KWReserved);
00157 if (memcmp("false", Result.start, 5) == 0)
00158 return SetTokenKind(Result, Token::KWFalse);
00159 if (memcmp("query", Result.start, 5) == 0)
00160 return SetTokenKind(Result, Token::KWQuery);
00161 break;
00162
00163 case 6:
00164 if (memcmp("define", Result.start, 6) == 0)
00165 return SetTokenKind(Result, Token::KWReserved);
00166 break;
00167
00168 case 7:
00169 if (memcmp("declare", Result.start, 7) == 0)
00170 return SetTokenKind(Result, Token::KWReserved);
00171 break;
00172 }
00173
00174 if (isReservedKW(Result.start, Length))
00175 return SetTokenKind(Result, Token::KWReserved);
00176 if (isWidthKW(Result.start, Length))
00177 return SetTokenKind(Result, Token::KWWidth);
00178
00179 return SetTokenKind(Result, Token::Identifier);
00180 }
00181
00182 void Lexer::SkipToEndOfLine() {
00183 for (;;) {
00184 int Char = GetNextChar();
00185 if (Char == -1 || Char =='\n')
00186 break;
00187 }
00188 }
00189
00190 Token &Lexer::LexNumber(Token &Result) {
00191 while (isalnum(PeekNextChar()) || PeekNextChar()=='_')
00192 GetNextChar();
00193 return SetTokenKind(Result, Token::Number);
00194 }
00195
00196 Token &Lexer::LexIdentifier(Token &Result) {
00197 while (isInternalIdentifierChar(PeekNextChar()))
00198 GetNextChar();
00199
00200
00201 return SetIdentifierTokenKind(Result);
00202 }
00203
00204 Token &Lexer::Lex(Token &Result) {
00205 Result.kind = Token::Unknown;
00206 Result.length = 0;
00207 Result.start = BufferPos;
00208
00209
00210 while (isspace(PeekNextChar()))
00211 GetNextChar();
00212
00213 Result.start = BufferPos;
00214 Result.line = LineNumber;
00215 Result.column = ColumnNumber;
00216 int Char = GetNextChar();
00217 switch (Char) {
00218 case -1: return SetTokenKind(Result, Token::EndOfFile);
00219
00220 case '(': return SetTokenKind(Result, Token::LParen);
00221 case ')': return SetTokenKind(Result, Token::RParen);
00222 case ',': return SetTokenKind(Result, Token::Comma);
00223 case ':': return SetTokenKind(Result, Token::Colon);
00224 case ';': return SetTokenKind(Result, Token::Semicolon);
00225 case '=': return SetTokenKind(Result, Token::Equals);
00226 case '@': return SetTokenKind(Result, Token::At);
00227 case '[': return SetTokenKind(Result, Token::LSquare);
00228 case ']': return SetTokenKind(Result, Token::RSquare);
00229 case '{': return SetTokenKind(Result, Token::LBrace);
00230 case '}': return SetTokenKind(Result, Token::RBrace);
00231
00232 case '#':
00233 SkipToEndOfLine();
00234 return SetTokenKind(Result, Token::Comment);
00235
00236 case '+': {
00237 if (isdigit(PeekNextChar()))
00238 return LexNumber(Result);
00239 else
00240 return SetTokenKind(Result, Token::Unknown);
00241 }
00242
00243 case '-': {
00244 int Next = PeekNextChar();
00245 if (Next == '>')
00246 return GetNextChar(), SetTokenKind(Result, Token::Arrow);
00247 else if (isdigit(Next))
00248 return LexNumber(Result);
00249 else
00250 return SetTokenKind(Result, Token::Unknown);
00251 break;
00252 }
00253
00254 default:
00255 if (isdigit(Char))
00256 return LexNumber(Result);
00257 else if (isalpha(Char) || Char == '_')
00258 return LexIdentifier(Result);
00259 return SetTokenKind(Result, Token::Unknown);
00260 }
00261 }