 |
|
 |
|
| Files: |
1 |
|
Branches Taken: |
65.3% |
378 / 579 |
| Generated: |
2010-02-10 01:31 |
|
Branches Executed: |
63.6% |
368 / 579 |
| |
|
Line Coverage: |
88.4% |
374 / 423 |
| |
 |
|
 |
1 : //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the NumericLiteralParser, CharLiteralParser, and
11 : // StringLiteralParser interfaces.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "clang/Lex/LiteralSupport.h"
16 : #include "clang/Lex/Preprocessor.h"
17 : #include "clang/Lex/LexDiagnostic.h"
18 : #include "clang/Basic/TargetInfo.h"
19 : #include "llvm/ADT/StringRef.h"
20 : #include "llvm/ADT/StringExtras.h"
21 : using namespace clang;
22 :
23 : /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
24 : /// not valid.
25 19681: static int HexDigitValue(char C) {
19676: branch 0 taken
5: branch 1 taken
17314: branch 2 taken
2362: branch 3 taken
26 19681: if (C >= '0' && C <= '9') return C-'0';
912: branch 0 taken
1455: branch 1 taken
912: branch 2 taken
0: branch 3 not taken
27 2367: if (C >= 'a' && C <= 'f') return C-'a'+10;
1450: branch 0 taken
5: branch 1 taken
1448: branch 2 taken
2: branch 3 taken
28 1455: if (C >= 'A' && C <= 'F') return C-'A'+10;
29 7: return -1;
30 : }
31 :
32 : /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
33 : /// either a character or a string literal.
34 : static unsigned ProcessCharEscape(const char *&ThisTokBuf,
35 : const char *ThisTokEnd, bool &HadError,
36 : SourceLocation Loc, bool IsWide,
37 658: Preprocessor &PP) {
38 : // Skip the '\' char.
39 658: ++ThisTokBuf;
40 :
41 : // We know that this character can't be off the end of the buffer, because
42 : // that would have been \", which would not have been the end of string.
43 658: unsigned ResultChar = *ThisTokBuf++;
26: branch 0 taken
4: branch 1 taken
3: branch 2 taken
3: branch 3 taken
1: branch 4 taken
3: branch 5 taken
574: branch 6 taken
3: branch 7 taken
6: branch 8 taken
3: branch 9 taken
7: branch 10 taken
21: branch 11 taken
4: branch 12 taken
0: branch 13 not taken
44 658: switch (ResultChar) {
45 : // These map to themselves.
46 26: case '\\': case '\'': case '"': case '?': break;
47 :
48 : // These have fixed mappings.
49 : case 'a':
50 : // TODO: K&R: the meaning of '\\a' is different in traditional C
51 4: ResultChar = 7;
52 4: break;
53 : case 'b':
54 3: ResultChar = 8;
55 3: break;
56 : case 'e':
57 3: PP.Diag(Loc, diag::ext_nonstandard_escape) << "e";
58 3: ResultChar = 27;
59 3: break;
60 : case 'E':
61 1: PP.Diag(Loc, diag::ext_nonstandard_escape) << "E";
62 1: ResultChar = 27;
63 1: break;
64 : case 'f':
65 3: ResultChar = 12;
66 3: break;
67 : case 'n':
68 574: ResultChar = 10;
69 574: break;
70 : case 'r':
71 3: ResultChar = 13;
72 3: break;
73 : case 't':
74 6: ResultChar = 9;
75 6: break;
76 : case 'v':
77 3: ResultChar = 11;
78 3: break;
79 : case 'x': { // Hex escape.
80 7: ResultChar = 0;
7: branch 0 taken
0: branch 1 not taken
0: branch 3 not taken
7: branch 4 taken
81 7: if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
82 0: PP.Diag(Loc, diag::err_hex_escape_no_digits);
83 0: HadError = 1;
84 0: break;
85 : }
86 :
87 : // Hex escapes are a maximal series of hex digits.
88 7: bool Overflow = false;
19: branch 0 taken
0: branch 1 not taken
89 19: for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
90 19: int CharVal = HexDigitValue(ThisTokBuf[0]);
12: branch 0 taken
7: branch 1 taken
91 19: if (CharVal == -1) break;
92 : // About to shift out a digit?
93 12: Overflow |= (ResultChar & 0xF0000000) ? true : false;
94 12: ResultChar <<= 4;
95 12: ResultChar |= CharVal;
96 : }
97 :
98 : // See if any bits will be truncated when evaluated as a character.
99 : unsigned CharWidth = IsWide
100 : ? PP.getTargetInfo().getWCharWidth()
2: branch 0 taken
5: branch 1 taken
101 7: : PP.getTargetInfo().getCharWidth();
102 :
5: branch 0 taken
2: branch 1 taken
0: branch 2 not taken
5: branch 3 taken
103 7: if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
104 0: Overflow = true;
105 0: ResultChar &= ~0U >> (32-CharWidth);
106 : }
107 :
108 : // Check for overflow.
0: branch 0 not taken
7: branch 1 taken
109 7: if (Overflow) // Too many digits to fit in
110 0: PP.Diag(Loc, diag::warn_hex_escape_too_large);
111 7: break;
112 : }
113 : case '0': case '1': case '2': case '3':
114 : case '4': case '5': case '6': case '7': {
115 : // Octal escapes.
116 21: --ThisTokBuf;
117 21: ResultChar = 0;
118 :
119 : // Octal escapes are a series of octal digits with maximum length 3.
120 : // "\0123" is a two digit sequence equal to "\012" "3".
121 21: unsigned NumDigits = 0;
14: branch 0 taken
9: branch 1 taken
14: branch 2 taken
0: branch 3 not taken
8: branch 4 taken
6: branch 5 taken
2: branch 6 taken
6: branch 7 taken
122 23: do {
123 23: ResultChar <<= 3;
124 23: ResultChar |= *ThisTokBuf++ - '0';
125 23: ++NumDigits;
126 : } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
127 : ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
128 :
129 : // Check for overflow. Reject '\777', but not L'\777'.
130 : unsigned CharWidth = IsWide
131 : ? PP.getTargetInfo().getWCharWidth()
2: branch 0 taken
19: branch 1 taken
132 21: : PP.getTargetInfo().getCharWidth();
133 :
19: branch 0 taken
2: branch 1 taken
0: branch 2 not taken
19: branch 3 taken
134 21: if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
135 0: PP.Diag(Loc, diag::warn_octal_escape_too_large);
136 0: ResultChar &= ~0U >> (32-CharWidth);
137 : }
138 21: break;
139 : }
140 :
141 : // Otherwise, these are not valid escapes.
142 : case '(': case '{': case '[': case '%':
143 : // GCC accepts these as extensions. We warn about them as such though.
144 : PP.Diag(Loc, diag::ext_nonstandard_escape)
145 4: << std::string()+(char)ResultChar;
146 4: break;
147 : default:
0: branch 1 not taken
0: branch 2 not taken
148 0: if (isgraph(ThisTokBuf[0]))
149 0: PP.Diag(Loc, diag::ext_unknown_escape) << std::string()+(char)ResultChar;
150 : else
151 0: PP.Diag(Loc, diag::ext_unknown_escape) << "x"+llvm::utohexstr(ResultChar);
152 : break;
153 : }
154 :
155 658: return ResultChar;
156 : }
157 :
158 : /// ProcessUCNEscape - Read the Universal Character Name, check constraints and
159 : /// convert the UTF32 to UTF8. This is a subroutine of StringLiteralParser.
160 : /// When we decide to implement UCN's for character constants and identifiers,
161 : /// we will likely rework our support for UCN's.
162 : static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
163 : char *&ResultBuf, bool &HadError,
164 28: SourceLocation Loc, bool IsWide, Preprocessor &PP)
165 : {
166 : // FIXME: Add a warning - UCN's are only valid in C++ & C99.
167 : // FIXME: Handle wide strings.
168 :
169 : // Save the beginning of the string (for error diagnostics).
170 28: const char *ThisTokBegin = ThisTokBuf;
171 :
172 : // Skip the '\u' char's.
173 28: ThisTokBuf += 2;
174 :
27: branch 0 taken
1: branch 1 taken
0: branch 3 not taken
27: branch 4 taken
175 28: if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
176 1: PP.Diag(Loc, diag::err_ucn_escape_no_digits);
177 1: HadError = 1;
178 1: return;
179 : }
180 : typedef uint32_t UTF32;
181 :
182 27: UTF32 UcnVal = 0;
19: branch 0 taken
8: branch 1 taken
183 27: unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
151: branch 0 taken
6: branch 1 taken
130: branch 2 taken
21: branch 3 taken
184 157: for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
185 130: int CharVal = HexDigitValue(ThisTokBuf[0]);
130: branch 0 taken
0: branch 1 not taken
186 130: if (CharVal == -1) break;
187 130: UcnVal <<= 4;
188 130: UcnVal |= CharVal;
189 : }
190 : // If we didn't consume the proper number of digits, there is a problem.
2: branch 0 taken
25: branch 1 taken
191 27: if (UcnLen) {
192 : PP.Diag(PP.AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin),
193 2: diag::err_ucn_escape_incomplete);
194 2: HadError = 1;
195 2: return;
196 : }
197 : // Check UCN constraints (C99 6.4.3p2).
1: branch 0 taken
24: branch 1 taken
1: branch 2 taken
0: branch 3 not taken
1: branch 4 taken
0: branch 5 not taken
0: branch 6 not taken
1: branch 7 taken
6: branch 8 taken
18: branch 9 taken
6: branch 10 taken
0: branch 11 not taken
0: branch 12 not taken
24: branch 13 taken
198 25: if ((UcnVal < 0xa0 &&
199 : (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, `
200 : || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF)
201 : || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ {
202 1: PP.Diag(Loc, diag::err_ucn_escape_invalid);
203 1: HadError = 1;
204 1: return;
205 : }
206 : // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
207 : // The conversion below was inspired by:
208 : // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
209 : // First, we determine how many bytes the result will require.
210 : typedef uint8_t UTF8;
211 :
212 24: unsigned short bytesToWrite = 0;
0: branch 0 not taken
24: branch 1 taken
213 24: if (UcnVal < (UTF32)0x80)
214 0: bytesToWrite = 1;
0: branch 0 not taken
24: branch 1 taken
215 24: else if (UcnVal < (UTF32)0x800)
216 0: bytesToWrite = 2;
18: branch 0 taken
6: branch 1 taken
217 24: else if (UcnVal < (UTF32)0x10000)
218 18: bytesToWrite = 3;
219 : else
220 6: bytesToWrite = 4;
221 :
222 24: const unsigned byteMask = 0xBF;
223 24: const unsigned byteMark = 0x80;
224 :
225 : // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
226 : // into the first byte, depending on how many bytes follow.
227 : static const UTF8 firstByteMark[5] = {
228 : 0x00, 0x00, 0xC0, 0xE0, 0xF0
229 : };
230 : // Finally, we write the bytes into ResultBuf.
231 24: ResultBuf += bytesToWrite;
6: branch 0 taken
18: branch 1 taken
0: branch 2 not taken
0: branch 3 not taken
0: branch 4 not taken
232 24: switch (bytesToWrite) { // note: everything falls through.
233 6: case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
234 24: case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
235 24: case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
236 24: case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
237 : }
238 : // Update the buffer.
239 24: ResultBuf += bytesToWrite;
240 : }
241 :
242 :
243 : /// integer-constant: [C99 6.4.4.1]
244 : /// decimal-constant integer-suffix
245 : /// octal-constant integer-suffix
246 : /// hexadecimal-constant integer-suffix
247 : /// decimal-constant:
248 : /// nonzero-digit
249 : /// decimal-constant digit
250 : /// octal-constant:
251 : /// 0
252 : /// octal-constant octal-digit
253 : /// hexadecimal-constant:
254 : /// hexadecimal-prefix hexadecimal-digit
255 : /// hexadecimal-constant hexadecimal-digit
256 : /// hexadecimal-prefix: one of
257 : /// 0x 0X
258 : /// integer-suffix:
259 : /// unsigned-suffix [long-suffix]
260 : /// unsigned-suffix [long-long-suffix]
261 : /// long-suffix [unsigned-suffix]
262 : /// long-long-suffix [unsigned-sufix]
263 : /// nonzero-digit:
264 : /// 1 2 3 4 5 6 7 8 9
265 : /// octal-digit:
266 : /// 0 1 2 3 4 5 6 7
267 : /// hexadecimal-digit:
268 : /// 0 1 2 3 4 5 6 7 8 9
269 : /// a b c d e f
270 : /// A B C D E F
271 : /// unsigned-suffix: one of
272 : /// u U
273 : /// long-suffix: one of
274 : /// l L
275 : /// long-long-suffix: one of
276 : /// ll LL
277 : ///
278 : /// floating-constant: [C99 6.4.4.2]
279 : /// TODO: add rules...
280 : ///
281 : NumericLiteralParser::
282 : NumericLiteralParser(const char *begin, const char *end,
283 9451: SourceLocation TokLoc, Preprocessor &pp)
284 9451: : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
285 :
286 : // This routine assumes that the range begin/end matches the regex for integer
287 : // and FP constants (specifically, the 'pp-number' regex), and assumes that
288 : // the byte at "*end" is both valid and not part of the regex. Because of
289 : // this, it doesn't have to check for 'overscan' in various places.
290 : assert(!isalnum(*end) && *end != '.' && *end != '_' &&
9451: branch 1 taken
0: branch 2 not taken
9451: branch 3 taken
0: branch 4 not taken
0: branch 5 not taken
9451: branch 6 taken
0: branch 9 not taken
0: branch 10 not taken
0: branch 11 not taken
0: branch 12 not taken
0: branch 13 not taken
0: branch 14 not taken
291 9451: "Lexer didn't maximally munch?");
292 :
293 9451: s = DigitsBegin = begin;
294 9451: saw_exponent = false;
295 9451: saw_period = false;
296 9451: isLong = false;
297 9451: isUnsigned = false;
298 9451: isLongLong = false;
299 9451: isFloat = false;
300 9451: isImaginary = false;
301 9451: isMicrosoftInteger = false;
302 9451: hadError = false;
303 :
1528: branch 0 taken
7923: branch 1 taken
7923: branch 2 taken
7923: branch 3 taken
304 9451: if (*s == '0') { // parse radix
305 1528: ParseNumberStartingWithZero(TokLoc);
2: branch 0 taken
1526: branch 1 taken
1526: branch 2 taken
1526: branch 3 taken
306 1528: if (hadError)
307 2: return;
308 : } else { // the first digit is non-zero
309 7923: radix = 10;
310 7923: s = SkipDigits(s);
1148: branch 0 taken
6775: branch 1 taken
6775: branch 2 taken
6775: branch 3 taken
311 7923: if (s == ThisTokEnd) {
312 : // Done.
5: branch 1 taken
1143: branch 2 taken
0: branch 3 not taken
5: branch 4 taken
5: branch 5 taken
5: branch 6 taken
0: branch 8 not taken
0: branch 9 not taken
0: branch 10 not taken
0: branch 11 not taken
0: branch 12 not taken
0: branch 13 not taken
313 1148: } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
314 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
315 0: diag::err_invalid_decimal_digit) << std::string(s, s+1);
316 0: hadError = true;
317 0: return;
648: branch 0 taken
500: branch 1 taken
500: branch 2 taken
500: branch 3 taken
318 1148: } else if (*s == '.') {
319 648: s++;
320 648: saw_period = true;
321 648: s = SkipDigits(s);
322 : }
7900: branch 0 taken
23: branch 1 taken
0: branch 2 not taken
7900: branch 3 taken
7900: branch 4 taken
7900: branch 5 taken
7900: branch 6 taken
7900: branch 7 taken
323 7923: if ((*s == 'e' || *s == 'E')) { // exponent
324 23: const char *Exponent = s;
325 23: s++;
326 23: saw_exponent = true;
23: branch 0 taken
0: branch 1 not taken
11: branch 2 taken
12: branch 3 taken
12: branch 4 taken
12: branch 5 taken
12: branch 6 taken
12: branch 7 taken
327 23: if (*s == '+' || *s == '-') s++; // sign
328 23: const char *first_non_digit = SkipDigits(s);
21: branch 0 taken
2: branch 1 taken
2: branch 2 taken
2: branch 3 taken
329 23: if (first_non_digit != s) {
330 21: s = first_non_digit;
331 : } else {
332 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
333 2: diag::err_exponent_has_no_digits);
334 2: hadError = true;
335 2: return;
336 : }
337 : }
338 : }
339 :
340 9447: SuffixBegin = s;
341 :
342 : // Parse the suffix. At this point we can classify whether we have an FP or
343 : // integer constant.
344 9447: bool isFPConstant = isFloatingLiteral();
345 :
346 : // Loop over all of the characters of the suffix. If we see something bad,
347 : // we break out of the loop.
814: branch 0 taken
9434: branch 1 taken
9434: branch 2 taken
9434: branch 3 taken
348 10248: for (; s != ThisTokEnd; ++s) {
153: branch 0 taken
56: branch 1 taken
537: branch 2 taken
67: branch 3 taken
1: branch 4 taken
0: branch 5 not taken
0: branch 6 not taken
0: branch 7 not taken
0: branch 8 not taken
0: branch 9 not taken
0: branch 10 not taken
0: branch 11 not taken
349 814: switch (*s) {
350 : case 'f': // FP Suffix for "float"
351 : case 'F':
153: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
352 153: if (!isFPConstant) break; // Error for integer constant.
153: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
153: branch 3 taken
153: branch 4 taken
153: branch 5 taken
153: branch 6 taken
153: branch 7 taken
353 153: if (isFloat || isLong) break; // FF, LF invalid.
354 153: isFloat = true;
355 153: continue; // Success.
356 : case 'u':
357 : case 'U':
56: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
358 56: if (isFPConstant) break; // Error for floating constant.
56: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
359 56: if (isUnsigned) break; // Cannot be repeated.
360 56: isUnsigned = true;
361 56: continue; // Success.
362 : case 'l':
363 : case 'L':
537: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
537: branch 3 taken
537: branch 4 taken
537: branch 5 taken
537: branch 6 taken
537: branch 7 taken
364 537: if (isLong || isLongLong) break; // Cannot be repeated.
537: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
365 537: if (isFloat) break; // LF invalid.
366 :
367 : // Check for long long. The L's need to be adjacent and the same case.
48: branch 0 taken
489: branch 1 taken
47: branch 2 taken
1: branch 3 taken
1: branch 4 taken
1: branch 5 taken
1: branch 6 taken
1: branch 7 taken
368 584: if (s+1 != ThisTokEnd && s[1] == s[0]) {
47: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
369 47: if (isFPConstant) break; // long long invalid for floats.
370 47: isLongLong = true;
371 47: ++s; // Eat both of them.
372 : } else {
373 490: isLong = true;
374 : }
375 537: continue; // Success.
376 : case 'i':
13: branch 1 taken
54: branch 2 taken
0: branch 4 not taken
0: branch 5 not taken
377 67: if (PP.getLangOptions().Microsoft) {
13: branch 0 taken
0: branch 1 not taken
12: branch 2 taken
1: branch 3 taken
1: branch 4 taken
11: branch 5 taken
11: branch 6 taken
11: branch 7 taken
11: branch 8 taken
11: branch 9 taken
11: branch 10 taken
11: branch 11 taken
378 13: if (isFPConstant || isLong || isLongLong) break;
379 :
380 : // Allow i8, i16, i32, i64, and i128.
11: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
381 11: if (s + 1 != ThisTokEnd) {
2: branch 0 taken
3: branch 1 taken
2: branch 2 taken
4: branch 3 taken
0: branch 4 not taken
0: branch 5 not taken
0: branch 6 not taken
0: branch 7 not taken
0: branch 8 not taken
0: branch 9 not taken
382 11: switch (s[1]) {
383 : case '8':
384 2: s += 2; // i8 suffix
385 2: isMicrosoftInteger = true;
386 2: break;
387 : case '1':
3: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
388 3: if (s + 2 == ThisTokEnd) break;
2: branch 0 taken
1: branch 1 taken
1: branch 2 taken
1: branch 3 taken
389 3: if (s[2] == '6') s += 3; // i16 suffix
1: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
390 1: else if (s[2] == '2') {
1: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
391 1: if (s + 3 == ThisTokEnd) break;
1: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
392 1: if (s[3] == '8') s += 4; // i128 suffix
393 : }
394 3: isMicrosoftInteger = true;
395 3: break;
396 : case '3':
2: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
397 2: if (s + 2 == ThisTokEnd) break;
2: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
398 2: if (s[2] == '2') s += 3; // i32 suffix
399 2: isMicrosoftInteger = true;
400 2: break;
401 : case '6':
4: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
402 4: if (s + 2 == ThisTokEnd) break;
4: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
403 4: if (s[2] == '4') s += 3; // i64 suffix
404 4: isMicrosoftInteger = true;
405 : break;
406 : default:
407 : break;
408 : }
409 11: break;
410 : }
411 : }
412 : // fall through.
413 : case 'I':
414 : case 'j':
415 : case 'J':
55: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
416 55: if (isImaginary) break; // Cannot be repeated.
417 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
418 55: diag::ext_imaginary_constant);
419 55: isImaginary = true;
420 55: continue; // Success.
421 : }
422 : // If we reached here, there was an error.
423 13: break;
424 : }
425 :
426 : // Report an error if there are any.
3: branch 0 taken
9444: branch 1 taken
9444: branch 2 taken
9444: branch 3 taken
427 9447: if (s != ThisTokEnd) {
428 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
429 : isFPConstant ? diag::err_invalid_suffix_float_constant :
430 : diag::err_invalid_suffix_integer_constant)
0: branch 3 not taken
3: branch 4 taken
0: branch 14 not taken
0: branch 15 not taken
431 3: << std::string(SuffixBegin, ThisTokEnd);
432 3: hadError = true;
433 3: return;
434 : }
435 : }
436 :
437 : /// ParseNumberStartingWithZero - This method is called when the first character
438 : /// of the number is found to be a zero. This means it is either an octal
439 : /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
440 : /// a floating point number (01239.123e4). Eat the prefix, determining the
441 : /// radix etc.
442 1528: void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
0: branch 0 not taken
1528: branch 1 taken
443 1528: assert(s[0] == '0' && "Invalid method call");
444 1528: s++;
445 :
446 : // Handle a hex number like 0x1234.
544: branch 0 taken
984: branch 1 taken
0: branch 2 not taken
544: branch 3 taken
0: branch 5 not taken
984: branch 6 taken
984: branch 7 taken
984: branch 8 taken
447 1528: if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
448 984: s++;
449 984: radix = 16;
450 984: DigitsBegin = s;
451 984: s = SkipHexDigits(s);
72: branch 0 taken
912: branch 1 taken
452 984: if (s == ThisTokEnd) {
453 : // Done.
3: branch 0 taken
69: branch 1 taken
454 72: } else if (*s == '.') {
455 3: s++;
456 3: saw_period = true;
457 3: s = SkipHexDigits(s);
458 : }
459 : // A binary exponent can appear with or with a '.'. If dotted, the
460 : // binary exponent is required.
980: branch 0 taken
4: branch 1 taken
0: branch 2 not taken
980: branch 3 taken
4: branch 5 taken
0: branch 6 not taken
4: branch 7 taken
980: branch 8 taken
461 984: if ((*s == 'p' || *s == 'P') && !PP.getLangOptions().CPlusPlus0x) {
462 4: const char *Exponent = s;
463 4: s++;
464 4: saw_exponent = true;
4: branch 0 taken
0: branch 1 not taken
4: branch 2 taken
0: branch 3 not taken
465 4: if (*s == '+' || *s == '-') s++; // sign
466 4: const char *first_non_digit = SkipDigits(s);
0: branch 0 not taken
4: branch 1 taken
467 4: if (first_non_digit == s) {
468 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
469 0: diag::err_exponent_has_no_digits);
470 0: hadError = true;
471 0: return;
472 : }
473 4: s = first_non_digit;
474 :
475 : // In C++0x, we cannot support hexadecmial floating literals because
476 : // they conflict with user-defined literals, so we warn in previous
477 : // versions of C++ by default.
0: branch 1 not taken
4: branch 2 taken
478 4: if (PP.getLangOptions().CPlusPlus)
479 0: PP.Diag(TokLoc, diag::ext_hexconstant_cplusplus);
3: branch 1 taken
1: branch 2 taken
480 4: else if (!PP.getLangOptions().HexFloats)
481 3: PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
0: branch 0 not taken
980: branch 1 taken
482 980: } else if (saw_period) {
483 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
484 0: diag::err_hexconstant_requires_exponent);
485 0: hadError = true;
486 : }
487 984: return;
488 : }
489 :
490 : // Handle simple binary numbers 0b01010
544: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
544: branch 3 taken
491 544: if (*s == 'b' || *s == 'B') {
492 : // 0b101010 is a GCC extension.
493 0: PP.Diag(TokLoc, diag::ext_binary_literal);
494 0: ++s;
495 0: radix = 2;
496 0: DigitsBegin = s;
497 0: s = SkipBinaryDigits(s);
0: branch 0 not taken
0: branch 1 not taken
498 0: if (s == ThisTokEnd) {
499 : // Done.
0: branch 1 not taken
0: branch 2 not taken
500 0: } else if (isxdigit(*s)) {
501 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
502 0: diag::err_invalid_binary_digit) << std::string(s, s+1);
503 0: hadError = true;
504 : }
505 : // Other suffixes will be diagnosed by the caller.
506 0: return;
507 : }
508 :
509 : // For now, the radix is set to 8. If we discover that we have a
510 : // floating point constant, the radix will change to 10. Octal floating
511 : // point constants are not permitted (only decimal and hexadecimal).
512 544: radix = 8;
513 544: DigitsBegin = s;
514 544: s = SkipOctalDigits(s);
395: branch 0 taken
149: branch 1 taken
515 544: if (s == ThisTokEnd)
516 395: return; // Done, simple octal number like 01234
517 :
518 : // If we have some other non-octal digit that *is* a decimal digit, see if
519 : // this is part of a floating point number like 094.123 or 09e1.
3: branch 0 taken
146: branch 1 taken
520 149: if (isdigit(*s)) {
521 3: const char *EndDecimal = SkipDigits(s);
2: branch 0 taken
1: branch 1 taken
2: branch 2 taken
0: branch 3 not taken
0: branch 4 not taken
2: branch 5 taken
522 3: if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
523 1: s = EndDecimal;
524 1: radix = 10;
525 : }
526 : }
527 :
528 : // If we have a hex digit other than 'e' (which denotes a FP exponent) then
529 : // the code is using an incorrect base.
4: branch 1 taken
145: branch 2 taken
2: branch 3 taken
2: branch 4 taken
2: branch 5 taken
0: branch 6 not taken
530 149: if (isxdigit(*s) && *s != 'e' && *s != 'E') {
531 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
532 2: diag::err_invalid_octal_digit) << std::string(s, s+1);
533 2: hadError = true;
534 2: return;
535 : }
536 :
114: branch 0 taken
33: branch 1 taken
537 147: if (*s == '.') {
538 114: s++;
539 114: radix = 10;
540 114: saw_period = true;
541 114: s = SkipDigits(s); // Skip suffix.
542 : }
145: branch 0 taken
2: branch 1 taken
0: branch 2 not taken
145: branch 3 taken
543 147: if (*s == 'e' || *s == 'E') { // exponent
544 2: const char *Exponent = s;
545 2: s++;
546 2: radix = 10;
547 2: saw_exponent = true;
2: branch 0 taken
0: branch 1 not taken
1: branch 2 taken
1: branch 3 taken
548 2: if (*s == '+' || *s == '-') s++; // sign
549 2: const char *first_non_digit = SkipDigits(s);
2: branch 0 taken
0: branch 1 not taken
550 2: if (first_non_digit != s) {
551 2: s = first_non_digit;
552 : } else {
553 : PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
554 0: diag::err_exponent_has_no_digits);
555 0: hadError = true;
556 0: return;
557 : }
558 : }
559 : }
560 :
561 :
562 : /// GetIntegerValue - Convert this numeric literal value to an APInt that
563 : /// matches Val's input width. If there is an overflow, set Val to the low bits
564 : /// of the result and return true. Otherwise, return false.
565 8673: bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
566 : // Fast path: Compute a conservative bound on the maximum number of
567 : // bits per digit in this radix. If we can't possibly overflow a
568 : // uint64 based on that bound then do the simple conversion to
569 : // integer. This avoids the expensive overflow checking below, and
570 : // handles the common cases that matter (small decimal integers and
571 : // hex/octal values which don't overflow).
572 8673: unsigned MaxBitsPerDigit = 1;
25593: branch 0 taken
8673: branch 1 taken
573 42939: while ((1U << MaxBitsPerDigit) < radix)
574 25593: MaxBitsPerDigit += 1;
8668: branch 0 taken
5: branch 1 taken
575 8673: if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) {
576 8668: uint64_t N = 0;
19417: branch 0 taken
8668: branch 1 taken
577 28085: for (s = DigitsBegin; s != SuffixBegin; ++s)
578 19417: N = N*radix + HexDigitValue(*s);
579 :
580 : // This will truncate the value to Val's input width. Simply check
581 : // for overflow by comparing.
582 8668: Val = N;
583 8668: return Val.getZExtValue() != N;
584 : }
585 :
586 5: Val = 0;
587 5: s = DigitsBegin;
588 :
589 5: llvm::APInt RadixVal(Val.getBitWidth(), radix);
590 5: llvm::APInt CharVal(Val.getBitWidth(), 0);
591 5: llvm::APInt OldVal = Val;
592 :
593 5: bool OverflowOccurred = false;
115: branch 0 taken
5: branch 1 taken
594 125: while (s < SuffixBegin) {
595 115: unsigned C = HexDigitValue(*s++);
596 :
597 : // If this letter is out of bound for this radix, reject it.
0: branch 0 not taken
115: branch 1 taken
598 115: assert(C < radix && "NumericLiteralParser ctor should have rejected this");
599 :
600 115: CharVal = C;
601 :
602 : // Add the digit to the value in the appropriate radix. If adding in digits
603 : // made the value smaller, then this overflowed.
604 115: OldVal = Val;
605 :
606 : // Multiply by radix, did overflow occur on the multiply?
607 115: Val *= RadixVal;
608 115: OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
609 :
610 : // Add value, did overflow occur on the value?
611 : // (a + b) ult b <=> overflow
612 115: Val += CharVal;
613 115: OverflowOccurred |= Val.ult(CharVal);
614 : }
615 5: return OverflowOccurred;
616 : }
617 :
618 : llvm::APFloat::opStatus
619 771: NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
620 : using llvm::APFloat;
621 : using llvm::StringRef;
622 :
623 771: unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
624 : return Result.convertFromString(StringRef(ThisTokBegin, n),
625 771: APFloat::rmNearestTiesToEven);
626 : }
627 :
628 :
629 : CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
630 192: SourceLocation Loc, Preprocessor &PP) {
631 : // At this point we know that the character matches the regex "L?'.*'".
632 192: HadError = false;
633 :
634 : // Determine if this is a wide character.
635 192: IsWide = begin[0] == 'L';
19: branch 0 taken
173: branch 1 taken
173: branch 2 taken
173: branch 3 taken
636 192: if (IsWide) ++begin;
637 :
638 : // Skip over the entry quote.
0: branch 0 not taken
192: branch 1 taken
0: branch 3 not taken
0: branch 4 not taken
639 192: assert(begin[0] == '\'' && "Invalid token lexed");
640 192: ++begin;
641 :
642 : // FIXME: The "Value" is an uint64_t so we can handle char literals of
643 : // upto 64-bits.
644 : // FIXME: This extensively assumes that 'char' is 8-bits.
645 : assert(PP.getTargetInfo().getCharWidth() == 8 &&
192: branch 2 taken
0: branch 3 not taken
0: branch 7 not taken
0: branch 8 not taken
646 192: "Assumes char is 8 bits");
647 : assert(PP.getTargetInfo().getIntWidth() <= 64 &&
648 : (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
192: branch 2 taken
0: branch 3 not taken
192: branch 6 taken
0: branch 7 not taken
0: branch 11 not taken
0: branch 12 not taken
0: branch 15 not taken
0: branch 16 not taken
649 384: "Assumes sizeof(int) on target is <= 64 and a multiple of char");
650 : assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
192: branch 2 taken
0: branch 3 not taken
0: branch 7 not taken
0: branch 8 not taken
651 192: "Assumes sizeof(wchar) on target is <= 64");
652 :
653 : // This is what we will use for overflow detection
654 192: llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
655 :
656 192: unsigned NumCharsSoFar = 0;
212: branch 0 taken
192: branch 1 taken
192: branch 2 taken
192: branch 3 taken
657 596: while (begin[0] != '\'') {
658 : uint64_t ResultChar;
163: branch 0 taken
49: branch 1 taken
49: branch 2 taken
49: branch 3 taken
659 212: if (begin[0] != '\\') // If this is a normal character, consume it.
660 163: ResultChar = *begin++;
661 : else // Otherwise, this is an escape character.
662 49: ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
663 :
664 : // If this is a multi-character constant (e.g. 'abc'), handle it. These are
665 : // implementation defined (C99 6.4.4.4p10).
20: branch 0 taken
192: branch 1 taken
192: branch 2 taken
192: branch 3 taken
666 212: if (NumCharsSoFar) {
0: branch 0 not taken
20: branch 1 taken
20: branch 2 taken
20: branch 3 taken
667 20: if (IsWide) {
668 : // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
669 0: LitVal = 0;
670 : } else {
671 : // Narrow character literals act as though their value is concatenated
672 : // in this implementation, but warn on overflow.
0: branch 1 not taken
20: branch 2 taken
0: branch 4 not taken
0: branch 5 not taken
673 20: if (LitVal.countLeadingZeros() < 8)
674 0: PP.Diag(Loc, diag::warn_char_constant_too_large);
675 20: LitVal <<= 8;
676 : }
677 : }
678 :
679 212: LitVal = LitVal + ResultChar;
680 212: ++NumCharsSoFar;
681 : }
682 :
683 : // If this is the second character being processed, do special handling.
10: branch 0 taken
182: branch 1 taken
182: branch 2 taken
182: branch 3 taken
684 192: if (NumCharsSoFar > 1) {
685 : // Warn about discarding the top bits for multi-char wide-character
686 : // constants (L'abcd').
0: branch 0 not taken
10: branch 1 taken
10: branch 2 taken
10: branch 3 taken
687 10: if (IsWide)
688 0: PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
5: branch 0 taken
5: branch 1 taken
5: branch 2 taken
5: branch 3 taken
689 10: else if (NumCharsSoFar != 4)
690 5: PP.Diag(Loc, diag::ext_multichar_character_literal);
691 : else
692 5: PP.Diag(Loc, diag::ext_four_char_character_literal);
693 10: IsMultiChar = true;
694 : } else
695 182: IsMultiChar = false;
696 :
697 : // Transfer the value from APInt to uint64_t
698 192: Value = LitVal.getZExtValue();
699 :
700 : // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
701 : // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
702 : // character constants are not sign extended in the this implementation:
703 : // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
173: branch 0 taken
19: branch 1 taken
163: branch 2 taken
10: branch 3 taken
1: branch 4 taken
162: branch 5 taken
1: branch 7 taken
0: branch 8 not taken
1: branch 9 taken
191: branch 10 taken
191: branch 11 taken
191: branch 12 taken
191: branch 13 taken
191: branch 14 taken
191: branch 15 taken
191: branch 16 taken
0: branch 18 not taken
0: branch 19 not taken
0: branch 20 not taken
0: branch 21 not taken
704 192: if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&
705 : PP.getLangOptions().CharIsSigned)
706 1: Value = (signed char)Value;
707 192: }
708 :
709 :
710 : /// string-literal: [C99 6.4.5]
711 : /// " [s-char-sequence] "
712 : /// L" [s-char-sequence] "
713 : /// s-char-sequence:
714 : /// s-char
715 : /// s-char-sequence s-char
716 : /// s-char:
717 : /// any source character except the double quote ",
718 : /// backslash \, or newline character
719 : /// escape-character
720 : /// universal-character-name
721 : /// escape-character: [C99 6.4.4.4]
722 : /// \ escape-code
723 : /// universal-character-name
724 : /// escape-code:
725 : /// character-escape-code
726 : /// octal-escape-code
727 : /// hex-escape-code
728 : /// character-escape-code: one of
729 : /// n t b r f v a
730 : /// \ ' " ?
731 : /// octal-escape-code:
732 : /// octal-digit
733 : /// octal-digit octal-digit
734 : /// octal-digit octal-digit octal-digit
735 : /// hex-escape-code:
736 : /// x hex-digit
737 : /// hex-escape-code hex-digit
738 : /// universal-character-name:
739 : /// \u hex-quad
740 : /// \U hex-quad hex-quad
741 : /// hex-quad:
742 : /// hex-digit hex-digit hex-digit hex-digit
743 : ///
744 : StringLiteralParser::
745 : StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
746 9530: Preprocessor &pp) : PP(pp) {
747 : // Scan all of the string portions, remember the max individual token length,
748 : // computing a bound on the concatenated string length, and see whether any
749 : // piece is a wide-string. If any of the string portions is a wide-string
750 : // literal, the result is a wide-string literal [C99 6.4.5p4].
751 9530: MaxTokenLength = StringToks[0].getLength();
752 9530: SizeBound = StringToks[0].getLength()-2; // -2 for "".
753 9530: AnyWide = StringToks[0].is(tok::wide_string_literal);
754 :
755 9530: hadError = false;
756 :
757 : // Implement Translation Phase #6: concatenation of string literals
758 : /// (C99 5.1.1.2p1). The common case is only one string fragment.
180: branch 0 taken
9530: branch 1 taken
9530: branch 2 taken
9530: branch 3 taken
759 9710: for (unsigned i = 1; i != NumStringToks; ++i) {
760 : // The string could be shorter than this if it needs cleaning, but this is a
761 : // reasonable bound, which is all we need.
762 180: SizeBound += StringToks[i].getLength()-2; // -2 for "".
763 :
764 : // Remember maximum string piece length.
79: branch 1 taken
101: branch 2 taken
0: branch 4 not taken
0: branch 5 not taken
765 180: if (StringToks[i].getLength() > MaxTokenLength)
766 79: MaxTokenLength = StringToks[i].getLength();
767 :
768 : // Remember if we see any wide strings.
769 180: AnyWide |= StringToks[i].is(tok::wide_string_literal);
770 : }
771 :
772 : // Include space for the null terminator.
773 9530: ++SizeBound;
774 :
775 : // TODO: K&R warning: "traditional C rejects string constant concatenation"
776 :
777 : // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
778 : // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
779 9530: wchar_tByteWidth = ~0U;
30: branch 0 taken
9500: branch 1 taken
9500: branch 2 taken
9500: branch 3 taken
780 9530: if (AnyWide) {
781 30: wchar_tByteWidth = PP.getTargetInfo().getWCharWidth();
0: branch 0 not taken
30: branch 1 taken
0: branch 3 not taken
0: branch 4 not taken
782 30: assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
783 30: wchar_tByteWidth /= 8;
784 : }
785 :
786 : // The output buffer size needs to be large enough to hold wide characters.
787 : // This is a worst-case assumption which basically corresponds to L"" "long".
30: branch 0 taken
9500: branch 1 taken
9500: branch 2 taken
9500: branch 3 taken
788 9530: if (AnyWide)
789 30: SizeBound *= wchar_tByteWidth;
790 :
791 : // Size the temporary buffer to hold the result string data.
792 9530: ResultBuf.resize(SizeBound);
793 :
794 : // Likewise, but for each string piece.
795 9530: llvm::SmallString<512> TokenBuf;
796 9530: TokenBuf.resize(MaxTokenLength);
797 :
798 : // Loop over all the strings, getting their spelling, and expanding them to
799 : // wide strings as appropriate.
800 9530: ResultPtr = &ResultBuf[0]; // Next byte to fill in.
801 :
802 9530: Pascal = false;
803 :
9710: branch 0 taken
9530: branch 1 taken
9530: branch 2 taken
9530: branch 3 taken
804 19240: for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
805 9710: const char *ThisTokBuf = &TokenBuf[0];
806 : // Get the spelling of the token, which eliminates trigraphs, etc. We know
807 : // that ThisTokBuf points to a buffer that is big enough for the whole token
808 : // and 'spelled' tokens can only shrink.
809 9710: unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
810 9710: const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
811 :
812 : // TODO: Input character set mapping support.
813 :
814 : // Skip L marker for wide strings.
815 9710: bool ThisIsWide = false;
30: branch 0 taken
9680: branch 1 taken
9680: branch 2 taken
9680: branch 3 taken
816 9710: if (ThisTokBuf[0] == 'L') {
817 30: ++ThisTokBuf;
818 30: ThisIsWide = true;
819 : }
820 :
0: branch 0 not taken
9710: branch 1 taken
0: branch 3 not taken
0: branch 4 not taken
821 9710: assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
822 9710: ++ThisTokBuf;
823 :
824 : // Check if this is a pascal string
13: branch 1 taken
9697: branch 2 taken
13: branch 3 taken
0: branch 4 not taken
2: branch 5 taken
11: branch 6 taken
2: branch 7 taken
0: branch 8 not taken
2: branch 9 taken
9708: branch 10 taken
0: branch 12 not taken
0: branch 13 not taken
0: branch 14 not taken
0: branch 15 not taken
0: branch 16 not taken
0: branch 17 not taken
0: branch 18 not taken
0: branch 19 not taken
0: branch 20 not taken
0: branch 21 not taken
825 9710: if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
826 : ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
827 :
828 : // If the \p sequence is found in the first token, we have a pascal string
829 : // Otherwise, if we already have a pascal string, ignore the first \p
2: branch 0 taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
830 2: if (i == 0) {
831 2: ++ThisTokBuf;
832 2: Pascal = true;
0: branch 0 not taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
833 0: } else if (Pascal)
834 0: ThisTokBuf += 2;
835 : }
836 :
10218: branch 0 taken
9710: branch 1 taken
9710: branch 2 taken
9710: branch 3 taken
837 29638: while (ThisTokBuf != ThisTokEnd) {
838 : // Is this a span of non-escape characters?
9581: branch 0 taken
637: branch 1 taken
637: branch 2 taken
637: branch 3 taken
839 10218: if (ThisTokBuf[0] != '\\') {
840 9581: const char *InStart = ThisTokBuf;
97578: branch 0 taken
9016: branch 1 taken
97013: branch 2 taken
565: branch 3 taken
565: branch 4 taken
565: branch 5 taken
565: branch 6 taken
565: branch 7 taken
841 106594: do {
842 106594: ++ThisTokBuf;
843 : } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
844 :
845 : // Copy the character span over.
846 9581: unsigned Len = ThisTokBuf-InStart;
9566: branch 0 taken
15: branch 1 taken
15: branch 2 taken
15: branch 3 taken
847 9581: if (!AnyWide) {
848 9566: memcpy(ResultPtr, InStart, Len);
849 9566: ResultPtr += Len;
850 : } else {
851 : // Note: our internal rep of wide char tokens is always little-endian.
48: branch 0 taken
15: branch 1 taken
15: branch 2 taken
15: branch 3 taken
852 63: for (; Len; --Len, ++InStart) {
853 48: *ResultPtr++ = InStart[0];
854 : // Add zeros at the end.
138: branch 0 taken
48: branch 1 taken
48: branch 2 taken
48: branch 3 taken
855 186: for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
856 138: *ResultPtr++ = 0;
857 : }
858 : }
859 9581: continue;
860 : }
861 : // Is this a Universal Character Name escape?
618: branch 0 taken
19: branch 1 taken
9: branch 2 taken
609: branch 3 taken
609: branch 4 taken
609: branch 5 taken
609: branch 6 taken
609: branch 7 taken
862 637: if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
863 : ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
864 28: hadError, StringToks[i].getLocation(), ThisIsWide, PP);
865 28: continue;
866 : }
867 : // Otherwise, this is a non-UCN escape character. Process it.
868 : unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
869 : StringToks[i].getLocation(),
870 609: ThisIsWide, PP);
871 :
872 : // Note: our internal rep of wide char tokens is always little-endian.
873 609: *ResultPtr++ = ResultChar & 0xFF;
874 :
0: branch 0 not taken
609: branch 1 taken
609: branch 2 taken
609: branch 3 taken
875 609: if (AnyWide) {
0: branch 0 not taken
0: branch 1 not taken
0: branch 2 not taken
0: branch 3 not taken
876 0: for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
877 0: *ResultPtr++ = ResultChar >> i*8;
878 : }
879 : }
880 : }
881 :
2: branch 0 taken
9528: branch 1 taken
9528: branch 2 taken
9528: branch 3 taken
882 9530: if (Pascal) {
883 2: ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
884 :
885 : // Verify that pascal strings aren't too large.
0: branch 1 not taken
2: branch 2 taken
0: branch 4 not taken
0: branch 5 not taken
886 2: if (GetStringLength() > 256) {
887 : PP.Diag(StringToks[0].getLocation(), diag::err_pascal_string_too_long)
888 : << SourceRange(StringToks[0].getLocation(),
889 0: StringToks[NumStringToks-1].getLocation());
890 0: hadError = 1;
891 0: return;
892 : }
9530: branch 1 taken
0: branch 2 not taken
0: branch 4 not taken
0: branch 5 not taken
893 9530: }
894 : }
895 :
896 :
897 : /// getOffsetOfStringByte - This function returns the offset of the
898 : /// specified byte of the string data represented by Token. This handles
899 : /// advancing over escape sequences in the string.
900 : unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
901 : unsigned ByteNo,
902 111: Preprocessor &PP) {
903 : // Get the spelling of the token.
904 111: llvm::SmallString<16> SpellingBuffer;
905 111: SpellingBuffer.resize(Tok.getLength());
906 :
907 111: const char *SpellingPtr = &SpellingBuffer[0];
908 111: unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
909 :
0: branch 0 not taken
111: branch 1 taken
910 111: assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
911 :
912 :
913 111: const char *SpellingStart = SpellingPtr;
914 111: const char *SpellingEnd = SpellingPtr+TokLen;
915 :
916 : // Skip over the leading quote.
0: branch 0 not taken
111: branch 1 taken
917 111: assert(SpellingPtr[0] == '"' && "Should be a string literal!");
918 111: ++SpellingPtr;
919 :
920 : // Skip over bytes until we find the offset we're looking for.
259: branch 0 taken
111: branch 1 taken
921 481: while (ByteNo) {
0: branch 0 not taken
259: branch 1 taken
922 259: assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
923 :
924 : // Step over non-escapes simply.
259: branch 0 taken
0: branch 1 not taken
925 259: if (*SpellingPtr != '\\') {
926 259: ++SpellingPtr;
927 259: --ByteNo;
928 259: continue;
929 : }
930 :
931 : // Otherwise, this is an escape character. Advance over it.
932 0: bool HadError = false;
933 : ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
934 0: Tok.getLocation(), false, PP);
0: branch 0 not taken
0: branch 1 not taken
935 0: assert(!HadError && "This method isn't valid on erroneous strings");
936 0: --ByteNo;
937 : }
938 :
939 111: return SpellingPtr-SpellingStart;
940 : }
Generated: 2010-02-10 01:31 by zcov