zcov: / lib/Lex/LiteralSupport.cpp


Files: 1 Branches Taken: 65.3% 378 / 579
Generated: 2010-02-10 01:31 Branches Executed: 63.6% 368 / 579
Line Coverage: 88.4% 374 / 423


Programs: 2 Runs 3018


       1                 : //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
       2                 : //
       3                 : //                     The LLVM Compiler Infrastructure
       4                 : //
       5                 : // This file is distributed under the University of Illinois Open Source
       6                 : // License. See LICENSE.TXT for details.
       7                 : //
       8                 : //===----------------------------------------------------------------------===//
       9                 : //
      10                 : // This file implements the NumericLiteralParser, CharLiteralParser, and
      11                 : // StringLiteralParser interfaces.
      12                 : //
      13                 : //===----------------------------------------------------------------------===//
      14                 : 
      15                 : #include "clang/Lex/LiteralSupport.h"
      16                 : #include "clang/Lex/Preprocessor.h"
      17                 : #include "clang/Lex/LexDiagnostic.h"
      18                 : #include "clang/Basic/TargetInfo.h"
      19                 : #include "llvm/ADT/StringRef.h"
      20                 : #include "llvm/ADT/StringExtras.h"
      21                 : using namespace clang;
      22                 : 
      23                 : /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
      24                 : /// not valid.
      25            19681: static int HexDigitValue(char C) {
                    19676: branch 0 taken
                        5: branch 1 taken
                    17314: branch 2 taken
                     2362: branch 3 taken
      26            19681:   if (C >= '0' && C <= '9') return C-'0';
                      912: branch 0 taken
                     1455: branch 1 taken
                      912: branch 2 taken
                        0: branch 3 not taken
      27             2367:   if (C >= 'a' && C <= 'f') return C-'a'+10;
                     1450: branch 0 taken
                        5: branch 1 taken
                     1448: branch 2 taken
                        2: branch 3 taken
      28             1455:   if (C >= 'A' && C <= 'F') return C-'A'+10;
      29                7:   return -1;
      30                 : }
      31                 : 
      32                 : /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
      33                 : /// either a character or a string literal.
      34                 : static unsigned ProcessCharEscape(const char *&ThisTokBuf,
      35                 :                                   const char *ThisTokEnd, bool &HadError,
      36                 :                                   SourceLocation Loc, bool IsWide,
      37              658:                                   Preprocessor &PP) {
      38                 :   // Skip the '\' char.
      39              658:   ++ThisTokBuf;
      40                 : 
      41                 :   // We know that this character can't be off the end of the buffer, because
      42                 :   // that would have been \", which would not have been the end of string.
      43              658:   unsigned ResultChar = *ThisTokBuf++;
                       26: branch 0 taken
                        4: branch 1 taken
                        3: branch 2 taken
                        3: branch 3 taken
                        1: branch 4 taken
                        3: branch 5 taken
                      574: branch 6 taken
                        3: branch 7 taken
                        6: branch 8 taken
                        3: branch 9 taken
                        7: branch 10 taken
                       21: branch 11 taken
                        4: branch 12 taken
                        0: branch 13 not taken
      44              658:   switch (ResultChar) {
      45                 :   // These map to themselves.
      46               26:   case '\\': case '\'': case '"': case '?': break;
      47                 : 
      48                 :     // These have fixed mappings.
      49                 :   case 'a':
      50                 :     // TODO: K&R: the meaning of '\\a' is different in traditional C
      51                4:     ResultChar = 7;
      52                4:     break;
      53                 :   case 'b':
      54                3:     ResultChar = 8;
      55                3:     break;
      56                 :   case 'e':
      57                3:     PP.Diag(Loc, diag::ext_nonstandard_escape) << "e";
      58                3:     ResultChar = 27;
      59                3:     break;
      60                 :   case 'E':
      61                1:     PP.Diag(Loc, diag::ext_nonstandard_escape) << "E";
      62                1:     ResultChar = 27;
      63                1:     break;
      64                 :   case 'f':
      65                3:     ResultChar = 12;
      66                3:     break;
      67                 :   case 'n':
      68              574:     ResultChar = 10;
      69              574:     break;
      70                 :   case 'r':
      71                3:     ResultChar = 13;
      72                3:     break;
      73                 :   case 't':
      74                6:     ResultChar = 9;
      75                6:     break;
      76                 :   case 'v':
      77                3:     ResultChar = 11;
      78                3:     break;
      79                 :   case 'x': { // Hex escape.
      80                7:     ResultChar = 0;
                        7: branch 0 taken
                        0: branch 1 not taken
                        0: branch 3 not taken
                        7: branch 4 taken
      81                7:     if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
      82                0:       PP.Diag(Loc, diag::err_hex_escape_no_digits);
      83                0:       HadError = 1;
      84                0:       break;
      85                 :     }
      86                 : 
      87                 :     // Hex escapes are a maximal series of hex digits.
      88                7:     bool Overflow = false;
                       19: branch 0 taken
                        0: branch 1 not taken
      89               19:     for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
      90               19:       int CharVal = HexDigitValue(ThisTokBuf[0]);
                       12: branch 0 taken
                        7: branch 1 taken
      91               19:       if (CharVal == -1) break;
      92                 :       // About to shift out a digit?
      93               12:       Overflow |= (ResultChar & 0xF0000000) ? true : false;
      94               12:       ResultChar <<= 4;
      95               12:       ResultChar |= CharVal;
      96                 :     }
      97                 : 
      98                 :     // See if any bits will be truncated when evaluated as a character.
      99                 :     unsigned CharWidth = IsWide
     100                 :                        ? PP.getTargetInfo().getWCharWidth()
                        2: branch 0 taken
                        5: branch 1 taken
     101                7:                        : PP.getTargetInfo().getCharWidth();
     102                 : 
                        5: branch 0 taken
                        2: branch 1 taken
                        0: branch 2 not taken
                        5: branch 3 taken
     103                7:     if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
     104                0:       Overflow = true;
     105                0:       ResultChar &= ~0U >> (32-CharWidth);
     106                 :     }
     107                 : 
     108                 :     // Check for overflow.
                        0: branch 0 not taken
                        7: branch 1 taken
     109                7:     if (Overflow)   // Too many digits to fit in
     110                0:       PP.Diag(Loc, diag::warn_hex_escape_too_large);
     111                7:     break;
     112                 :   }
     113                 :   case '0': case '1': case '2': case '3':
     114                 :   case '4': case '5': case '6': case '7': {
     115                 :     // Octal escapes.
     116               21:     --ThisTokBuf;
     117               21:     ResultChar = 0;
     118                 : 
     119                 :     // Octal escapes are a series of octal digits with maximum length 3.
     120                 :     // "\0123" is a two digit sequence equal to "\012" "3".
     121               21:     unsigned NumDigits = 0;
                       14: branch 0 taken
                        9: branch 1 taken
                       14: branch 2 taken
                        0: branch 3 not taken
                        8: branch 4 taken
                        6: branch 5 taken
                        2: branch 6 taken
                        6: branch 7 taken
     122               23:     do {
     123               23:       ResultChar <<= 3;
     124               23:       ResultChar |= *ThisTokBuf++ - '0';
     125               23:       ++NumDigits;
     126                 :     } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
     127                 :              ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
     128                 : 
     129                 :     // Check for overflow.  Reject '\777', but not L'\777'.
     130                 :     unsigned CharWidth = IsWide
     131                 :                        ? PP.getTargetInfo().getWCharWidth()
                        2: branch 0 taken
                       19: branch 1 taken
     132               21:                        : PP.getTargetInfo().getCharWidth();
     133                 : 
                       19: branch 0 taken
                        2: branch 1 taken
                        0: branch 2 not taken
                       19: branch 3 taken
     134               21:     if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
     135                0:       PP.Diag(Loc, diag::warn_octal_escape_too_large);
     136                0:       ResultChar &= ~0U >> (32-CharWidth);
     137                 :     }
     138               21:     break;
     139                 :   }
     140                 : 
     141                 :     // Otherwise, these are not valid escapes.
     142                 :   case '(': case '{': case '[': case '%':
     143                 :     // GCC accepts these as extensions.  We warn about them as such though.
     144                 :     PP.Diag(Loc, diag::ext_nonstandard_escape)
     145                4:       << std::string()+(char)ResultChar;
     146                4:     break;
     147                 :   default:
                        0: branch 1 not taken
                        0: branch 2 not taken
     148                0:     if (isgraph(ThisTokBuf[0]))
     149                0:       PP.Diag(Loc, diag::ext_unknown_escape) << std::string()+(char)ResultChar;
     150                 :     else
     151                0:       PP.Diag(Loc, diag::ext_unknown_escape) << "x"+llvm::utohexstr(ResultChar);
     152                 :     break;
     153                 :   }
     154                 : 
     155              658:   return ResultChar;
     156                 : }
     157                 : 
     158                 : /// ProcessUCNEscape - Read the Universal Character Name, check constraints and
     159                 : /// convert the UTF32 to UTF8. This is a subroutine of StringLiteralParser.
     160                 : /// When we decide to implement UCN's for character constants and identifiers,
     161                 : /// we will likely rework our support for UCN's.
     162                 : static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
     163                 :                              char *&ResultBuf, bool &HadError,
     164               28:                              SourceLocation Loc, bool IsWide, Preprocessor &PP)
     165                 : {
     166                 :   // FIXME: Add a warning - UCN's are only valid in C++ & C99.
     167                 :   // FIXME: Handle wide strings.
     168                 : 
     169                 :   // Save the beginning of the string (for error diagnostics).
     170               28:   const char *ThisTokBegin = ThisTokBuf;
     171                 : 
     172                 :   // Skip the '\u' char's.
     173               28:   ThisTokBuf += 2;
     174                 : 
                       27: branch 0 taken
                        1: branch 1 taken
                        0: branch 3 not taken
                       27: branch 4 taken
     175               28:   if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
     176                1:     PP.Diag(Loc, diag::err_ucn_escape_no_digits);
     177                1:     HadError = 1;
     178                1:     return;
     179                 :   }
     180                 :   typedef uint32_t UTF32;
     181                 : 
     182               27:   UTF32 UcnVal = 0;
                       19: branch 0 taken
                        8: branch 1 taken
     183               27:   unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
                      151: branch 0 taken
                        6: branch 1 taken
                      130: branch 2 taken
                       21: branch 3 taken
     184              157:   for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
     185              130:     int CharVal = HexDigitValue(ThisTokBuf[0]);
                      130: branch 0 taken
                        0: branch 1 not taken
     186              130:     if (CharVal == -1) break;
     187              130:     UcnVal <<= 4;
     188              130:     UcnVal |= CharVal;
     189                 :   }
     190                 :   // If we didn't consume the proper number of digits, there is a problem.
                        2: branch 0 taken
                       25: branch 1 taken
     191               27:   if (UcnLen) {
     192                 :     PP.Diag(PP.AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin),
     193                2:             diag::err_ucn_escape_incomplete);
     194                2:     HadError = 1;
     195                2:     return;
     196                 :   }
     197                 :   // Check UCN constraints (C99 6.4.3p2).
                        1: branch 0 taken
                       24: branch 1 taken
                        1: branch 2 taken
                        0: branch 3 not taken
                        1: branch 4 taken
                        0: branch 5 not taken
                        0: branch 6 not taken
                        1: branch 7 taken
                        6: branch 8 taken
                       18: branch 9 taken
                        6: branch 10 taken
                        0: branch 11 not taken
                        0: branch 12 not taken
                       24: branch 13 taken
     198               25:   if ((UcnVal < 0xa0 &&
     199                 :       (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, `
     200                 :       || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF)
     201                 :       || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ {
     202                1:     PP.Diag(Loc, diag::err_ucn_escape_invalid);
     203                1:     HadError = 1;
     204                1:     return;
     205                 :   }
     206                 :   // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
     207                 :   // The conversion below was inspired by:
     208                 :   //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
     209                 :   // First, we determine how many bytes the result will require.
     210                 :   typedef uint8_t UTF8;
     211                 : 
     212               24:   unsigned short bytesToWrite = 0;
                        0: branch 0 not taken
                       24: branch 1 taken
     213               24:   if (UcnVal < (UTF32)0x80)
     214                0:     bytesToWrite = 1;
                        0: branch 0 not taken
                       24: branch 1 taken
     215               24:   else if (UcnVal < (UTF32)0x800)
     216                0:     bytesToWrite = 2;
                       18: branch 0 taken
                        6: branch 1 taken
     217               24:   else if (UcnVal < (UTF32)0x10000)
     218               18:     bytesToWrite = 3;
     219                 :   else
     220                6:     bytesToWrite = 4;
     221                 : 
     222               24:   const unsigned byteMask = 0xBF;
     223               24:   const unsigned byteMark = 0x80;
     224                 : 
     225                 :   // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
     226                 :   // into the first byte, depending on how many bytes follow.
     227                 :   static const UTF8 firstByteMark[5] = {
     228                 :     0x00, 0x00, 0xC0, 0xE0, 0xF0
     229                 :   };
     230                 :   // Finally, we write the bytes into ResultBuf.
     231               24:   ResultBuf += bytesToWrite;
                        6: branch 0 taken
                       18: branch 1 taken
                        0: branch 2 not taken
                        0: branch 3 not taken
                        0: branch 4 not taken
     232               24:   switch (bytesToWrite) { // note: everything falls through.
     233                6:     case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
     234               24:     case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
     235               24:     case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
     236               24:     case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
     237                 :   }
     238                 :   // Update the buffer.
     239               24:   ResultBuf += bytesToWrite;
     240                 : }
     241                 : 
     242                 : 
     243                 : ///       integer-constant: [C99 6.4.4.1]
     244                 : ///         decimal-constant integer-suffix
     245                 : ///         octal-constant integer-suffix
     246                 : ///         hexadecimal-constant integer-suffix
     247                 : ///       decimal-constant:
     248                 : ///         nonzero-digit
     249                 : ///         decimal-constant digit
     250                 : ///       octal-constant:
     251                 : ///         0
     252                 : ///         octal-constant octal-digit
     253                 : ///       hexadecimal-constant:
     254                 : ///         hexadecimal-prefix hexadecimal-digit
     255                 : ///         hexadecimal-constant hexadecimal-digit
     256                 : ///       hexadecimal-prefix: one of
     257                 : ///         0x 0X
     258                 : ///       integer-suffix:
     259                 : ///         unsigned-suffix [long-suffix]
     260                 : ///         unsigned-suffix [long-long-suffix]
     261                 : ///         long-suffix [unsigned-suffix]
     262                 : ///         long-long-suffix [unsigned-sufix]
     263                 : ///       nonzero-digit:
     264                 : ///         1 2 3 4 5 6 7 8 9
     265                 : ///       octal-digit:
     266                 : ///         0 1 2 3 4 5 6 7
     267                 : ///       hexadecimal-digit:
     268                 : ///         0 1 2 3 4 5 6 7 8 9
     269                 : ///         a b c d e f
     270                 : ///         A B C D E F
     271                 : ///       unsigned-suffix: one of
     272                 : ///         u U
     273                 : ///       long-suffix: one of
     274                 : ///         l L
     275                 : ///       long-long-suffix: one of
     276                 : ///         ll LL
     277                 : ///
     278                 : ///       floating-constant: [C99 6.4.4.2]
     279                 : ///         TODO: add rules...
     280                 : ///
     281                 : NumericLiteralParser::
     282                 : NumericLiteralParser(const char *begin, const char *end,
     283             9451:                      SourceLocation TokLoc, Preprocessor &pp)
     284             9451:   : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
     285                 : 
     286                 :   // This routine assumes that the range begin/end matches the regex for integer
     287                 :   // and FP constants (specifically, the 'pp-number' regex), and assumes that
     288                 :   // the byte at "*end" is both valid and not part of the regex.  Because of
     289                 :   // this, it doesn't have to check for 'overscan' in various places.
     290                 :   assert(!isalnum(*end) && *end != '.' && *end != '_' &&
                     9451: branch 1 taken
                        0: branch 2 not taken
                     9451: branch 3 taken
                        0: branch 4 not taken
                        0: branch 5 not taken
                     9451: branch 6 taken
                        0: branch 9 not taken
                        0: branch 10 not taken
                        0: branch 11 not taken
                        0: branch 12 not taken
                        0: branch 13 not taken
                        0: branch 14 not taken
     291             9451:          "Lexer didn't maximally munch?");
     292                 : 
     293             9451:   s = DigitsBegin = begin;
     294             9451:   saw_exponent = false;
     295             9451:   saw_period = false;
     296             9451:   isLong = false;
     297             9451:   isUnsigned = false;
     298             9451:   isLongLong = false;
     299             9451:   isFloat = false;
     300             9451:   isImaginary = false;
     301             9451:   isMicrosoftInteger = false;
     302             9451:   hadError = false;
     303                 : 
                     1528: branch 0 taken
                     7923: branch 1 taken
                     7923: branch 2 taken
                     7923: branch 3 taken
     304             9451:   if (*s == '0') { // parse radix
     305             1528:     ParseNumberStartingWithZero(TokLoc);
                        2: branch 0 taken
                     1526: branch 1 taken
                     1526: branch 2 taken
                     1526: branch 3 taken
     306             1528:     if (hadError)
     307                2:       return;
     308                 :   } else { // the first digit is non-zero
     309             7923:     radix = 10;
     310             7923:     s = SkipDigits(s);
                     1148: branch 0 taken
                     6775: branch 1 taken
                     6775: branch 2 taken
                     6775: branch 3 taken
     311             7923:     if (s == ThisTokEnd) {
     312                 :       // Done.
                        5: branch 1 taken
                     1143: branch 2 taken
                        0: branch 3 not taken
                        5: branch 4 taken
                        5: branch 5 taken
                        5: branch 6 taken
                        0: branch 8 not taken
                        0: branch 9 not taken
                        0: branch 10 not taken
                        0: branch 11 not taken
                        0: branch 12 not taken
                        0: branch 13 not taken
     313             1148:     } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
     314                 :       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
     315                0:               diag::err_invalid_decimal_digit) << std::string(s, s+1);
     316                0:       hadError = true;
     317                0:       return;
                      648: branch 0 taken
                      500: branch 1 taken
                      500: branch 2 taken
                      500: branch 3 taken
     318             1148:     } else if (*s == '.') {
     319              648:       s++;
     320              648:       saw_period = true;
     321              648:       s = SkipDigits(s);
     322                 :     }
                     7900: branch 0 taken
                       23: branch 1 taken
                        0: branch 2 not taken
                     7900: branch 3 taken
                     7900: branch 4 taken
                     7900: branch 5 taken
                     7900: branch 6 taken
                     7900: branch 7 taken
     323             7923:     if ((*s == 'e' || *s == 'E')) { // exponent
     324               23:       const char *Exponent = s;
     325               23:       s++;
     326               23:       saw_exponent = true;
                       23: branch 0 taken
                        0: branch 1 not taken
                       11: branch 2 taken
                       12: branch 3 taken
                       12: branch 4 taken
                       12: branch 5 taken
                       12: branch 6 taken
                       12: branch 7 taken
     327               23:       if (*s == '+' || *s == '-')  s++; // sign
     328               23:       const char *first_non_digit = SkipDigits(s);
                       21: branch 0 taken
                        2: branch 1 taken
                        2: branch 2 taken
                        2: branch 3 taken
     329               23:       if (first_non_digit != s) {
     330               21:         s = first_non_digit;
     331                 :       } else {
     332                 :         PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
     333                2:                 diag::err_exponent_has_no_digits);
     334                2:         hadError = true;
     335                2:         return;
     336                 :       }
     337                 :     }
     338                 :   }
     339                 : 
     340             9447:   SuffixBegin = s;
     341                 : 
     342                 :   // Parse the suffix.  At this point we can classify whether we have an FP or
     343                 :   // integer constant.
     344             9447:   bool isFPConstant = isFloatingLiteral();
     345                 : 
     346                 :   // Loop over all of the characters of the suffix.  If we see something bad,
     347                 :   // we break out of the loop.
                      814: branch 0 taken
                     9434: branch 1 taken
                     9434: branch 2 taken
                     9434: branch 3 taken
     348            10248:   for (; s != ThisTokEnd; ++s) {
                      153: branch 0 taken
                       56: branch 1 taken
                      537: branch 2 taken
                       67: branch 3 taken
                        1: branch 4 taken
                        0: branch 5 not taken
                        0: branch 6 not taken
                        0: branch 7 not taken
                        0: branch 8 not taken
                        0: branch 9 not taken
                        0: branch 10 not taken
                        0: branch 11 not taken
     349              814:     switch (*s) {
     350                 :     case 'f':      // FP Suffix for "float"
     351                 :     case 'F':
                      153: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     352              153:       if (!isFPConstant) break;  // Error for integer constant.
                      153: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                      153: branch 3 taken
                      153: branch 4 taken
                      153: branch 5 taken
                      153: branch 6 taken
                      153: branch 7 taken
     353              153:       if (isFloat || isLong) break; // FF, LF invalid.
     354              153:       isFloat = true;
     355              153:       continue;  // Success.
     356                 :     case 'u':
     357                 :     case 'U':
                       56: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     358               56:       if (isFPConstant) break;  // Error for floating constant.
                       56: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     359               56:       if (isUnsigned) break;    // Cannot be repeated.
     360               56:       isUnsigned = true;
     361               56:       continue;  // Success.
     362                 :     case 'l':
     363                 :     case 'L':
                      537: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                      537: branch 3 taken
                      537: branch 4 taken
                      537: branch 5 taken
                      537: branch 6 taken
                      537: branch 7 taken
     364              537:       if (isLong || isLongLong) break;  // Cannot be repeated.
                      537: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     365              537:       if (isFloat) break;               // LF invalid.
     366                 : 
     367                 :       // Check for long long.  The L's need to be adjacent and the same case.
                       48: branch 0 taken
                      489: branch 1 taken
                       47: branch 2 taken
                        1: branch 3 taken
                        1: branch 4 taken
                        1: branch 5 taken
                        1: branch 6 taken
                        1: branch 7 taken
     368              584:       if (s+1 != ThisTokEnd && s[1] == s[0]) {
                       47: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     369               47:         if (isFPConstant) break;        // long long invalid for floats.
     370               47:         isLongLong = true;
     371               47:         ++s;  // Eat both of them.
     372                 :       } else {
     373              490:         isLong = true;
     374                 :       }
     375              537:       continue;  // Success.
     376                 :     case 'i':
                       13: branch 1 taken
                       54: branch 2 taken
                        0: branch 4 not taken
                        0: branch 5 not taken
     377               67:       if (PP.getLangOptions().Microsoft) {
                       13: branch 0 taken
                        0: branch 1 not taken
                       12: branch 2 taken
                        1: branch 3 taken
                        1: branch 4 taken
                       11: branch 5 taken
                       11: branch 6 taken
                       11: branch 7 taken
                       11: branch 8 taken
                       11: branch 9 taken
                       11: branch 10 taken
                       11: branch 11 taken
     378               13:         if (isFPConstant || isLong || isLongLong) break;
     379                 : 
     380                 :         // Allow i8, i16, i32, i64, and i128.
                       11: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     381               11:         if (s + 1 != ThisTokEnd) {
                        2: branch 0 taken
                        3: branch 1 taken
                        2: branch 2 taken
                        4: branch 3 taken
                        0: branch 4 not taken
                        0: branch 5 not taken
                        0: branch 6 not taken
                        0: branch 7 not taken
                        0: branch 8 not taken
                        0: branch 9 not taken
     382               11:           switch (s[1]) {
     383                 :             case '8':
     384                2:               s += 2; // i8 suffix
     385                2:               isMicrosoftInteger = true;
     386                2:               break;
     387                 :             case '1':
                        3: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     388                3:               if (s + 2 == ThisTokEnd) break;
                        2: branch 0 taken
                        1: branch 1 taken
                        1: branch 2 taken
                        1: branch 3 taken
     389                3:               if (s[2] == '6') s += 3; // i16 suffix
                        1: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     390                1:               else if (s[2] == '2') {
                        1: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     391                1:                 if (s + 3 == ThisTokEnd) break;
                        1: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     392                1:                 if (s[3] == '8') s += 4; // i128 suffix
     393                 :               }
     394                3:               isMicrosoftInteger = true;
     395                3:               break;
     396                 :             case '3':
                        2: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     397                2:               if (s + 2 == ThisTokEnd) break;
                        2: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     398                2:               if (s[2] == '2') s += 3; // i32 suffix
     399                2:               isMicrosoftInteger = true;
     400                2:               break;
     401                 :             case '6':
                        4: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     402                4:               if (s + 2 == ThisTokEnd) break;
                        4: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     403                4:               if (s[2] == '4') s += 3; // i64 suffix
     404                4:               isMicrosoftInteger = true;
     405                 :               break;
     406                 :             default:
     407                 :               break;
     408                 :           }
     409               11:           break;
     410                 :         }
     411                 :       }
     412                 :       // fall through.
     413                 :     case 'I':
     414                 :     case 'j':
     415                 :     case 'J':
                       55: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     416               55:       if (isImaginary) break;   // Cannot be repeated.
     417                 :       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
     418               55:               diag::ext_imaginary_constant);
     419               55:       isImaginary = true;
     420               55:       continue;  // Success.
     421                 :     }
     422                 :     // If we reached here, there was an error.
     423               13:     break;
     424                 :   }
     425                 : 
     426                 :   // Report an error if there are any.
                        3: branch 0 taken
                     9444: branch 1 taken
                     9444: branch 2 taken
                     9444: branch 3 taken
     427             9447:   if (s != ThisTokEnd) {
     428                 :     PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
     429                 :             isFPConstant ? diag::err_invalid_suffix_float_constant :
     430                 :                            diag::err_invalid_suffix_integer_constant)
                        0: branch 3 not taken
                        3: branch 4 taken
                        0: branch 14 not taken
                        0: branch 15 not taken
     431                3:       << std::string(SuffixBegin, ThisTokEnd);
     432                3:     hadError = true;
     433                3:     return;
     434                 :   }
     435                 : }
     436                 : 
     437                 : /// ParseNumberStartingWithZero - This method is called when the first character
     438                 : /// of the number is found to be a zero.  This means it is either an octal
     439                 : /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
     440                 : /// a floating point number (01239.123e4).  Eat the prefix, determining the
     441                 : /// radix etc.
     442             1528: void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
                        0: branch 0 not taken
                     1528: branch 1 taken
     443             1528:   assert(s[0] == '0' && "Invalid method call");
     444             1528:   s++;
     445                 : 
     446                 :   // Handle a hex number like 0x1234.
                      544: branch 0 taken
                      984: branch 1 taken
                        0: branch 2 not taken
                      544: branch 3 taken
                        0: branch 5 not taken
                      984: branch 6 taken
                      984: branch 7 taken
                      984: branch 8 taken
     447             1528:   if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
     448              984:     s++;
     449              984:     radix = 16;
     450              984:     DigitsBegin = s;
     451              984:     s = SkipHexDigits(s);
                       72: branch 0 taken
                      912: branch 1 taken
     452              984:     if (s == ThisTokEnd) {
     453                 :       // Done.
                        3: branch 0 taken
                       69: branch 1 taken
     454               72:     } else if (*s == '.') {
     455                3:       s++;
     456                3:       saw_period = true;
     457                3:       s = SkipHexDigits(s);
     458                 :     }
     459                 :     // A binary exponent can appear with or with a '.'. If dotted, the
     460                 :     // binary exponent is required.
                      980: branch 0 taken
                        4: branch 1 taken
                        0: branch 2 not taken
                      980: branch 3 taken
                        4: branch 5 taken
                        0: branch 6 not taken
                        4: branch 7 taken
                      980: branch 8 taken
     461              984:     if ((*s == 'p' || *s == 'P') && !PP.getLangOptions().CPlusPlus0x) {
     462                4:       const char *Exponent = s;
     463                4:       s++;
     464                4:       saw_exponent = true;
                        4: branch 0 taken
                        0: branch 1 not taken
                        4: branch 2 taken
                        0: branch 3 not taken
     465                4:       if (*s == '+' || *s == '-')  s++; // sign
     466                4:       const char *first_non_digit = SkipDigits(s);
                        0: branch 0 not taken
                        4: branch 1 taken
     467                4:       if (first_non_digit == s) {
     468                 :         PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
     469                0:                 diag::err_exponent_has_no_digits);
     470                0:         hadError = true;
     471                0:         return;
     472                 :       }
     473                4:       s = first_non_digit;
     474                 : 
     475                 :       // In C++0x, we cannot support hexadecmial floating literals because
     476                 :       // they conflict with user-defined literals, so we warn in previous
     477                 :       // versions of C++ by default.
                        0: branch 1 not taken
                        4: branch 2 taken
     478                4:       if (PP.getLangOptions().CPlusPlus)
     479                0:         PP.Diag(TokLoc, diag::ext_hexconstant_cplusplus);
                        3: branch 1 taken
                        1: branch 2 taken
     480                4:       else if (!PP.getLangOptions().HexFloats)
     481                3:         PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
                        0: branch 0 not taken
                      980: branch 1 taken
     482              980:     } else if (saw_period) {
     483                 :       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
     484                0:               diag::err_hexconstant_requires_exponent);
     485                0:       hadError = true;
     486                 :     }
     487              984:     return;
     488                 :   }
     489                 : 
     490                 :   // Handle simple binary numbers 0b01010
                      544: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                      544: branch 3 taken
     491              544:   if (*s == 'b' || *s == 'B') {
     492                 :     // 0b101010 is a GCC extension.
     493                0:     PP.Diag(TokLoc, diag::ext_binary_literal);
     494                0:     ++s;
     495                0:     radix = 2;
     496                0:     DigitsBegin = s;
     497                0:     s = SkipBinaryDigits(s);
                        0: branch 0 not taken
                        0: branch 1 not taken
     498                0:     if (s == ThisTokEnd) {
     499                 :       // Done.
                        0: branch 1 not taken
                        0: branch 2 not taken
     500                0:     } else if (isxdigit(*s)) {
     501                 :       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
     502                0:               diag::err_invalid_binary_digit) << std::string(s, s+1);
     503                0:       hadError = true;
     504                 :     }
     505                 :     // Other suffixes will be diagnosed by the caller.
     506                0:     return;
     507                 :   }
     508                 : 
     509                 :   // For now, the radix is set to 8. If we discover that we have a
     510                 :   // floating point constant, the radix will change to 10. Octal floating
     511                 :   // point constants are not permitted (only decimal and hexadecimal).
     512              544:   radix = 8;
     513              544:   DigitsBegin = s;
     514              544:   s = SkipOctalDigits(s);
                      395: branch 0 taken
                      149: branch 1 taken
     515              544:   if (s == ThisTokEnd)
     516              395:     return; // Done, simple octal number like 01234
     517                 : 
     518                 :   // If we have some other non-octal digit that *is* a decimal digit, see if
     519                 :   // this is part of a floating point number like 094.123 or 09e1.
                        3: branch 0 taken
                      146: branch 1 taken
     520              149:   if (isdigit(*s)) {
     521                3:     const char *EndDecimal = SkipDigits(s);
                        2: branch 0 taken
                        1: branch 1 taken
                        2: branch 2 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                        2: branch 5 taken
     522                3:     if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
     523                1:       s = EndDecimal;
     524                1:       radix = 10;
     525                 :     }
     526                 :   }
     527                 : 
     528                 :   // If we have a hex digit other than 'e' (which denotes a FP exponent) then
     529                 :   // the code is using an incorrect base.
                        4: branch 1 taken
                      145: branch 2 taken
                        2: branch 3 taken
                        2: branch 4 taken
                        2: branch 5 taken
                        0: branch 6 not taken
     530              149:   if (isxdigit(*s) && *s != 'e' && *s != 'E') {
     531                 :     PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
     532                2:             diag::err_invalid_octal_digit) << std::string(s, s+1);
     533                2:     hadError = true;
     534                2:     return;
     535                 :   }
     536                 : 
                      114: branch 0 taken
                       33: branch 1 taken
     537              147:   if (*s == '.') {
     538              114:     s++;
     539              114:     radix = 10;
     540              114:     saw_period = true;
     541              114:     s = SkipDigits(s); // Skip suffix.
     542                 :   }
                      145: branch 0 taken
                        2: branch 1 taken
                        0: branch 2 not taken
                      145: branch 3 taken
     543              147:   if (*s == 'e' || *s == 'E') { // exponent
     544                2:     const char *Exponent = s;
     545                2:     s++;
     546                2:     radix = 10;
     547                2:     saw_exponent = true;
                        2: branch 0 taken
                        0: branch 1 not taken
                        1: branch 2 taken
                        1: branch 3 taken
     548                2:     if (*s == '+' || *s == '-')  s++; // sign
     549                2:     const char *first_non_digit = SkipDigits(s);
                        2: branch 0 taken
                        0: branch 1 not taken
     550                2:     if (first_non_digit != s) {
     551                2:       s = first_non_digit;
     552                 :     } else {
     553                 :       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
     554                0:               diag::err_exponent_has_no_digits);
     555                0:       hadError = true;
     556                0:       return;
     557                 :     }
     558                 :   }
     559                 : }
     560                 : 
     561                 : 
     562                 : /// GetIntegerValue - Convert this numeric literal value to an APInt that
     563                 : /// matches Val's input width.  If there is an overflow, set Val to the low bits
     564                 : /// of the result and return true.  Otherwise, return false.
     565             8673: bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
     566                 :   // Fast path: Compute a conservative bound on the maximum number of
     567                 :   // bits per digit in this radix. If we can't possibly overflow a
     568                 :   // uint64 based on that bound then do the simple conversion to
     569                 :   // integer. This avoids the expensive overflow checking below, and
     570                 :   // handles the common cases that matter (small decimal integers and
     571                 :   // hex/octal values which don't overflow).
     572             8673:   unsigned MaxBitsPerDigit = 1;
                    25593: branch 0 taken
                     8673: branch 1 taken
     573            42939:   while ((1U << MaxBitsPerDigit) < radix)
     574            25593:     MaxBitsPerDigit += 1;
                     8668: branch 0 taken
                        5: branch 1 taken
     575             8673:   if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) {
     576             8668:     uint64_t N = 0;
                    19417: branch 0 taken
                     8668: branch 1 taken
     577            28085:     for (s = DigitsBegin; s != SuffixBegin; ++s)
     578            19417:       N = N*radix + HexDigitValue(*s);
     579                 : 
     580                 :     // This will truncate the value to Val's input width. Simply check
     581                 :     // for overflow by comparing.
     582             8668:     Val = N;
     583             8668:     return Val.getZExtValue() != N;
     584                 :   }
     585                 : 
     586                5:   Val = 0;
     587                5:   s = DigitsBegin;
     588                 : 
     589                5:   llvm::APInt RadixVal(Val.getBitWidth(), radix);
     590                5:   llvm::APInt CharVal(Val.getBitWidth(), 0);
     591                5:   llvm::APInt OldVal = Val;
     592                 : 
     593                5:   bool OverflowOccurred = false;
                      115: branch 0 taken
                        5: branch 1 taken
     594              125:   while (s < SuffixBegin) {
     595              115:     unsigned C = HexDigitValue(*s++);
     596                 : 
     597                 :     // If this letter is out of bound for this radix, reject it.
                        0: branch 0 not taken
                      115: branch 1 taken
     598              115:     assert(C < radix && "NumericLiteralParser ctor should have rejected this");
     599                 : 
     600              115:     CharVal = C;
     601                 : 
     602                 :     // Add the digit to the value in the appropriate radix.  If adding in digits
     603                 :     // made the value smaller, then this overflowed.
     604              115:     OldVal = Val;
     605                 : 
     606                 :     // Multiply by radix, did overflow occur on the multiply?
     607              115:     Val *= RadixVal;
     608              115:     OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
     609                 : 
     610                 :     // Add value, did overflow occur on the value?
     611                 :     //   (a + b) ult b  <=> overflow
     612              115:     Val += CharVal;
     613              115:     OverflowOccurred |= Val.ult(CharVal);
     614                 :   }
     615                5:   return OverflowOccurred;
     616                 : }
     617                 : 
     618                 : llvm::APFloat::opStatus
     619              771: NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
     620                 :   using llvm::APFloat;
     621                 :   using llvm::StringRef;
     622                 : 
     623              771:   unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
     624                 :   return Result.convertFromString(StringRef(ThisTokBegin, n),
     625              771:                                   APFloat::rmNearestTiesToEven);
     626                 : }
     627                 : 
     628                 : 
     629                 : CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
     630              192:                                      SourceLocation Loc, Preprocessor &PP) {
     631                 :   // At this point we know that the character matches the regex "L?'.*'".
     632              192:   HadError = false;
     633                 : 
     634                 :   // Determine if this is a wide character.
     635              192:   IsWide = begin[0] == 'L';
                       19: branch 0 taken
                      173: branch 1 taken
                      173: branch 2 taken
                      173: branch 3 taken
     636              192:   if (IsWide) ++begin;
     637                 : 
     638                 :   // Skip over the entry quote.
                        0: branch 0 not taken
                      192: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
     639              192:   assert(begin[0] == '\'' && "Invalid token lexed");
     640              192:   ++begin;
     641                 : 
     642                 :   // FIXME: The "Value" is an uint64_t so we can handle char literals of
     643                 :   // upto 64-bits.
     644                 :   // FIXME: This extensively assumes that 'char' is 8-bits.
     645                 :   assert(PP.getTargetInfo().getCharWidth() == 8 &&
                      192: branch 2 taken
                        0: branch 3 not taken
                        0: branch 7 not taken
                        0: branch 8 not taken
     646              192:          "Assumes char is 8 bits");
     647                 :   assert(PP.getTargetInfo().getIntWidth() <= 64 &&
     648                 :          (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
                      192: branch 2 taken
                        0: branch 3 not taken
                      192: branch 6 taken
                        0: branch 7 not taken
                        0: branch 11 not taken
                        0: branch 12 not taken
                        0: branch 15 not taken
                        0: branch 16 not taken
     649              384:          "Assumes sizeof(int) on target is <= 64 and a multiple of char");
     650                 :   assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
                      192: branch 2 taken
                        0: branch 3 not taken
                        0: branch 7 not taken
                        0: branch 8 not taken
     651              192:          "Assumes sizeof(wchar) on target is <= 64");
     652                 : 
     653                 :   // This is what we will use for overflow detection
     654              192:   llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
     655                 : 
     656              192:   unsigned NumCharsSoFar = 0;
                      212: branch 0 taken
                      192: branch 1 taken
                      192: branch 2 taken
                      192: branch 3 taken
     657              596:   while (begin[0] != '\'') {
     658                 :     uint64_t ResultChar;
                      163: branch 0 taken
                       49: branch 1 taken
                       49: branch 2 taken
                       49: branch 3 taken
     659              212:     if (begin[0] != '\\')     // If this is a normal character, consume it.
     660              163:       ResultChar = *begin++;
     661                 :     else                      // Otherwise, this is an escape character.
     662               49:       ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
     663                 : 
     664                 :     // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
     665                 :     // implementation defined (C99 6.4.4.4p10).
                       20: branch 0 taken
                      192: branch 1 taken
                      192: branch 2 taken
                      192: branch 3 taken
     666              212:     if (NumCharsSoFar) {
                        0: branch 0 not taken
                       20: branch 1 taken
                       20: branch 2 taken
                       20: branch 3 taken
     667               20:       if (IsWide) {
     668                 :         // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
     669                0:         LitVal = 0;
     670                 :       } else {
     671                 :         // Narrow character literals act as though their value is concatenated
     672                 :         // in this implementation, but warn on overflow.
                        0: branch 1 not taken
                       20: branch 2 taken
                        0: branch 4 not taken
                        0: branch 5 not taken
     673               20:         if (LitVal.countLeadingZeros() < 8)
     674                0:           PP.Diag(Loc, diag::warn_char_constant_too_large);
     675               20:         LitVal <<= 8;
     676                 :       }
     677                 :     }
     678                 : 
     679              212:     LitVal = LitVal + ResultChar;
     680              212:     ++NumCharsSoFar;
     681                 :   }
     682                 : 
     683                 :   // If this is the second character being processed, do special handling.
                       10: branch 0 taken
                      182: branch 1 taken
                      182: branch 2 taken
                      182: branch 3 taken
     684              192:   if (NumCharsSoFar > 1) {
     685                 :     // Warn about discarding the top bits for multi-char wide-character
     686                 :     // constants (L'abcd').
                        0: branch 0 not taken
                       10: branch 1 taken
                       10: branch 2 taken
                       10: branch 3 taken
     687               10:     if (IsWide)
     688                0:       PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
                        5: branch 0 taken
                        5: branch 1 taken
                        5: branch 2 taken
                        5: branch 3 taken
     689               10:     else if (NumCharsSoFar != 4)
     690                5:       PP.Diag(Loc, diag::ext_multichar_character_literal);
     691                 :     else
     692                5:       PP.Diag(Loc, diag::ext_four_char_character_literal);
     693               10:     IsMultiChar = true;
     694                 :   } else
     695              182:     IsMultiChar = false;
     696                 : 
     697                 :   // Transfer the value from APInt to uint64_t
     698              192:   Value = LitVal.getZExtValue();
     699                 : 
     700                 :   // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
     701                 :   // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
     702                 :   // character constants are not sign extended in the this implementation:
     703                 :   // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
                      173: branch 0 taken
                       19: branch 1 taken
                      163: branch 2 taken
                       10: branch 3 taken
                        1: branch 4 taken
                      162: branch 5 taken
                        1: branch 7 taken
                        0: branch 8 not taken
                        1: branch 9 taken
                      191: branch 10 taken
                      191: branch 11 taken
                      191: branch 12 taken
                      191: branch 13 taken
                      191: branch 14 taken
                      191: branch 15 taken
                      191: branch 16 taken
                        0: branch 18 not taken
                        0: branch 19 not taken
                        0: branch 20 not taken
                        0: branch 21 not taken
     704              192:   if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&
     705                 :       PP.getLangOptions().CharIsSigned)
     706                1:     Value = (signed char)Value;
     707              192: }
     708                 : 
     709                 : 
     710                 : ///       string-literal: [C99 6.4.5]
     711                 : ///          " [s-char-sequence] "
     712                 : ///         L" [s-char-sequence] "
     713                 : ///       s-char-sequence:
     714                 : ///         s-char
     715                 : ///         s-char-sequence s-char
     716                 : ///       s-char:
     717                 : ///         any source character except the double quote ",
     718                 : ///           backslash \, or newline character
     719                 : ///         escape-character
     720                 : ///         universal-character-name
     721                 : ///       escape-character: [C99 6.4.4.4]
     722                 : ///         \ escape-code
     723                 : ///         universal-character-name
     724                 : ///       escape-code:
     725                 : ///         character-escape-code
     726                 : ///         octal-escape-code
     727                 : ///         hex-escape-code
     728                 : ///       character-escape-code: one of
     729                 : ///         n t b r f v a
     730                 : ///         \ ' " ?
     731                 : ///       octal-escape-code:
     732                 : ///         octal-digit
     733                 : ///         octal-digit octal-digit
     734                 : ///         octal-digit octal-digit octal-digit
     735                 : ///       hex-escape-code:
     736                 : ///         x hex-digit
     737                 : ///         hex-escape-code hex-digit
     738                 : ///       universal-character-name:
     739                 : ///         \u hex-quad
     740                 : ///         \U hex-quad hex-quad
     741                 : ///       hex-quad:
     742                 : ///         hex-digit hex-digit hex-digit hex-digit
     743                 : ///
     744                 : StringLiteralParser::
     745                 : StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
     746             9530:                     Preprocessor &pp) : PP(pp) {
     747                 :   // Scan all of the string portions, remember the max individual token length,
     748                 :   // computing a bound on the concatenated string length, and see whether any
     749                 :   // piece is a wide-string.  If any of the string portions is a wide-string
     750                 :   // literal, the result is a wide-string literal [C99 6.4.5p4].
     751             9530:   MaxTokenLength = StringToks[0].getLength();
     752             9530:   SizeBound = StringToks[0].getLength()-2;  // -2 for "".
     753             9530:   AnyWide = StringToks[0].is(tok::wide_string_literal);
     754                 : 
     755             9530:   hadError = false;
     756                 : 
     757                 :   // Implement Translation Phase #6: concatenation of string literals
     758                 :   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
                      180: branch 0 taken
                     9530: branch 1 taken
                     9530: branch 2 taken
                     9530: branch 3 taken
     759             9710:   for (unsigned i = 1; i != NumStringToks; ++i) {
     760                 :     // The string could be shorter than this if it needs cleaning, but this is a
     761                 :     // reasonable bound, which is all we need.
     762              180:     SizeBound += StringToks[i].getLength()-2;  // -2 for "".
     763                 : 
     764                 :     // Remember maximum string piece length.
                       79: branch 1 taken
                      101: branch 2 taken
                        0: branch 4 not taken
                        0: branch 5 not taken
     765              180:     if (StringToks[i].getLength() > MaxTokenLength)
     766               79:       MaxTokenLength = StringToks[i].getLength();
     767                 : 
     768                 :     // Remember if we see any wide strings.
     769              180:     AnyWide |= StringToks[i].is(tok::wide_string_literal);
     770                 :   }
     771                 : 
     772                 :   // Include space for the null terminator.
     773             9530:   ++SizeBound;
     774                 : 
     775                 :   // TODO: K&R warning: "traditional C rejects string constant concatenation"
     776                 : 
     777                 :   // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
     778                 :   // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
     779             9530:   wchar_tByteWidth = ~0U;
                       30: branch 0 taken
                     9500: branch 1 taken
                     9500: branch 2 taken
                     9500: branch 3 taken
     780             9530:   if (AnyWide) {
     781               30:     wchar_tByteWidth = PP.getTargetInfo().getWCharWidth();
                        0: branch 0 not taken
                       30: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
     782               30:     assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
     783               30:     wchar_tByteWidth /= 8;
     784                 :   }
     785                 : 
     786                 :   // The output buffer size needs to be large enough to hold wide characters.
     787                 :   // This is a worst-case assumption which basically corresponds to L"" "long".
                       30: branch 0 taken
                     9500: branch 1 taken
                     9500: branch 2 taken
                     9500: branch 3 taken
     788             9530:   if (AnyWide)
     789               30:     SizeBound *= wchar_tByteWidth;
     790                 : 
     791                 :   // Size the temporary buffer to hold the result string data.
     792             9530:   ResultBuf.resize(SizeBound);
     793                 : 
     794                 :   // Likewise, but for each string piece.
     795             9530:   llvm::SmallString<512> TokenBuf;
     796             9530:   TokenBuf.resize(MaxTokenLength);
     797                 : 
     798                 :   // Loop over all the strings, getting their spelling, and expanding them to
     799                 :   // wide strings as appropriate.
     800             9530:   ResultPtr = &ResultBuf[0];   // Next byte to fill in.
     801                 : 
     802             9530:   Pascal = false;
     803                 : 
                     9710: branch 0 taken
                     9530: branch 1 taken
                     9530: branch 2 taken
                     9530: branch 3 taken
     804            19240:   for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
     805             9710:     const char *ThisTokBuf = &TokenBuf[0];
     806                 :     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     807                 :     // that ThisTokBuf points to a buffer that is big enough for the whole token
     808                 :     // and 'spelled' tokens can only shrink.
     809             9710:     unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
     810             9710:     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
     811                 : 
     812                 :     // TODO: Input character set mapping support.
     813                 : 
     814                 :     // Skip L marker for wide strings.
     815             9710:     bool ThisIsWide = false;
                       30: branch 0 taken
                     9680: branch 1 taken
                     9680: branch 2 taken
                     9680: branch 3 taken
     816             9710:     if (ThisTokBuf[0] == 'L') {
     817               30:       ++ThisTokBuf;
     818               30:       ThisIsWide = true;
     819                 :     }
     820                 : 
                        0: branch 0 not taken
                     9710: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
     821             9710:     assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
     822             9710:     ++ThisTokBuf;
     823                 : 
     824                 :     // Check if this is a pascal string
                       13: branch 1 taken
                     9697: branch 2 taken
                       13: branch 3 taken
                        0: branch 4 not taken
                        2: branch 5 taken
                       11: branch 6 taken
                        2: branch 7 taken
                        0: branch 8 not taken
                        2: branch 9 taken
                     9708: branch 10 taken
                        0: branch 12 not taken
                        0: branch 13 not taken
                        0: branch 14 not taken
                        0: branch 15 not taken
                        0: branch 16 not taken
                        0: branch 17 not taken
                        0: branch 18 not taken
                        0: branch 19 not taken
                        0: branch 20 not taken
                        0: branch 21 not taken
     825             9710:     if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
     826                 :         ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
     827                 : 
     828                 :       // If the \p sequence is found in the first token, we have a pascal string
     829                 :       // Otherwise, if we already have a pascal string, ignore the first \p
                        2: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     830                2:       if (i == 0) {
     831                2:         ++ThisTokBuf;
     832                2:         Pascal = true;
                        0: branch 0 not taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     833                0:       } else if (Pascal)
     834                0:         ThisTokBuf += 2;
     835                 :     }
     836                 : 
                    10218: branch 0 taken
                     9710: branch 1 taken
                     9710: branch 2 taken
                     9710: branch 3 taken
     837            29638:     while (ThisTokBuf != ThisTokEnd) {
     838                 :       // Is this a span of non-escape characters?
                     9581: branch 0 taken
                      637: branch 1 taken
                      637: branch 2 taken
                      637: branch 3 taken
     839            10218:       if (ThisTokBuf[0] != '\\') {
     840             9581:         const char *InStart = ThisTokBuf;
                    97578: branch 0 taken
                     9016: branch 1 taken
                    97013: branch 2 taken
                      565: branch 3 taken
                      565: branch 4 taken
                      565: branch 5 taken
                      565: branch 6 taken
                      565: branch 7 taken
     841           106594:         do {
     842           106594:           ++ThisTokBuf;
     843                 :         } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
     844                 : 
     845                 :         // Copy the character span over.
     846             9581:         unsigned Len = ThisTokBuf-InStart;
                     9566: branch 0 taken
                       15: branch 1 taken
                       15: branch 2 taken
                       15: branch 3 taken
     847             9581:         if (!AnyWide) {
     848             9566:           memcpy(ResultPtr, InStart, Len);
     849             9566:           ResultPtr += Len;
     850                 :         } else {
     851                 :           // Note: our internal rep of wide char tokens is always little-endian.
                       48: branch 0 taken
                       15: branch 1 taken
                       15: branch 2 taken
                       15: branch 3 taken
     852               63:           for (; Len; --Len, ++InStart) {
     853               48:             *ResultPtr++ = InStart[0];
     854                 :             // Add zeros at the end.
                      138: branch 0 taken
                       48: branch 1 taken
                       48: branch 2 taken
                       48: branch 3 taken
     855              186:             for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
     856              138:               *ResultPtr++ = 0;
     857                 :           }
     858                 :         }
     859             9581:         continue;
     860                 :       }
     861                 :       // Is this a Universal Character Name escape?
                      618: branch 0 taken
                       19: branch 1 taken
                        9: branch 2 taken
                      609: branch 3 taken
                      609: branch 4 taken
                      609: branch 5 taken
                      609: branch 6 taken
                      609: branch 7 taken
     862              637:       if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
     863                 :         ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
     864               28:                          hadError, StringToks[i].getLocation(), ThisIsWide, PP);
     865               28:         continue;
     866                 :       }
     867                 :       // Otherwise, this is a non-UCN escape character.  Process it.
     868                 :       unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
     869                 :                                               StringToks[i].getLocation(),
     870              609:                                               ThisIsWide, PP);
     871                 : 
     872                 :       // Note: our internal rep of wide char tokens is always little-endian.
     873              609:       *ResultPtr++ = ResultChar & 0xFF;
     874                 : 
                        0: branch 0 not taken
                      609: branch 1 taken
                      609: branch 2 taken
                      609: branch 3 taken
     875              609:       if (AnyWide) {
                        0: branch 0 not taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
     876                0:         for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
     877                0:           *ResultPtr++ = ResultChar >> i*8;
     878                 :       }
     879                 :     }
     880                 :   }
     881                 : 
                        2: branch 0 taken
                     9528: branch 1 taken
                     9528: branch 2 taken
                     9528: branch 3 taken
     882             9530:   if (Pascal) {
     883                2:     ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
     884                 : 
     885                 :     // Verify that pascal strings aren't too large.
                        0: branch 1 not taken
                        2: branch 2 taken
                        0: branch 4 not taken
                        0: branch 5 not taken
     886                2:     if (GetStringLength() > 256) {
     887                 :       PP.Diag(StringToks[0].getLocation(), diag::err_pascal_string_too_long)
     888                 :         << SourceRange(StringToks[0].getLocation(),
     889                0:                        StringToks[NumStringToks-1].getLocation());
     890                0:       hadError = 1;
     891                0:       return;
     892                 :     }
                     9530: branch 1 taken
                        0: branch 2 not taken
                        0: branch 4 not taken
                        0: branch 5 not taken
     893             9530:   }
     894                 : }
     895                 : 
     896                 : 
     897                 : /// getOffsetOfStringByte - This function returns the offset of the
     898                 : /// specified byte of the string data represented by Token.  This handles
     899                 : /// advancing over escape sequences in the string.
     900                 : unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
     901                 :                                                     unsigned ByteNo,
     902              111:                                                     Preprocessor &PP) {
     903                 :   // Get the spelling of the token.
     904              111:   llvm::SmallString<16> SpellingBuffer;
     905              111:   SpellingBuffer.resize(Tok.getLength());
     906                 : 
     907              111:   const char *SpellingPtr = &SpellingBuffer[0];
     908              111:   unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
     909                 : 
                        0: branch 0 not taken
                      111: branch 1 taken
     910              111:   assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
     911                 : 
     912                 : 
     913              111:   const char *SpellingStart = SpellingPtr;
     914              111:   const char *SpellingEnd = SpellingPtr+TokLen;
     915                 : 
     916                 :   // Skip over the leading quote.
                        0: branch 0 not taken
                      111: branch 1 taken
     917              111:   assert(SpellingPtr[0] == '"' && "Should be a string literal!");
     918              111:   ++SpellingPtr;
     919                 : 
     920                 :   // Skip over bytes until we find the offset we're looking for.
                      259: branch 0 taken
                      111: branch 1 taken
     921              481:   while (ByteNo) {
                        0: branch 0 not taken
                      259: branch 1 taken
     922              259:     assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
     923                 : 
     924                 :     // Step over non-escapes simply.
                      259: branch 0 taken
                        0: branch 1 not taken
     925              259:     if (*SpellingPtr != '\\') {
     926              259:       ++SpellingPtr;
     927              259:       --ByteNo;
     928              259:       continue;
     929                 :     }
     930                 : 
     931                 :     // Otherwise, this is an escape character.  Advance over it.
     932                0:     bool HadError = false;
     933                 :     ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
     934                0:                       Tok.getLocation(), false, PP);
                        0: branch 0 not taken
                        0: branch 1 not taken
     935                0:     assert(!HadError && "This method isn't valid on erroneous strings");
     936                0:     --ByteNo;
     937                 :   }
     938                 : 
     939              111:   return SpellingPtr-SpellingStart;
     940                 : }

Generated: 2010-02-10 01:31 by zcov