zcov: / lib/Lex/Lexer.cpp


Files: 1 Branches Taken: 79.4% 683 / 860
Generated: 2010-02-10 01:31 Branches Executed: 95.3% 820 / 860
Line Coverage: 90.4% 786 / 869


Programs: 2 Runs 3018


       1                 : //===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
       2                 : //
       3                 : //                     The LLVM Compiler Infrastructure
       4                 : //
       5                 : // This file is distributed under the University of Illinois Open Source
       6                 : // License. See LICENSE.TXT for details.
       7                 : //
       8                 : //===----------------------------------------------------------------------===//
       9                 : //
      10                 : //  This file implements the Lexer and Token interfaces.
      11                 : //
      12                 : //===----------------------------------------------------------------------===//
      13                 : //
      14                 : // TODO: GCC Diagnostics emitted by the lexer:
      15                 : // PEDWARN: (form feed|vertical tab) in preprocessing directive
      16                 : //
      17                 : // Universal characters, unicode, char mapping:
      18                 : // WARNING: `%.*s' is not in NFKC
      19                 : // WARNING: `%.*s' is not in NFC
      20                 : //
      21                 : // Other:
      22                 : // TODO: Options to support:
      23                 : //    -fexec-charset,-fwide-exec-charset
      24                 : //
      25                 : //===----------------------------------------------------------------------===//
      26                 : 
      27                 : #include "clang/Lex/Lexer.h"
      28                 : #include "clang/Lex/Preprocessor.h"
      29                 : #include "clang/Lex/LexDiagnostic.h"
      30                 : #include "clang/Basic/SourceManager.h"
      31                 : #include "llvm/Support/Compiler.h"
      32                 : #include "llvm/Support/MemoryBuffer.h"
      33                 : #include <cctype>
      34                 : using namespace clang;
      35                 : 
      36                 : static void InitCharacterInfo();
      37                 : 
      38                 : //===----------------------------------------------------------------------===//
      39                 : // Token Class Implementation
      40                 : //===----------------------------------------------------------------------===//
      41                 : 
      42                 : /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
      43             6961: bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
                     6932: branch 1 taken
                       29: branch 2 taken
      44             6961:   if (IdentifierInfo *II = getIdentifierInfo())
      45             6932:     return II->getObjCKeywordID() == objcKey;
      46               29:   return false;
      47                 : }
      48                 : 
      49                 : /// getObjCKeywordID - Return the ObjC keyword kind.
      50             7480: tok::ObjCKeywordKind Token::getObjCKeywordID() const {
      51             7480:   IdentifierInfo *specId = getIdentifierInfo();
                     7477: branch 0 taken
                        3: branch 1 taken
      52             7480:   return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
      53                 : }
      54                 : 
      55                 : 
      56                 : //===----------------------------------------------------------------------===//
      57                 : // Lexer Class Implementation
      58                 : //===----------------------------------------------------------------------===//
      59                 : 
      60                 : void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
      61           144587:                       const char *BufEnd) {
      62           144587:   InitCharacterInfo();
      63                 : 
      64           144587:   BufferStart = BufStart;
      65           144587:   BufferPtr = BufPtr;
      66           144587:   BufferEnd = BufEnd;
      67                 : 
      68                 :   assert(BufEnd[0] == 0 &&
      69                 :          "We assume that the input buffer has a null character at the end"
                        0: branch 0 not taken
                   144587: branch 1 taken
      70           144587:          " to simplify lexing!");
      71                 : 
      72           144587:   Is_PragmaLexer = false;
      73           144587:   IsInConflictMarker = false;
      74                 :   
      75                 :   // Start of the file is a start of line.
      76           144587:   IsAtStartOfLine = true;
      77                 : 
      78                 :   // We are not after parsing a #.
      79           144587:   ParsingPreprocessorDirective = false;
      80                 : 
      81                 :   // We are not after parsing #include.
      82           144587:   ParsingFilename = false;
      83                 : 
      84                 :   // We are not in raw mode.  Raw mode disables diagnostics and interpretation
      85                 :   // of tokens (e.g. identifiers, thus disabling macro expansion).  It is used
      86                 :   // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
      87                 :   // or otherwise skipping over tokens.
      88           144587:   LexingRawMode = false;
      89                 : 
      90                 :   // Default to not keeping comments.
      91           144587:   ExtendedTokenMode = 0;
      92           144587: }
      93                 : 
      94                 : /// Lexer constructor - Create a new lexer object for the specified buffer
      95                 : /// with the specified preprocessor managing the lexing process.  This lexer
      96                 : /// assumes that the associated file buffer and Preprocessor objects will
      97                 : /// outlive it, so it doesn't take ownership of either of them.
      98             5618: Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
      99                 :   : PreprocessorLexer(&PP, FID),
     100                 :     FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
     101             5618:     Features(PP.getLangOptions()) {
     102                 : 
     103                 :   InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
     104             5618:             InputFile->getBufferEnd());
     105                 : 
     106                 :   // Default to keeping comments if the preprocessor wants them.
     107             5618:   SetCommentRetentionState(PP.getCommentRetentionState());
     108             5618: }
     109                 : 
     110                 : /// Lexer constructor - Create a new raw lexer object.  This object is only
     111                 : /// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
     112                 : /// range will outlive it, so it doesn't take ownership of it.
     113                 : Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
     114           137646:              const char *BufStart, const char *BufPtr, const char *BufEnd)
     115           137646:   : FileLoc(fileloc), Features(features) {
     116                 : 
     117           137646:   InitLexer(BufStart, BufPtr, BufEnd);
     118                 : 
     119                 :   // We *are* in raw mode.
     120           137646:   LexingRawMode = true;
     121           137646: }
     122                 : 
     123                 : /// Lexer constructor - Create a new raw lexer object.  This object is only
     124                 : /// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
     125                 : /// range will outlive it, so it doesn't take ownership of it.
     126                 : Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile,
     127             1323:              const SourceManager &SM, const LangOptions &features)
     128             1323:   : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) {
     129                 : 
     130                 :   InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(),
     131             1323:             FromFile->getBufferEnd());
     132                 : 
     133                 :   // We *are* in raw mode.
     134             1323:   LexingRawMode = true;
     135             1323: }
     136                 : 
     137                 : /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
     138                 : /// _Pragma expansion.  This has a variety of magic semantics that this method
     139                 : /// sets up.  It returns a new'd Lexer that must be delete'd when done.
     140                 : ///
     141                 : /// On entrance to this routine, TokStartLoc is a macro location which has a
     142                 : /// spelling loc that indicates the bytes to be lexed for the token and an
     143                 : /// instantiation location that indicates where all lexed tokens should be
     144                 : /// "expanded from".
     145                 : ///
     146                 : /// FIXME: It would really be nice to make _Pragma just be a wrapper around a
     147                 : /// normal lexer that remaps tokens as they fly by.  This would require making
     148                 : /// Preprocessor::Lex virtual.  Given that, we could just dump in a magic lexer
     149                 : /// interface that could handle this stuff.  This would pull GetMappedTokenLoc
     150                 : /// out of the critical path of the lexer!
     151                 : ///
     152                 : Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
     153                 :                                  SourceLocation InstantiationLocStart,
     154                 :                                  SourceLocation InstantiationLocEnd,
     155               16:                                  unsigned TokLen, Preprocessor &PP) {
     156               16:   SourceManager &SM = PP.getSourceManager();
     157                 : 
     158                 :   // Create the lexer as if we were going to lex the file normally.
     159               16:   FileID SpellingFID = SM.getFileID(SpellingLoc);
     160               16:   const llvm::MemoryBuffer *InputFile = SM.getBuffer(SpellingFID);
     161               16:   Lexer *L = new Lexer(SpellingFID, InputFile, PP);
     162                 : 
     163                 :   // Now that the lexer is created, change the start/end locations so that we
     164                 :   // just lex the subsection of the file that we want.  This is lexing from a
     165                 :   // scratch buffer.
     166               16:   const char *StrData = SM.getCharacterData(SpellingLoc);
     167                 : 
     168               16:   L->BufferPtr = StrData;
     169               16:   L->BufferEnd = StrData+TokLen;
                        0: branch 0 not taken
                       16: branch 1 taken
     170               16:   assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
     171                 : 
     172                 :   // Set the SourceLocation with the remapping information.  This ensures that
     173                 :   // GetMappedTokenLoc will remap the tokens as they are lexed.
     174                 :   L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),
     175                 :                                          InstantiationLocStart,
     176               16:                                          InstantiationLocEnd, TokLen);
     177                 : 
     178                 :   // Ensure that the lexer thinks it is inside a directive, so that end \n will
     179                 :   // return an EOM token.
     180               16:   L->ParsingPreprocessorDirective = true;
     181                 : 
     182                 :   // This lexer really is for _Pragma.
     183               16:   L->Is_PragmaLexer = true;
     184               16:   return L;
     185                 : }
     186                 : 
     187                 : 
     188                 : /// Stringify - Convert the specified string into a C string, with surrounding
     189                 : /// ""'s, and with escaped \ and " characters.
     190              111: std::string Lexer::Stringify(const std::string &Str, bool Charify) {
     191              111:   std::string Result = Str;
                        0: branch 0 not taken
                      111: branch 1 taken
     192              111:   char Quote = Charify ? '\'' : '"';
                     6056: branch 1 taken
                      111: branch 2 taken
     193             6167:   for (unsigned i = 0, e = Result.size(); i != e; ++i) {
                     6054: branch 1 taken
                        2: branch 2 taken
                       20: branch 4 taken
                     6034: branch 5 taken
                       22: branch 6 taken
                     6034: branch 7 taken
     194             6056:     if (Result[i] == '\\' || Result[i] == Quote) {
     195               22:       Result.insert(Result.begin()+i, '\\');
     196               22:       ++i; ++e;
     197                 :     }
     198                 :   }
     199                 :   return Result;
     200                 : }
     201                 : 
     202                 : /// Stringify - Convert the specified string into a C string by escaping '\'
     203                 : /// and " characters.  This does not add surrounding ""'s to the string.
     204             1372: void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
                    44314: branch 1 taken
                     1372: branch 2 taken
     205            45686:   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
                    44314: branch 1 taken
                        0: branch 2 not taken
                        0: branch 4 not taken
                    44314: branch 5 taken
                        0: branch 6 not taken
                    44314: branch 7 taken
     206            44314:     if (Str[i] == '\\' || Str[i] == '"') {
     207                0:       Str.insert(Str.begin()+i, '\\');
     208                0:       ++i; ++e;
     209                 :     }
     210                 :   }
     211             1372: }
     212                 : 
     213                 : static bool isWhitespace(unsigned char c);
     214                 : 
     215                 : /// MeasureTokenLength - Relex the token at the specified location and return
     216                 : /// its length in bytes in the input file.  If the token needs cleaning (e.g.
     217                 : /// includes a trigraph or an escaped newline) then this count includes bytes
     218                 : /// that are part of that.
     219                 : unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
     220                 :                                    const SourceManager &SM,
     221           137653:                                    const LangOptions &LangOpts) {
     222                 :   // TODO: this could be special cased for common tokens like identifiers, ')',
     223                 :   // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
     224                 :   // all obviously single-char tokens.  This could use
     225                 :   // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
     226                 :   // something.
     227                 : 
     228                 :   // If this comes from a macro expansion, we really do want the macro name, not
     229                 :   // the token this macro expanded to.
     230           137653:   Loc = SM.getInstantiationLoc(Loc);
     231           137653:   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
     232           137653:   std::pair<const char *,const char *> Buffer = SM.getBufferData(LocInfo.first);
     233           137653:   const char *StrData = Buffer.first+LocInfo.second;
     234                 : 
                     1256: branch 1 taken
                   136397: branch 2 taken
     235           137653:   if (isWhitespace(StrData[0]))
     236             1256:     return 0;
     237                 : 
     238                 :   // Create a lexer starting at the beginning of this token.
     239           136397:   Lexer TheLexer(Loc, LangOpts, Buffer.first, StrData, Buffer.second);
     240           136397:   TheLexer.SetCommentRetentionState(true);
     241           136397:   Token TheTok;
     242           136397:   TheLexer.LexFromRawLexer(TheTok);
     243           136397:   return TheTok.getLength();
     244                 : }
     245                 : 
     246                 : //===----------------------------------------------------------------------===//
     247                 : // Character information.
     248                 : //===----------------------------------------------------------------------===//
     249                 : 
     250                 : enum {
     251                 :   CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
     252                 :   CHAR_VERT_WS  = 0x02,  // '\r', '\n'
     253                 :   CHAR_LETTER   = 0x04,  // a-z,A-Z
     254                 :   CHAR_NUMBER   = 0x08,  // 0-9
     255                 :   CHAR_UNDER    = 0x10,  // _
     256                 :   CHAR_PERIOD   = 0x20   // .
     257                 : };
     258                 : 
     259                 : // Statically initialize CharInfo table based on ASCII character set
     260                 : // Reference: FreeBSD 7.2 /usr/share/misc/ascii
     261                 : static const unsigned char CharInfo[256] =
     262                 : {
     263                 : // 0 NUL         1 SOH         2 STX         3 ETX
     264                 : // 4 EOT         5 ENQ         6 ACK         7 BEL
     265                 :    0           , 0           , 0           , 0           ,
     266                 :    0           , 0           , 0           , 0           ,
     267                 : // 8 BS          9 HT         10 NL         11 VT
     268                 : //12 NP         13 CR         14 SO         15 SI
     269                 :    0           , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS,
     270                 :    CHAR_HORZ_WS, CHAR_VERT_WS, 0           , 0           ,
     271                 : //16 DLE        17 DC1        18 DC2        19 DC3
     272                 : //20 DC4        21 NAK        22 SYN        23 ETB
     273                 :    0           , 0           , 0           , 0           ,
     274                 :    0           , 0           , 0           , 0           ,
     275                 : //24 CAN        25 EM         26 SUB        27 ESC
     276                 : //28 FS         29 GS         30 RS         31 US
     277                 :    0           , 0           , 0           , 0           ,
     278                 :    0           , 0           , 0           , 0           ,
     279                 : //32 SP         33  !         34  "         35  #
     280                 : //36  $         37  %         38  &         39  '
     281                 :    CHAR_HORZ_WS, 0           , 0           , 0           ,
     282                 :    0           , 0           , 0           , 0           ,
     283                 : //40  (         41  )         42  *         43  +
     284                 : //44  ,         45  -         46  .         47  /
     285                 :    0           , 0           , 0           , 0           ,
     286                 :    0           , 0           , CHAR_PERIOD , 0           ,
     287                 : //48  0         49  1         50  2         51  3
     288                 : //52  4         53  5         54  6         55  7
     289                 :    CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
     290                 :    CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
     291                 : //56  8         57  9         58  :         59  ;
     292                 : //60  <         61  =         62  >         63  ?
     293                 :    CHAR_NUMBER , CHAR_NUMBER , 0           , 0           ,
     294                 :    0           , 0           , 0           , 0           ,
     295                 : //64  @         65  A         66  B         67  C
     296                 : //68  D         69  E         70  F         71  G
     297                 :    0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     298                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     299                 : //72  H         73  I         74  J         75  K
     300                 : //76  L         77  M         78  N         79  O
     301                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     302                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     303                 : //80  P         81  Q         82  R         83  S
     304                 : //84  T         85  U         86  V         87  W
     305                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     306                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     307                 : //88  X         89  Y         90  Z         91  [
     308                 : //92  \         93  ]         94  ^         95  _
     309                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
     310                 :    0           , 0           , 0           , CHAR_UNDER  ,
     311                 : //96  `         97  a         98  b         99  c
     312                 : //100  d       101  e        102  f        103  g
     313                 :    0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     314                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     315                 : //104  h       105  i        106  j        107  k
     316                 : //108  l       109  m        110  n        111  o
     317                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     318                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     319                 : //112  p       113  q        114  r        115  s
     320                 : //116  t       117  u        118  v        119  w
     321                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     322                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
     323                 : //120  x       121  y        122  z        123  {
     324                 : //124  |        125  }        126  ~        127 DEL
     325                 :    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
     326                 :    0           , 0           , 0           , 0
     327                 : };
     328                 : 
     329           144587: static void InitCharacterInfo() {
     330                 :   static bool isInited = false;
                     2538: branch 0 taken
                   142049: branch 1 taken
     331           144587:   if (isInited) return;
     332                 :   // check the statically-initialized CharInfo table
                        0: branch 0 not taken
                     2538: branch 1 taken
     333             2538:   assert(CHAR_HORZ_WS == CharInfo[(int)' ']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     334             2538:   assert(CHAR_HORZ_WS == CharInfo[(int)'\t']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     335             2538:   assert(CHAR_HORZ_WS == CharInfo[(int)'\f']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     336             2538:   assert(CHAR_HORZ_WS == CharInfo[(int)'\v']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     337             2538:   assert(CHAR_VERT_WS == CharInfo[(int)'\n']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     338             2538:   assert(CHAR_VERT_WS == CharInfo[(int)'\r']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     339             2538:   assert(CHAR_UNDER   == CharInfo[(int)'_']);
                        0: branch 0 not taken
                     2538: branch 1 taken
     340             2538:   assert(CHAR_PERIOD  == CharInfo[(int)'.']);
                    65988: branch 0 taken
                     2538: branch 1 taken
     341            68526:   for (unsigned i = 'a'; i <= 'z'; ++i) {
                        0: branch 0 not taken
                    65988: branch 1 taken
     342            65988:     assert(CHAR_LETTER == CharInfo[i]);
                        0: branch 0 not taken
                    65988: branch 1 taken
     343            65988:     assert(CHAR_LETTER == CharInfo[i+'A'-'a']);
     344                 :   }
                    25380: branch 0 taken
                     2538: branch 1 taken
     345            27918:   for (unsigned i = '0'; i <= '9'; ++i)
                        0: branch 0 not taken
                    25380: branch 1 taken
     346            25380:     assert(CHAR_NUMBER == CharInfo[i]);
     347                 :     
     348             2538:   isInited = true;
     349                 : }
     350                 : 
     351                 : 
     352                 : /// isIdentifierBody - Return true if this is the body character of an
     353                 : /// identifier, which is [a-zA-Z0-9_].
     354          9347222: static inline bool isIdentifierBody(unsigned char c) {
     355          9347222:   return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
     356                 : }
     357                 : 
     358                 : /// isHorizontalWhitespace - Return true if this character is horizontal
     359                 : /// whitespace: ' ', '\t', '\f', '\v'.  Note that this returns false for '\0'.
     360           454532: static inline bool isHorizontalWhitespace(unsigned char c) {
     361           454532:   return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
     362                 : }
     363                 : 
     364                 : /// isWhitespace - Return true if this character is horizontal or vertical
     365                 : /// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.  Note that this returns false
     366                 : /// for '\0'.
     367           145162: static inline bool isWhitespace(unsigned char c) {
     368           145162:   return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
     369                 : }
     370                 : 
     371                 : /// isNumberBody - Return true if this is the body character of an
     372                 : /// preprocessing number, which is [a-zA-Z0-9_.].
     373          1265555: static inline bool isNumberBody(unsigned char c) {
     374                 :   return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ?
     375          1265555:     true : false;
     376                 : }
     377                 : 
     378                 : 
     379                 : //===----------------------------------------------------------------------===//
     380                 : // Diagnostics forwarding code.
     381                 : //===----------------------------------------------------------------------===//
     382                 : 
     383                 : /// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
     384                 : /// lexer buffer was all instantiated at a single point, perform the mapping.
     385                 : /// This is currently only used for _Pragma implementation, so it is the slow
     386                 : /// path of the hot getSourceLocation method.  Do not allow it to be inlined.
     387                 : static DISABLE_INLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP,
     388                 :                                                        SourceLocation FileLoc,
     389                 :                                                        unsigned CharNo,
     390                 :                                                        unsigned TokLen);
     391                 : static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
     392                 :                                         SourceLocation FileLoc,
     393               61:                                         unsigned CharNo, unsigned TokLen) {
                       61: branch 1 taken
                        0: branch 2 not taken
     394               61:   assert(FileLoc.isMacroID() && "Must be an instantiation");
     395                 : 
     396                 :   // Otherwise, we're lexing "mapped tokens".  This is used for things like
     397                 :   // _Pragma handling.  Combine the instantiation location of FileLoc with the
     398                 :   // spelling location.
     399               61:   SourceManager &SM = PP.getSourceManager();
     400                 : 
     401                 :   // Create a new SLoc which is expanded from Instantiation(FileLoc) but whose
     402                 :   // characters come from spelling(FileLoc)+Offset.
     403               61:   SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc);
     404               61:   SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo);
     405                 : 
     406                 :   // Figure out the expansion loc range, which is the range covered by the
     407                 :   // original _Pragma(...) sequence.
     408                 :   std::pair<SourceLocation,SourceLocation> II =
     409               61:     SM.getImmediateInstantiationRange(FileLoc);
     410                 : 
     411               61:   return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen);
     412                 : }
     413                 : 
     414                 : /// getSourceLocation - Return a source location identifier for the specified
     415                 : /// offset in the current file.
     416                 : SourceLocation Lexer::getSourceLocation(const char *Loc,
     417          2792135:                                         unsigned TokLen) const {
     418                 :   assert(Loc >= BufferStart && Loc <= BufferEnd &&
                  2792135: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                  2792135: branch 3 taken
     419          2792135:          "Location out of range for this buffer!");
     420                 : 
     421                 :   // In the normal case, we're just lexing from a simple file buffer, return
     422                 :   // the file id from FileLoc with the offset specified.
     423          2792135:   unsigned CharNo = Loc-BufferStart;
                  2792074: branch 1 taken
                       61: branch 2 taken
     424          2792135:   if (FileLoc.isFileID())
     425          2792074:     return FileLoc.getFileLocWithOffset(CharNo);
     426                 : 
     427                 :   // Otherwise, this is the _Pragma lexer case, which pretends that all of the
     428                 :   // tokens are lexed from where the _Pragma was defined.
                        0: branch 0 not taken
                       61: branch 1 taken
     429               61:   assert(PP && "This doesn't work on raw lexers");
     430               61:   return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen);
     431                 : }
     432                 : 
     433                 : /// Diag - Forwarding function for diagnostics.  This translate a source
     434                 : /// position in the current buffer into a SourceLocation object for rendering.
     435              215: DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const {
     436              215:   return PP->Diag(getSourceLocation(Loc), DiagID);
     437                 : }
     438                 : 
     439                 : //===----------------------------------------------------------------------===//
     440                 : // Trigraph and Escaped Newline Handling Code.
     441                 : //===----------------------------------------------------------------------===//
     442                 : 
     443                 : /// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
     444                 : /// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
     445              313: static char GetTrigraphCharForLetter(char Letter) {
                      218: branch 0 taken
                        4: branch 1 taken
                       24: branch 2 taken
                       22: branch 3 taken
                       11: branch 4 taken
                        9: branch 5 taken
                        2: branch 6 taken
                       19: branch 7 taken
                        2: branch 8 taken
                        2: branch 9 taken
     446              313:   switch (Letter) {
     447              218:   default:   return 0;
     448                4:   case '=':  return '#';
     449               24:   case ')':  return ']';
     450               22:   case '(':  return '[';
     451               11:   case '!':  return '|';
     452                9:   case '\'': return '^';
     453                2:   case '>':  return '}';
     454               19:   case '/':  return '\\';
     455                2:   case '<':  return '{';
     456                2:   case '-':  return '~';
     457                 :   }
     458                 : }
     459                 : 
     460                 : /// DecodeTrigraphChar - If the specified character is a legal trigraph when
     461                 : /// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled,
     462                 : /// return the result character.  Finally, emit a warning about trigraph use
     463                 : /// whether trigraphs are enabled or not.
     464              301: static char DecodeTrigraphChar(const char *CP, Lexer *L) {
     465              301:   char Res = GetTrigraphCharForLetter(*CP);
                       85: branch 0 taken
                      216: branch 1 taken
                       15: branch 2 taken
                       70: branch 3 taken
     466              301:   if (!Res || !L) return Res;
     467                 : 
                        8: branch 1 taken
                       62: branch 2 taken
     468               70:   if (!L->getFeatures().Trigraphs) {
                        2: branch 1 taken
                        6: branch 2 taken
     469                8:     if (!L->isLexingRawMode())
     470                2:       L->Diag(CP-2, diag::trigraph_ignored);
     471                8:     return 0;
     472                 :   }
     473                 : 
                       24: branch 1 taken
                       38: branch 2 taken
     474               62:   if (!L->isLexingRawMode())
     475               24:     L->Diag(CP-2, diag::trigraph_converted) << std::string()+Res;
     476               62:   return Res;
     477                 : }
     478                 : 
     479                 : /// getEscapedNewLineSize - Return the size of the specified escaped newline,
     480                 : /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a
     481                 : /// trigraph equivalent on entry to this function.
     482             2957: unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {
     483             2957:   unsigned Size = 0;
                     3049: branch 1 taken
                        8: branch 2 taken
     484             6014:   while (isWhitespace(Ptr[Size])) {
     485             3049:     ++Size;
     486                 : 
                      100: branch 0 taken
                     2949: branch 1 taken
                        0: branch 2 not taken
                      100: branch 3 taken
     487             3049:     if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r')
     488              100:       continue;
     489                 : 
     490                 :     // If this is a \r\n or \n\r, skip the other half.
                     2949: branch 0 taken
                        0: branch 1 not taken
                        1: branch 2 taken
                     2948: branch 3 taken
                        0: branch 4 not taken
                        1: branch 5 taken
     491             2949:     if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') &&
     492                 :         Ptr[Size-1] != Ptr[Size])
     493                0:       ++Size;
     494                 : 
     495             2949:     return Size;
     496                 :   }
     497                 : 
     498                 :   // Not an escaped newline, must be a \t or something else.
     499                8:   return 0;
     500                 : }
     501                 : 
     502                 : /// SkipEscapedNewLines - If P points to an escaped newline (or a series of
     503                 : /// them), skip over them and return the first non-escaped-newline found,
     504                 : /// otherwise return P.
     505               15: const char *Lexer::SkipEscapedNewLines(const char *P) {
     506                3:   while (1) {
     507                 :     const char *AfterEscape;
                       11: branch 0 taken
                        4: branch 1 taken
     508               15:     if (*P == '\\') {
     509               11:       AfterEscape = P+1;
                        1: branch 0 taken
                        3: branch 1 taken
     510                4:     } else if (*P == '?') {
     511                 :       // If not a trigraph for escape, bail out.
                        0: branch 0 not taken
                        1: branch 1 taken
                        1: branch 2 taken
                        1: branch 3 taken
     512                1:       if (P[1] != '?' || P[2] != '/')
     513                1:         return P;
     514                0:       AfterEscape = P+3;
     515                 :     } else {
     516                3:       return P;
     517                 :     }
     518                 : 
     519               11:     unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
                        8: branch 0 taken
                        3: branch 1 taken
     520               11:     if (NewLineSize == 0) return P;
     521                3:     P = AfterEscape+NewLineSize;
     522                 :   }
     523                 : }
     524                 : 
     525                 : 
     526                 : /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
     527                 : /// get its size, and return it.  This is tricky in several cases:
     528                 : ///   1. If currently at the start of a trigraph, we warn about the trigraph,
     529                 : ///      then either return the trigraph (skipping 3 chars) or the '?',
     530                 : ///      depending on whether trigraphs are enabled or not.
     531                 : ///   2. If this is an escaped newline (potentially with whitespace between
     532                 : ///      the backslash and newline), implicitly skip the newline and return
     533                 : ///      the char after it.
     534                 : ///   3. If this is a UCN, return it.  FIXME: C++ UCN's?
     535                 : ///
     536                 : /// This handles the slow/uncommon case of the getCharAndSize method.  Here we
     537                 : /// know that we can accumulate into Size, and that we have already incremented
     538                 : /// Ptr by Size bytes.
     539                 : ///
     540                 : /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
     541                 : /// be updated to match.
     542                 : ///
     543                 : char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
     544             9346:                                Token *Tok) {
     545                 :   // If we have a slash, look for an escaped newline.
                     4114: branch 0 taken
                     5232: branch 1 taken
     546             9346:   if (Ptr[0] == '\\') {
     547             4114:     ++Size;
     548             4114:     ++Ptr;
     549             4129: Slash:
     550                 :     // Common case, backslash-char where the char is not whitespace.
                     1482: branch 1 taken
                     2647: branch 2 taken
     551             4129:     if (!isWhitespace(Ptr[0])) return '\\';
     552                 : 
     553                 :     // See if we have optional whitespace characters between the slash and
     554                 :     // newline.
                     2647: branch 1 taken
                        0: branch 2 not taken
     555             2647:     if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
     556                 :       // Remember that this token needs to be cleaned.
                     2636: branch 0 taken
                       11: branch 1 taken
     557             2647:       if (Tok) Tok->setFlag(Token::NeedsCleaning);
     558                 : 
     559                 :       // Warn if there was whitespace between the backslash and newline.
                       20: branch 0 taken
                     2627: branch 1 taken
                       20: branch 2 taken
                        0: branch 3 not taken
                       17: branch 4 taken
                        3: branch 5 taken
                        4: branch 7 taken
                       13: branch 8 taken
                        4: branch 9 taken
                     2643: branch 10 taken
     560             2647:       if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode())
     561                4:         Diag(Ptr, diag::backslash_newline_space);
     562                 : 
     563                 :       // Found backslash<whitespace><newline>.  Parse the char after it.
     564             2647:       Size += EscapedNewLineSize;
     565             2647:       Ptr  += EscapedNewLineSize;
     566                 :       // Use slow version to accumulate a correct size field.
     567             2647:       return getCharAndSizeSlow(Ptr, Size, Tok);
     568                 :     }
     569                 : 
     570                 :     // Otherwise, this is not an escaped newline, just return the slash.
     571                0:     return '\\';
     572                 :   }
     573                 : 
     574                 :   // If this is a trigraph, process it.
                     2585: branch 0 taken
                     2647: branch 1 taken
                      301: branch 2 taken
                     2284: branch 3 taken
     575             5232:   if (Ptr[0] == '?' && Ptr[1] == '?') {
     576                 :     // If this is actually a legal trigraph (not something like "??x"), emit
     577                 :     // a trigraph warning.  If so, and if trigraphs are enabled, return it.
                      286: branch 0 taken
                       15: branch 1 taken
                       77: branch 3 taken
                      224: branch 4 taken
     578              301:     if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
     579                 :       // Remember that this token needs to be cleaned.
                       62: branch 0 taken
                       15: branch 1 taken
     580               77:       if (Tok) Tok->setFlag(Token::NeedsCleaning);
     581                 : 
     582               77:       Ptr += 3;
     583               77:       Size += 3;
                       15: branch 0 taken
                       62: branch 1 taken
     584               77:       if (C == '\\') goto Slash;
     585               62:       return C;
     586                 :     }
     587                 :   }
     588                 : 
     589                 :   // If this is neither, return a single character.
     590             5155:   ++Size;
     591             5155:   return *Ptr;
     592                 : }
     593                 : 
     594                 : 
     595                 : /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
     596                 : /// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,
     597                 : /// and that we have already incremented Ptr by Size bytes.
     598                 : ///
     599                 : /// NOTE: When this method is updated, getCharAndSizeSlow (above) should
     600                 : /// be updated to match.
     601                 : char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
     602              632:                                      const LangOptions &Features) {
     603                 :   // If we have a slash, look for an escaped newline.
                      319: branch 0 taken
                      313: branch 1 taken
     604              632:   if (Ptr[0] == '\\') {
     605              319:     ++Size;
     606              319:     ++Ptr;
     607              323: Slash:
     608                 :     // Common case, backslash-char where the char is not whitespace.
                       24: branch 1 taken
                      299: branch 2 taken
     609              323:     if (!isWhitespace(Ptr[0])) return '\\';
     610                 : 
     611                 :     // See if we have optional whitespace characters followed by a newline.
                      299: branch 1 taken
                        0: branch 2 not taken
     612              299:     if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
     613                 :       // Found backslash<whitespace><newline>.  Parse the char after it.
     614              299:       Size += EscapedNewLineSize;
     615              299:       Ptr  += EscapedNewLineSize;
     616                 : 
     617                 :       // Use slow version to accumulate a correct size field.
     618              299:       return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
     619                 :     }
     620                 : 
     621                 :     // Otherwise, this is not an escaped newline, just return the slash.
     622                0:     return '\\';
     623                 :   }
     624                 : 
     625                 :   // If this is a trigraph, process it.
                       40: branch 0 taken
                      273: branch 1 taken
                       12: branch 2 taken
                       28: branch 3 taken
                       12: branch 4 taken
                        0: branch 5 not taken
     626              313:   if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
     627                 :     // If this is actually a legal trigraph (not something like "??x"), return
     628                 :     // it.
                       10: branch 1 taken
                        2: branch 2 taken
     629               12:     if (char C = GetTrigraphCharForLetter(Ptr[2])) {
     630               10:       Ptr += 3;
     631               10:       Size += 3;
                        4: branch 0 taken
                        6: branch 1 taken
     632               10:       if (C == '\\') goto Slash;
     633                6:       return C;
     634                 :     }
     635                 :   }
     636                 : 
     637                 :   // If this is neither, return a single character.
     638              303:   ++Size;
     639              303:   return *Ptr;
     640                 : }
     641                 : 
     642                 : //===----------------------------------------------------------------------===//
     643                 : // Helper methods for lexing.
     644                 : //===----------------------------------------------------------------------===//
     645                 : 
     646          1170750: void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
     647                 :   // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
     648                 :   unsigned Size;
     649          1170750:   unsigned char C = *CurPtr++;
                  8176257: branch 1 taken
                  1170750: branch 2 taken
     650         10517757:   while (isIdentifierBody(C))
     651          8176257:     C = *CurPtr++;
     652                 : 
     653          1170750:   --CurPtr;   // Back up over the skipped character.
     654                 : 
     655                 :   // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
     656                 :   // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
     657                 :   // FIXME: UCNs.
     658                 :   //
     659                 :   // TODO: Could merge these checks into a CharInfo flag to make the comparison
     660                 :   // cheaper
                  1170738: branch 0 taken
                       12: branch 1 taken
                  1170577: branch 2 taken
                      161: branch 3 taken
                        4: branch 4 taken
                  1170573: branch 5 taken
                        2: branch 6 taken
                        2: branch 7 taken
     661          1170750:   if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
     662          1170750: FinishIdentifier:
     663          1170750:     const char *IdStart = BufferPtr;
     664          1170750:     FormTokenWithChars(Result, CurPtr, tok::identifier);
     665                 : 
     666                 :     // If we are in raw mode, return this identifier raw.  There is no need to
     667                 :     // look up identifier information or attempt to macro expand it.
                   863259: branch 0 taken
                   307491: branch 1 taken
     668          1170750:     if (LexingRawMode) return;
     669                 : 
     670                 :     // Fill in Result.IdentifierInfo, looking up the identifier in the
     671                 :     // identifier table.
     672           863259:     IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
     673                 : 
     674                 :     // Change the kind of this identifier to the appropriate token kind, e.g.
     675                 :     // turning "for" into a keyword.
     676           863259:     Result.setKind(II->getTokenID());
     677                 : 
     678                 :     // Finally, now that we know we have an identifier, pass this off to the
     679                 :     // preprocessor, which may macro expand it or something.
                    13007: branch 1 taken
                   850252: branch 2 taken
     680           863259:     if (II->isHandleIdentifierCase())
     681            13007:       PP->HandleIdentifier(Result);
     682           863259:     return;
     683                 :   }
     684                 : 
     685                 :   // Otherwise, $,\,? in identifier found.  Enter slower path.
     686                 : 
     687              175:   C = getCharAndSize(CurPtr, Size);
     688                8:   while (1) {
                        2: branch 0 taken
                      181: branch 1 taken
     689              183:     if (C == '$') {
     690                 :       // If we hit a $ and they are not supported in identifiers, we are done.
                        0: branch 0 not taken
                        2: branch 1 taken
     691                2:       if (!Features.DollarIdents) goto FinishIdentifier;
     692                 : 
     693                 :       // Otherwise, emit a diagnostic and continue.
                        1: branch 1 taken
                        1: branch 2 taken
     694                2:       if (!isLexingRawMode())
     695                1:         Diag(CurPtr, diag::ext_dollar_in_identifier);
     696                2:       CurPtr = ConsumeChar(CurPtr, Size, Result);
     697                2:       C = getCharAndSize(CurPtr, Size);
     698                2:       continue;
                      175: branch 1 taken
                        6: branch 2 taken
     699              181:     } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
     700                 :       // Found end of identifier.
     701              175:       goto FinishIdentifier;
     702                 :     }
     703                 : 
     704                 :     // Otherwise, this character is good, consume it.
     705                6:     CurPtr = ConsumeChar(CurPtr, Size, Result);
     706                 : 
     707                6:     C = getCharAndSize(CurPtr, Size);
                       28: branch 1 taken
                        6: branch 2 taken
     708               40:     while (isIdentifierBody(C)) { // FIXME: UCNs.
     709               28:       CurPtr = ConsumeChar(CurPtr, Size, Result);
     710               28:       C = getCharAndSize(CurPtr, Size);
     711                 :     }
     712                 :   }
     713                 : }
     714                 : 
     715                 : 
     716                 : /// LexNumericConstant - Lex the remainder of a integer or floating point
     717                 : /// constant. From[-1] is the first character lexed.  Return the end of the
     718                 : /// constant.
     719           308194: void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
     720                 :   unsigned Size;
     721           308194:   char C = getCharAndSize(CurPtr, Size);
     722           308194:   char PrevCh = 0;
                   957361: branch 1 taken
                   308194: branch 2 taken
     723          1573749:   while (isNumberBody(C)) { // FIXME: UCNs?
     724           957361:     CurPtr = ConsumeChar(CurPtr, Size, Result);
     725           957361:     PrevCh = C;
     726           957361:     C = getCharAndSize(CurPtr, Size);
     727                 :   }
     728                 : 
     729                 :   // If we fell out, check for a sign, due to 1e+12.  If we have one, continue.
                   285618: branch 0 taken
                    22576: branch 1 taken
                     7761: branch 2 taken
                   277857: branch 3 taken
                    30337: branch 4 taken
                        0: branch 5 not taken
                    29818: branch 6 taken
                      519: branch 7 taken
     730           308194:   if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e'))
     731            29818:     return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
     732                 : 
     733                 :   // If we have a hex FP constant, continue.
                   278169: branch 0 taken
                      207: branch 1 taken
                      312: branch 2 taken
                   277857: branch 3 taken
                      519: branch 4 taken
                        0: branch 5 not taken
                        8: branch 6 taken
                      511: branch 7 taken
                        4: branch 8 taken
                        4: branch 9 taken
                        4: branch 11 taken
                        0: branch 12 not taken
                        8: branch 13 taken
                   278368: branch 14 taken
     734           278376:   if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p') &&
     735                 :       (!PP || !PP->getLangOptions().CPlusPlus0x))
     736                8:     return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
     737                 : 
     738                 :   // Update the location of token as well as BufferPtr.
     739           278368:   const char *TokStart = BufferPtr;
     740           278368:   FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
     741           278368:   Result.setLiteralData(TokStart);
     742                 : }
     743                 : 
     744                 : /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
     745                 : /// either " or L".
     746            13497: void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
     747            13497:   const char *NulCharacter = 0; // Does this string contain the \0 character?
     748                 : 
     749            13497:   char C = getAndAdvanceChar(CurPtr, Result);
                   199279: branch 0 taken
                    13494: branch 1 taken
     750           226270:   while (C != '"') {
     751                 :     // Skip escaped characters.
                      737: branch 0 taken
                   198542: branch 1 taken
     752           199279:     if (C == '\\') {
     753                 :       // Skip the escaped character.
     754              737:       C = getAndAdvanceChar(CurPtr, Result);
                   198539: branch 0 taken
                        3: branch 1 taken
                   198539: branch 2 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                   198539: branch 5 taken
                   198539: branch 6 taken
                   198539: branch 7 taken
     755           198542:     } else if (C == '\n' || C == '\r' ||             // Newline.
     756                 :                (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
                        2: branch 1 taken
                        1: branch 2 taken
                        0: branch 3 not taken
                        2: branch 4 taken
                        0: branch 5 not taken
                        3: branch 6 taken
     757                3:       if (!isLexingRawMode() && !Features.AsmPreprocessor)
     758                0:         Diag(BufferPtr, diag::err_unterminated_string);
     759                3:       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
     760                3:       return;
                        0: branch 0 not taken
                   198539: branch 1 taken
     761           198539:     } else if (C == 0) {
     762                0:       NulCharacter = CurPtr-1;
     763                 :     }
     764           199276:     C = getAndAdvanceChar(CurPtr, Result);
     765                 :   }
     766                 : 
     767                 :   // If a nul character existed in the string, warn about it.
                        0: branch 0 not taken
                    13494: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                        0: branch 5 not taken
                    13494: branch 6 taken
     768            13494:   if (NulCharacter && !isLexingRawMode())
     769                0:     Diag(NulCharacter, diag::null_in_string);
     770                 : 
     771                 :   // Update the location of the token as well as the BufferPtr instance var.
     772            13494:   const char *TokStart = BufferPtr;
     773                 :   FormTokenWithChars(Result, CurPtr,
                       61: branch 0 taken
                    13433: branch 1 taken
     774            13494:                      Wide ? tok::wide_string_literal : tok::string_literal);
     775            13494:   Result.setLiteralData(TokStart);
     776                 : }
     777                 : 
     778                 : /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
     779                 : /// after having lexed the '<' character.  This is used for #include filenames.
     780              632: void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
     781              632:   const char *NulCharacter = 0; // Does this string contain the \0 character?
     782              632:   const char *AfterLessPos = CurPtr;
     783              632:   char C = getAndAdvanceChar(CurPtr, Result);
                     7111: branch 0 taken
                      631: branch 1 taken
     784             8374:   while (C != '>') {
     785                 :     // Skip escaped characters.
                        0: branch 0 not taken
                     7111: branch 1 taken
     786             7111:     if (C == '\\') {
     787                 :       // Skip the escaped character.
     788                0:       C = getAndAdvanceChar(CurPtr, Result);
                     7110: branch 0 taken
                        1: branch 1 taken
                     7110: branch 2 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                     7110: branch 5 taken
                     7110: branch 6 taken
                     7110: branch 7 taken
     789             7111:     } else if (C == '\n' || C == '\r' ||             // Newline.
     790                 :                (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
     791                 :       // If the filename is unterminated, then it must just be a lone <
     792                 :       // character.  Return this as such.
     793                1:       FormTokenWithChars(Result, AfterLessPos, tok::less);
     794                1:       return;
                        0: branch 0 not taken
                     7110: branch 1 taken
     795             7110:     } else if (C == 0) {
     796                0:       NulCharacter = CurPtr-1;
     797                 :     }
     798             7110:     C = getAndAdvanceChar(CurPtr, Result);
     799                 :   }
     800                 : 
     801                 :   // If a nul character existed in the string, warn about it.
                        0: branch 0 not taken
                      631: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                        0: branch 5 not taken
                      631: branch 6 taken
     802              631:   if (NulCharacter && !isLexingRawMode())
     803                0:     Diag(NulCharacter, diag::null_in_string);
     804                 : 
     805                 :   // Update the location of token as well as BufferPtr.
     806              631:   const char *TokStart = BufferPtr;
     807              631:   FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
     808              631:   Result.setLiteralData(TokStart);
     809                 : }
     810                 : 
     811                 : 
     812                 : /// LexCharConstant - Lex the remainder of a character constant, after having
     813                 : /// lexed either ' or L'.
     814              377: void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
     815              377:   const char *NulCharacter = 0; // Does this character contain the \0 character?
     816                 : 
     817                 :   // Handle the common case of 'x' and '\y' efficiently.
     818              377:   char C = getAndAdvanceChar(CurPtr, Result);
                        2: branch 0 taken
                      375: branch 1 taken
     819              377:   if (C == '\'') {
                        2: branch 1 taken
                        0: branch 2 not taken
                        0: branch 3 not taken
                        2: branch 4 taken
                        0: branch 5 not taken
                        2: branch 6 taken
     820                2:     if (!isLexingRawMode() && !Features.AsmPreprocessor)
     821                0:       Diag(BufferPtr, diag::err_empty_character);
     822                2:     FormTokenWithChars(Result, CurPtr, tok::unknown);
     823                2:     return;
                       87: branch 0 taken
                      288: branch 1 taken
     824              375:   } else if (C == '\\') {
     825                 :     // Skip the escaped character.
     826                 :     // FIXME: UCN's.
     827               87:     C = getAndAdvanceChar(CurPtr, Result);
     828                 :   }
     829                 : 
                      375: branch 0 taken
                        0: branch 1 not taken
                      372: branch 2 taken
                        3: branch 3 taken
                      372: branch 4 taken
                        0: branch 5 not taken
                      338: branch 6 taken
                       34: branch 7 taken
     830              713:   if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
     831              338:     ++CurPtr;
     832                 :   } else {
     833                 :     // Fall back on generic code for embedded nulls, newlines, wide chars.
                       70: branch 0 taken
                       33: branch 1 taken
     834              103:     do {
     835                 :       // Skip escaped characters.
                        0: branch 0 not taken
                      107: branch 1 taken
     836              107:       if (C == '\\') {
     837                 :         // Skip the escaped character.
     838                0:         C = getAndAdvanceChar(CurPtr, Result);
                      103: branch 0 taken
                        4: branch 1 taken
                      103: branch 2 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                      103: branch 5 taken
                      103: branch 6 taken
                      103: branch 7 taken
     839              107:       } else if (C == '\n' || C == '\r' ||               // Newline.
     840                 :                  (C == 0 && CurPtr-1 == BufferEnd)) {    // End of file.
                        2: branch 1 taken
                        2: branch 2 taken
                        0: branch 3 not taken
                        2: branch 4 taken
                        0: branch 5 not taken
                        4: branch 6 taken
     841                4:         if (!isLexingRawMode() && !Features.AsmPreprocessor)
     842                0:           Diag(BufferPtr, diag::err_unterminated_char);
     843                4:         FormTokenWithChars(Result, CurPtr-1, tok::unknown);
     844                4:         return;
                        0: branch 0 not taken
                      103: branch 1 taken
     845              103:       } else if (C == 0) {
     846                0:         NulCharacter = CurPtr-1;
     847                 :       }
     848              103:       C = getAndAdvanceChar(CurPtr, Result);
     849                 :     } while (C != '\'');
     850                 :   }
     851                 : 
                        0: branch 0 not taken
                      371: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                        0: branch 5 not taken
                      371: branch 6 taken
     852              371:   if (NulCharacter && !isLexingRawMode())
     853                0:     Diag(NulCharacter, diag::null_in_char);
     854                 : 
     855                 :   // Update the location of token as well as BufferPtr.
     856              371:   const char *TokStart = BufferPtr;
     857              371:   FormTokenWithChars(Result, CurPtr, tok::char_constant);
     858              371:   Result.setLiteralData(TokStart);
     859                 : }
     860                 : 
     861                 : /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
     862                 : /// Update BufferPtr to point to the next non-whitespace character and return.
     863                 : ///
     864                 : /// This method forms a token and returns true if KeepWhitespaceMode is enabled.
     865                 : ///
     866           178792: bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
     867                 :   // Whitespace - Skip it, then return the token after the whitespace.
     868           178792:   unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
     869            36464:   while (1) {
     870                 :     // Skip horizontal whitespace very aggressively.
                   206019: branch 1 taken
                   215256: branch 2 taken
     871           636531:     while (isHorizontalWhitespace(Char))
     872           206019:       Char = *++CurPtr;
     873                 : 
     874                 :     // Otherwise if we have something other than whitespace, we're done.
                   178796: branch 0 taken
                    36460: branch 1 taken
                        4: branch 2 taken
                   178792: branch 3 taken
     875           215256:     if (Char != '\n' && Char != '\r')
     876           178792:       break;
     877                 : 
                        0: branch 0 not taken
                    36464: branch 1 taken
     878            36464:     if (ParsingPreprocessorDirective) {
     879                 :       // End of preprocessor directive line, let LexTokenInternal handle this.
     880                0:       BufferPtr = CurPtr;
     881                0:       return false;
     882                 :     }
     883                 : 
     884                 :     // ok, but handle newline.
     885                 :     // The returned token is at the start of the line.
     886            36464:     Result.setFlag(Token::StartOfLine);
     887                 :     // No leading whitespace seen so far.
     888            36464:     Result.clearFlag(Token::LeadingSpace);
     889            36464:     Char = *++CurPtr;
     890                 :   }
     891                 : 
     892                 :   // If this isn't immediately after a newline, there is leading space.
     893           178792:   char PrevChar = CurPtr[-1];
                    65709: branch 0 taken
                   113083: branch 1 taken
                    65709: branch 2 taken
                        0: branch 3 not taken
     894           178792:   if (PrevChar != '\n' && PrevChar != '\r')
     895            65709:     Result.setFlag(Token::LeadingSpace);
     896                 : 
     897                 :   // If the client wants us to return whitespace, return it now.
                        0: branch 1 not taken
                   178792: branch 2 taken
     898           178792:   if (isKeepWhitespaceMode()) {
     899                0:     FormTokenWithChars(Result, CurPtr, tok::unknown);
     900                0:     return true;
     901                 :   }
     902                 : 
     903           178792:   BufferPtr = CurPtr;
     904           178792:   return false;
     905                 : }
     906                 : 
     907                 : // SkipBCPLComment - We have just read the // characters from input.  Skip until
     908                 : // we find the newline character thats terminate the comment.  Then update
     909                 : /// BufferPtr and return.
     910                 : ///
     911                 : /// If we're in KeepCommentMode or any CommentHandler has inserted
     912                 : /// some tokens, this will store the first token and return true.
     913            53903: bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
     914                 :   // If BCPL comments aren't explicitly enabled for this language, emit an
     915                 :   // extension warning.
                       26: branch 0 taken
                    53877: branch 1 taken
                       11: branch 3 taken
                       15: branch 4 taken
                       11: branch 5 taken
                    53892: branch 6 taken
     916            53903:   if (!Features.BCPLComment && !isLexingRawMode()) {
     917               11:     Diag(BufferPtr, diag::ext_bcpl_comment);
     918                 : 
     919                 :     // Mark them enabled so we only emit one warning for this translation
     920                 :     // unit.
     921               11:     Features.BCPLComment = true;
     922                 :   }
     923                 : 
     924                 :   // Scan over the body of the comment.  The common case, when scanning, is that
     925                 :   // the comment contains normal ascii characters with nothing interesting in
     926                 :   // them.  As such, optimize for this case with the inner loop.
     927                 :   char C;
                     1571: branch 0 taken
                        0: branch 1 not taken
                     1571: branch 2 taken
                        0: branch 3 not taken
     928             1571:   do {
     929            55474:     C = *CurPtr;
     930                 :     // FIXME: Speedup BCPL comment lexing.  Just scan for a \n or \r character.
     931                 :     // If we find a \n character, scan backwards, checking to see if it's an
     932                 :     // escaped newline, like we do for block comments.
     933                 : 
     934                 :     // Skip over characters in the fast loop.
                  2637593: branch 0 taken
                        3: branch 1 taken
                  2636376: branch 2 taken
                     1217: branch 3 taken
                  2636022: branch 4 taken
                      354: branch 5 taken
                  2582124: branch 6 taken
                    53898: branch 7 taken
                  2582122: branch 8 taken
                        2: branch 9 taken
     935          2693070:     while (C != 0 &&                // Potentially EOF.
     936                 :            C != '\\' &&             // Potentially escaped newline.
     937                 :            C != '?' &&              // Potentially trigraph.
     938                 :            C != '\n' && C != '\r')  // Newline or DOS-style newline.
     939          2582122:       C = *++CurPtr;
     940                 : 
     941                 :     // If this is a newline, we're done.
                     1576: branch 0 taken
                    53898: branch 1 taken
                        2: branch 2 taken
                     1574: branch 3 taken
     942            55474:     if (C == '\n' || C == '\r')
     943            53900:       break;  // Found the newline? Break out!
     944                 : 
     945                 :     // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to
     946                 :     // properly decode the character.  Read it in raw mode to avoid emitting
     947                 :     // diagnostics about things like trigraphs.  If we see an escaped newline,
     948                 :     // we'll handle it below.
     949             1574:     const char *OldPtr = CurPtr;
     950             1574:     bool OldRawMode = isLexingRawMode();
     951             1574:     LexingRawMode = true;
     952             1574:     C = getAndAdvanceChar(CurPtr, Result);
     953             1574:     LexingRawMode = OldRawMode;
     954                 : 
     955                 :     // If the char that we finally got was a \n, then we must have had something
     956                 :     // like \<newline><newline>.  We don't want to have consumed the second
     957                 :     // newline, we want CurPtr, to end up pointing to it down below.
                     1573: branch 0 taken
                        1: branch 1 taken
                        0: branch 2 not taken
                     1573: branch 3 taken
     958             1574:     if (C == '\n' || C == '\r') {
     959                1:       --CurPtr;
     960                1:       C = 'x'; // doesn't matter what this is.
     961                 :     }
     962                 : 
     963                 :     // If we read multiple characters, and one of those characters was a \r or
     964                 :     // \n, then we had an escaped newline within the comment.  Emit diagnostic
     965                 :     // unless the next line is also a // comment.
                      584: branch 0 taken
                      990: branch 1 taken
                      465: branch 2 taken
                      119: branch 3 taken
                      451: branch 4 taken
                       14: branch 5 taken
     966             1574:     if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
                      914: branch 0 taken
                       10: branch 1 taken
     967              924:       for (; OldPtr != CurPtr; ++OldPtr)
                      473: branch 0 taken
                      441: branch 1 taken
                        0: branch 2 not taken
                      473: branch 3 taken
     968              914:         if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
     969                 :           // Okay, we found a // comment that ends in a newline, if the next
     970                 :           // line is also a // comment, but has spaces, don't emit a diagnostic.
                      424: branch 1 taken
                       17: branch 2 taken
     971              441:           if (isspace(C)) {
     972              424:             const char *ForwardPtr = CurPtr;
                     6908: branch 1 taken
                      424: branch 2 taken
     973             7756:             while (isspace(*ForwardPtr))  // Skip whitespace.
     974             6908:               ++ForwardPtr;
                      332: branch 0 taken
                       92: branch 1 taken
                      332: branch 2 taken
                        0: branch 3 not taken
     975              424:             if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
     976              332:               break;
     977                 :           }
     978                 : 
                       55: branch 1 taken
                       54: branch 2 taken
     979              109:           if (!isLexingRawMode())
     980               55:             Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
     981              109:           break;
     982                 :         }
     983                 :     }
     984                 : 
                        3: branch 0 taken
                     1571: branch 1 taken
     985             1574:     if (CurPtr == BufferEnd+1) { --CurPtr; break; }
     986                 :   } while (C != '\n' && C != '\r');
     987                 : 
     988                 :   // Found but did not consume the newline.  Notify comment handlers about the
     989                 :   // comment unless we're in a #if 0 block.
                    38464: branch 0 taken
                    15439: branch 1 taken
                    38436: branch 3 taken
                       28: branch 4 taken
                        0: branch 9 not taken
                    38436: branch 10 taken
                        0: branch 11 not taken
                    53903: branch 12 taken
     990            53903:   if (PP && !isLexingRawMode() &&
     991                 :       PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr),
     992                 :                                             getSourceLocation(CurPtr)))) {
     993                0:     BufferPtr = CurPtr;
     994                0:     return true; // A token has to be returned.
     995                 :   }
     996                 : 
     997                 :   // If we are returning comments as tokens, return this comment as a token.
                    15458: branch 1 taken
                    38445: branch 2 taken
     998            53903:   if (inKeepCommentMode())
     999            15458:     return SaveBCPLComment(Result, CurPtr);
    1000                 : 
    1001                 :   // If we are inside a preprocessor directive and we see the end of line,
    1002                 :   // return immediately, so that the lexer can return this as an EOM token.
                    38261: branch 0 taken
                      184: branch 1 taken
                        3: branch 2 taken
                    38258: branch 3 taken
    1003            38445:   if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
    1004              187:     BufferPtr = CurPtr;
    1005              187:     return false;
    1006                 :   }
    1007                 : 
    1008                 :   // Otherwise, eat the \n character.  We don't care if this is a \n\r or
    1009                 :   // \r\n sequence.  This is an efficiency hack (because we know the \n can't
    1010                 :   // contribute to another token), it isn't needed for correctness.  Note that
    1011                 :   // this is ok even in KeepWhitespaceMode, because we would have returned the
    1012                 :   /// comment above in that mode.
    1013            38258:   ++CurPtr;
    1014                 : 
    1015                 :   // The next returned token is at the start of the line.
    1016            38258:   Result.setFlag(Token::StartOfLine);
    1017                 :   // No leading whitespace seen so far.
    1018            38258:   Result.clearFlag(Token::LeadingSpace);
    1019            38258:   BufferPtr = CurPtr;
    1020            38258:   return false;
    1021                 : }
    1022                 : 
    1023                 : /// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
    1024                 : /// an appropriate way and return it.
    1025            15458: bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
    1026                 :   // If we're not in a preprocessor directive, just return the // comment
    1027                 :   // directly.
    1028            15458:   FormTokenWithChars(Result, CurPtr, tok::comment);
    1029                 : 
                    15456: branch 0 taken
                        2: branch 1 taken
    1030            15458:   if (!ParsingPreprocessorDirective)
    1031            15456:     return true;
    1032                 : 
    1033                 :   // If this BCPL-style comment is in a macro definition, transmogrify it into
    1034                 :   // a C-style block comment.
    1035                2:   std::string Spelling = PP->getSpelling(Result);
                        2: branch 1 taken
                        0: branch 2 not taken
                        2: branch 4 taken
                        0: branch 5 not taken
    1036                2:   assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
    1037                2:   Spelling[1] = '*';   // Change prefix to "/*".
    1038                2:   Spelling += "*/";    // add suffix.
    1039                 : 
    1040                2:   Result.setKind(tok::comment);
    1041                 :   PP->CreateString(&Spelling[0], Spelling.size(), Result,
    1042                2:                    Result.getLocation());
    1043                2:   return true;
    1044                 : }
    1045                 : 
    1046                 : /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
    1047                 : /// character (either \n or \r) is part of an escaped newline sequence.  Issue a
    1048                 : /// diagnostic if so.  We know that the newline is inside of a block comment.
    1049                 : static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
    1050               24:                                                   Lexer *L) {
                        0: branch 0 not taken
                       24: branch 1 taken
                       24: branch 2 taken
                       24: branch 3 taken
    1051               24:   assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
    1052                 : 
    1053                 :   // Back up off the newline.
    1054               24:   --CurPtr;
    1055                 : 
    1056                 :   // If this is a two-character newline sequence, skip the other character.
                       18: branch 0 taken
                        6: branch 1 taken
                        0: branch 2 not taken
                       18: branch 3 taken
    1057               24:   if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
    1058                 :     // \n\n or \r\r -> not escaped newline.
                        6: branch 0 taken
                        0: branch 1 not taken
    1059                6:     if (CurPtr[0] == CurPtr[1])
    1060                6:       return false;
    1061                 :     // \n\r or \r\n -> skip the newline.
    1062                0:     --CurPtr;
    1063                 :   }
    1064                 : 
    1065                 :   // If we have horizontal whitespace, skip over it.  We allow whitespace
    1066                 :   // between the slash and newline.
    1067               18:   bool HasSpace = false;
                       18: branch 1 taken
                       36: branch 2 taken
                        0: branch 3 not taken
                       18: branch 4 taken
                       36: branch 5 taken
                       18: branch 6 taken
    1068               72:   while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
    1069               36:     --CurPtr;
    1070               36:     HasSpace = true;
    1071                 :   }
    1072                 : 
    1073                 :   // If we have a slash, we know this is an escaped newline.
                        6: branch 0 taken
                       12: branch 1 taken
    1074               18:   if (*CurPtr == '\\') {
                        0: branch 0 not taken
                        6: branch 1 taken
    1075                6:     if (CurPtr[-1] != '*') return false;
    1076                 :   } else {
    1077                 :     // It isn't a slash, is it the ?? / trigraph?
                        6: branch 0 taken
                        6: branch 1 taken
                        6: branch 2 taken
                        0: branch 3 not taken
                        6: branch 4 taken
                        0: branch 5 not taken
                        0: branch 6 not taken
                        6: branch 7 taken
    1078               12:     if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
    1079                 :         CurPtr[-3] != '*')
    1080                6:       return false;
    1081                 : 
    1082                 :     // This is the trigraph ending the comment.  Emit a stern warning!
    1083                6:     CurPtr -= 2;
    1084                 : 
    1085                 :     // If no trigraphs are enabled, warn that we ignored this trigraph and
    1086                 :     // ignore this * character.
                        0: branch 1 not taken
                        6: branch 2 taken
    1087                6:     if (!L->getFeatures().Trigraphs) {
                        0: branch 1 not taken
                        0: branch 2 not taken
    1088                0:       if (!L->isLexingRawMode())
    1089                0:         L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
    1090                0:       return false;
    1091                 :     }
                        5: branch 1 taken
                        1: branch 2 taken
    1092                6:     if (!L->isLexingRawMode())
    1093                5:       L->Diag(CurPtr, diag::trigraph_ends_block_comment);
    1094                 :   }
    1095                 : 
    1096                 :   // Warn about having an escaped newline between the */ characters.
                       10: branch 1 taken
                        2: branch 2 taken
    1097               12:   if (!L->isLexingRawMode())
    1098               10:     L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
    1099                 : 
    1100                 :   // If there was space between the backslash and newline, warn about it.
                       12: branch 0 taken
                        0: branch 1 not taken
                       10: branch 3 taken
                        2: branch 4 taken
                       10: branch 5 taken
                        2: branch 6 taken
    1101               12:   if (HasSpace && !L->isLexingRawMode())
    1102               10:     L->Diag(CurPtr, diag::backslash_newline_space);
    1103                 : 
    1104               12:   return true;
    1105                 : }
    1106                 : 
    1107                 : #ifdef __SSE2__
    1108                 : #include <emmintrin.h>
    1109                 : #elif __ALTIVEC__
    1110                 : #include <altivec.h>
    1111                 : #undef bool
    1112                 : #endif
    1113                 : 
    1114                 : /// SkipBlockComment - We have just read the /* characters from input.  Read
    1115                 : /// until we find the */ characters that terminate the comment.  Note that we
    1116                 : /// don't bother decoding trigraphs or escaped newlines in block comments,
    1117                 : /// because they cannot cause the comment to end.  The only thing that can
    1118                 : /// happen is the comment could end with an escaped newline between the */ end
    1119                 : /// of comment.
    1120                 : ///
    1121                 : /// If we're in KeepCommentMode or any CommentHandler has inserted
    1122                 : /// some tokens, this will store the first token and return true.
    1123            10873: bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
    1124                 :   // Scan one character past where we should, looking for a '/' character.  Once
    1125                 :   // we find it, check to see if it was preceeded by a *.  This common
    1126                 :   // optimization helps people who like to put a lot of * characters in their
    1127                 :   // comments.
    1128                 : 
    1129                 :   // The first character we get with newlines and trigraphs skipped to handle
    1130                 :   // the degenerate /*/ case below correctly if the * has an escaped newline
    1131                 :   // after it.
    1132                 :   unsigned CharSize;
    1133            10873:   unsigned char C = getCharAndSize(CurPtr, CharSize);
    1134            10873:   CurPtr += CharSize;
                        3: branch 0 taken
                    10870: branch 1 taken
                        3: branch 2 taken
                        0: branch 3 not taken
    1135            10873:   if (C == 0 && CurPtr == BufferEnd+1) {
                        0: branch 1 not taken
                        3: branch 2 taken
    1136                3:     if (!isLexingRawMode())
    1137                0:       Diag(BufferPtr, diag::err_unterminated_block_comment);
    1138                3:     --CurPtr;
    1139                 : 
    1140                 :     // KeepWhitespaceMode should return this broken comment as a token.  Since
    1141                 :     // it isn't a well formed comment, just return it as an 'unknown' token.
                        0: branch 1 not taken
                        3: branch 2 taken
    1142                3:     if (isKeepWhitespaceMode()) {
    1143                0:       FormTokenWithChars(Result, CurPtr, tok::unknown);
    1144                0:       return true;
    1145                 :     }
    1146                 : 
    1147                3:     BufferPtr = CurPtr;
    1148                3:     return false;
    1149                 :   }
    1150                 : 
    1151                 :   // Check to see if the first character after the '/*' is another /.  If so,
    1152                 :   // then this slash does not end the block comment, it is part of it.
                        6: branch 0 taken
                    10864: branch 1 taken
    1153            10870:   if (C == '/')
    1154                6:     C = *CurPtr++;
    1155                 : 
    1156             1227:   while (1) {
    1157                 :     // Skip over all non-interesting characters until we find end of buffer or a
    1158                 :     // (probably ending) '/' character.
                    11704: branch 0 taken
                      393: branch 1 taken
    1159            12097:     if (CurPtr + 24 < BufferEnd) {
    1160                 :       // While not aligned to a 16-byte boundary.
                    97331: branch 0 taken
                      675: branch 1 taken
                    86302: branch 2 taken
                    11029: branch 3 taken
    1161           109710:       while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
    1162            86302:         C = *CurPtr++;
    1163                 : 
                    11029: branch 0 taken
                      675: branch 1 taken
    1164            11704:       if (C == '/') goto FoundSlash;
    1165                 : 
    1166                 : #ifdef __SSE2__
    1167                 :       __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
    1168                 :                                      '/', '/', '/', '/', '/', '/', '/', '/');
    1169                 :       while (CurPtr+16 <= BufferEnd &&
    1170                 :              _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
    1171                 :         CurPtr += 16;
    1172                 : #elif __ALTIVEC__
    1173                 :       __vector unsigned char Slashes = {
    1174                 :         '/', '/', '/', '/',  '/', '/', '/', '/',
    1175                 :         '/', '/', '/', '/',  '/', '/', '/', '/'
    1176                 :       };
    1177                 :       while (CurPtr+16 <= BufferEnd &&
    1178                 :              !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes))
    1179                 :         CurPtr += 16;
    1180                 : #else
    1181                 :       // Scan for '/' quickly.  Many block comments are very large.
                   274050: branch 0 taken
                     2801: branch 1 taken
                   271065: branch 2 taken
                     2985: branch 3 taken
                   268523: branch 4 taken
                     2542: branch 5 taken
                   265822: branch 6 taken
                     2701: branch 7 taken
                   265822: branch 8 taken
                        0: branch 9 not taken
    1182           287880:       while (CurPtr[0] != '/' &&
    1183                 :              CurPtr[1] != '/' &&
    1184                 :              CurPtr[2] != '/' &&
    1185                 :              CurPtr[3] != '/' &&
    1186                 :              CurPtr+4 < BufferEnd) {
    1187           265822:         CurPtr += 4;
    1188                 :       }
    1189                 : #endif
    1190                 : 
    1191                 :       // It has to be one of the bytes scanned, increment to it and read one.
    1192            11029:       C = *CurPtr++;
    1193                 :     }
    1194                 : 
    1195                 :     // Loop to scan the remainder.
                    20458: branch 0 taken
                    11422: branch 1 taken
                    20458: branch 2 taken
                        0: branch 3 not taken
    1196            43302:     while (C != '/' && C != '\0')
    1197            20458:       C = *CurPtr++;
    1198                 : 
    1199            12097:   FoundSlash:
                    12097: branch 0 taken
                        0: branch 1 not taken
    1200            12097:     if (C == '/') {
                     1239: branch 0 taken
                    10858: branch 1 taken
    1201            12097:       if (CurPtr[-2] == '*')  // We found the final */.  We're done!
    1202            10858:         break;
    1203                 : 
                     1215: branch 0 taken
                       24: branch 1 taken
                        0: branch 2 not taken
                     1215: branch 3 taken
    1204             1239:       if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
                       12: branch 1 taken
                       12: branch 2 taken
    1205               24:         if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
    1206                 :           // We found the final */, though it had an escaped newline between the
    1207                 :           // * and /.  We're done!
    1208               12:           break;
    1209                 :         }
    1210                 :       }
                        2: branch 0 taken
                     1225: branch 1 taken
                        2: branch 2 taken
                        0: branch 3 not taken
    1211             1227:       if (CurPtr[0] == '*' && CurPtr[1] != '/') {
    1212                 :         // If this is a /* inside of the comment, emit a warning.  Don't do this
    1213                 :         // if this is a /*/, which will end the comment.  This misses cases with
    1214                 :         // embedded escaped newlines, but oh well.
                        0: branch 1 not taken
                        2: branch 2 taken
    1215                2:         if (!isLexingRawMode())
    1216                0:           Diag(CurPtr-1, diag::warn_nested_block_comment);
    1217                 :       }
                        0: branch 0 not taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                        0: branch 3 not taken
    1218                0:     } else if (C == 0 && CurPtr == BufferEnd+1) {
                        0: branch 1 not taken
                        0: branch 2 not taken
    1219                0:       if (!isLexingRawMode())
    1220                0:         Diag(BufferPtr, diag::err_unterminated_block_comment);
    1221                 :       // Note: the user probably forgot a */.  We could continue immediately
    1222                 :       // after the /*, but this would involve lexing a lot of what really is the
    1223                 :       // comment, which surely would confuse the parser.
    1224                0:       --CurPtr;
    1225                 : 
    1226                 :       // KeepWhitespaceMode should return this broken comment as a token.  Since
    1227                 :       // it isn't a well formed comment, just return it as an 'unknown' token.
                        0: branch 1 not taken
                        0: branch 2 not taken
    1228                0:       if (isKeepWhitespaceMode()) {
    1229                0:         FormTokenWithChars(Result, CurPtr, tok::unknown);
    1230                0:         return true;
    1231                 :       }
    1232                 : 
    1233                0:       BufferPtr = CurPtr;
    1234                0:       return false;
    1235                 :     }
    1236             1227:     C = *CurPtr++;
    1237                 :   }
    1238                 : 
    1239                 :   // Notify comment handlers about the comment unless we're in a #if 0 block.
                    10525: branch 0 taken
                      345: branch 1 taken
                     7085: branch 3 taken
                     3440: branch 4 taken
                        0: branch 9 not taken
                     7085: branch 10 taken
                        0: branch 11 not taken
                    10870: branch 12 taken
    1240            10870:   if (PP && !isLexingRawMode() &&
    1241                 :       PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr),
    1242                 :                                             getSourceLocation(CurPtr)))) {
    1243                0:     BufferPtr = CurPtr;
    1244                0:     return true; // A token has to be returned.
    1245                 :   }
    1246                 : 
    1247                 :   // If we are returning comments as tokens, return this comment as a token.
                      349: branch 1 taken
                    10521: branch 2 taken
    1248            10870:   if (inKeepCommentMode()) {
    1249              349:     FormTokenWithChars(Result, CurPtr, tok::comment);
    1250              349:     return true;
    1251                 :   }
    1252                 : 
    1253                 :   // It is common for the tokens immediately after a /**/ comment to be
    1254                 :   // whitespace.  Instead of going through the big switch, handle it
    1255                 :   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
    1256                 :   // have already returned above with the comment as a token.
                      161: branch 1 taken
                    10360: branch 2 taken
    1257            10521:   if (isHorizontalWhitespace(*CurPtr)) {
    1258              161:     Result.setFlag(Token::LeadingSpace);
    1259              161:     SkipWhitespace(Result, CurPtr+1);
    1260              161:     return false;
    1261                 :   }
    1262                 : 
    1263                 :   // Otherwise, just return so that the next character will be lexed as a token.
    1264            10360:   BufferPtr = CurPtr;
    1265            10360:   Result.setFlag(Token::LeadingSpace);
    1266            10360:   return false;
    1267                 : }
    1268                 : 
    1269                 : //===----------------------------------------------------------------------===//
    1270                 : // Primary Lexing Entry Points
    1271                 : //===----------------------------------------------------------------------===//
    1272                 : 
    1273                 : /// ReadToEndOfLine - Read the rest of the current preprocessor line as an
    1274                 : /// uninterpreted string.  This switches the lexer out of directive mode.
    1275               24: std::string Lexer::ReadToEndOfLine() {
    1276                 :   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
                       24: branch 0 taken
                        0: branch 1 not taken
                        0: branch 2 not taken
                       24: branch 3 taken
    1277               24:          "Must be in a preprocessing directive!");
    1278               24:   std::string Result;
    1279               24:   Token Tmp;
    1280                 : 
    1281                 :   // CurPtr - Cache BufferPtr in an automatic variable.
    1282               24:   const char *CurPtr = BufferPtr;
    1283              433:   while (1) {
    1284              457:     char Char = getAndAdvanceChar(CurPtr, Tmp);
                      433: branch 0 taken
                        0: branch 1 not taken
                       24: branch 2 taken
    1285              457:     switch (Char) {
    1286                 :     default:
    1287              433:       Result += Char;
    1288              433:       break;
    1289                 :     case 0:  // Null.
    1290                 :       // Found end of file?
                        0: branch 0 not taken
                        0: branch 1 not taken
    1291                0:       if (CurPtr-1 != BufferEnd) {
    1292                 :         // Nope, normal character, continue.
    1293                0:         Result += Char;
    1294                0:         break;
    1295                 :       }
    1296                 :       // FALL THROUGH.
    1297                 :     case '\r':
    1298                 :     case '\n':
    1299                 :       // Okay, we found the end of the line. First, back up past the \0, \r, \n.
                        0: branch 0 not taken
                       24: branch 1 taken
    1300               24:       assert(CurPtr[-1] == Char && "Trigraphs for newline?");
    1301               24:       BufferPtr = CurPtr-1;
    1302                 : 
    1303                 :       // Next, lex the character, which should handle the EOM transition.
    1304               24:       Lex(Tmp);
                       24: branch 1 taken
                        0: branch 2 not taken
    1305               24:       assert(Tmp.is(tok::eom) && "Unexpected token!");
    1306                 : 
    1307                 :       // Finally, we're done, return the string we found.
    1308                 :       return Result;
    1309                 :     }
    1310                 :   }
    1311                 : }
    1312                 : 
    1313                 : /// LexEndOfFile - CurPtr points to the end of this file.  Handle this
    1314                 : /// condition, reporting diagnostics and handling other edge cases as required.
    1315                 : /// This returns true if Result contains a token, false if PP.Lex should be
    1316                 : /// called again.
    1317             7042: bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
    1318                 :   // If we hit the end of the file while parsing a preprocessor directive,
    1319                 :   // end the preprocessor directive first.  The next token returned will
    1320                 :   // then be the end of file.
                        0: branch 0 not taken
                     7042: branch 1 taken
    1321             7042:   if (ParsingPreprocessorDirective) {
    1322                 :     // Done parsing the "line".
    1323                0:     ParsingPreprocessorDirective = false;
    1324                 :     // Update the location of token as well as BufferPtr.
    1325                0:     FormTokenWithChars(Result, CurPtr, tok::eom);
    1326                 : 
    1327                 :     // Restore comment saving mode, in case it was disabled for directive.
    1328                0:     SetCommentRetentionState(PP->getCommentRetentionState());
    1329                0:     return true;  // Have a token.
    1330                 :   }
    1331                 :  
    1332                 :   // If we are in raw mode, return this event as an EOF token.  Let the caller
    1333                 :   // that put us in raw mode handle the event.
                     1341: branch 1 taken
                     5701: branch 2 taken
    1334             7042:   if (isLexingRawMode()) {
    1335             1341:     Result.startToken();
    1336             1341:     BufferPtr = BufferEnd;
    1337             1341:     FormTokenWithChars(Result, BufferEnd, tok::eof);
    1338             1341:     return true;
    1339                 :   }
    1340                 : 
    1341                 :   // Otherwise, check if we are code-completing, then issue diagnostics for 
    1342                 :   // unterminated #if and missing newline.
    1343                 : 
                     5701: branch 0 taken
                        0: branch 1 not taken
                       87: branch 3 taken
                     5614: branch 4 taken
                       87: branch 5 taken
                     5614: branch 6 taken
    1344             5701:   if (PP && PP->isCodeCompletionFile(FileLoc)) {
    1345                 :     // We're at the end of the file, but we've been asked to consider the
    1346                 :     // end of the file to be a code-completion token. Return the
    1347                 :     // code-completion token.
    1348               87:     Result.startToken();
    1349               87:     FormTokenWithChars(Result, CurPtr, tok::code_completion);
    1350                 :     
    1351                 :     // Only do the eof -> code_completion translation once.
    1352               87:     PP->SetCodeCompletionPoint(0, 0, 0);
    1353               87:     return true;
    1354                 :   }
    1355                 :   
    1356                 :   // If we are in a #if directive, emit an error.
                        0: branch 1 not taken
                     5614: branch 2 taken
    1357            11228:   while (!ConditionalStack.empty()) {
    1358                 :     PP->Diag(ConditionalStack.back().IfLoc,
    1359                0:              diag::err_pp_unterminated_conditional);
    1360                0:     ConditionalStack.pop_back();
    1361                 :   }
    1362                 : 
    1363                 :   // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
    1364                 :   // a pedwarn.
                     5536: branch 0 taken
                       78: branch 1 taken
                       87: branch 2 taken
                     5449: branch 3 taken
                       87: branch 4 taken
                        0: branch 5 not taken
    1365             5614:   if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
    1366                 :     Diag(BufferEnd, diag::ext_no_newline_eof)
    1367                 :       << CodeModificationHint::CreateInsertion(getSourceLocation(BufferEnd),
    1368               87:                                                "\n");
    1369                 : 
    1370             5614:   BufferPtr = CurPtr;
    1371                 : 
    1372                 :   // Finally, let the preprocessor handle this.
    1373             5614:   return PP->HandleEndOfFile(Result);
    1374                 : }
    1375                 : 
    1376                 : /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
    1377                 : /// the specified lexer will return a tok::l_paren token, 0 if it is something
    1378                 : /// else and 2 if there are no more tokens in the buffer controlled by the
    1379                 : /// lexer.
    1380             2126: unsigned Lexer::isNextPPTokenLParen() {
                        0: branch 0 not taken
                     2126: branch 1 taken
    1381             2126:   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
    1382                 : 
    1383                 :   // Switch to 'skipping' mode.  This will ensure that we can lex a token
    1384                 :   // without emitting diagnostics, disables macro expansion, and will cause EOF
    1385                 :   // to return an EOF token instead of popping the include stack.
    1386             2126:   LexingRawMode = true;
    1387                 : 
    1388                 :   // Save state that can be changed while lexing so that we can restore it.
    1389             2126:   const char *TmpBufferPtr = BufferPtr;
    1390             2126:   bool inPPDirectiveMode = ParsingPreprocessorDirective;
    1391                 : 
    1392             2126:   Token Tok;
    1393             2126:   Tok.startToken();
    1394             2126:   LexTokenInternal(Tok);
    1395                 : 
    1396                 :   // Restore state that may have changed.
    1397             2126:   BufferPtr = TmpBufferPtr;
    1398             2126:   ParsingPreprocessorDirective = inPPDirectiveMode;
    1399                 : 
    1400                 :   // Restore the lexer back to non-skipping mode.
    1401             2126:   LexingRawMode = false;
    1402                 : 
                        1: branch 1 taken
                     2125: branch 2 taken
    1403             2126:   if (Tok.is(tok::eof))
    1404                1:     return 2;
    1405             2125:   return Tok.is(tok::l_paren);
    1406                 : }
    1407                 : 
    1408                 : /// FindConflictEnd - Find the end of a version control conflict marker.
    1409                4: static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
    1410                4:   llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
    1411                4:   size_t Pos = RestOfBuffer.find(">>>>>>>");
                        4: branch 0 taken
                        0: branch 1 not taken
    1412                8:   while (Pos != llvm::StringRef::npos) {
    1413                 :     // Must occur at start of line.
                        4: branch 1 taken
                        0: branch 2 not taken
                        0: branch 4 not taken
                        4: branch 5 taken
                        0: branch 6 not taken
                        4: branch 7 taken
    1414                4:     if (RestOfBuffer[Pos-1] != '\r' &&
    1415                 :         RestOfBuffer[Pos-1] != '\n') {
    1416                0:       RestOfBuffer = RestOfBuffer.substr(Pos+7);
    1417                0:       continue;
    1418                 :     }
    1419                4:     return RestOfBuffer.data()+Pos;
    1420                 :   }
    1421                0:   return 0;
    1422                 : }
    1423                 : 
    1424                 : /// IsStartOfConflictMarker - If the specified pointer is the start of a version
    1425                 : /// control conflict marker like '<<<<<<<', recognize it as such, emit an error
    1426                 : /// and recover nicely.  This returns true if it is a conflict marker and false
    1427                 : /// if not.
    1428                8: bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
    1429                 :   // Only a conflict marker if it starts at the beginning of a line.
                        8: branch 0 taken
                        0: branch 1 not taken
                        4: branch 2 taken
                        4: branch 3 taken
                        4: branch 4 taken
                        0: branch 5 not taken
    1430                8:   if (CurPtr != BufferStart &&
    1431                 :       CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
    1432                4:     return false;
    1433                 :   
    1434                 :   // Check to see if we have <<<<<<<.
                        4: branch 0 taken
                        0: branch 1 not taken
                        0: branch 5 not taken
                        4: branch 6 taken
                        0: branch 7 not taken
                        4: branch 8 taken
    1435                4:   if (BufferEnd-CurPtr < 8 ||
    1436                 :       llvm::StringRef(CurPtr, 7) != "<<<<<<<")
    1437                0:     return false;
    1438                 : 
    1439                 :   // If we have a situation where we don't care about conflict markers, ignore
    1440                 :   // it.
                        4: branch 0 taken
                        0: branch 1 not taken
                        2: branch 3 taken
                        2: branch 4 taken
                        2: branch 5 taken
                        2: branch 6 taken
    1441                4:   if (IsInConflictMarker || isLexingRawMode())
    1442                2:     return false;
    1443                 :   
    1444                 :   // Check to see if there is a >>>>>>> somewhere in the buffer at the start of
    1445                 :   // a line to terminate this conflict marker.
                        2: branch 1 taken
                        0: branch 2 not taken
    1446                2:   if (FindConflictEnd(CurPtr+7, BufferEnd)) {
    1447                 :     // We found a match.  We are really in a conflict marker.
    1448                 :     // Diagnose this, and ignore to the end of line.
    1449                2:     Diag(CurPtr, diag::err_conflict_marker);
    1450                2:     IsInConflictMarker = true;
    1451                 :     
    1452                 :     // Skip ahead to the end of line.  We know this exists because the
    1453                 :     // end-of-conflict marker starts with \r or \n.
                      176: branch 0 taken
                        0: branch 1 not taken
                      174: branch 2 taken
                        2: branch 3 taken
    1454              178:     while (*CurPtr != '\r' && *CurPtr != '\n') {
                        0: branch 0 not taken
                      174: branch 1 taken
    1455              174:       assert(CurPtr != BufferEnd && "Didn't find end of line");
    1456              174:       ++CurPtr;
    1457                 :     }
    1458                2:     BufferPtr = CurPtr;
    1459                2:     return true;
    1460                 :   }
    1461                 :   
    1462                 :   // No end of conflict marker found.
    1463                0:   return false;
    1464                 : }
    1465                 : 
    1466                 : 
    1467                 : /// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
    1468                 : /// marker, then it is the end of a conflict marker.  Handle it by ignoring up
    1469                 : /// until the end of the line.  This returns true if it is a conflict marker and
    1470                 : /// false if not.
    1471               23: bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
    1472                 :   // Only a conflict marker if it starts at the beginning of a line.
                       23: branch 0 taken
                        0: branch 1 not taken
                       16: branch 2 taken
                        7: branch 3 taken
                       16: branch 4 taken
                        0: branch 5 not taken
    1473               23:   if (CurPtr != BufferStart &&
    1474                 :       CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
    1475               16:     return false;
    1476                 :   
    1477                 :   // If we have a situation where we don't care about conflict markers, ignore
    1478                 :   // it.
                        2: branch 0 taken
                        5: branch 1 taken
                        0: branch 3 not taken
                        2: branch 4 taken
                        5: branch 5 taken
                        2: branch 6 taken
    1479                7:   if (!IsInConflictMarker || isLexingRawMode())
    1480                5:     return false;
    1481                 :   
    1482                 :   // Check to see if we have the marker (7 characters in a row).
                       12: branch 0 taken
                        2: branch 1 taken
    1483               14:   for (unsigned i = 1; i != 7; ++i)
                        0: branch 0 not taken
                       12: branch 1 taken
    1484               12:     if (CurPtr[i] != CurPtr[0])
    1485                0:       return false;
    1486                 :   
    1487                 :   // If we do have it, search for the end of the conflict marker.  This could
    1488                 :   // fail if it got skipped with a '#if 0' or something.  Note that CurPtr might
    1489                 :   // be the end of conflict marker.
                        2: branch 1 taken
                        0: branch 2 not taken
    1490                2:   if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
    1491                2:     CurPtr = End;
    1492                 :     
    1493                 :     // Skip ahead to the end of line.
                       32: branch 0 taken
                        0: branch 1 not taken
                       32: branch 2 taken
                        0: branch 3 not taken
                       30: branch 4 taken
                        2: branch 5 taken
    1494               34:     while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
    1495               30:       ++CurPtr;
    1496                 :     
    1497                2:     BufferPtr = CurPtr;
    1498                 :     
    1499                 :     // No longer in the conflict marker.
    1500                2:     IsInConflictMarker = false;
    1501                2:     return true;
    1502                 :   }
    1503                 :   
    1504                0:   return false;
    1505                 : }
    1506                 : 
    1507                 : 
    1508                 : /// LexTokenInternal - This implements a simple C family lexer.  It is an
    1509                 : /// extremely performance critical piece of code.  This assumes that the buffer
    1510                 : /// has a null character at the end of the file.  This returns a preprocessing
    1511                 : /// token, not a normal token, as such, it is an internal interface.  It assumes
    1512                 : /// that the Flags of result have been cleared before calling this.
    1513          2903142: void Lexer::LexTokenInternal(Token &Result) {
    1514          2903142: LexNextToken:
    1515                 :   // New token, can't need cleaning yet.
    1516          2903142:   Result.clearFlag(Token::NeedsCleaning);
    1517          2903142:   Result.setIdentifierInfo(0);
    1518                 : 
    1519                 :   // CurPtr - Cache BufferPtr in an automatic variable.
    1520          2903142:   const char *CurPtr = BufferPtr;
    1521                 : 
    1522                 :   // Small amounts of horizontal whitespace is very common between tokens.
                  1917746: branch 0 taken
                   985396: branch 1 taken
                    10748: branch 2 taken
                  1906998: branch 3 taken
    1523          2903142:   if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
    1524           996144:     ++CurPtr;
                    49282: branch 0 taken
                  1000639: branch 1 taken
                     4495: branch 2 taken
                   996144: branch 3 taken
    1525          2046065:     while ((*CurPtr == ' ') || (*CurPtr == '\t'))
    1526            53777:       ++CurPtr;
    1527                 : 
    1528                 :     // If we are keeping whitespace and other tokens, just return what we just
    1529                 :     // skipped.  The next lexer invocation will return the token after the
    1530                 :     // whitespace.
                        0: branch 1 not taken
                   996144: branch 2 taken
    1531           996144:     if (isKeepWhitespaceMode()) {
    1532                0:       FormTokenWithChars(Result, CurPtr, tok::unknown);
    1533                0:       return;
    1534                 :     }
    1535                 : 
    1536           996144:     BufferPtr = CurPtr;
    1537           996144:     Result.setFlag(Token::LeadingSpace);
    1538                 :   }
    1539                 : 
    1540                 :   unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below.
    1541                 : 
    1542                 :   // Read a character, advancing over it.
    1543          2903142:   char Char = getAndAdvanceChar(CurPtr, Result);
    1544                 :   tok::TokenKind Kind;
    1545                 : 
                     7040: branch 0 taken
                        2: branch 1 taken
                   466324: branch 2 taken
                     2016: branch 3 taken
                   278349: branch 4 taken
                     3266: branch 5 taken
                  1167573: branch 6 taken
                        6: branch 7 taken
                      346: branch 8 taken
                    13436: branch 9 taken
                     1570: branch 10 taken
                     8974: branch 11 taken
                     9500: branch 12 taken
                   114875: branch 13 taken
                   112925: branch 14 taken
                    30808: branch 15 taken
                    64523: branch 16 taken
                     4011: branch 17 taken
                     7574: branch 18 taken
                    28571: branch 19 taken
                     4910: branch 20 taken
                    23561: branch 21 taken
                      458: branch 22 taken
                     2172: branch 23 taken
                    42756: branch 24 taken
                      153: branch 25 taken
                    12950: branch 26 taken
                    11871: branch 27 taken
                      940: branch 28 taken
                     1863: branch 29 taken
                    15980: branch 30 taken
                    86303: branch 31 taken
                    21704: branch 32 taken
                    35587: branch 33 taken
                   306726: branch 34 taken
                    13508: branch 35 taken
                       11: branch 36 taken
    1546          2903142:   switch (Char) {
    1547                 :   case 0:  // Null.
    1548                 :     // Found end of file?
                     7040: branch 0 taken
                        0: branch 1 not taken
    1549             7040:     if (CurPtr-1 == BufferEnd) {
    1550                 :       // Read the PP instance variable into an automatic variable, because
    1551                 :       // LexEndOfFile will often delete 'this'.
    1552             7040:       Preprocessor *PPCache = PP;
                     3940: branch 1 taken
                     3100: branch 2 taken
    1553             7040:       if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
    1554             3940:         return;   // Got a token to return.
                        0: branch 0 not taken
                     3100: branch 1 taken
    1555             3100:       assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
    1556             3100:       return PPCache->Lex(Result);
    1557                 :     }
    1558                 : 
                        0: branch 1 not taken
                        0: branch 2 not taken
    1559                0:     if (!isLexingRawMode())
    1560                0:       Diag(CurPtr-1, diag::null_in_file);
    1561                0:     Result.setFlag(Token::LeadingSpace);
                        0: branch 1 not taken
                        0: branch 2 not taken
    1562                0:     if (SkipWhitespace(Result, CurPtr))
    1563                0:       return; // KeepWhitespaceMode
    1564                 : 
    1565                0:     goto LexNextToken;   // GCC isn't tail call eliminating.
    1566                 :       
    1567                 :   case 26:  // DOS & CP/M EOF: "^Z".
    1568                 :     // If we're in Microsoft extensions mode, treat this as end of file.
                        2: branch 0 taken
                        0: branch 1 not taken
    1569                2:     if (Features.Microsoft) {
    1570                 :       // Read the PP instance variable into an automatic variable, because
    1571                 :       // LexEndOfFile will often delete 'this'.
    1572                2:       Preprocessor *PPCache = PP;
                        2: branch 1 taken
                        0: branch 2 not taken
    1573                2:       if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
    1574                2:         return;   // Got a token to return.
                        0: branch 0 not taken
                        0: branch 1 not taken
    1575                0:       assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
    1576                0:       return PPCache->Lex(Result);
    1577                 :     }
    1578                 :     // If Microsoft extensions are disabled, this is just random garbage.
    1579                0:     Kind = tok::unknown;
    1580                0:     break;
    1581                 :       
    1582                 :   case '\n':
    1583                 :   case '\r':
    1584                 :     // If we are inside a preprocessor directive and we see the end of line,
    1585                 :     // we know we are done with the directive, so return an EOM token.
                   294714: branch 0 taken
                   171610: branch 1 taken
    1586           466324:     if (ParsingPreprocessorDirective) {
    1587                 :       // Done parsing the "line".
    1588           294714:       ParsingPreprocessorDirective = false;
    1589                 : 
    1590                 :       // Restore comment saving mode, in case it was disabled for directive.
    1591           294714:       SetCommentRetentionState(PP->getCommentRetentionState());
    1592                 : 
    1593                 :       // Since we consumed a newline, we are back at the start of a line.
    1594           294714:       IsAtStartOfLine = true;
    1595                 : 
    1596           294714:       Kind = tok::eom;
    1597           294714:       break;
    1598                 :     }
    1599                 :     // The returned token is at the start of the line.
    1600           171610:     Result.setFlag(Token::StartOfLine);
    1601                 :     // No leading whitespace seen so far.
    1602           171610:     Result.clearFlag(Token::LeadingSpace);
    1603                 : 
                        0: branch 1 not taken
                   171610: branch 2 taken
    1604           171610:     if (SkipWhitespace(Result, CurPtr))
    1605                0:       return; // KeepWhitespaceMode
    1606           171610:     goto LexNextToken;   // GCC isn't tail call eliminating.
    1607                 :   case ' ':
    1608                 :   case '\t':
    1609                 :   case '\f':
    1610                 :   case '\v':
    1611             7021:   SkipHorizontalWhitespace:
    1612             7021:     Result.setFlag(Token::LeadingSpace);
                        0: branch 1 not taken
                     7021: branch 2 taken
    1613             7021:     if (SkipWhitespace(Result, CurPtr))
    1614                0:       return; // KeepWhitespaceMode
    1615                 : 
    1616            45478:   SkipIgnoredUnits:
    1617            45478:     CurPtr = BufferPtr;
    1618                 : 
    1619                 :     // If the next token is obviously a // or /* */ comment, skip it efficiently
    1620                 :     // too (without going through the big switch stmt).
                    22796: branch 0 taken
                    22682: branch 1 taken
                    22784: branch 2 taken
                       12: branch 3 taken
                    22784: branch 5 taken
                        0: branch 6 not taken
                    22784: branch 7 taken
                        0: branch 8 not taken
                    22784: branch 9 taken
                    22694: branch 10 taken
    1621            45478:     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
    1622                 :         Features.BCPLComment) {
                        0: branch 1 not taken
                    22784: branch 2 taken
    1623            22784:       if (SkipBCPLComment(Result, CurPtr+2))
    1624                0:         return; // There is a token to return.
    1625            22784:       goto SkipIgnoredUnits;
                       12: branch 0 taken
                    22682: branch 1 taken
                       12: branch 2 taken
                        0: branch 3 not taken
                       12: branch 5 taken
                        0: branch 6 not taken
                       12: branch 7 taken
                    22682: branch 8 taken
    1626            22694:     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
                        0: branch 1 not taken
                       12: branch 2 taken
    1627               12:       if (SkipBlockComment(Result, CurPtr+2))
    1628                0:         return; // There is a token to return.
    1629               12:       goto SkipIgnoredUnits;
                     5005: branch 1 taken
                    17677: branch 2 taken
    1630            22682:     } else if (isHorizontalWhitespace(*CurPtr)) {
    1631             5005:       goto SkipHorizontalWhitespace;
    1632                 :     }
    1633            17677:     goto LexNextToken;   // GCC isn't tail call eliminating.
    1634                 :       
    1635                 :   // C99 6.4.4.1: Integer Constants.
    1636                 :   // C99 6.4.4.2: Floating Constants.
    1637                 :   case '0': case '1': case '2': case '3': case '4':
    1638                 :   case '5': case '6': case '7': case '8': case '9':
    1639                 :     // Notify MIOpt that we read a non-whitespace/non-comment token.
    1640           278349:     MIOpt.ReadToken();
    1641           278349:     return LexNumericConstant(Result, CurPtr);
    1642                 : 
    1643                 :   case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
    1644                 :     // Notify MIOpt that we read a non-whitespace/non-comment token.
    1645             3266:     MIOpt.ReadToken();
    1646             3266:     Char = getCharAndSize(CurPtr, SizeTmp);
    1647                 : 
    1648                 :     // Wide string literal.
                       61: branch 0 taken
                     3205: branch 1 taken
    1649             3266:     if (Char == '"')
    1650                 :       return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
    1651               61:                               true);
    1652                 : 
    1653                 :     // Wide character constant.
                       31: branch 0 taken
                     3174: branch 1 taken
    1654             3205:     if (Char == '\'')
    1655               31:       return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
    1656                 :     // FALL THROUGH, treating L like the start of an identifier.
    1657                 : 
    1658                 :   // C99 6.4.2: Identifiers.
    1659                 :   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
    1660                 :   case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
    1661                 :   case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
    1662                 :   case 'V': case 'W': case 'X': case 'Y': case 'Z':
    1663                 :   case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
    1664                 :   case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
    1665                 :   case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
    1666                 :   case 'v': case 'w': case 'x': case 'y': case 'z':
    1667                 :   case '_':
    1668                 :     // Notify MIOpt that we read a non-whitespace/non-comment token.
    1669          1170747:     MIOpt.ReadToken();
    1670          1170747:     return LexIdentifier(Result, CurPtr);
    1671                 : 
    1672                 :   case '$':   // $ in identifiers.
                        3: branch 0 taken
                        3: branch 1 taken
    1673                6:     if (Features.DollarIdents) {
                        3: branch 1 taken
                        0: branch 2 not taken
    1674                3:       if (!isLexingRawMode())
    1675                3:         Diag(CurPtr-1, diag::ext_dollar_in_identifier);
    1676                 :       // Notify MIOpt that we read a non-whitespace/non-comment token.
    1677                3:       MIOpt.ReadToken();
    1678                3:       return LexIdentifier(Result, CurPtr);
    1679                 :     }
    1680                 : 
    1681                3:     Kind = tok::unknown;
    1682                3:     break;
    1683                 : 
    1684                 :   // C99 6.4.4: Character Constants.
    1685                 :   case '\'':
    1686                 :     // Notify MIOpt that we read a non-whitespace/non-comment token.
    1687              346:     MIOpt.ReadToken();
    1688              346:     return LexCharConstant(Result, CurPtr);
    1689                 : 
    1690                 :   // C99 6.4.5: String Literals.
    1691                 :   case '"':
    1692                 :     // Notify MIOpt that we read a non-whitespace/non-comment token.
    1693            13436:     MIOpt.ReadToken();
    1694            13436:     return LexStringLiteral(Result, CurPtr, false);
    1695                 : 
    1696                 :   // C99 6.4.6: Punctuators.
    1697                 :   case '?':
    1698             1570:     Kind = tok::question;
    1699             1570:     break;
    1700                 :   case '[':
    1701             8974:     Kind = tok::l_square;
    1702             8974:     break;
    1703                 :   case ']':
    1704             9500:     Kind = tok::r_square;
    1705             9500:     break;
    1706                 :   case '(':
    1707           114875:     Kind = tok::l_paren;
    1708           114875:     break;
    1709                 :   case ')':
    1710           112925:     Kind = tok::r_paren;
    1711           112925:     break;
    1712                 :   case '{':
    1713            30808:     Kind = tok::l_brace;
    1714            30808:     break;
    1715                 :   case '}':
    1716            64523:     Kind = tok::r_brace;
    1717            64523:     break;
    1718                 :   case '.':
    1719             4011:     Char = getCharAndSize(CurPtr, SizeTmp);
                     2986: branch 0 taken
                     1025: branch 1 taken
                       19: branch 2 taken
                     2967: branch 3 taken
    1720             4011:     if (Char >= '0' && Char <= '9') {
    1721                 :       // Notify MIOpt that we read a non-whitespace/non-comment token.
    1722               19:       MIOpt.ReadToken();
    1723                 : 
    1724               19:       return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
                     1291: branch 0 taken
                     2701: branch 1 taken
                       55: branch 2 taken
                     1236: branch 3 taken
    1725             4047:     } else if (Features.CPlusPlus && Char == '*') {
    1726               55:       Kind = tok::periodstar;
    1727               55:       CurPtr += SizeTmp;
                      935: branch 0 taken
                     3002: branch 1 taken
                      932: branch 3 taken
                        3: branch 4 taken
                      932: branch 5 taken
                     3005: branch 6 taken
    1728             3937:     } else if (Char == '.' &&
    1729                 :                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
    1730              932:       Kind = tok::ellipsis;
    1731                 :       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
    1732              932:                            SizeTmp2, Result);
    1733                 :     } else {
    1734             3005:       Kind = tok::period;
    1735                 :     }
    1736             3992:     break;
    1737                 :   case '&':
    1738             7574:     Char = getCharAndSize(CurPtr, SizeTmp);
                     1744: branch 0 taken
                     5830: branch 1 taken
    1739             7574:     if (Char == '&') {
    1740             1744:       Kind = tok::ampamp;
    1741             1744:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                       37: branch 0 taken
                     5793: branch 1 taken
    1742             5830:     } else if (Char == '=') {
    1743               37:       Kind = tok::ampequal;
    1744               37:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1745                 :     } else {
    1746             5793:       Kind = tok::amp;
    1747                 :     }
    1748             7574:     break;
    1749                 :   case '*':
                       66: branch 1 taken
                    28505: branch 2 taken
    1750            28571:     if (getCharAndSize(CurPtr, SizeTmp) == '=') {
    1751               66:       Kind = tok::starequal;
    1752               66:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1753                 :     } else {
    1754            28505:       Kind = tok::star;
    1755                 :     }
    1756            28571:     break;
    1757                 :   case '+':
    1758             4910:     Char = getCharAndSize(CurPtr, SizeTmp);
                     1244: branch 0 taken
                     3666: branch 1 taken
    1759             4910:     if (Char == '+') {
    1760             1244:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1761             1244:       Kind = tok::plusplus;
                      240: branch 0 taken
                     3426: branch 1 taken
    1762             3666:     } else if (Char == '=') {
    1763              240:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1764              240:       Kind = tok::plusequal;
    1765                 :     } else {
    1766             3426:       Kind = tok::plus;
    1767                 :     }
    1768             4910:     break;
    1769                 :   case '-':
    1770            23561:     Char = getCharAndSize(CurPtr, SizeTmp);
                      170: branch 0 taken
                    23391: branch 1 taken
    1771            23561:     if (Char == '-') {      // --
    1772              170:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1773              170:       Kind = tok::minusminus;
                     1333: branch 0 taken
                    22058: branch 1 taken
                      594: branch 2 taken
                      739: branch 3 taken
                       80: branch 5 taken
                      514: branch 6 taken
                       80: branch 7 taken
                    23311: branch 8 taken
    1774            23391:     } else if (Char == '>' && Features.CPlusPlus &&
    1775                 :                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {  // C++ ->*
    1776                 :       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
    1777               80:                            SizeTmp2, Result);
    1778               80:       Kind = tok::arrowstar;
                     1253: branch 0 taken
                    22058: branch 1 taken
    1779            23311:     } else if (Char == '>') {   // ->
    1780             1253:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1781             1253:       Kind = tok::arrow;
                       33: branch 0 taken
                    22025: branch 1 taken
    1782            22058:     } else if (Char == '=') {   // -=
    1783               33:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1784               33:       Kind = tok::minusequal;
    1785                 :     } else {
    1786            22025:       Kind = tok::minus;
    1787                 :     }
    1788            23561:     break;
    1789                 :   case '~':
    1790              458:     Kind = tok::tilde;
    1791              458:     break;
    1792                 :   case '!':
                      556: branch 1 taken
                     1616: branch 2 taken
    1793             2172:     if (getCharAndSize(CurPtr, SizeTmp) == '=') {
    1794              556:       Kind = tok::exclaimequal;
    1795              556:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1796                 :     } else {
    1797             1616:       Kind = tok::exclaim;
    1798                 :     }
    1799             2172:     break;
    1800                 :   case '/':
    1801                 :     // 6.4.9: Comments
    1802            42756:     Char = getCharAndSize(CurPtr, SizeTmp);
                    31121: branch 0 taken
                    11635: branch 1 taken
    1803            42756:     if (Char == '/') {         // BCPL comment.
    1804                 :       // Even if BCPL comments are disabled (e.g. in C89 mode), we generally
    1805                 :       // want to lex this as a comment.  There is one problem with this though,
    1806                 :       // that in one particular corner case, this can change the behavior of the
    1807                 :       // resultant program.  For example, In  "foo //**/ bar", C89 would lex
    1808                 :       // this as "foo / bar" and langauges with BCPL comments would lex it as
    1809                 :       // "foo".  Check to see if the character after the second slash is a '*'.
    1810                 :       // If so, we will lex that as a "/" instead of the start of a comment.
                       28: branch 0 taken
                    31093: branch 1 taken
                       26: branch 3 taken
                        2: branch 4 taken
                    31119: branch 5 taken
                        2: branch 6 taken
    1811            31121:       if (Features.BCPLComment ||
    1812                 :           getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') {
                    15458: branch 2 taken
                    15661: branch 3 taken
    1813            31119:         if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
    1814            15458:           return; // There is a token to return.
    1815                 : 
    1816                 :         // It is common for the tokens immediately after a // comment to be
    1817                 :         // whitespace (indentation for the next line).  Instead of going through
    1818                 :         // the big switch, handle it efficiently now.
    1819            15661:         goto SkipIgnoredUnits;
    1820                 :       }
    1821                 :     }
    1822                 : 
                    10861: branch 0 taken
                      776: branch 1 taken
    1823            11637:     if (Char == '*') {  // /**/ comment.
                      349: branch 2 taken
                    10512: branch 3 taken
    1824            10861:       if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
    1825              349:         return; // There is a token to return.
    1826            10512:       goto LexNextToken;   // GCC isn't tail call eliminating.
    1827                 :     }
    1828                 : 
                      102: branch 0 taken
                      674: branch 1 taken
    1829              776:     if (Char == '=') {
    1830              102:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1831              102:       Kind = tok::slashequal;
    1832                 :     } else {
    1833              674:       Kind = tok::slash;
    1834                 :     }
    1835              776:     break;
    1836                 :   case '%':
    1837              153:     Char = getCharAndSize(CurPtr, SizeTmp);
                       10: branch 0 taken
                      143: branch 1 taken
    1838              153:     if (Char == '=') {
    1839               10:       Kind = tok::percentequal;
    1840               10:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                      143: branch 0 taken
                        0: branch 1 not taken
                        4: branch 2 taken
                      139: branch 3 taken
    1841              147:     } else if (Features.Digraphs && Char == '>') {
    1842                4:       Kind = tok::r_brace;                             // '%>' -> '}'
    1843                4:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                      139: branch 0 taken
                        0: branch 1 not taken
                        8: branch 2 taken
                      131: branch 3 taken
    1844              143:     } else if (Features.Digraphs && Char == ':') {
    1845                8:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1846                8:       Char = getCharAndSize(CurPtr, SizeTmp);
                        0: branch 0 not taken
                        8: branch 1 taken
                        0: branch 3 not taken
                        0: branch 4 not taken
                        0: branch 5 not taken
                        8: branch 6 taken
    1847                8:       if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
    1848                0:         Kind = tok::hashhash;                          // '%:%:' -> '##'
    1849                 :         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
    1850                0:                              SizeTmp2, Result);
                        0: branch 0 not taken
                        8: branch 1 taken
                        8: branch 2 taken
                        8: branch 3 taken
    1851                8:       } else if (Char == '@' && Features.Microsoft) {  // %:@ -> #@ -> Charize
    1852                0:         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                        0: branch 1 not taken
                        0: branch 2 not taken
    1853                0:         if (!isLexingRawMode())
    1854                0:           Diag(BufferPtr, diag::charize_microsoft_ext);
    1855                0:         Kind = tok::hashat;
    1856                 :       } else {                                         // '%:' -> '#'
    1857                 :         // We parsed a # character.  If this occurs at the start of the line,
    1858                 :         // it's actually the start of a preprocessing directive.  Callback to
    1859                 :         // the preprocessor to handle it.
    1860                 :         // FIXME: -fpreprocessed mode??
                        8: branch 1 taken
                        0: branch 2 not taken
                        4: branch 3 taken
                        4: branch 4 taken
                        4: branch 5 taken
                        0: branch 6 not taken
                        4: branch 7 taken
                        4: branch 8 taken
    1861                8:         if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
    1862                4:           FormTokenWithChars(Result, CurPtr, tok::hash);
    1863                4:           PP->HandleDirective(Result);
    1864                 : 
    1865                 :           // As an optimization, if the preprocessor didn't switch lexers, tail
    1866                 :           // recurse.
                        3: branch 1 taken
                        1: branch 2 taken
    1867                4:           if (PP->isCurrentLexer(this)) {
    1868                 :             // Start a new token. If this is a #include or something, the PP may
    1869                 :             // want us starting at the beginning of the line again.  If so, set
    1870                 :             // the StartOfLine flag.
                        3: branch 0 taken
                        0: branch 1 not taken
    1871                3:             if (IsAtStartOfLine) {
    1872                3:               Result.setFlag(Token::StartOfLine);
    1873                3:               IsAtStartOfLine = false;
    1874                 :             }
    1875                3:             goto LexNextToken;   // GCC isn't tail call eliminating.
    1876                 :           }
    1877                 : 
    1878                1:           return PP->Lex(Result);
    1879                 :         }
    1880                 : 
    1881                4:         Kind = tok::hash;
    1882                 :       }
    1883                 :     } else {
    1884              131:       Kind = tok::percent;
    1885                 :     }
    1886              149:     break;
    1887                 :   case '<':
    1888            12950:     Char = getCharAndSize(CurPtr, SizeTmp);
                      632: branch 0 taken
                    12318: branch 1 taken
    1889            12950:     if (ParsingFilename) {
    1890              632:       return LexAngledStringLiteral(Result, CurPtr);
                      814: branch 0 taken
                    11504: branch 1 taken
    1891            12318:     } else if (Char == '<') {
    1892              814:       char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
                       31: branch 0 taken
                      783: branch 1 taken
    1893              814:       if (After == '=') {
    1894               31:         Kind = tok::lesslessequal;
    1895                 :         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
    1896               31:                              SizeTmp2, Result);
                        8: branch 0 taken
                      775: branch 1 taken
                        2: branch 3 taken
                        6: branch 4 taken
                        2: branch 5 taken
                      781: branch 6 taken
    1897              783:       } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
    1898                 :         // If this is actually a '<<<<<<<' version control conflict marker,
    1899                 :         // recognize it as such and recover nicely.
    1900                2:         goto LexNextToken;
    1901                 :       } else {
    1902              781:         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1903              781:         Kind = tok::lessless;
    1904                 :       }
                       56: branch 0 taken
                    11448: branch 1 taken
    1905            11504:     } else if (Char == '=') {
    1906               56:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1907               56:       Kind = tok::lessequal;
                    11442: branch 0 taken
                        6: branch 1 taken
                        8: branch 2 taken
                    11434: branch 3 taken
    1908            11456:     } else if (Features.Digraphs && Char == ':') {     // '<:' -> '['
    1909                8:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1910                8:       Kind = tok::l_square;
                    11434: branch 0 taken
                        6: branch 1 taken
                        4: branch 2 taken
                    11430: branch 3 taken
    1911            11444:     } else if (Features.Digraphs && Char == '%') {     // '<%' -> '{'
    1912                4:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1913                4:       Kind = tok::l_brace;
    1914                 :     } else {
    1915            11436:       Kind = tok::less;
    1916                 :     }
    1917            12316:     break;
    1918                 :   case '>':
    1919            11871:     Char = getCharAndSize(CurPtr, SizeTmp);
                      576: branch 0 taken
                    11295: branch 1 taken
    1920            11871:     if (Char == '=') {
    1921              576:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1922              576:       Kind = tok::greaterequal;
                      220: branch 0 taken
                    11075: branch 1 taken
    1923            11295:     } else if (Char == '>') {
    1924              220:       char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
                       12: branch 0 taken
                      208: branch 1 taken
    1925              220:       if (After == '=') {
    1926                 :         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
    1927               12:                              SizeTmp2, Result);
    1928               12:         Kind = tok::greatergreaterequal;
                       12: branch 0 taken
                      196: branch 1 taken
                        0: branch 3 not taken
                       12: branch 4 taken
                        0: branch 5 not taken
                      208: branch 6 taken
    1929              208:       } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
    1930                 :         // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
    1931                0:         goto LexNextToken;
    1932                 :       } else {
    1933              208:         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1934              208:         Kind = tok::greatergreater;
    1935                 :       }
    1936                 :       
    1937                 :     } else {
    1938            11075:       Kind = tok::greater;
    1939                 :     }
    1940            11871:     break;
    1941                 :   case '^':
    1942              940:     Char = getCharAndSize(CurPtr, SizeTmp);
                        0: branch 0 not taken
                      940: branch 1 taken
    1943              940:     if (Char == '=') {
    1944                0:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1945                0:       Kind = tok::caretequal;
    1946                 :     } else {
    1947              940:       Kind = tok::caret;
    1948                 :     }
    1949              940:     break;
    1950                 :   case '|':
    1951             1863:     Char = getCharAndSize(CurPtr, SizeTmp);
                       33: branch 0 taken
                     1830: branch 1 taken
    1952             1863:     if (Char == '=') {
    1953               33:       Kind = tok::pipeequal;
    1954               33:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                     1316: branch 0 taken
                      514: branch 1 taken
    1955             1830:     } else if (Char == '|') {
    1956                 :       // If this is '|||||||' and we're in a conflict marker, ignore it.
                        4: branch 0 taken
                     1312: branch 1 taken
                        1: branch 3 taken
                        3: branch 4 taken
                        1: branch 5 taken
                     1315: branch 6 taken
    1957             1316:       if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
    1958                1:         goto LexNextToken;
    1959             1315:       Kind = tok::pipepipe;
    1960             1315:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1961                 :     } else {
    1962              514:       Kind = tok::pipe;
    1963                 :     }
    1964             1862:     break;
    1965                 :   case ':':
    1966            15980:     Char = getCharAndSize(CurPtr, SizeTmp);
                    15962: branch 0 taken
                       18: branch 1 taken
                        8: branch 2 taken
                    15954: branch 3 taken
    1967            15988:     if (Features.Digraphs && Char == '>') {
    1968                8:       Kind = tok::r_square; // ':>' -> ']'
    1969                8:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                     7207: branch 0 taken
                     8765: branch 1 taken
                     4533: branch 2 taken
                     2674: branch 3 taken
    1970            20505:     } else if (Features.CPlusPlus && Char == ':') {
    1971             4533:       Kind = tok::coloncolon;
    1972             4533:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1973                 :     } else {
    1974            11439:       Kind = tok::colon;
    1975                 :     }
    1976            15980:     break;
    1977                 :   case ';':
    1978            86303:     Kind = tok::semi;
    1979            86303:     break;
    1980                 :   case '=':
    1981            21704:     Char = getCharAndSize(CurPtr, SizeTmp);
                     2688: branch 0 taken
                    19016: branch 1 taken
    1982            21704:     if (Char == '=') {
    1983                 :       // If this is '=======' and we're in a conflict marker, ignore it.
                        7: branch 0 taken
                     2681: branch 1 taken
                        1: branch 3 taken
                        6: branch 4 taken
                        1: branch 5 taken
                     2687: branch 6 taken
    1984             2688:       if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
    1985                1:         goto LexNextToken;
    1986                 :       
    1987             2687:       Kind = tok::equalequal;
    1988             2687:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    1989                 :     } else {
    1990            19016:       Kind = tok::equal;
    1991                 :     }
    1992            21703:     break;
    1993                 :   case ',':
    1994            35587:     Kind = tok::comma;
    1995            35587:     break;
    1996                 :   case '#':
    1997           306726:     Char = getCharAndSize(CurPtr, SizeTmp);
                      810: branch 0 taken
                   305916: branch 1 taken
    1998           306726:     if (Char == '#') {
    1999              810:       Kind = tok::hashhash;
    2000              810:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
                        2: branch 0 taken
                   305914: branch 1 taken
                        2: branch 2 taken
                        0: branch 3 not taken
    2001           305918:     } else if (Char == '@' && Features.Microsoft) {  // #@ -> Charize
    2002                2:       Kind = tok::hashat;
                        1: branch 1 taken
                        1: branch 2 taken
    2003                2:       if (!isLexingRawMode())
    2004                1:         Diag(BufferPtr, diag::charize_microsoft_ext);
    2005                2:       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    2006                 :     } else {
    2007                 :       // We parsed a # character.  If this occurs at the start of the line,
    2008                 :       // it's actually the start of a preprocessing directive.  Callback to
    2009                 :       // the preprocessor to handle it.
    2010                 :       // FIXME: -fpreprocessed mode??
                   305613: branch 1 taken
                      301: branch 2 taken
                   288089: branch 3 taken
                    17524: branch 4 taken
                   288088: branch 5 taken
                        1: branch 6 taken
                   288088: branch 7 taken
                    17826: branch 8 taken
    2011           305914:       if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
    2012           288088:         FormTokenWithChars(Result, CurPtr, tok::hash);
    2013           288088:         PP->HandleDirective(Result);
    2014                 : 
    2015                 :         // As an optimization, if the preprocessor didn't switch lexers, tail
    2016                 :         // recurse.
                   287506: branch 1 taken
                      582: branch 2 taken
    2017           288088:         if (PP->isCurrentLexer(this)) {
    2018                 :           // Start a new token.  If this is a #include or something, the PP may
    2019                 :           // want us starting at the beginning of the line again.  If so, set
    2020                 :           // the StartOfLine flag.
                   287504: branch 0 taken
                        2: branch 1 taken
    2021           287506:           if (IsAtStartOfLine) {
    2022           287504:             Result.setFlag(Token::StartOfLine);
    2023           287504:             IsAtStartOfLine = false;
    2024                 :           }
    2025           287506:           goto LexNextToken;   // GCC isn't tail call eliminating.
    2026                 :         }
    2027              582:         return PP->Lex(Result);
    2028                 :       }
    2029                 : 
    2030            17826:       Kind = tok::hash;
    2031                 :     }
    2032            18638:     break;
    2033                 : 
    2034                 :   case '@':
    2035                 :     // Objective C support.
                    13508: branch 0 taken
                        0: branch 1 not taken
                    13507: branch 2 taken
                        1: branch 3 taken
    2036            27015:     if (CurPtr[-1] == '@' && Features.ObjC1)
    2037            13507:       Kind = tok::at;
    2038                 :     else
    2039                1:       Kind = tok::unknown;
    2040            13508:     break;
    2041                 : 
    2042                 :   case '\\':
    2043                 :     // FIXME: UCN's.
    2044                 :     // FALL THROUGH.
    2045                 :   default:
    2046               11:     Kind = tok::unknown;
    2047                 :     break;
    2048                 :   }
    2049                 : 
    2050                 :   // Notify MIOpt that we read a non-whitespace/non-comment token.
    2051           928774:   MIOpt.ReadToken();
    2052                 : 
    2053                 :   // Update the location of token as well as BufferPtr.
    2054           928774:   FormTokenWithChars(Result, CurPtr, Kind);
    2055                 : }

Generated: 2010-02-10 01:31 by zcov