Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
clang 20.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
55 CI != CE; ++CI) {
56 OS << "\n";
57 printLine(OS, *CI, (Prefix + " ").str());
58 NewLine = true;
59 }
60 }
61 if (!NewLine)
62 OS << "\n";
63}
64
65LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
66 printLine(llvm::dbgs(), Line);
67}
68
69class ScopedDeclarationState {
70public:
71 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
72 bool MustBeDeclaration)
73 : Line(Line), Stack(Stack) {
74 Line.MustBeDeclaration = MustBeDeclaration;
75 Stack.push_back(MustBeDeclaration);
76 }
77 ~ScopedDeclarationState() {
78 Stack.pop_back();
79 if (!Stack.empty())
80 Line.MustBeDeclaration = Stack.back();
81 else
82 Line.MustBeDeclaration = true;
83 }
84
85private:
86 UnwrappedLine &Line;
87 llvm::BitVector &Stack;
88};
89
90} // end anonymous namespace
91
92std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
93 llvm::raw_os_ostream OS(Stream);
94 printLine(OS, Line);
95 return Stream;
96}
97
99public:
101 bool SwitchToPreprocessorLines = false)
102 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
103 if (SwitchToPreprocessorLines)
104 Parser.CurrentLines = &Parser.PreprocessorDirectives;
105 else if (!Parser.Line->Tokens.empty())
106 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
107 PreBlockLine = std::move(Parser.Line);
108 Parser.Line = std::make_unique<UnwrappedLine>();
109 Parser.Line->Level = PreBlockLine->Level;
110 Parser.Line->PPLevel = PreBlockLine->PPLevel;
111 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
112 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
113 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
114 }
115
117 if (!Parser.Line->Tokens.empty())
118 Parser.addUnwrappedLine();
119 assert(Parser.Line->Tokens.empty());
120 Parser.Line = std::move(PreBlockLine);
121 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
122 Parser.MustBreakBeforeNextToken = true;
123 Parser.CurrentLines = OriginalLines;
124 }
125
126private:
128
129 std::unique_ptr<UnwrappedLine> PreBlockLine;
130 SmallVectorImpl<UnwrappedLine> *OriginalLines;
131};
132
134public:
136 const FormatStyle &Style, unsigned &LineLevel)
138 Style.BraceWrapping.AfterControlStatement,
139 Style.BraceWrapping.IndentBraces) {}
141 bool WrapBrace, bool IndentBrace)
142 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
143 if (WrapBrace)
144 Parser->addUnwrappedLine();
145 if (IndentBrace)
146 ++LineLevel;
147 }
148 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
149
150private:
151 unsigned &LineLevel;
152 unsigned OldLineLevel;
153};
154
156 SourceManager &SourceMgr, const FormatStyle &Style,
157 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
159 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
160 IdentifierTable &IdentTable)
161 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
162 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
163 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
164 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
165 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
166 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
167 ? IG_Rejected
168 : IG_Inited),
169 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
170 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
171 assert(IsCpp == LangOpts.CXXOperatorNames);
172}
173
174void UnwrappedLineParser::reset() {
175 PPBranchLevel = -1;
176 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
177 ? IG_Rejected
178 : IG_Inited;
179 IncludeGuardToken = nullptr;
180 Line.reset(new UnwrappedLine);
181 CommentsBeforeNextToken.clear();
182 FormatTok = nullptr;
183 MustBreakBeforeNextToken = false;
184 IsDecltypeAutoFunction = false;
185 PreprocessorDirectives.clear();
186 CurrentLines = &Lines;
187 DeclarationScopeStack.clear();
188 NestedTooDeep.clear();
189 NestedLambdas.clear();
190 PPStack.clear();
191 Line->FirstStartColumn = FirstStartColumn;
192
193 if (!Unexpanded.empty())
194 for (FormatToken *Token : AllTokens)
195 Token->MacroCtx.reset();
196 CurrentExpandedLines.clear();
197 ExpandedLines.clear();
198 Unexpanded.clear();
199 InExpansion = false;
200 Reconstruct.reset();
201}
202
204 IndexedTokenSource TokenSource(AllTokens);
205 Line->FirstStartColumn = FirstStartColumn;
206 do {
207 LLVM_DEBUG(llvm::dbgs() << "----\n");
208 reset();
209 Tokens = &TokenSource;
210 TokenSource.reset();
211
212 readToken();
213 parseFile();
214
215 // If we found an include guard then all preprocessor directives (other than
216 // the guard) are over-indented by one.
217 if (IncludeGuard == IG_Found) {
218 for (auto &Line : Lines)
219 if (Line.InPPDirective && Line.Level > 0)
220 --Line.Level;
221 }
222
223 // Create line with eof token.
224 assert(eof());
225 pushToken(FormatTok);
226 addUnwrappedLine();
227
228 // In a first run, format everything with the lines containing macro calls
229 // replaced by the expansion.
230 if (!ExpandedLines.empty()) {
231 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
232 for (const auto &Line : Lines) {
233 if (!Line.Tokens.empty()) {
234 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
235 if (it != ExpandedLines.end()) {
236 for (const auto &Expanded : it->second) {
237 LLVM_DEBUG(printDebugInfo(Expanded));
238 Callback.consumeUnwrappedLine(Expanded);
239 }
240 continue;
241 }
242 }
243 LLVM_DEBUG(printDebugInfo(Line));
244 Callback.consumeUnwrappedLine(Line);
245 }
246 Callback.finishRun();
247 }
248
249 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
250 for (const UnwrappedLine &Line : Lines) {
251 LLVM_DEBUG(printDebugInfo(Line));
252 Callback.consumeUnwrappedLine(Line);
253 }
254 Callback.finishRun();
255 Lines.clear();
256 while (!PPLevelBranchIndex.empty() &&
257 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
258 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
259 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
260 }
261 if (!PPLevelBranchIndex.empty()) {
262 ++PPLevelBranchIndex.back();
263 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
264 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
265 }
266 } while (!PPLevelBranchIndex.empty());
267}
268
269void UnwrappedLineParser::parseFile() {
270 // The top-level context in a file always has declarations, except for pre-
271 // processor directives and JavaScript files.
272 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
273 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
274 MustBeDeclaration);
276 parseBracedList();
277 else
278 parseLevel();
279 // Make sure to format the remaining tokens.
280 //
281 // LK_TextProto is special since its top-level is parsed as the body of a
282 // braced list, which does not necessarily have natural line separators such
283 // as a semicolon. Comments after the last entry that have been determined to
284 // not belong to that line, as in:
285 // key: value
286 // // endfile comment
287 // do not have a chance to be put on a line of their own until this point.
288 // Here we add this newline before end-of-file comments.
289 if (Style.Language == FormatStyle::LK_TextProto &&
290 !CommentsBeforeNextToken.empty()) {
291 addUnwrappedLine();
292 }
293 flushComments(true);
294 addUnwrappedLine();
295}
296
297void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
298 do {
299 switch (FormatTok->Tok.getKind()) {
300 case tok::l_brace:
301 return;
302 default:
303 if (FormatTok->is(Keywords.kw_where)) {
304 addUnwrappedLine();
305 nextToken();
306 parseCSharpGenericTypeConstraint();
307 break;
308 }
309 nextToken();
310 break;
311 }
312 } while (!eof());
313}
314
315void UnwrappedLineParser::parseCSharpAttribute() {
316 int UnpairedSquareBrackets = 1;
317 do {
318 switch (FormatTok->Tok.getKind()) {
319 case tok::r_square:
320 nextToken();
321 --UnpairedSquareBrackets;
322 if (UnpairedSquareBrackets == 0) {
323 addUnwrappedLine();
324 return;
325 }
326 break;
327 case tok::l_square:
328 ++UnpairedSquareBrackets;
329 nextToken();
330 break;
331 default:
332 nextToken();
333 break;
334 }
335 } while (!eof());
336}
337
338bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
339 if (!Lines.empty() && Lines.back().InPPDirective)
340 return true;
341
342 const FormatToken *Previous = Tokens->getPreviousToken();
343 return Previous && Previous->is(tok::comment) &&
344 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
345}
346
347/// \brief Parses a level, that is ???.
348/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
349/// \param IfKind The \p if statement kind in the level.
350/// \param IfLeftBrace The left brace of the \p if block in the level.
351/// \returns true if a simple block of if/else/for/while, or false otherwise.
352/// (A simple block has a single statement.)
353bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
354 IfStmtKind *IfKind,
355 FormatToken **IfLeftBrace) {
356 const bool InRequiresExpression =
357 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
358 const bool IsPrecededByCommentOrPPDirective =
359 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
360 FormatToken *IfLBrace = nullptr;
361 bool HasDoWhile = false;
362 bool HasLabel = false;
363 unsigned StatementCount = 0;
364 bool SwitchLabelEncountered = false;
365
366 do {
367 if (FormatTok->isAttribute()) {
368 nextToken();
369 if (FormatTok->is(tok::l_paren))
370 parseParens();
371 continue;
372 }
373 tok::TokenKind Kind = FormatTok->Tok.getKind();
374 if (FormatTok->is(TT_MacroBlockBegin))
375 Kind = tok::l_brace;
376 else if (FormatTok->is(TT_MacroBlockEnd))
377 Kind = tok::r_brace;
378
379 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
380 &HasLabel, &StatementCount] {
381 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
382 HasDoWhile ? nullptr : &HasDoWhile,
383 HasLabel ? nullptr : &HasLabel);
384 ++StatementCount;
385 assert(StatementCount > 0 && "StatementCount overflow!");
386 };
387
388 switch (Kind) {
389 case tok::comment:
390 nextToken();
391 addUnwrappedLine();
392 break;
393 case tok::l_brace:
394 if (InRequiresExpression) {
395 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
396 } else if (FormatTok->Previous &&
397 FormatTok->Previous->ClosesRequiresClause) {
398 // We need the 'default' case here to correctly parse a function
399 // l_brace.
400 ParseDefault();
401 continue;
402 }
403 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
404 if (tryToParseBracedList())
405 continue;
406 FormatTok->setFinalizedType(TT_BlockLBrace);
407 }
408 parseBlock();
409 ++StatementCount;
410 assert(StatementCount > 0 && "StatementCount overflow!");
411 addUnwrappedLine();
412 break;
413 case tok::r_brace:
414 if (OpeningBrace) {
415 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
416 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
417 return false;
418 }
419 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
420 HasDoWhile || IsPrecededByCommentOrPPDirective ||
421 precededByCommentOrPPDirective()) {
422 return false;
423 }
424 const FormatToken *Next = Tokens->peekNextToken();
425 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
426 return false;
427 if (IfLeftBrace)
428 *IfLeftBrace = IfLBrace;
429 return true;
430 }
431 nextToken();
432 addUnwrappedLine();
433 break;
434 case tok::kw_default: {
435 unsigned StoredPosition = Tokens->getPosition();
436 auto *Next = Tokens->getNextNonComment();
437 FormatTok = Tokens->setPosition(StoredPosition);
438 if (!Next->isOneOf(tok::colon, tok::arrow)) {
439 // default not followed by `:` or `->` is not a case label; treat it
440 // like an identifier.
441 parseStructuralElement();
442 break;
443 }
444 // Else, if it is 'default:', fall through to the case handling.
445 [[fallthrough]];
446 }
447 case tok::kw_case:
448 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
449 (Style.isJavaScript() && Line->MustBeDeclaration)) {
450 // Proto: there are no switch/case statements
451 // Verilog: Case labels don't have this word. We handle case
452 // labels including default in TokenAnnotator.
453 // JavaScript: A 'case: string' style field declaration.
454 ParseDefault();
455 break;
456 }
457 if (!SwitchLabelEncountered &&
458 (Style.IndentCaseLabels ||
459 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
460 (Line->InPPDirective && Line->Level == 1))) {
461 ++Line->Level;
462 }
463 SwitchLabelEncountered = true;
464 parseStructuralElement();
465 break;
466 case tok::l_square:
467 if (Style.isCSharp()) {
468 nextToken();
469 parseCSharpAttribute();
470 break;
471 }
472 if (handleCppAttributes())
473 break;
474 [[fallthrough]];
475 default:
476 ParseDefault();
477 break;
478 }
479 } while (!eof());
480
481 return false;
482}
483
484void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
485 // We'll parse forward through the tokens until we hit
486 // a closing brace or eof - note that getNextToken() will
487 // parse macros, so this will magically work inside macro
488 // definitions, too.
489 unsigned StoredPosition = Tokens->getPosition();
490 FormatToken *Tok = FormatTok;
491 const FormatToken *PrevTok = Tok->Previous;
492 // Keep a stack of positions of lbrace tokens. We will
493 // update information about whether an lbrace starts a
494 // braced init list or a different block during the loop.
495 struct StackEntry {
496 FormatToken *Tok;
497 const FormatToken *PrevTok;
498 };
499 SmallVector<StackEntry, 8> LBraceStack;
500 assert(Tok->is(tok::l_brace));
501
502 do {
503 auto *NextTok = Tokens->getNextNonComment();
504
505 if (!Line->InMacroBody && !Style.isTableGen()) {
506 // Skip PPDirective lines (except macro definitions) and comments.
507 while (NextTok->is(tok::hash)) {
508 NextTok = Tokens->getNextToken();
509 if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define))
510 break;
511 do {
512 NextTok = Tokens->getNextToken();
513 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
514
515 while (NextTok->is(tok::comment))
516 NextTok = Tokens->getNextToken();
517 }
518 }
519
520 switch (Tok->Tok.getKind()) {
521 case tok::l_brace:
522 if (Style.isJavaScript() && PrevTok) {
523 if (PrevTok->isOneOf(tok::colon, tok::less)) {
524 // A ':' indicates this code is in a type, or a braced list
525 // following a label in an object literal ({a: {b: 1}}).
526 // A '<' could be an object used in a comparison, but that is nonsense
527 // code (can never return true), so more likely it is a generic type
528 // argument (`X<{a: string; b: number}>`).
529 // The code below could be confused by semicolons between the
530 // individual members in a type member list, which would normally
531 // trigger BK_Block. In both cases, this must be parsed as an inline
532 // braced init.
534 } else if (PrevTok->is(tok::r_paren)) {
535 // `) { }` can only occur in function or method declarations in JS.
536 Tok->setBlockKind(BK_Block);
537 }
538 } else {
539 Tok->setBlockKind(BK_Unknown);
540 }
541 LBraceStack.push_back({Tok, PrevTok});
542 break;
543 case tok::r_brace:
544 if (LBraceStack.empty())
545 break;
546 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
547 bool ProbablyBracedList = false;
548 if (Style.Language == FormatStyle::LK_Proto) {
549 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
550 } else if (LBrace->isNot(TT_EnumLBrace)) {
551 // Using OriginalColumn to distinguish between ObjC methods and
552 // binary operators is a bit hacky.
553 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
554 NextTok->OriginalColumn == 0;
555
556 // Try to detect a braced list. Note that regardless how we mark inner
557 // braces here, we will overwrite the BlockKind later if we parse a
558 // braced list (where all blocks inside are by default braced lists),
559 // or when we explicitly detect blocks (for example while parsing
560 // lambdas).
561
562 // If we already marked the opening brace as braced list, the closing
563 // must also be part of it.
564 ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
565
566 ProbablyBracedList = ProbablyBracedList ||
567 (Style.isJavaScript() &&
568 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
569 Keywords.kw_as));
570 ProbablyBracedList =
571 ProbablyBracedList ||
572 (IsCpp && (PrevTok->Tok.isLiteral() ||
573 NextTok->isOneOf(tok::l_paren, tok::arrow)));
574
575 // If there is a comma, semicolon or right paren after the closing
576 // brace, we assume this is a braced initializer list.
577 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
578 // braced list in JS.
579 ProbablyBracedList =
580 ProbablyBracedList ||
581 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
582 tok::r_paren, tok::r_square, tok::ellipsis);
583
584 // Distinguish between braced list in a constructor initializer list
585 // followed by constructor body, or just adjacent blocks.
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
589 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
590 tok::greater));
591
592 ProbablyBracedList =
593 ProbablyBracedList ||
594 (NextTok->is(tok::identifier) &&
595 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
596
597 ProbablyBracedList = ProbablyBracedList ||
598 (NextTok->is(tok::semi) &&
599 (!ExpectClassBody || LBraceStack.size() != 1));
600
601 ProbablyBracedList =
602 ProbablyBracedList ||
603 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
604
605 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
606 // We can have an array subscript after a braced init
607 // list, but C++11 attributes are expected after blocks.
608 NextTok = Tokens->getNextToken();
609 ProbablyBracedList = NextTok->isNot(tok::l_square);
610 }
611
612 // Cpp macro definition body that is a nonempty braced list or block:
613 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
614 !FormatTok->Previous && NextTok->is(tok::eof) &&
615 // A statement can end with only `;` (simple statement), a block
616 // closing brace (compound statement), or `:` (label statement).
617 // If PrevTok is a block opening brace, Tok ends an empty block.
618 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
619 ProbablyBracedList = true;
620 }
621 }
622 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
623 Tok->setBlockKind(BlockKind);
624 LBrace->setBlockKind(BlockKind);
625 }
626 LBraceStack.pop_back();
627 break;
628 case tok::identifier:
629 if (Tok->isNot(TT_StatementMacro))
630 break;
631 [[fallthrough]];
632 case tok::at:
633 case tok::semi:
634 case tok::kw_if:
635 case tok::kw_while:
636 case tok::kw_for:
637 case tok::kw_switch:
638 case tok::kw_try:
639 case tok::kw___try:
640 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
641 LBraceStack.back().Tok->setBlockKind(BK_Block);
642 break;
643 default:
644 break;
645 }
646
647 PrevTok = Tok;
648 Tok = NextTok;
649 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
650
651 // Assume other blocks for all unclosed opening braces.
652 for (const auto &Entry : LBraceStack)
653 if (Entry.Tok->is(BK_Unknown))
654 Entry.Tok->setBlockKind(BK_Block);
655
656 FormatTok = Tokens->setPosition(StoredPosition);
657}
658
659// Sets the token type of the directly previous right brace.
660void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
661 if (auto Prev = FormatTok->getPreviousNonComment();
662 Prev && Prev->is(tok::r_brace)) {
663 Prev->setFinalizedType(Type);
664 }
665}
666
667template <class T>
668static inline void hash_combine(std::size_t &seed, const T &v) {
669 std::hash<T> hasher;
670 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
671}
672
673size_t UnwrappedLineParser::computePPHash() const {
674 size_t h = 0;
675 for (const auto &i : PPStack) {
676 hash_combine(h, size_t(i.Kind));
677 hash_combine(h, i.Line);
678 }
679 return h;
680}
681
682// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
683// is not null, subtracts its length (plus the preceding space) when computing
684// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
685// running the token annotator on it so that we can restore them afterward.
686bool UnwrappedLineParser::mightFitOnOneLine(
687 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
688 const auto ColumnLimit = Style.ColumnLimit;
689 if (ColumnLimit == 0)
690 return true;
691
692 auto &Tokens = ParsedLine.Tokens;
693 assert(!Tokens.empty());
694
695 const auto *LastToken = Tokens.back().Tok;
696 assert(LastToken);
697
698 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
699
700 int Index = 0;
701 for (const auto &Token : Tokens) {
702 assert(Token.Tok);
703 auto &SavedToken = SavedTokens[Index++];
704 SavedToken.Tok = new FormatToken;
705 SavedToken.Tok->copyFrom(*Token.Tok);
706 SavedToken.Children = std::move(Token.Children);
707 }
708
709 AnnotatedLine Line(ParsedLine);
710 assert(Line.Last == LastToken);
711
712 TokenAnnotator Annotator(Style, Keywords);
713 Annotator.annotate(Line);
714 Annotator.calculateFormattingInformation(Line);
715
716 auto Length = LastToken->TotalLength;
717 if (OpeningBrace) {
718 assert(OpeningBrace != Tokens.front().Tok);
719 if (auto Prev = OpeningBrace->Previous;
720 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
721 Length -= ColumnLimit;
722 }
723 Length -= OpeningBrace->TokenText.size() + 1;
724 }
725
726 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
727 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
728 Length -= FirstToken->TokenText.size() + 1;
729 }
730
731 Index = 0;
732 for (auto &Token : Tokens) {
733 const auto &SavedToken = SavedTokens[Index++];
734 Token.Tok->copyFrom(*SavedToken.Tok);
735 Token.Children = std::move(SavedToken.Children);
736 delete SavedToken.Tok;
737 }
738
739 // If these change PPLevel needs to be used for get correct indentation.
740 assert(!Line.InMacroBody);
741 assert(!Line.InPPDirective);
742 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
743}
744
745FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
746 unsigned AddLevels, bool MunchSemi,
747 bool KeepBraces,
748 IfStmtKind *IfKind,
749 bool UnindentWhitesmithsBraces) {
750 auto HandleVerilogBlockLabel = [this]() {
751 // ":" name
752 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
753 nextToken();
754 if (Keywords.isVerilogIdentifier(*FormatTok))
755 nextToken();
756 }
757 };
758
759 // Whether this is a Verilog-specific block that has a special header like a
760 // module.
761 const bool VerilogHierarchy =
762 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
763 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
764 (Style.isVerilog() &&
765 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
766 "'{' or macro block token expected");
767 FormatToken *Tok = FormatTok;
768 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
769 auto Index = CurrentLines->size();
770 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
771 FormatTok->setBlockKind(BK_Block);
772
773 // For Whitesmiths mode, jump to the next level prior to skipping over the
774 // braces.
775 if (!VerilogHierarchy && AddLevels > 0 &&
777 ++Line->Level;
778 }
779
780 size_t PPStartHash = computePPHash();
781
782 const unsigned InitialLevel = Line->Level;
783 if (VerilogHierarchy) {
784 AddLevels += parseVerilogHierarchyHeader();
785 } else {
786 nextToken(/*LevelDifference=*/AddLevels);
787 HandleVerilogBlockLabel();
788 }
789
790 // Bail out if there are too many levels. Otherwise, the stack might overflow.
791 if (Line->Level > 300)
792 return nullptr;
793
794 if (MacroBlock && FormatTok->is(tok::l_paren))
795 parseParens();
796
797 size_t NbPreprocessorDirectives =
798 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
799 addUnwrappedLine();
800 size_t OpeningLineIndex =
801 CurrentLines->empty()
803 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
804
805 // Whitesmiths is weird here. The brace needs to be indented for the namespace
806 // block, but the block itself may not be indented depending on the style
807 // settings. This allows the format to back up one level in those cases.
808 if (UnindentWhitesmithsBraces)
809 --Line->Level;
810
811 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
812 MustBeDeclaration);
813 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
814 Line->Level += AddLevels;
815
816 FormatToken *IfLBrace = nullptr;
817 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
818
819 if (eof())
820 return IfLBrace;
821
822 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
823 : FormatTok->isNot(tok::r_brace)) {
824 Line->Level = InitialLevel;
825 FormatTok->setBlockKind(BK_Block);
826 return IfLBrace;
827 }
828
829 if (FormatTok->is(tok::r_brace)) {
830 FormatTok->setBlockKind(BK_Block);
831 if (Tok->is(TT_NamespaceLBrace))
832 FormatTok->setFinalizedType(TT_NamespaceRBrace);
833 }
834
835 const bool IsFunctionRBrace =
836 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
837
838 auto RemoveBraces = [=]() mutable {
839 if (!SimpleBlock)
840 return false;
841 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
842 assert(FormatTok->is(tok::r_brace));
843 const bool WrappedOpeningBrace = !Tok->Previous;
844 if (WrappedOpeningBrace && FollowedByComment)
845 return false;
846 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
847 if (KeepBraces && !HasRequiredIfBraces)
848 return false;
849 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
850 const FormatToken *Previous = Tokens->getPreviousToken();
851 assert(Previous);
852 if (Previous->is(tok::r_brace) && !Previous->Optional)
853 return false;
854 }
855 assert(!CurrentLines->empty());
856 auto &LastLine = CurrentLines->back();
857 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
858 return false;
859 if (Tok->is(TT_ElseLBrace))
860 return true;
861 if (WrappedOpeningBrace) {
862 assert(Index > 0);
863 --Index; // The line above the wrapped l_brace.
864 Tok = nullptr;
865 }
866 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
867 };
868 if (RemoveBraces()) {
869 Tok->MatchingParen = FormatTok;
870 FormatTok->MatchingParen = Tok;
871 }
872
873 size_t PPEndHash = computePPHash();
874
875 // Munch the closing brace.
876 nextToken(/*LevelDifference=*/-AddLevels);
877
878 // When this is a function block and there is an unnecessary semicolon
879 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
880 // it later).
881 if (Style.RemoveSemicolon && IsFunctionRBrace) {
882 while (FormatTok->is(tok::semi)) {
883 FormatTok->Optional = true;
884 nextToken();
885 }
886 }
887
888 HandleVerilogBlockLabel();
889
890 if (MacroBlock && FormatTok->is(tok::l_paren))
891 parseParens();
892
893 Line->Level = InitialLevel;
894
895 if (FormatTok->is(tok::kw_noexcept)) {
896 // A noexcept in a requires expression.
897 nextToken();
898 }
899
900 if (FormatTok->is(tok::arrow)) {
901 // Following the } or noexcept we can find a trailing return type arrow
902 // as part of an implicit conversion constraint.
903 nextToken();
904 parseStructuralElement();
905 }
906
907 if (MunchSemi && FormatTok->is(tok::semi))
908 nextToken();
909
910 if (PPStartHash == PPEndHash) {
911 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
912 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
913 // Update the opening line to add the forward reference as well
914 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
915 CurrentLines->size() - 1;
916 }
917 }
918
919 return IfLBrace;
920}
921
922static bool isGoogScope(const UnwrappedLine &Line) {
923 // FIXME: Closure-library specific stuff should not be hard-coded but be
924 // configurable.
925 if (Line.Tokens.size() < 4)
926 return false;
927 auto I = Line.Tokens.begin();
928 if (I->Tok->TokenText != "goog")
929 return false;
930 ++I;
931 if (I->Tok->isNot(tok::period))
932 return false;
933 ++I;
934 if (I->Tok->TokenText != "scope")
935 return false;
936 ++I;
937 return I->Tok->is(tok::l_paren);
938}
939
940static bool isIIFE(const UnwrappedLine &Line,
941 const AdditionalKeywords &Keywords) {
942 // Look for the start of an immediately invoked anonymous function.
943 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
944 // This is commonly done in JavaScript to create a new, anonymous scope.
945 // Example: (function() { ... })()
946 if (Line.Tokens.size() < 3)
947 return false;
948 auto I = Line.Tokens.begin();
949 if (I->Tok->isNot(tok::l_paren))
950 return false;
951 ++I;
952 if (I->Tok->isNot(Keywords.kw_function))
953 return false;
954 ++I;
955 return I->Tok->is(tok::l_paren);
956}
957
958static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
959 const FormatToken &InitialToken) {
960 tok::TokenKind Kind = InitialToken.Tok.getKind();
961 if (InitialToken.is(TT_NamespaceMacro))
962 Kind = tok::kw_namespace;
963
964 switch (Kind) {
965 case tok::kw_namespace:
966 return Style.BraceWrapping.AfterNamespace;
967 case tok::kw_class:
968 return Style.BraceWrapping.AfterClass;
969 case tok::kw_union:
970 return Style.BraceWrapping.AfterUnion;
971 case tok::kw_struct:
972 return Style.BraceWrapping.AfterStruct;
973 case tok::kw_enum:
974 return Style.BraceWrapping.AfterEnum;
975 default:
976 return false;
977 }
978}
979
980void UnwrappedLineParser::parseChildBlock() {
981 assert(FormatTok->is(tok::l_brace));
982 FormatTok->setBlockKind(BK_Block);
983 const FormatToken *OpeningBrace = FormatTok;
984 nextToken();
985 {
986 bool SkipIndent = (Style.isJavaScript() &&
987 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
988 ScopedLineState LineState(*this);
989 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
990 /*MustBeDeclaration=*/false);
991 Line->Level += SkipIndent ? 0 : 1;
992 parseLevel(OpeningBrace);
993 flushComments(isOnNewLine(*FormatTok));
994 Line->Level -= SkipIndent ? 0 : 1;
995 }
996 nextToken();
997}
998
999void UnwrappedLineParser::parsePPDirective() {
1000 assert(FormatTok->is(tok::hash) && "'#' expected");
1001 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1002
1003 nextToken();
1004
1005 if (!FormatTok->Tok.getIdentifierInfo()) {
1006 parsePPUnknown();
1007 return;
1008 }
1009
1010 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1011 case tok::pp_define:
1012 parsePPDefine();
1013 return;
1014 case tok::pp_if:
1015 parsePPIf(/*IfDef=*/false);
1016 break;
1017 case tok::pp_ifdef:
1018 case tok::pp_ifndef:
1019 parsePPIf(/*IfDef=*/true);
1020 break;
1021 case tok::pp_else:
1022 case tok::pp_elifdef:
1023 case tok::pp_elifndef:
1024 case tok::pp_elif:
1025 parsePPElse();
1026 break;
1027 case tok::pp_endif:
1028 parsePPEndIf();
1029 break;
1030 case tok::pp_pragma:
1031 parsePPPragma();
1032 break;
1033 case tok::pp_error:
1034 case tok::pp_warning:
1035 nextToken();
1036 if (!eof() && Style.isCpp())
1037 FormatTok->setFinalizedType(TT_AfterPPDirective);
1038 [[fallthrough]];
1039 default:
1040 parsePPUnknown();
1041 break;
1042 }
1043}
1044
1045void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1046 size_t Line = CurrentLines->size();
1047 if (CurrentLines == &PreprocessorDirectives)
1048 Line += Lines.size();
1049
1050 if (Unreachable ||
1051 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1052 PPStack.push_back({PP_Unreachable, Line});
1053 } else {
1054 PPStack.push_back({PP_Conditional, Line});
1055 }
1056}
1057
1058void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1059 ++PPBranchLevel;
1060 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1061 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1062 PPLevelBranchIndex.push_back(0);
1063 PPLevelBranchCount.push_back(0);
1064 }
1065 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1066 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1067 conditionalCompilationCondition(Unreachable || Skip);
1068}
1069
1070void UnwrappedLineParser::conditionalCompilationAlternative() {
1071 if (!PPStack.empty())
1072 PPStack.pop_back();
1073 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1074 if (!PPChainBranchIndex.empty())
1075 ++PPChainBranchIndex.top();
1076 conditionalCompilationCondition(
1077 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1078 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1079}
1080
1081void UnwrappedLineParser::conditionalCompilationEnd() {
1082 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1083 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1084 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1085 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1086 }
1087 // Guard against #endif's without #if.
1088 if (PPBranchLevel > -1)
1089 --PPBranchLevel;
1090 if (!PPChainBranchIndex.empty())
1091 PPChainBranchIndex.pop();
1092 if (!PPStack.empty())
1093 PPStack.pop_back();
1094}
1095
1096void UnwrappedLineParser::parsePPIf(bool IfDef) {
1097 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1098 nextToken();
1099 bool Unreachable = false;
1100 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1101 Unreachable = true;
1102 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1103 Unreachable = true;
1104 conditionalCompilationStart(Unreachable);
1105 FormatToken *IfCondition = FormatTok;
1106 // If there's a #ifndef on the first line, and the only lines before it are
1107 // comments, it could be an include guard.
1108 bool MaybeIncludeGuard = IfNDef;
1109 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1110 for (auto &Line : Lines) {
1111 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1112 MaybeIncludeGuard = false;
1113 IncludeGuard = IG_Rejected;
1114 break;
1115 }
1116 }
1117 }
1118 --PPBranchLevel;
1119 parsePPUnknown();
1120 ++PPBranchLevel;
1121 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1122 IncludeGuard = IG_IfNdefed;
1123 IncludeGuardToken = IfCondition;
1124 }
1125}
1126
1127void UnwrappedLineParser::parsePPElse() {
1128 // If a potential include guard has an #else, it's not an include guard.
1129 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1130 IncludeGuard = IG_Rejected;
1131 // Don't crash when there is an #else without an #if.
1132 assert(PPBranchLevel >= -1);
1133 if (PPBranchLevel == -1)
1134 conditionalCompilationStart(/*Unreachable=*/true);
1135 conditionalCompilationAlternative();
1136 --PPBranchLevel;
1137 parsePPUnknown();
1138 ++PPBranchLevel;
1139}
1140
1141void UnwrappedLineParser::parsePPEndIf() {
1142 conditionalCompilationEnd();
1143 parsePPUnknown();
1144 // If the #endif of a potential include guard is the last thing in the file,
1145 // then we found an include guard.
1146 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1148 IncludeGuard = IG_Found;
1149 }
1150}
1151
1152void UnwrappedLineParser::parsePPDefine() {
1153 nextToken();
1154
1155 if (!FormatTok->Tok.getIdentifierInfo()) {
1156 IncludeGuard = IG_Rejected;
1157 IncludeGuardToken = nullptr;
1158 parsePPUnknown();
1159 return;
1160 }
1161
1162 if (IncludeGuard == IG_IfNdefed &&
1163 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1164 IncludeGuard = IG_Defined;
1165 IncludeGuardToken = nullptr;
1166 for (auto &Line : Lines) {
1167 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1168 IncludeGuard = IG_Rejected;
1169 break;
1170 }
1171 }
1172 }
1173
1174 // In the context of a define, even keywords should be treated as normal
1175 // identifiers. Setting the kind to identifier is not enough, because we need
1176 // to treat additional keywords like __except as well, which are already
1177 // identifiers. Setting the identifier info to null interferes with include
1178 // guard processing above, and changes preprocessing nesting.
1179 FormatTok->Tok.setKind(tok::identifier);
1181 nextToken();
1182 if (FormatTok->Tok.getKind() == tok::l_paren &&
1183 !FormatTok->hasWhitespaceBefore()) {
1184 parseParens();
1185 }
1187 Line->Level += PPBranchLevel + 1;
1188 addUnwrappedLine();
1189 ++Line->Level;
1190
1191 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1192 assert((int)Line->PPLevel >= 0);
1193 Line->InMacroBody = true;
1194
1195 if (Style.SkipMacroDefinitionBody) {
1196 while (!eof()) {
1197 FormatTok->Finalized = true;
1198 FormatTok = Tokens->getNextToken();
1199 }
1200 addUnwrappedLine();
1201 return;
1202 }
1203
1204 // Errors during a preprocessor directive can only affect the layout of the
1205 // preprocessor directive, and thus we ignore them. An alternative approach
1206 // would be to use the same approach we use on the file level (no
1207 // re-indentation if there was a structural error) within the macro
1208 // definition.
1209 parseFile();
1210}
1211
1212void UnwrappedLineParser::parsePPPragma() {
1213 Line->InPragmaDirective = true;
1214 parsePPUnknown();
1215}
1216
1217void UnwrappedLineParser::parsePPUnknown() {
1218 while (!eof())
1219 nextToken();
1221 Line->Level += PPBranchLevel + 1;
1222 addUnwrappedLine();
1223}
1224
1225// Here we exclude certain tokens that are not usually the first token in an
1226// unwrapped line. This is used in attempt to distinguish macro calls without
1227// trailing semicolons from other constructs split to several lines.
1228static bool tokenCanStartNewLine(const FormatToken &Tok) {
1229 // Semicolon can be a null-statement, l_square can be a start of a macro or
1230 // a C++11 attribute, but this doesn't seem to be common.
1231 return !Tok.isOneOf(tok::semi, tok::l_brace,
1232 // Tokens that can only be used as binary operators and a
1233 // part of overloaded operator names.
1234 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1235 tok::less, tok::greater, tok::slash, tok::percent,
1236 tok::lessless, tok::greatergreater, tok::equal,
1237 tok::plusequal, tok::minusequal, tok::starequal,
1238 tok::slashequal, tok::percentequal, tok::ampequal,
1239 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1240 tok::lesslessequal,
1241 // Colon is used in labels, base class lists, initializer
1242 // lists, range-based for loops, ternary operator, but
1243 // should never be the first token in an unwrapped line.
1244 tok::colon,
1245 // 'noexcept' is a trailing annotation.
1246 tok::kw_noexcept);
1247}
1248
1249static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1250 const FormatToken *FormatTok) {
1251 // FIXME: This returns true for C/C++ keywords like 'struct'.
1252 return FormatTok->is(tok::identifier) &&
1253 (!FormatTok->Tok.getIdentifierInfo() ||
1254 !FormatTok->isOneOf(
1255 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1256 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1257 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1258 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1259 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1260 Keywords.kw_instanceof, Keywords.kw_interface,
1261 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1262}
1263
1264static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1265 const FormatToken *FormatTok) {
1266 return FormatTok->Tok.isLiteral() ||
1267 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1268 mustBeJSIdent(Keywords, FormatTok);
1269}
1270
1271// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1272// when encountered after a value (see mustBeJSIdentOrValue).
1273static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1274 const FormatToken *FormatTok) {
1275 return FormatTok->isOneOf(
1276 tok::kw_return, Keywords.kw_yield,
1277 // conditionals
1278 tok::kw_if, tok::kw_else,
1279 // loops
1280 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1281 // switch/case
1282 tok::kw_switch, tok::kw_case,
1283 // exceptions
1284 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1285 // declaration
1286 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1287 Keywords.kw_async, Keywords.kw_function,
1288 // import/export
1289 Keywords.kw_import, tok::kw_export);
1290}
1291
1292// Checks whether a token is a type in K&R C (aka C78).
1293static bool isC78Type(const FormatToken &Tok) {
1294 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1295 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1296 tok::identifier);
1297}
1298
1299// This function checks whether a token starts the first parameter declaration
1300// in a K&R C (aka C78) function definition, e.g.:
1301// int f(a, b)
1302// short a, b;
1303// {
1304// return a + b;
1305// }
1306static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1307 const FormatToken *FuncName) {
1308 assert(Tok);
1309 assert(Next);
1310 assert(FuncName);
1311
1312 if (FuncName->isNot(tok::identifier))
1313 return false;
1314
1315 const FormatToken *Prev = FuncName->Previous;
1316 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1317 return false;
1318
1319 if (!isC78Type(*Tok) &&
1320 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1321 return false;
1322 }
1323
1324 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1325 return false;
1326
1327 Tok = Tok->Previous;
1328 if (!Tok || Tok->isNot(tok::r_paren))
1329 return false;
1330
1331 Tok = Tok->Previous;
1332 if (!Tok || Tok->isNot(tok::identifier))
1333 return false;
1334
1335 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1336}
1337
1338bool UnwrappedLineParser::parseModuleImport() {
1339 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1340
1341 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1342 !Token->Tok.getIdentifierInfo() &&
1343 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1344 return false;
1345 }
1346
1347 nextToken();
1348 while (!eof()) {
1349 if (FormatTok->is(tok::colon)) {
1350 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1351 }
1352 // Handle import <foo/bar.h> as we would an include statement.
1353 else if (FormatTok->is(tok::less)) {
1354 nextToken();
1355 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1356 // Mark tokens up to the trailing line comments as implicit string
1357 // literals.
1358 if (FormatTok->isNot(tok::comment) &&
1359 !FormatTok->TokenText.starts_with("//")) {
1360 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1361 }
1362 nextToken();
1363 }
1364 }
1365 if (FormatTok->is(tok::semi)) {
1366 nextToken();
1367 break;
1368 }
1369 nextToken();
1370 }
1371
1372 addUnwrappedLine();
1373 return true;
1374}
1375
1376// readTokenWithJavaScriptASI reads the next token and terminates the current
1377// line if JavaScript Automatic Semicolon Insertion must
1378// happen between the current token and the next token.
1379//
1380// This method is conservative - it cannot cover all edge cases of JavaScript,
1381// but only aims to correctly handle certain well known cases. It *must not*
1382// return true in speculative cases.
1383void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1384 FormatToken *Previous = FormatTok;
1385 readToken();
1386 FormatToken *Next = FormatTok;
1387
1388 bool IsOnSameLine =
1389 CommentsBeforeNextToken.empty()
1390 ? Next->NewlinesBefore == 0
1391 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1392 if (IsOnSameLine)
1393 return;
1394
1395 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1396 bool PreviousStartsTemplateExpr =
1397 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1398 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1399 // If the line contains an '@' sign, the previous token might be an
1400 // annotation, which can precede another identifier/value.
1401 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1402 return LineNode.Tok->is(tok::at);
1403 });
1404 if (HasAt)
1405 return;
1406 }
1407 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1408 return addUnwrappedLine();
1409 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1410 bool NextEndsTemplateExpr =
1411 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1412 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1413 (PreviousMustBeValue ||
1414 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1415 tok::minusminus))) {
1416 return addUnwrappedLine();
1417 }
1418 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1419 isJSDeclOrStmt(Keywords, Next)) {
1420 return addUnwrappedLine();
1421 }
1422}
1423
1424void UnwrappedLineParser::parseStructuralElement(
1425 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1426 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1427 if (Style.Language == FormatStyle::LK_TableGen &&
1428 FormatTok->is(tok::pp_include)) {
1429 nextToken();
1430 if (FormatTok->is(tok::string_literal))
1431 nextToken();
1432 addUnwrappedLine();
1433 return;
1434 }
1435
1436 if (IsCpp) {
1437 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1438 }
1439 } else if (Style.isVerilog()) {
1440 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1441 parseForOrWhileLoop(/*HasParens=*/false);
1442 return;
1443 }
1444 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1445 parseForOrWhileLoop();
1446 return;
1447 }
1448 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1449 Keywords.kw_assume, Keywords.kw_cover)) {
1450 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1451 return;
1452 }
1453
1454 // Skip things that can exist before keywords like 'if' and 'case'.
1455 while (true) {
1456 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1457 Keywords.kw_unique0)) {
1458 nextToken();
1459 } else if (FormatTok->is(tok::l_paren) &&
1460 Tokens->peekNextToken()->is(tok::star)) {
1461 parseParens();
1462 } else {
1463 break;
1464 }
1465 }
1466 }
1467
1468 // Tokens that only make sense at the beginning of a line.
1469 if (FormatTok->isAccessSpecifierKeyword()) {
1470 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1471 Style.isCSharp()) {
1472 nextToken();
1473 } else {
1474 parseAccessSpecifier();
1475 }
1476 return;
1477 }
1478 switch (FormatTok->Tok.getKind()) {
1479 case tok::kw_asm:
1480 nextToken();
1481 if (FormatTok->is(tok::l_brace)) {
1482 FormatTok->setFinalizedType(TT_InlineASMBrace);
1483 nextToken();
1484 while (FormatTok && !eof()) {
1485 if (FormatTok->is(tok::r_brace)) {
1486 FormatTok->setFinalizedType(TT_InlineASMBrace);
1487 nextToken();
1488 addUnwrappedLine();
1489 break;
1490 }
1491 FormatTok->Finalized = true;
1492 nextToken();
1493 }
1494 }
1495 break;
1496 case tok::kw_namespace:
1497 parseNamespace();
1498 return;
1499 case tok::kw_if: {
1500 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501 // field/method declaration.
1502 break;
1503 }
1504 FormatToken *Tok = parseIfThenElse(IfKind);
1505 if (IfLeftBrace)
1506 *IfLeftBrace = Tok;
1507 return;
1508 }
1509 case tok::kw_for:
1510 case tok::kw_while:
1511 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512 // field/method declaration.
1513 break;
1514 }
1515 parseForOrWhileLoop();
1516 return;
1517 case tok::kw_do:
1518 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1519 // field/method declaration.
1520 break;
1521 }
1522 parseDoWhile();
1523 if (HasDoWhile)
1524 *HasDoWhile = true;
1525 return;
1526 case tok::kw_switch:
1527 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1528 // 'switch: string' field declaration.
1529 break;
1530 }
1531 parseSwitch(/*IsExpr=*/false);
1532 return;
1533 case tok::kw_default: {
1534 // In Verilog default along with other labels are handled in the next loop.
1535 if (Style.isVerilog())
1536 break;
1537 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538 // 'default: string' field declaration.
1539 break;
1540 }
1541 auto *Default = FormatTok;
1542 nextToken();
1543 if (FormatTok->is(tok::colon)) {
1544 FormatTok->setFinalizedType(TT_CaseLabelColon);
1545 parseLabel();
1546 return;
1547 }
1548 if (FormatTok->is(tok::arrow)) {
1549 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1550 Default->setFinalizedType(TT_SwitchExpressionLabel);
1551 parseLabel();
1552 return;
1553 }
1554 // e.g. "default void f() {}" in a Java interface.
1555 break;
1556 }
1557 case tok::kw_case:
1558 // Proto: there are no switch/case statements.
1559 if (Style.Language == FormatStyle::LK_Proto) {
1560 nextToken();
1561 return;
1562 }
1563 if (Style.isVerilog()) {
1564 parseBlock();
1565 addUnwrappedLine();
1566 return;
1567 }
1568 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1569 // 'case: string' field declaration.
1570 nextToken();
1571 break;
1572 }
1573 parseCaseLabel();
1574 return;
1575 case tok::kw_goto:
1576 nextToken();
1577 if (FormatTok->is(tok::kw_case))
1578 nextToken();
1579 break;
1580 case tok::kw_try:
1581 case tok::kw___try:
1582 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1583 // field/method declaration.
1584 break;
1585 }
1586 parseTryCatch();
1587 return;
1588 case tok::kw_extern:
1589 nextToken();
1590 if (Style.isVerilog()) {
1591 // In Verilog and extern module declaration looks like a start of module.
1592 // But there is no body and endmodule. So we handle it separately.
1593 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1594 parseVerilogHierarchyHeader();
1595 return;
1596 }
1597 } else if (FormatTok->is(tok::string_literal)) {
1598 nextToken();
1599 if (FormatTok->is(tok::l_brace)) {
1601 addUnwrappedLine();
1602 // Either we indent or for backwards compatibility we follow the
1603 // AfterExternBlock style.
1604 unsigned AddLevels =
1607 Style.IndentExternBlock ==
1609 ? 1u
1610 : 0u;
1611 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1612 addUnwrappedLine();
1613 return;
1614 }
1615 }
1616 break;
1617 case tok::kw_export:
1618 if (Style.isJavaScript()) {
1619 parseJavaScriptEs6ImportExport();
1620 return;
1621 }
1622 if (IsCpp) {
1623 nextToken();
1624 if (FormatTok->is(tok::kw_namespace)) {
1625 parseNamespace();
1626 return;
1627 }
1628 if (FormatTok->is(tok::l_brace)) {
1629 parseCppExportBlock();
1630 return;
1631 }
1632 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1633 return;
1634 }
1635 break;
1636 case tok::kw_inline:
1637 nextToken();
1638 if (FormatTok->is(tok::kw_namespace)) {
1639 parseNamespace();
1640 return;
1641 }
1642 break;
1643 case tok::identifier:
1644 if (FormatTok->is(TT_ForEachMacro)) {
1645 parseForOrWhileLoop();
1646 return;
1647 }
1648 if (FormatTok->is(TT_MacroBlockBegin)) {
1649 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1650 /*MunchSemi=*/false);
1651 return;
1652 }
1653 if (FormatTok->is(Keywords.kw_import)) {
1654 if (Style.isJavaScript()) {
1655 parseJavaScriptEs6ImportExport();
1656 return;
1657 }
1658 if (Style.Language == FormatStyle::LK_Proto) {
1659 nextToken();
1660 if (FormatTok->is(tok::kw_public))
1661 nextToken();
1662 if (FormatTok->isNot(tok::string_literal))
1663 return;
1664 nextToken();
1665 if (FormatTok->is(tok::semi))
1666 nextToken();
1667 addUnwrappedLine();
1668 return;
1669 }
1670 if (IsCpp && parseModuleImport())
1671 return;
1672 }
1673 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1674 Keywords.kw_slots, Keywords.kw_qslots)) {
1675 nextToken();
1676 if (FormatTok->is(tok::colon)) {
1677 nextToken();
1678 addUnwrappedLine();
1679 return;
1680 }
1681 }
1682 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1683 parseStatementMacro();
1684 return;
1685 }
1686 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1687 parseNamespace();
1688 return;
1689 }
1690 // In Verilog labels can be any expression, so we don't do them here.
1691 // JS doesn't have macros, and within classes colons indicate fields, not
1692 // labels.
1693 // TableGen doesn't have labels.
1694 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1695 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1696 nextToken();
1697 if (!Line->InMacroBody || CurrentLines->size() > 1)
1698 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1699 FormatTok->setFinalizedType(TT_GotoLabelColon);
1700 parseLabel(!Style.IndentGotoLabels);
1701 if (HasLabel)
1702 *HasLabel = true;
1703 return;
1704 }
1705 // In all other cases, parse the declaration.
1706 break;
1707 default:
1708 break;
1709 }
1710
1711 for (const bool InRequiresExpression =
1712 OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
1713 TT_CompoundRequirementLBrace);
1714 !eof();) {
1715 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1716 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1717 Next && Next->isBinaryOperator()) {
1718 FormatTok->Tok.setKind(tok::identifier);
1719 }
1720 }
1721 const FormatToken *Previous = FormatTok->Previous;
1722 switch (FormatTok->Tok.getKind()) {
1723 case tok::at:
1724 nextToken();
1725 if (FormatTok->is(tok::l_brace)) {
1726 nextToken();
1727 parseBracedList();
1728 break;
1729 } else if (Style.Language == FormatStyle::LK_Java &&
1730 FormatTok->is(Keywords.kw_interface)) {
1731 nextToken();
1732 break;
1733 }
1734 switch (FormatTok->Tok.getObjCKeywordID()) {
1735 case tok::objc_public:
1736 case tok::objc_protected:
1737 case tok::objc_package:
1738 case tok::objc_private:
1739 return parseAccessSpecifier();
1740 case tok::objc_interface:
1741 case tok::objc_implementation:
1742 return parseObjCInterfaceOrImplementation();
1743 case tok::objc_protocol:
1744 if (parseObjCProtocol())
1745 return;
1746 break;
1747 case tok::objc_end:
1748 return; // Handled by the caller.
1749 case tok::objc_optional:
1750 case tok::objc_required:
1751 nextToken();
1752 addUnwrappedLine();
1753 return;
1754 case tok::objc_autoreleasepool:
1755 nextToken();
1756 if (FormatTok->is(tok::l_brace)) {
1759 addUnwrappedLine();
1760 }
1761 parseBlock();
1762 }
1763 addUnwrappedLine();
1764 return;
1765 case tok::objc_synchronized:
1766 nextToken();
1767 if (FormatTok->is(tok::l_paren)) {
1768 // Skip synchronization object
1769 parseParens();
1770 }
1771 if (FormatTok->is(tok::l_brace)) {
1774 addUnwrappedLine();
1775 }
1776 parseBlock();
1777 }
1778 addUnwrappedLine();
1779 return;
1780 case tok::objc_try:
1781 // This branch isn't strictly necessary (the kw_try case below would
1782 // do this too after the tok::at is parsed above). But be explicit.
1783 parseTryCatch();
1784 return;
1785 default:
1786 break;
1787 }
1788 break;
1789 case tok::kw_requires: {
1790 if (IsCpp) {
1791 bool ParsedClause = parseRequires();
1792 if (ParsedClause)
1793 return;
1794 } else {
1795 nextToken();
1796 }
1797 break;
1798 }
1799 case tok::kw_enum:
1800 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1801 // "template <..., enum ...>".
1802 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1803 nextToken();
1804 break;
1805 }
1806
1807 // parseEnum falls through and does not yet add an unwrapped line as an
1808 // enum definition can start a structural element.
1809 if (!parseEnum())
1810 break;
1811 // This only applies to C++ and Verilog.
1812 if (!IsCpp && !Style.isVerilog()) {
1813 addUnwrappedLine();
1814 return;
1815 }
1816 break;
1817 case tok::kw_typedef:
1818 nextToken();
1819 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1820 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1821 Keywords.kw_CF_CLOSED_ENUM,
1822 Keywords.kw_NS_CLOSED_ENUM)) {
1823 parseEnum();
1824 }
1825 break;
1826 case tok::kw_class:
1827 if (Style.isVerilog()) {
1828 parseBlock();
1829 addUnwrappedLine();
1830 return;
1831 }
1832 if (Style.isTableGen()) {
1833 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1834 // This is same as def and so on.
1835 nextToken();
1836 break;
1837 }
1838 [[fallthrough]];
1839 case tok::kw_struct:
1840 case tok::kw_union:
1841 if (parseStructLike())
1842 return;
1843 break;
1844 case tok::kw_decltype:
1845 nextToken();
1846 if (FormatTok->is(tok::l_paren)) {
1847 parseParens();
1848 assert(FormatTok->Previous);
1849 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1850 tok::l_paren)) {
1851 Line->SeenDecltypeAuto = true;
1852 }
1853 }
1854 break;
1855 case tok::period:
1856 nextToken();
1857 // In Java, classes have an implicit static member "class".
1858 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1859 FormatTok->is(tok::kw_class)) {
1860 nextToken();
1861 }
1862 if (Style.isJavaScript() && FormatTok &&
1863 FormatTok->Tok.getIdentifierInfo()) {
1864 // JavaScript only has pseudo keywords, all keywords are allowed to
1865 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1866 nextToken();
1867 }
1868 break;
1869 case tok::semi:
1870 nextToken();
1871 addUnwrappedLine();
1872 return;
1873 case tok::r_brace:
1874 addUnwrappedLine();
1875 return;
1876 case tok::l_paren: {
1877 parseParens();
1878 // Break the unwrapped line if a K&R C function definition has a parameter
1879 // declaration.
1880 if (OpeningBrace || !IsCpp || !Previous || eof())
1881 break;
1882 if (isC78ParameterDecl(FormatTok,
1883 Tokens->peekNextToken(/*SkipComment=*/true),
1884 Previous)) {
1885 addUnwrappedLine();
1886 return;
1887 }
1888 break;
1889 }
1890 case tok::kw_operator:
1891 nextToken();
1892 if (FormatTok->isBinaryOperator())
1893 nextToken();
1894 break;
1895 case tok::caret:
1896 nextToken();
1897 // Block return type.
1898 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1899 nextToken();
1900 // Return types: pointers are ok too.
1901 while (FormatTok->is(tok::star))
1902 nextToken();
1903 }
1904 // Block argument list.
1905 if (FormatTok->is(tok::l_paren))
1906 parseParens();
1907 // Block body.
1908 if (FormatTok->is(tok::l_brace))
1909 parseChildBlock();
1910 break;
1911 case tok::l_brace:
1912 if (InRequiresExpression)
1913 FormatTok->setFinalizedType(TT_BracedListLBrace);
1914 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1915 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1916 // A block outside of parentheses must be the last part of a
1917 // structural element.
1918 // FIXME: Figure out cases where this is not true, and add projections
1919 // for them (the one we know is missing are lambdas).
1920 if (Style.Language == FormatStyle::LK_Java &&
1921 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1922 // If necessary, we could set the type to something different than
1923 // TT_FunctionLBrace.
1926 addUnwrappedLine();
1927 }
1928 } else if (Style.BraceWrapping.AfterFunction) {
1929 addUnwrappedLine();
1930 }
1931 if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1932 FormatTok->setFinalizedType(TT_FunctionLBrace);
1933 parseBlock();
1934 IsDecltypeAutoFunction = false;
1935 addUnwrappedLine();
1936 return;
1937 }
1938 // Otherwise this was a braced init list, and the structural
1939 // element continues.
1940 break;
1941 case tok::kw_try:
1942 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1943 // field/method declaration.
1944 nextToken();
1945 break;
1946 }
1947 // We arrive here when parsing function-try blocks.
1948 if (Style.BraceWrapping.AfterFunction)
1949 addUnwrappedLine();
1950 parseTryCatch();
1951 return;
1952 case tok::identifier: {
1953 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1954 Line->MustBeDeclaration) {
1955 addUnwrappedLine();
1956 parseCSharpGenericTypeConstraint();
1957 break;
1958 }
1959 if (FormatTok->is(TT_MacroBlockEnd)) {
1960 addUnwrappedLine();
1961 return;
1962 }
1963
1964 // Function declarations (as opposed to function expressions) are parsed
1965 // on their own unwrapped line by continuing this loop. Function
1966 // expressions (functions that are not on their own line) must not create
1967 // a new unwrapped line, so they are special cased below.
1968 size_t TokenCount = Line->Tokens.size();
1969 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1970 (TokenCount > 1 ||
1971 (TokenCount == 1 &&
1972 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1973 tryToParseJSFunction();
1974 break;
1975 }
1976 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1977 FormatTok->is(Keywords.kw_interface)) {
1978 if (Style.isJavaScript()) {
1979 // In JavaScript/TypeScript, "interface" can be used as a standalone
1980 // identifier, e.g. in `var interface = 1;`. If "interface" is
1981 // followed by another identifier, it is very like to be an actual
1982 // interface declaration.
1983 unsigned StoredPosition = Tokens->getPosition();
1984 FormatToken *Next = Tokens->getNextToken();
1985 FormatTok = Tokens->setPosition(StoredPosition);
1986 if (!mustBeJSIdent(Keywords, Next)) {
1987 nextToken();
1988 break;
1989 }
1990 }
1991 parseRecord();
1992 addUnwrappedLine();
1993 return;
1994 }
1995
1996 if (Style.isVerilog()) {
1997 if (FormatTok->is(Keywords.kw_table)) {
1998 parseVerilogTable();
1999 return;
2000 }
2001 if (Keywords.isVerilogBegin(*FormatTok) ||
2002 Keywords.isVerilogHierarchy(*FormatTok)) {
2003 parseBlock();
2004 addUnwrappedLine();
2005 return;
2006 }
2007 }
2008
2009 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
2010 if (parseStructLike())
2011 return;
2012 break;
2013 }
2014
2015 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2016 parseStatementMacro();
2017 return;
2018 }
2019
2020 // See if the following token should start a new unwrapped line.
2021 StringRef Text = FormatTok->TokenText;
2022
2023 FormatToken *PreviousToken = FormatTok;
2024 nextToken();
2025
2026 // JS doesn't have macros, and within classes colons indicate fields, not
2027 // labels.
2028 if (Style.isJavaScript())
2029 break;
2030
2031 auto OneTokenSoFar = [&]() {
2032 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2033 while (I != E && I->Tok->is(tok::comment))
2034 ++I;
2035 if (Style.isVerilog())
2036 while (I != E && I->Tok->is(tok::hash))
2037 ++I;
2038 return I != E && (++I == E);
2039 };
2040 if (OneTokenSoFar()) {
2041 // Recognize function-like macro usages without trailing semicolon as
2042 // well as free-standing macros like Q_OBJECT.
2043 bool FunctionLike = FormatTok->is(tok::l_paren);
2044 if (FunctionLike)
2045 parseParens();
2046
2047 bool FollowedByNewline =
2048 CommentsBeforeNextToken.empty()
2049 ? FormatTok->NewlinesBefore > 0
2050 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2051
2052 if (FollowedByNewline &&
2053 (Text.size() >= 5 ||
2054 (FunctionLike && FormatTok->isNot(tok::l_paren))) &&
2055 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2056 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2057 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2058 addUnwrappedLine();
2059 return;
2060 }
2061 }
2062 break;
2063 }
2064 case tok::equal:
2065 if ((Style.isJavaScript() || Style.isCSharp()) &&
2066 FormatTok->is(TT_FatArrow)) {
2067 tryToParseChildBlock();
2068 break;
2069 }
2070
2071 nextToken();
2072 if (FormatTok->is(tok::l_brace)) {
2073 // Block kind should probably be set to BK_BracedInit for any language.
2074 // C# needs this change to ensure that array initialisers and object
2075 // initialisers are indented the same way.
2076 if (Style.isCSharp())
2077 FormatTok->setBlockKind(BK_BracedInit);
2078 // TableGen's defset statement has syntax of the form,
2079 // `defset <type> <name> = { <statement>... }`
2080 if (Style.isTableGen() &&
2081 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2082 FormatTok->setFinalizedType(TT_FunctionLBrace);
2083 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2084 /*MunchSemi=*/false);
2085 addUnwrappedLine();
2086 break;
2087 }
2088 nextToken();
2089 parseBracedList();
2090 } else if (Style.Language == FormatStyle::LK_Proto &&
2091 FormatTok->is(tok::less)) {
2092 nextToken();
2093 parseBracedList(/*IsAngleBracket=*/true);
2094 }
2095 break;
2096 case tok::l_square:
2097 parseSquare();
2098 break;
2099 case tok::kw_new:
2100 parseNew();
2101 break;
2102 case tok::kw_switch:
2103 if (Style.Language == FormatStyle::LK_Java)
2104 parseSwitch(/*IsExpr=*/true);
2105 else
2106 nextToken();
2107 break;
2108 case tok::kw_case:
2109 // Proto: there are no switch/case statements.
2110 if (Style.Language == FormatStyle::LK_Proto) {
2111 nextToken();
2112 return;
2113 }
2114 // In Verilog switch is called case.
2115 if (Style.isVerilog()) {
2116 parseBlock();
2117 addUnwrappedLine();
2118 return;
2119 }
2120 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2121 // 'case: string' field declaration.
2122 nextToken();
2123 break;
2124 }
2125 parseCaseLabel();
2126 break;
2127 case tok::kw_default:
2128 nextToken();
2129 if (Style.isVerilog()) {
2130 if (FormatTok->is(tok::colon)) {
2131 // The label will be handled in the next iteration.
2132 break;
2133 }
2134 if (FormatTok->is(Keywords.kw_clocking)) {
2135 // A default clocking block.
2136 parseBlock();
2137 addUnwrappedLine();
2138 return;
2139 }
2140 parseVerilogCaseLabel();
2141 return;
2142 }
2143 break;
2144 case tok::colon:
2145 nextToken();
2146 if (Style.isVerilog()) {
2147 parseVerilogCaseLabel();
2148 return;
2149 }
2150 break;
2151 case tok::greater:
2152 nextToken();
2153 if (FormatTok->is(tok::l_brace))
2154 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2155 break;
2156 default:
2157 nextToken();
2158 break;
2159 }
2160 }
2161}
2162
2163bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2164 assert(FormatTok->is(tok::l_brace));
2165 if (!Style.isCSharp())
2166 return false;
2167 // See if it's a property accessor.
2168 if (!FormatTok->Previous || FormatTok->Previous->isNot(tok::identifier))
2169 return false;
2170
2171 // See if we are inside a property accessor.
2172 //
2173 // Record the current tokenPosition so that we can advance and
2174 // reset the current token. `Next` is not set yet so we need
2175 // another way to advance along the token stream.
2176 unsigned int StoredPosition = Tokens->getPosition();
2177 FormatToken *Tok = Tokens->getNextToken();
2178
2179 // A trivial property accessor is of the form:
2180 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2181 // Track these as they do not require line breaks to be introduced.
2182 bool HasSpecialAccessor = false;
2183 bool IsTrivialPropertyAccessor = true;
2184 bool HasAttribute = false;
2185 while (!eof()) {
2186 if (const bool IsAccessorKeyword =
2187 Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set);
2188 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2189 Tok->isOneOf(tok::l_square, tok::semi, Keywords.kw_internal)) {
2190 if (IsAccessorKeyword)
2191 HasSpecialAccessor = true;
2192 else if (Tok->is(tok::l_square))
2193 HasAttribute = true;
2194 Tok = Tokens->getNextToken();
2195 continue;
2196 }
2197 if (Tok->isNot(tok::r_brace))
2198 IsTrivialPropertyAccessor = false;
2199 break;
2200 }
2201
2202 if (!HasSpecialAccessor || HasAttribute) {
2203 Tokens->setPosition(StoredPosition);
2204 return false;
2205 }
2206
2207 // Try to parse the property accessor:
2208 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2209 Tokens->setPosition(StoredPosition);
2210 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2211 addUnwrappedLine();
2212 nextToken();
2213 do {
2214 switch (FormatTok->Tok.getKind()) {
2215 case tok::r_brace:
2216 nextToken();
2217 if (FormatTok->is(tok::equal)) {
2218 while (!eof() && FormatTok->isNot(tok::semi))
2219 nextToken();
2220 nextToken();
2221 }
2222 addUnwrappedLine();
2223 return true;
2224 case tok::l_brace:
2225 ++Line->Level;
2226 parseBlock(/*MustBeDeclaration=*/true);
2227 addUnwrappedLine();
2228 --Line->Level;
2229 break;
2230 case tok::equal:
2231 if (FormatTok->is(TT_FatArrow)) {
2232 ++Line->Level;
2233 do {
2234 nextToken();
2235 } while (!eof() && FormatTok->isNot(tok::semi));
2236 nextToken();
2237 addUnwrappedLine();
2238 --Line->Level;
2239 break;
2240 }
2241 nextToken();
2242 break;
2243 default:
2244 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2245 Keywords.kw_set) &&
2246 !IsTrivialPropertyAccessor) {
2247 // Non-trivial get/set needs to be on its own line.
2248 addUnwrappedLine();
2249 }
2250 nextToken();
2251 }
2252 } while (!eof());
2253
2254 // Unreachable for well-formed code (paired '{' and '}').
2255 return true;
2256}
2257
2258bool UnwrappedLineParser::tryToParseLambda() {
2259 assert(FormatTok->is(tok::l_square));
2260 if (!IsCpp) {
2261 nextToken();
2262 return false;
2263 }
2264 FormatToken &LSquare = *FormatTok;
2265 if (!tryToParseLambdaIntroducer())
2266 return false;
2267
2268 bool SeenArrow = false;
2269 bool InTemplateParameterList = false;
2270
2271 while (FormatTok->isNot(tok::l_brace)) {
2272 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2273 nextToken();
2274 continue;
2275 }
2276 switch (FormatTok->Tok.getKind()) {
2277 case tok::l_brace:
2278 break;
2279 case tok::l_paren:
2280 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2281 break;
2282 case tok::l_square:
2283 parseSquare();
2284 break;
2285 case tok::less:
2286 assert(FormatTok->Previous);
2287 if (FormatTok->Previous->is(tok::r_square))
2288 InTemplateParameterList = true;
2289 nextToken();
2290 break;
2291 case tok::kw_auto:
2292 case tok::kw_class:
2293 case tok::kw_struct:
2294 case tok::kw_union:
2295 case tok::kw_template:
2296 case tok::kw_typename:
2297 case tok::amp:
2298 case tok::star:
2299 case tok::kw_const:
2300 case tok::kw_constexpr:
2301 case tok::kw_consteval:
2302 case tok::comma:
2303 case tok::greater:
2304 case tok::identifier:
2305 case tok::numeric_constant:
2306 case tok::coloncolon:
2307 case tok::kw_mutable:
2308 case tok::kw_noexcept:
2309 case tok::kw_static:
2310 nextToken();
2311 break;
2312 // Specialization of a template with an integer parameter can contain
2313 // arithmetic, logical, comparison and ternary operators.
2314 //
2315 // FIXME: This also accepts sequences of operators that are not in the scope
2316 // of a template argument list.
2317 //
2318 // In a C++ lambda a template type can only occur after an arrow. We use
2319 // this as an heuristic to distinguish between Objective-C expressions
2320 // followed by an `a->b` expression, such as:
2321 // ([obj func:arg] + a->b)
2322 // Otherwise the code below would parse as a lambda.
2323 case tok::plus:
2324 case tok::minus:
2325 case tok::exclaim:
2326 case tok::tilde:
2327 case tok::slash:
2328 case tok::percent:
2329 case tok::lessless:
2330 case tok::pipe:
2331 case tok::pipepipe:
2332 case tok::ampamp:
2333 case tok::caret:
2334 case tok::equalequal:
2335 case tok::exclaimequal:
2336 case tok::greaterequal:
2337 case tok::lessequal:
2338 case tok::question:
2339 case tok::colon:
2340 case tok::ellipsis:
2341 case tok::kw_true:
2342 case tok::kw_false:
2343 if (SeenArrow || InTemplateParameterList) {
2344 nextToken();
2345 break;
2346 }
2347 return true;
2348 case tok::arrow:
2349 // This might or might not actually be a lambda arrow (this could be an
2350 // ObjC method invocation followed by a dereferencing arrow). We might
2351 // reset this back to TT_Unknown in TokenAnnotator.
2352 FormatTok->setFinalizedType(TT_LambdaArrow);
2353 SeenArrow = true;
2354 nextToken();
2355 break;
2356 case tok::kw_requires: {
2357 auto *RequiresToken = FormatTok;
2358 nextToken();
2359 parseRequiresClause(RequiresToken);
2360 break;
2361 }
2362 case tok::equal:
2363 if (!InTemplateParameterList)
2364 return true;
2365 nextToken();
2366 break;
2367 default:
2368 return true;
2369 }
2370 }
2371
2372 FormatTok->setFinalizedType(TT_LambdaLBrace);
2373 LSquare.setFinalizedType(TT_LambdaLSquare);
2374
2375 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2376 parseChildBlock();
2377 assert(!NestedLambdas.empty());
2378 NestedLambdas.pop_back();
2379
2380 return true;
2381}
2382
2383bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2384 const FormatToken *Previous = FormatTok->Previous;
2385 const FormatToken *LeftSquare = FormatTok;
2386 nextToken();
2387 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2388 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2389 tok::kw_co_yield, tok::kw_co_return)) ||
2390 Previous->closesScope())) ||
2391 LeftSquare->isCppStructuredBinding(IsCpp)) {
2392 return false;
2393 }
2394 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2395 return false;
2396 if (FormatTok->is(tok::r_square)) {
2397 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2398 if (Next->is(tok::greater))
2399 return false;
2400 }
2401 parseSquare(/*LambdaIntroducer=*/true);
2402 return true;
2403}
2404
2405void UnwrappedLineParser::tryToParseJSFunction() {
2406 assert(FormatTok->is(Keywords.kw_function));
2407 if (FormatTok->is(Keywords.kw_async))
2408 nextToken();
2409 // Consume "function".
2410 nextToken();
2411
2412 // Consume * (generator function). Treat it like C++'s overloaded operators.
2413 if (FormatTok->is(tok::star)) {
2414 FormatTok->setFinalizedType(TT_OverloadedOperator);
2415 nextToken();
2416 }
2417
2418 // Consume function name.
2419 if (FormatTok->is(tok::identifier))
2420 nextToken();
2421
2422 if (FormatTok->isNot(tok::l_paren))
2423 return;
2424
2425 // Parse formal parameter list.
2426 parseParens();
2427
2428 if (FormatTok->is(tok::colon)) {
2429 // Parse a type definition.
2430 nextToken();
2431
2432 // Eat the type declaration. For braced inline object types, balance braces,
2433 // otherwise just parse until finding an l_brace for the function body.
2434 if (FormatTok->is(tok::l_brace))
2435 tryToParseBracedList();
2436 else
2437 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2438 nextToken();
2439 }
2440
2441 if (FormatTok->is(tok::semi))
2442 return;
2443
2444 parseChildBlock();
2445}
2446
2447bool UnwrappedLineParser::tryToParseBracedList() {
2448 if (FormatTok->is(BK_Unknown))
2449 calculateBraceTypes();
2450 assert(FormatTok->isNot(BK_Unknown));
2451 if (FormatTok->is(BK_Block))
2452 return false;
2453 nextToken();
2454 parseBracedList();
2455 return true;
2456}
2457
2458bool UnwrappedLineParser::tryToParseChildBlock() {
2459 assert(Style.isJavaScript() || Style.isCSharp());
2460 assert(FormatTok->is(TT_FatArrow));
2461 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2462 // They always start an expression or a child block if followed by a curly
2463 // brace.
2464 nextToken();
2465 if (FormatTok->isNot(tok::l_brace))
2466 return false;
2467 parseChildBlock();
2468 return true;
2469}
2470
2471bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2472 assert(!IsAngleBracket || !IsEnum);
2473 bool HasError = false;
2474
2475 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2476 // replace this by using parseAssignmentExpression() inside.
2477 do {
2478 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2479 tryToParseChildBlock()) {
2480 continue;
2481 }
2482 if (Style.isJavaScript()) {
2483 if (FormatTok->is(Keywords.kw_function)) {
2484 tryToParseJSFunction();
2485 continue;
2486 }
2487 if (FormatTok->is(tok::l_brace)) {
2488 // Could be a method inside of a braced list `{a() { return 1; }}`.
2489 if (tryToParseBracedList())
2490 continue;
2491 parseChildBlock();
2492 }
2493 }
2494 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2495 if (IsEnum) {
2496 FormatTok->setBlockKind(BK_Block);
2498 addUnwrappedLine();
2499 }
2500 nextToken();
2501 return !HasError;
2502 }
2503 switch (FormatTok->Tok.getKind()) {
2504 case tok::l_square:
2505 if (Style.isCSharp())
2506 parseSquare();
2507 else
2508 tryToParseLambda();
2509 break;
2510 case tok::l_paren:
2511 parseParens();
2512 // JavaScript can just have free standing methods and getters/setters in
2513 // object literals. Detect them by a "{" following ")".
2514 if (Style.isJavaScript()) {
2515 if (FormatTok->is(tok::l_brace))
2516 parseChildBlock();
2517 break;
2518 }
2519 break;
2520 case tok::l_brace:
2521 // Assume there are no blocks inside a braced init list apart
2522 // from the ones we explicitly parse out (like lambdas).
2523 FormatTok->setBlockKind(BK_BracedInit);
2524 if (!IsAngleBracket) {
2525 auto *Prev = FormatTok->Previous;
2526 if (Prev && Prev->is(tok::greater))
2527 Prev->setFinalizedType(TT_TemplateCloser);
2528 }
2529 nextToken();
2530 parseBracedList();
2531 break;
2532 case tok::less:
2533 nextToken();
2534 if (IsAngleBracket)
2535 parseBracedList(/*IsAngleBracket=*/true);
2536 break;
2537 case tok::semi:
2538 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2539 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2540 // used for error recovery if we have otherwise determined that this is
2541 // a braced list.
2542 if (Style.isJavaScript()) {
2543 nextToken();
2544 break;
2545 }
2546 HasError = true;
2547 if (!IsEnum)
2548 return false;
2549 nextToken();
2550 break;
2551 case tok::comma:
2552 nextToken();
2553 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2554 addUnwrappedLine();
2555 break;
2556 default:
2557 nextToken();
2558 break;
2559 }
2560 } while (!eof());
2561 return false;
2562}
2563
2564/// \brief Parses a pair of parentheses (and everything between them).
2565/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2566/// double ampersands. This applies for all nested scopes as well.
2567///
2568/// Returns whether there is a `=` token between the parentheses.
2569bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2570 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2571 auto *LeftParen = FormatTok;
2572 bool SeenComma = false;
2573 bool SeenEqual = false;
2574 bool MightBeFoldExpr = false;
2575 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2576 nextToken();
2577 do {
2578 switch (FormatTok->Tok.getKind()) {
2579 case tok::l_paren:
2580 if (parseParens(AmpAmpTokenType))
2581 SeenEqual = true;
2582 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2583 parseChildBlock();
2584 break;
2585 case tok::r_paren: {
2586 auto *Prev = LeftParen->Previous;
2587 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2589 const auto *Next = Tokens->peekNextToken();
2590 const bool DoubleParens =
2591 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2592 const bool CommaSeparated =
2593 !DoubleParens && Prev && Prev->isOneOf(tok::l_paren, tok::comma) &&
2594 Next && Next->isOneOf(tok::comma, tok::r_paren);
2595 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2596 const bool Excluded =
2597 PrevPrev &&
2598 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2599 SeenComma ||
2600 (SeenEqual &&
2601 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2602 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2603 const bool ReturnParens =
2605 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2606 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2607 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2608 Next->is(tok::semi);
2609 if ((DoubleParens && !Excluded) || (CommaSeparated && !SeenComma) ||
2610 ReturnParens) {
2611 LeftParen->Optional = true;
2612 FormatTok->Optional = true;
2613 }
2614 }
2615 if (Prev) {
2616 if (Prev->is(TT_TypenameMacro)) {
2617 LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2618 FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2619 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
2620 Prev->setFinalizedType(TT_TemplateCloser);
2621 }
2622 }
2623 nextToken();
2624 return SeenEqual;
2625 }
2626 case tok::r_brace:
2627 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2628 return SeenEqual;
2629 case tok::l_square:
2630 tryToParseLambda();
2631 break;
2632 case tok::l_brace:
2633 if (!tryToParseBracedList())
2634 parseChildBlock();
2635 break;
2636 case tok::at:
2637 nextToken();
2638 if (FormatTok->is(tok::l_brace)) {
2639 nextToken();
2640 parseBracedList();
2641 }
2642 break;
2643 case tok::comma:
2644 SeenComma = true;
2645 nextToken();
2646 break;
2647 case tok::ellipsis:
2648 MightBeFoldExpr = true;
2649 nextToken();
2650 break;
2651 case tok::equal:
2652 SeenEqual = true;
2653 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2654 tryToParseChildBlock();
2655 else
2656 nextToken();
2657 break;
2658 case tok::kw_class:
2659 if (Style.isJavaScript())
2660 parseRecord(/*ParseAsExpr=*/true);
2661 else
2662 nextToken();
2663 break;
2664 case tok::identifier:
2665 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2666 tryToParseJSFunction();
2667 else
2668 nextToken();
2669 break;
2670 case tok::kw_switch:
2671 if (Style.Language == FormatStyle::LK_Java)
2672 parseSwitch(/*IsExpr=*/true);
2673 else
2674 nextToken();
2675 break;
2676 case tok::kw_requires: {
2677 auto RequiresToken = FormatTok;
2678 nextToken();
2679 parseRequiresExpression(RequiresToken);
2680 break;
2681 }
2682 case tok::ampamp:
2683 if (AmpAmpTokenType != TT_Unknown)
2684 FormatTok->setFinalizedType(AmpAmpTokenType);
2685 [[fallthrough]];
2686 default:
2687 nextToken();
2688 break;
2689 }
2690 } while (!eof());
2691 return SeenEqual;
2692}
2693
2694void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2695 if (!LambdaIntroducer) {
2696 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2697 if (tryToParseLambda())
2698 return;
2699 }
2700 do {
2701 switch (FormatTok->Tok.getKind()) {
2702 case tok::l_paren:
2703 parseParens();
2704 break;
2705 case tok::r_square:
2706 nextToken();
2707 return;
2708 case tok::r_brace:
2709 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2710 return;
2711 case tok::l_square:
2712 parseSquare();
2713 break;
2714 case tok::l_brace: {
2715 if (!tryToParseBracedList())
2716 parseChildBlock();
2717 break;
2718 }
2719 case tok::at:
2720 case tok::colon:
2721 nextToken();
2722 if (FormatTok->is(tok::l_brace)) {
2723 nextToken();
2724 parseBracedList();
2725 }
2726 break;
2727 default:
2728 nextToken();
2729 break;
2730 }
2731 } while (!eof());
2732}
2733
2734void UnwrappedLineParser::keepAncestorBraces() {
2735 if (!Style.RemoveBracesLLVM)
2736 return;
2737
2738 const int MaxNestingLevels = 2;
2739 const int Size = NestedTooDeep.size();
2740 if (Size >= MaxNestingLevels)
2741 NestedTooDeep[Size - MaxNestingLevels] = true;
2742 NestedTooDeep.push_back(false);
2743}
2744
2746 for (const auto &Token : llvm::reverse(Line.Tokens))
2747 if (Token.Tok->isNot(tok::comment))
2748 return Token.Tok;
2749
2750 return nullptr;
2751}
2752
2753void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2754 FormatToken *Tok = nullptr;
2755
2756 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2757 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2759 ? getLastNonComment(*Line)
2760 : Line->Tokens.back().Tok;
2761 assert(Tok);
2762 if (Tok->BraceCount < 0) {
2763 assert(Tok->BraceCount == -1);
2764 Tok = nullptr;
2765 } else {
2766 Tok->BraceCount = -1;
2767 }
2768 }
2769
2770 addUnwrappedLine();
2771 ++Line->Level;
2772 ++Line->UnbracedBodyLevel;
2773 parseStructuralElement();
2774 --Line->UnbracedBodyLevel;
2775
2776 if (Tok) {
2777 assert(!Line->InPPDirective);
2778 Tok = nullptr;
2779 for (const auto &L : llvm::reverse(*CurrentLines)) {
2780 if (!L.InPPDirective && getLastNonComment(L)) {
2781 Tok = L.Tokens.back().Tok;
2782 break;
2783 }
2784 }
2785 assert(Tok);
2786 ++Tok->BraceCount;
2787 }
2788
2789 if (CheckEOF && eof())
2790 addUnwrappedLine();
2791
2792 --Line->Level;
2793}
2794
2795static void markOptionalBraces(FormatToken *LeftBrace) {
2796 if (!LeftBrace)
2797 return;
2798
2799 assert(LeftBrace->is(tok::l_brace));
2800
2801 FormatToken *RightBrace = LeftBrace->MatchingParen;
2802 if (!RightBrace) {
2803 assert(!LeftBrace->Optional);
2804 return;
2805 }
2806
2807 assert(RightBrace->is(tok::r_brace));
2808 assert(RightBrace->MatchingParen == LeftBrace);
2809 assert(LeftBrace->Optional == RightBrace->Optional);
2810
2811 LeftBrace->Optional = true;
2812 RightBrace->Optional = true;
2813}
2814
2815void UnwrappedLineParser::handleAttributes() {
2816 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2817 if (FormatTok->isAttribute())
2818 nextToken();
2819 else if (FormatTok->is(tok::l_square))
2820 handleCppAttributes();
2821}
2822
2823bool UnwrappedLineParser::handleCppAttributes() {
2824 // Handle [[likely]] / [[unlikely]] attributes.
2825 assert(FormatTok->is(tok::l_square));
2826 if (!tryToParseSimpleAttribute())
2827 return false;
2828 parseSquare();
2829 return true;
2830}
2831
2832/// Returns whether \c Tok begins a block.
2833bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2834 // FIXME: rename the function or make
2835 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2836 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2837 : Tok.is(tok::l_brace);
2838}
2839
2840FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2841 bool KeepBraces,
2842 bool IsVerilogAssert) {
2843 assert((FormatTok->is(tok::kw_if) ||
2844 (Style.isVerilog() &&
2845 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2846 Keywords.kw_assume, Keywords.kw_cover))) &&
2847 "'if' expected");
2848 nextToken();
2849
2850 if (IsVerilogAssert) {
2851 // Handle `assert #0` and `assert final`.
2852 if (FormatTok->is(Keywords.kw_verilogHash)) {
2853 nextToken();
2854 if (FormatTok->is(tok::numeric_constant))
2855 nextToken();
2856 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2857 Keywords.kw_sequence)) {
2858 nextToken();
2859 }
2860 }
2861
2862 // TableGen's if statement has the form of `if <cond> then { ... }`.
2863 if (Style.isTableGen()) {
2864 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2865 // Simply skip until then. This range only contains a value.
2866 nextToken();
2867 }
2868 }
2869
2870 // Handle `if !consteval`.
2871 if (FormatTok->is(tok::exclaim))
2872 nextToken();
2873
2874 bool KeepIfBraces = true;
2875 if (FormatTok->is(tok::kw_consteval)) {
2876 nextToken();
2877 } else {
2878 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2879 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2880 nextToken();
2881 if (FormatTok->is(tok::l_paren)) {
2882 FormatTok->setFinalizedType(TT_ConditionLParen);
2883 parseParens();
2884 }
2885 }
2886 handleAttributes();
2887 // The then action is optional in Verilog assert statements.
2888 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2889 nextToken();
2890 addUnwrappedLine();
2891 return nullptr;
2892 }
2893
2894 bool NeedsUnwrappedLine = false;
2895 keepAncestorBraces();
2896
2897 FormatToken *IfLeftBrace = nullptr;
2898 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2899
2900 if (isBlockBegin(*FormatTok)) {
2901 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2902 IfLeftBrace = FormatTok;
2903 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2904 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2905 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2906 setPreviousRBraceType(TT_ControlStatementRBrace);
2907 if (Style.BraceWrapping.BeforeElse)
2908 addUnwrappedLine();
2909 else
2910 NeedsUnwrappedLine = true;
2911 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2912 addUnwrappedLine();
2913 } else {
2914 parseUnbracedBody();
2915 }
2916
2917 if (Style.RemoveBracesLLVM) {
2918 assert(!NestedTooDeep.empty());
2919 KeepIfBraces = KeepIfBraces ||
2920 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2921 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2922 IfBlockKind == IfStmtKind::IfElseIf;
2923 }
2924
2925 bool KeepElseBraces = KeepIfBraces;
2926 FormatToken *ElseLeftBrace = nullptr;
2927 IfStmtKind Kind = IfStmtKind::IfOnly;
2928
2929 if (FormatTok->is(tok::kw_else)) {
2930 if (Style.RemoveBracesLLVM) {
2931 NestedTooDeep.back() = false;
2932 Kind = IfStmtKind::IfElse;
2933 }
2934 nextToken();
2935 handleAttributes();
2936 if (isBlockBegin(*FormatTok)) {
2937 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2938 FormatTok->setFinalizedType(TT_ElseLBrace);
2939 ElseLeftBrace = FormatTok;
2940 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2941 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2942 FormatToken *IfLBrace =
2943 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2944 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2945 setPreviousRBraceType(TT_ElseRBrace);
2946 if (FormatTok->is(tok::kw_else)) {
2947 KeepElseBraces = KeepElseBraces ||
2948 ElseBlockKind == IfStmtKind::IfOnly ||
2949 ElseBlockKind == IfStmtKind::IfElseIf;
2950 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2951 KeepElseBraces = true;
2952 assert(ElseLeftBrace->MatchingParen);
2953 markOptionalBraces(ElseLeftBrace);
2954 }
2955 addUnwrappedLine();
2956 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2957 const FormatToken *Previous = Tokens->getPreviousToken();
2958 assert(Previous);
2959 const bool IsPrecededByComment = Previous->is(tok::comment);
2960 if (IsPrecededByComment) {
2961 addUnwrappedLine();
2962 ++Line->Level;
2963 }
2964 bool TooDeep = true;
2965 if (Style.RemoveBracesLLVM) {
2966 Kind = IfStmtKind::IfElseIf;
2967 TooDeep = NestedTooDeep.pop_back_val();
2968 }
2969 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2970 if (Style.RemoveBracesLLVM)
2971 NestedTooDeep.push_back(TooDeep);
2972 if (IsPrecededByComment)
2973 --Line->Level;
2974 } else {
2975 parseUnbracedBody(/*CheckEOF=*/true);
2976 }
2977 } else {
2978 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2979 if (NeedsUnwrappedLine)
2980 addUnwrappedLine();
2981 }
2982
2983 if (!Style.RemoveBracesLLVM)
2984 return nullptr;
2985
2986 assert(!NestedTooDeep.empty());
2987 KeepElseBraces = KeepElseBraces ||
2988 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2989 NestedTooDeep.back();
2990
2991 NestedTooDeep.pop_back();
2992
2993 if (!KeepIfBraces && !KeepElseBraces) {
2994 markOptionalBraces(IfLeftBrace);
2995 markOptionalBraces(ElseLeftBrace);
2996 } else if (IfLeftBrace) {
2997 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2998 if (IfRightBrace) {
2999 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3000 assert(!IfLeftBrace->Optional);
3001 assert(!IfRightBrace->Optional);
3002 IfLeftBrace->MatchingParen = nullptr;
3003 IfRightBrace->MatchingParen = nullptr;
3004 }
3005 }
3006
3007 if (IfKind)
3008 *IfKind = Kind;
3009
3010 return IfLeftBrace;
3011}
3012
3013void UnwrappedLineParser::parseTryCatch() {
3014 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3015 nextToken();
3016 bool NeedsUnwrappedLine = false;
3017 bool HasCtorInitializer = false;
3018 if (FormatTok->is(tok::colon)) {
3019 auto *Colon = FormatTok;
3020 // We are in a function try block, what comes is an initializer list.
3021 nextToken();
3022 if (FormatTok->is(tok::identifier)) {
3023 HasCtorInitializer = true;
3024 Colon->setFinalizedType(TT_CtorInitializerColon);
3025 }
3026
3027 // In case identifiers were removed by clang-tidy, what might follow is
3028 // multiple commas in sequence - before the first identifier.
3029 while (FormatTok->is(tok::comma))
3030 nextToken();
3031
3032 while (FormatTok->is(tok::identifier)) {
3033 nextToken();
3034 if (FormatTok->is(tok::l_paren)) {
3035 parseParens();
3036 } else if (FormatTok->is(tok::l_brace)) {
3037 nextToken();
3038 parseBracedList();
3039 }
3040
3041 // In case identifiers were removed by clang-tidy, what might follow is
3042 // multiple commas in sequence - after the first identifier.
3043 while (FormatTok->is(tok::comma))
3044 nextToken();
3045 }
3046 }
3047 // Parse try with resource.
3048 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
3049 parseParens();
3050
3051 keepAncestorBraces();
3052
3053 if (FormatTok->is(tok::l_brace)) {
3054 if (HasCtorInitializer)
3055 FormatTok->setFinalizedType(TT_FunctionLBrace);
3056 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3057 parseBlock();
3058 if (Style.BraceWrapping.BeforeCatch)
3059 addUnwrappedLine();
3060 else
3061 NeedsUnwrappedLine = true;
3062 } else if (FormatTok->isNot(tok::kw_catch)) {
3063 // The C++ standard requires a compound-statement after a try.
3064 // If there's none, we try to assume there's a structuralElement
3065 // and try to continue.
3066 addUnwrappedLine();
3067 ++Line->Level;
3068 parseStructuralElement();
3069 --Line->Level;
3070 }
3071 while (true) {
3072 if (FormatTok->is(tok::at))
3073 nextToken();
3074 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3075 tok::kw___finally) ||
3076 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3077 FormatTok->is(Keywords.kw_finally)) ||
3078 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3079 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3080 break;
3081 }
3082 nextToken();
3083 while (FormatTok->isNot(tok::l_brace)) {
3084 if (FormatTok->is(tok::l_paren)) {
3085 parseParens();
3086 continue;
3087 }
3088 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3089 if (Style.RemoveBracesLLVM)
3090 NestedTooDeep.pop_back();
3091 return;
3092 }
3093 nextToken();
3094 }
3095 NeedsUnwrappedLine = false;
3096 Line->MustBeDeclaration = false;
3097 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3098 parseBlock();
3099 if (Style.BraceWrapping.BeforeCatch)
3100 addUnwrappedLine();
3101 else
3102 NeedsUnwrappedLine = true;
3103 }
3104
3105 if (Style.RemoveBracesLLVM)
3106 NestedTooDeep.pop_back();
3107
3108 if (NeedsUnwrappedLine)
3109 addUnwrappedLine();
3110}
3111
3112void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3113 bool ManageWhitesmithsBraces =
3114 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3115
3116 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3117 // the whole block.
3118 if (ManageWhitesmithsBraces)
3119 ++Line->Level;
3120
3121 // Munch the semicolon after the block. This is more common than one would
3122 // think. Putting the semicolon into its own line is very ugly.
3123 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3124 /*KeepBraces=*/true, /*IfKind=*/nullptr, ManageWhitesmithsBraces);
3125
3126 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3127
3128 if (ManageWhitesmithsBraces)
3129 --Line->Level;
3130}
3131
3132void UnwrappedLineParser::parseNamespace() {
3133 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3134 "'namespace' expected");
3135
3136 const FormatToken &InitialToken = *FormatTok;
3137 nextToken();
3138 if (InitialToken.is(TT_NamespaceMacro)) {
3139 parseParens();
3140 } else {
3141 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3142 tok::l_square, tok::period, tok::l_paren) ||
3143 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3144 if (FormatTok->is(tok::l_square))
3145 parseSquare();
3146 else if (FormatTok->is(tok::l_paren))
3147 parseParens();
3148 else
3149 nextToken();
3150 }
3151 }
3152 if (FormatTok->is(tok::l_brace)) {
3153 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3154
3155 if (ShouldBreakBeforeBrace(Style, InitialToken))
3156 addUnwrappedLine();
3157
3158 unsigned AddLevels =
3161 DeclarationScopeStack.size() > 1)
3162 ? 1u
3163 : 0u;
3164 parseNamespaceOrExportBlock(AddLevels);
3165 }
3166 // FIXME: Add error handling.
3167}
3168
3169void UnwrappedLineParser::parseCppExportBlock() {
3170 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3171}
3172
3173void UnwrappedLineParser::parseNew() {
3174 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3175 nextToken();
3176
3177 if (Style.isCSharp()) {
3178 do {
3179 // Handle constructor invocation, e.g. `new(field: value)`.
3180 if (FormatTok->is(tok::l_paren))
3181 parseParens();
3182
3183 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3184 if (FormatTok->is(tok::l_brace))
3185 parseBracedList();
3186
3187 if (FormatTok->isOneOf(tok::semi, tok::comma))
3188 return;
3189
3190 nextToken();
3191 } while (!eof());
3192 }
3193
3194 if (Style.Language != FormatStyle::LK_Java)
3195 return;
3196
3197 // In Java, we can parse everything up to the parens, which aren't optional.
3198 do {
3199 // There should not be a ;, { or } before the new's open paren.
3200 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3201 return;
3202
3203 // Consume the parens.
3204 if (FormatTok->is(tok::l_paren)) {
3205 parseParens();
3206
3207 // If there is a class body of an anonymous class, consume that as child.
3208 if (FormatTok->is(tok::l_brace))
3209 parseChildBlock();
3210 return;
3211 }
3212 nextToken();
3213 } while (!eof());
3214}
3215
3216void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3217 keepAncestorBraces();
3218
3219 if (isBlockBegin(*FormatTok)) {
3220 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3221 FormatToken *LeftBrace = FormatTok;
3222 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3223 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3224 /*MunchSemi=*/true, KeepBraces);
3225 setPreviousRBraceType(TT_ControlStatementRBrace);
3226 if (!KeepBraces) {
3227 assert(!NestedTooDeep.empty());
3228 if (!NestedTooDeep.back())
3229 markOptionalBraces(LeftBrace);
3230 }
3231 if (WrapRightBrace)
3232 addUnwrappedLine();
3233 } else {
3234 parseUnbracedBody();
3235 }
3236
3237 if (!KeepBraces)
3238 NestedTooDeep.pop_back();
3239}
3240
3241void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3242 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3243 (Style.isVerilog() &&
3244 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3245 Keywords.kw_always_ff, Keywords.kw_always_latch,
3246 Keywords.kw_final, Keywords.kw_initial,
3247 Keywords.kw_foreach, Keywords.kw_forever,
3248 Keywords.kw_repeat))) &&
3249 "'for', 'while' or foreach macro expected");
3250 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3251 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3252
3253 nextToken();
3254 // JS' for await ( ...
3255 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3256 nextToken();
3257 if (IsCpp && FormatTok->is(tok::kw_co_await))
3258 nextToken();
3259 if (HasParens && FormatTok->is(tok::l_paren)) {
3260 // The type is only set for Verilog basically because we were afraid to
3261 // change the existing behavior for loops. See the discussion on D121756 for
3262 // details.
3263 if (Style.isVerilog())
3264 FormatTok->setFinalizedType(TT_ConditionLParen);
3265 parseParens();
3266 }
3267
3268 if (Style.isVerilog()) {
3269 // Event control.
3270 parseVerilogSensitivityList();
3271 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3272 Tokens->getPreviousToken()->is(tok::r_paren)) {
3273 nextToken();
3274 addUnwrappedLine();
3275 return;
3276 }
3277
3278 handleAttributes();
3279 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3280}
3281
3282void UnwrappedLineParser::parseDoWhile() {
3283 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3284 nextToken();
3285
3286 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3287
3288 // FIXME: Add error handling.
3289 if (FormatTok->isNot(tok::kw_while)) {
3290 addUnwrappedLine();
3291 return;
3292 }
3293
3294 FormatTok->setFinalizedType(TT_DoWhile);
3295
3296 // If in Whitesmiths mode, the line with the while() needs to be indented
3297 // to the same level as the block.
3299 ++Line->Level;
3300
3301 nextToken();
3302 parseStructuralElement();
3303}
3304
3305void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3306 nextToken();
3307 unsigned OldLineLevel = Line->Level;
3308
3309 if (LeftAlignLabel)
3310 Line->Level = 0;
3311 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3312 --Line->Level;
3313
3314 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3315 FormatTok->is(tok::l_brace)) {
3316
3317 CompoundStatementIndenter Indenter(this, Line->Level,
3320 parseBlock();
3321 if (FormatTok->is(tok::kw_break)) {
3324 addUnwrappedLine();
3325 if (!Style.IndentCaseBlocks &&
3327 ++Line->Level;
3328 }
3329 }
3330 parseStructuralElement();
3331 }
3332 addUnwrappedLine();
3333 } else {
3334 if (FormatTok->is(tok::semi))
3335 nextToken();
3336 addUnwrappedLine();
3337 }
3338 Line->Level = OldLineLevel;
3339 if (FormatTok->isNot(tok::l_brace)) {
3340 parseStructuralElement();
3341 addUnwrappedLine();
3342 }
3343}
3344
3345void UnwrappedLineParser::parseCaseLabel() {
3346 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3347 auto *Case = FormatTok;
3348
3349 // FIXME: fix handling of complex expressions here.
3350 do {
3351 nextToken();
3352 if (FormatTok->is(tok::colon)) {
3353 FormatTok->setFinalizedType(TT_CaseLabelColon);
3354 break;
3355 }
3356 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3357 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3358 Case->setFinalizedType(TT_SwitchExpressionLabel);
3359 break;
3360 }
3361 } while (!eof());
3362 parseLabel();
3363}
3364
3365void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3366 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3367 nextToken();
3368 if (FormatTok->is(tok::l_paren))
3369 parseParens();
3370
3371 keepAncestorBraces();
3372
3373 if (FormatTok->is(tok::l_brace)) {
3374 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3375 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3376 : TT_ControlStatementLBrace);
3377 if (IsExpr)
3378 parseChildBlock();
3379 else
3380 parseBlock();
3381 setPreviousRBraceType(TT_ControlStatementRBrace);
3382 if (!IsExpr)
3383 addUnwrappedLine();
3384 } else {
3385 addUnwrappedLine();
3386 ++Line->Level;
3387 parseStructuralElement();
3388 --Line->Level;
3389 }
3390
3391 if (Style.RemoveBracesLLVM)
3392 NestedTooDeep.pop_back();
3393}
3394
3395// Operators that can follow a C variable.
3397 switch (Kind) {
3398 case tok::ampamp:
3399 case tok::ampequal:
3400 case tok::arrow:
3401 case tok::caret:
3402 case tok::caretequal:
3403 case tok::comma:
3404 case tok::ellipsis:
3405 case tok::equal:
3406 case tok::equalequal:
3407 case tok::exclaim:
3408 case tok::exclaimequal:
3409 case tok::greater:
3410 case tok::greaterequal:
3411 case tok::greatergreater:
3412 case tok::greatergreaterequal:
3413 case tok::l_paren:
3414 case tok::l_square:
3415 case tok::less:
3416 case tok::lessequal:
3417 case tok::lessless:
3418 case tok::lesslessequal:
3419 case tok::minus:
3420 case tok::minusequal:
3421 case tok::minusminus:
3422 case tok::percent:
3423 case tok::percentequal:
3424 case tok::period:
3425 case tok::pipe:
3426 case tok::pipeequal:
3427 case tok::pipepipe:
3428 case tok::plus:
3429 case tok::plusequal:
3430 case tok::plusplus:
3431 case tok::question:
3432 case tok::r_brace:
3433 case tok::r_paren:
3434 case tok::r_square:
3435 case tok::semi:
3436 case tok::slash:
3437 case tok::slashequal:
3438 case tok::star:
3439 case tok::starequal:
3440 return true;
3441 default:
3442 return false;
3443 }
3444}
3445
3446void UnwrappedLineParser::parseAccessSpecifier() {
3447 FormatToken *AccessSpecifierCandidate = FormatTok;
3448 nextToken();
3449 // Understand Qt's slots.
3450 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3451 nextToken();
3452 // Otherwise, we don't know what it is, and we'd better keep the next token.
3453 if (FormatTok->is(tok::colon)) {
3454 nextToken();
3455 addUnwrappedLine();
3456 } else if (FormatTok->isNot(tok::coloncolon) &&
3457 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3458 // Not a variable name nor namespace name.
3459 addUnwrappedLine();
3460 } else if (AccessSpecifierCandidate) {
3461 // Consider the access specifier to be a C identifier.
3462 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3463 }
3464}
3465
3466/// \brief Parses a requires, decides if it is a clause or an expression.
3467/// \pre The current token has to be the requires keyword.
3468/// \returns true if it parsed a clause.
3469bool UnwrappedLineParser::parseRequires() {
3470 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3471 auto RequiresToken = FormatTok;
3472
3473 // We try to guess if it is a requires clause, or a requires expression. For
3474 // that we first consume the keyword and check the next token.
3475 nextToken();
3476
3477 switch (FormatTok->Tok.getKind()) {
3478 case tok::l_brace:
3479 // This can only be an expression, never a clause.
3480 parseRequiresExpression(RequiresToken);
3481 return false;
3482 case tok::l_paren:
3483 // Clauses and expression can start with a paren, it's unclear what we have.
3484 break;
3485 default:
3486 // All other tokens can only be a clause.
3487 parseRequiresClause(RequiresToken);
3488 return true;
3489 }
3490
3491 // Looking forward we would have to decide if there are function declaration
3492 // like arguments to the requires expression:
3493 // requires (T t) {
3494 // Or there is a constraint expression for the requires clause:
3495 // requires (C<T> && ...
3496
3497 // But first let's look behind.
3498 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3499
3500 if (!PreviousNonComment ||
3501 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3502 // If there is no token, or an expression left brace, we are a requires
3503 // clause within a requires expression.
3504 parseRequiresClause(RequiresToken);
3505 return true;
3506 }
3507
3508 switch (PreviousNonComment->Tok.getKind()) {
3509 case tok::greater:
3510 case tok::r_paren:
3511 case tok::kw_noexcept:
3512 case tok::kw_const:
3513 case tok::amp:
3514 // This is a requires clause.
3515 parseRequiresClause(RequiresToken);
3516 return true;
3517 case tok::ampamp: {
3518 // This can be either:
3519 // if (... && requires (T t) ...)
3520 // Or
3521 // void member(...) && requires (C<T> ...
3522 // We check the one token before that for a const:
3523 // void member(...) const && requires (C<T> ...
3524 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3525 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3526 parseRequiresClause(RequiresToken);
3527 return true;
3528 }
3529 break;
3530 }
3531 default:
3532 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3533 // This is a requires clause.
3534 parseRequiresClause(RequiresToken);
3535 return true;
3536 }
3537 // It's an expression.
3538 parseRequiresExpression(RequiresToken);
3539 return false;
3540 }
3541
3542 // Now we look forward and try to check if the paren content is a parameter
3543 // list. The parameters can be cv-qualified and contain references or
3544 // pointers.
3545 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3546 // of stuff: typename, const, *, &, &&, ::, identifiers.
3547
3548 unsigned StoredPosition = Tokens->getPosition();
3549 FormatToken *NextToken = Tokens->getNextToken();
3550 int Lookahead = 0;
3551 auto PeekNext = [&Lookahead, &NextToken, this] {
3552 ++Lookahead;
3553 NextToken = Tokens->getNextToken();
3554 };
3555
3556 bool FoundType = false;
3557 bool LastWasColonColon = false;
3558 int OpenAngles = 0;
3559
3560 for (; Lookahead < 50; PeekNext()) {
3561 switch (NextToken->Tok.getKind()) {
3562 case tok::kw_volatile:
3563 case tok::kw_const:
3564 case tok::comma:
3565 if (OpenAngles == 0) {
3566 FormatTok = Tokens->setPosition(StoredPosition);
3567 parseRequiresExpression(RequiresToken);
3568 return false;
3569 }
3570 break;
3571 case tok::eof:
3572 // Break out of the loop.
3573 Lookahead = 50;
3574 break;
3575 case tok::coloncolon:
3576 LastWasColonColon = true;
3577 break;
3578 case tok::kw_decltype:
3579 case tok::identifier:
3580 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3581 FormatTok = Tokens->setPosition(StoredPosition);
3582 parseRequiresExpression(RequiresToken);
3583 return false;
3584 }
3585 FoundType = true;
3586 LastWasColonColon = false;
3587 break;
3588 case tok::less:
3589 ++OpenAngles;
3590 break;
3591 case tok::greater:
3592 --OpenAngles;
3593 break;
3594 default:
3595 if (NextToken->isTypeName(LangOpts)) {
3596 FormatTok = Tokens->setPosition(StoredPosition);
3597 parseRequiresExpression(RequiresToken);
3598 return false;
3599 }
3600 break;
3601 }
3602 }
3603 // This seems to be a complicated expression, just assume it's a clause.
3604 FormatTok = Tokens->setPosition(StoredPosition);
3605 parseRequiresClause(RequiresToken);
3606 return true;
3607}
3608
3609/// \brief Parses a requires clause.
3610/// \param RequiresToken The requires keyword token, which starts this clause.
3611/// \pre We need to be on the next token after the requires keyword.
3612/// \sa parseRequiresExpression
3613///
3614/// Returns if it either has finished parsing the clause, or it detects, that
3615/// the clause is incorrect.
3616void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3617 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3618 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3619
3620 // If there is no previous token, we are within a requires expression,
3621 // otherwise we will always have the template or function declaration in front
3622 // of it.
3623 bool InRequiresExpression =
3624 !RequiresToken->Previous ||
3625 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3626
3627 RequiresToken->setFinalizedType(InRequiresExpression
3628 ? TT_RequiresClauseInARequiresExpression
3629 : TT_RequiresClause);
3630
3631 // NOTE: parseConstraintExpression is only ever called from this function.
3632 // It could be inlined into here.
3633 parseConstraintExpression();
3634
3635 if (!InRequiresExpression)
3636 FormatTok->Previous->ClosesRequiresClause = true;
3637}
3638
3639/// \brief Parses a requires expression.
3640/// \param RequiresToken The requires keyword token, which starts this clause.
3641/// \pre We need to be on the next token after the requires keyword.
3642/// \sa parseRequiresClause
3643///
3644/// Returns if it either has finished parsing the expression, or it detects,
3645/// that the expression is incorrect.
3646void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3647 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3648 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3649
3650 RequiresToken->setFinalizedType(TT_RequiresExpression);
3651
3652 if (FormatTok->is(tok::l_paren)) {
3653 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3654 parseParens();
3655 }
3656
3657 if (FormatTok->is(tok::l_brace)) {
3658 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3659 parseChildBlock();
3660 }
3661}
3662
3663/// \brief Parses a constraint expression.
3664///
3665/// This is the body of a requires clause. It returns, when the parsing is
3666/// complete, or the expression is incorrect.
3667void UnwrappedLineParser::parseConstraintExpression() {
3668 // The special handling for lambdas is needed since tryToParseLambda() eats a
3669 // token and if a requires expression is the last part of a requires clause
3670 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3671 // not set on the correct token. Thus we need to be aware if we even expect a
3672 // lambda to be possible.
3673 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3674 bool LambdaNextTimeAllowed = true;
3675
3676 // Within lambda declarations, it is permitted to put a requires clause after
3677 // its template parameter list, which would place the requires clause right
3678 // before the parentheses of the parameters of the lambda declaration. Thus,
3679 // we track if we expect to see grouping parentheses at all.
3680 // Without this check, `requires foo<T> (T t)` in the below example would be
3681 // seen as the whole requires clause, accidentally eating the parameters of
3682 // the lambda.
3683 // [&]<typename T> requires foo<T> (T t) { ... };
3684 bool TopLevelParensAllowed = true;
3685
3686 do {
3687 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3688
3689 switch (FormatTok->Tok.getKind()) {
3690 case tok::kw_requires: {
3691 auto RequiresToken = FormatTok;
3692 nextToken();
3693 parseRequiresExpression(RequiresToken);
3694 break;
3695 }
3696
3697 case tok::l_paren:
3698 if (!TopLevelParensAllowed)
3699 return;
3700 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3701 TopLevelParensAllowed = false;
3702 break;
3703
3704 case tok::l_square:
3705 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3706 return;
3707 break;
3708
3709 case tok::kw_const:
3710 case tok::semi:
3711 case tok::kw_class:
3712 case tok::kw_struct:
3713 case tok::kw_union:
3714 return;
3715
3716 case tok::l_brace:
3717 // Potential function body.
3718 return;
3719
3720 case tok::ampamp:
3721 case tok::pipepipe:
3722 FormatTok->setFinalizedType(TT_BinaryOperator);
3723 nextToken();
3724 LambdaNextTimeAllowed = true;
3725 TopLevelParensAllowed = true;
3726 break;
3727
3728 case tok::comma:
3729 case tok::comment:
3730 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3731 nextToken();
3732 break;
3733
3734 case tok::kw_sizeof:
3735 case tok::greater:
3736 case tok::greaterequal:
3737 case tok::greatergreater:
3738 case tok::less:
3739 case tok::lessequal:
3740 case tok::lessless:
3741 case tok::equalequal:
3742 case tok::exclaim:
3743 case tok::exclaimequal:
3744 case tok::plus:
3745 case tok::minus:
3746 case tok::star:
3747 case tok::slash:
3748 LambdaNextTimeAllowed = true;
3749 TopLevelParensAllowed = true;
3750 // Just eat them.
3751 nextToken();
3752 break;
3753
3754 case tok::numeric_constant:
3755 case tok::coloncolon:
3756 case tok::kw_true:
3757 case tok::kw_false:
3758 TopLevelParensAllowed = false;
3759 // Just eat them.
3760 nextToken();
3761 break;
3762
3763 case tok::kw_static_cast:
3764 case tok::kw_const_cast:
3765 case tok::kw_reinterpret_cast:
3766 case tok::kw_dynamic_cast:
3767 nextToken();
3768 if (FormatTok->isNot(tok::less))
3769 return;
3770
3771 nextToken();
3772 parseBracedList(/*IsAngleBracket=*/true);
3773 break;
3774
3775 default:
3776 if (!FormatTok->Tok.getIdentifierInfo()) {
3777 // Identifiers are part of the default case, we check for more then
3778 // tok::identifier to handle builtin type traits.
3779 return;
3780 }
3781
3782 // We need to differentiate identifiers for a template deduction guide,
3783 // variables, or function return types (the constraint expression has
3784 // ended before that), and basically all other cases. But it's easier to
3785 // check the other way around.
3786 assert(FormatTok->Previous);
3787 switch (FormatTok->Previous->Tok.getKind()) {
3788 case tok::coloncolon: // Nested identifier.
3789 case tok::ampamp: // Start of a function or variable for the
3790 case tok::pipepipe: // constraint expression. (binary)
3791 case tok::exclaim: // The same as above, but unary.
3792 case tok::kw_requires: // Initial identifier of a requires clause.
3793 case tok::equal: // Initial identifier of a concept declaration.
3794 break;
3795 default:
3796 return;
3797 }
3798
3799 // Read identifier with optional template declaration.
3800 nextToken();
3801 if (FormatTok->is(tok::less)) {
3802 nextToken();
3803 parseBracedList(/*IsAngleBracket=*/true);
3804 }
3805 TopLevelParensAllowed = false;
3806 break;
3807 }
3808 } while (!eof());
3809}
3810
3811bool UnwrappedLineParser::parseEnum() {
3812 const FormatToken &InitialToken = *FormatTok;
3813
3814 // Won't be 'enum' for NS_ENUMs.
3815 if (FormatTok->is(tok::kw_enum))
3816 nextToken();
3817
3818 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3819 // declarations. An "enum" keyword followed by a colon would be a syntax
3820 // error and thus assume it is just an identifier.
3821 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3822 return false;
3823
3824 // In protobuf, "enum" can be used as a field name.
3825 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3826 return false;
3827
3828 if (IsCpp) {
3829 // Eat up enum class ...
3830 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3831 nextToken();
3832 while (FormatTok->is(tok::l_square))
3833 if (!handleCppAttributes())
3834 return false;
3835 }
3836
3837 while (FormatTok->Tok.getIdentifierInfo() ||
3838 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3839 tok::greater, tok::comma, tok::question,
3840 tok::l_square)) {
3841 if (Style.isVerilog()) {
3842 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3843 nextToken();
3844 // In Verilog the base type can have dimensions.
3845 while (FormatTok->is(tok::l_square))
3846 parseSquare();
3847 } else {
3848 nextToken();
3849 }
3850 // We can have macros or attributes in between 'enum' and the enum name.
3851 if (FormatTok->is(tok::l_paren))
3852 parseParens();
3853 if (FormatTok->is(tok::identifier)) {
3854 nextToken();
3855 // If there are two identifiers in a row, this is likely an elaborate
3856 // return type. In Java, this can be "implements", etc.
3857 if (IsCpp && FormatTok->is(tok::identifier))
3858 return false;
3859 }
3860 }
3861
3862 // Just a declaration or something is wrong.
3863 if (FormatTok->isNot(tok::l_brace))
3864 return true;
3865 FormatTok->setFinalizedType(TT_EnumLBrace);
3866 FormatTok->setBlockKind(BK_Block);
3867
3868 if (Style.Language == FormatStyle::LK_Java) {
3869 // Java enums are different.
3870 parseJavaEnumBody();
3871 return true;
3872 }
3873 if (Style.Language == FormatStyle::LK_Proto) {
3874 parseBlock(/*MustBeDeclaration=*/true);
3875 return true;
3876 }
3877
3878 if (!Style.AllowShortEnumsOnASingleLine &&
3879 ShouldBreakBeforeBrace(Style, InitialToken)) {
3880 addUnwrappedLine();
3881 }
3882 // Parse enum body.
3883 nextToken();
3884 if (!Style.AllowShortEnumsOnASingleLine) {
3885 addUnwrappedLine();
3886 Line->Level += 1;
3887 }
3888 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3890 Line->Level -= 1;
3891 if (HasError) {
3892 if (FormatTok->is(tok::semi))
3893 nextToken();
3894 addUnwrappedLine();
3895 }
3896 setPreviousRBraceType(TT_EnumRBrace);
3897 return true;
3898
3899 // There is no addUnwrappedLine() here so that we fall through to parsing a
3900 // structural element afterwards. Thus, in "enum A {} n, m;",
3901 // "} n, m;" will end up in one unwrapped line.
3902}
3903
3904bool UnwrappedLineParser::parseStructLike() {
3905 // parseRecord falls through and does not yet add an unwrapped line as a
3906 // record declaration or definition can start a structural element.
3907 parseRecord();
3908 // This does not apply to Java, JavaScript and C#.
3909 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3910 Style.isCSharp()) {
3911 if (FormatTok->is(tok::semi))
3912 nextToken();
3913 addUnwrappedLine();
3914 return true;
3915 }
3916 return false;
3917}
3918
3919namespace {
3920// A class used to set and restore the Token position when peeking
3921// ahead in the token source.
3922class ScopedTokenPosition {
3923 unsigned StoredPosition;
3924 FormatTokenSource *Tokens;
3925
3926public:
3927 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3928 assert(Tokens && "Tokens expected to not be null");
3929 StoredPosition = Tokens->getPosition();
3930 }
3931
3932 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3933};
3934} // namespace
3935
3936// Look to see if we have [[ by looking ahead, if
3937// its not then rewind to the original position.
3938bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3939 ScopedTokenPosition AutoPosition(Tokens);
3940 FormatToken *Tok = Tokens->getNextToken();
3941 // We already read the first [ check for the second.
3942 if (Tok->isNot(tok::l_square))
3943 return false;
3944 // Double check that the attribute is just something
3945 // fairly simple.
3946 while (Tok->isNot(tok::eof)) {
3947 if (Tok->is(tok::r_square))
3948 break;
3949 Tok = Tokens->getNextToken();
3950 }
3951 if (Tok->is(tok::eof))
3952 return false;
3953 Tok = Tokens->getNextToken();
3954 if (Tok->isNot(tok::r_square))
3955 return false;
3956 Tok = Tokens->getNextToken();
3957 if (Tok->is(tok::semi))
3958 return false;
3959 return true;
3960}
3961
3962void UnwrappedLineParser::parseJavaEnumBody() {
3963 assert(FormatTok->is(tok::l_brace));
3964 const FormatToken *OpeningBrace = FormatTok;
3965
3966 // Determine whether the enum is simple, i.e. does not have a semicolon or
3967 // constants with class bodies. Simple enums can be formatted like braced
3968 // lists, contracted to a single line, etc.
3969 unsigned StoredPosition = Tokens->getPosition();
3970 bool IsSimple = true;
3971 FormatToken *Tok = Tokens->getNextToken();
3972 while (Tok->isNot(tok::eof)) {
3973 if (Tok->is(tok::r_brace))
3974 break;
3975 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3976 IsSimple = false;
3977 break;
3978 }
3979 // FIXME: This will also mark enums with braces in the arguments to enum
3980 // constants as "not simple". This is probably fine in practice, though.
3981 Tok = Tokens->getNextToken();
3982 }
3983 FormatTok = Tokens->setPosition(StoredPosition);
3984
3985 if (IsSimple) {
3986 nextToken();
3987 parseBracedList();
3988 addUnwrappedLine();
3989 return;
3990 }
3991
3992 // Parse the body of a more complex enum.
3993 // First add a line for everything up to the "{".
3994 nextToken();
3995 addUnwrappedLine();
3996 ++Line->Level;
3997
3998 // Parse the enum constants.
3999 while (!eof()) {
4000 if (FormatTok->is(tok::l_brace)) {
4001 // Parse the constant's class body.
4002 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
4003 /*MunchSemi=*/false);
4004 } else if (FormatTok->is(tok::l_paren)) {
4005 parseParens();
4006 } else if (FormatTok->is(tok::comma)) {
4007 nextToken();
4008 addUnwrappedLine();
4009 } else if (FormatTok->is(tok::semi)) {
4010 nextToken();
4011 addUnwrappedLine();
4012 break;
4013 } else if (FormatTok->is(tok::r_brace)) {
4014 addUnwrappedLine();
4015 break;
4016 } else {
4017 nextToken();
4018 }
4019 }
4020
4021 // Parse the class body after the enum's ";" if any.
4022 parseLevel(OpeningBrace);
4023 nextToken();
4024 --Line->Level;
4025 addUnwrappedLine();
4026}
4027
4028void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
4029 const FormatToken &InitialToken = *FormatTok;
4030 nextToken();
4031
4032 const FormatToken *ClassName = nullptr;
4033 bool IsDerived = false;
4034 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4035 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4036 };
4037 // JavaScript/TypeScript supports anonymous classes like:
4038 // a = class extends foo { }
4039 bool JSPastExtendsOrImplements = false;
4040 // The actual identifier can be a nested name specifier, and in macros
4041 // it is often token-pasted.
4042 // An [[attribute]] can be before the identifier.
4043 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4044 tok::kw_alignas, tok::l_square) ||
4045 FormatTok->isAttribute() ||
4046 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4047 FormatTok->isOneOf(tok::period, tok::comma))) {
4048 if (Style.isJavaScript() &&
4049 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4050 JSPastExtendsOrImplements = true;
4051 // JavaScript/TypeScript supports inline object types in
4052 // extends/implements positions:
4053 // class Foo implements {bar: number} { }
4054 nextToken();
4055 if (FormatTok->is(tok::l_brace)) {
4056 tryToParseBracedList();
4057 continue;
4058 }
4059 }
4060 if (FormatTok->is(tok::l_square) && handleCppAttributes())
4061 continue;
4062 const auto *Previous = FormatTok;
4063 nextToken();
4064 switch (FormatTok->Tok.getKind()) {
4065 case tok::l_paren:
4066 // We can have macros in between 'class' and the class name.
4067 if (!IsNonMacroIdentifier(Previous) ||
4068 // e.g. `struct macro(a) S { int i; };`
4069 Previous->Previous == &InitialToken) {
4070 parseParens();
4071 }
4072 break;
4073 case tok::coloncolon:
4074 case tok::hashhash:
4075 break;
4076 default:
4077 if (!JSPastExtendsOrImplements && !ClassName &&
4078 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
4079 ClassName = Previous;
4080 }
4081 }
4082 }
4083
4084 auto IsListInitialization = [&] {
4085 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4086 return false;
4087 assert(FormatTok->is(tok::l_brace));
4088 const auto *Prev = FormatTok->getPreviousNonComment();
4089 assert(Prev);
4090 return Prev != ClassName && Prev->is(tok::identifier) &&
4091 Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4092 };
4093
4094 if (FormatTok->isOneOf(tok::colon, tok::less)) {
4095 int AngleNestingLevel = 0;
4096 do {
4097 if (FormatTok->is(tok::less))
4098 ++AngleNestingLevel;
4099 else if (FormatTok->is(tok::greater))
4100 --AngleNestingLevel;
4101
4102 if (AngleNestingLevel == 0) {
4103 if (FormatTok->is(tok::colon)) {
4104 IsDerived = true;
4105 } else if (FormatTok->is(tok::identifier) &&
4106 FormatTok->Previous->is(tok::coloncolon)) {
4107 ClassName = FormatTok;
4108 } else if (FormatTok->is(tok::l_paren) &&
4109 IsNonMacroIdentifier(FormatTok->Previous)) {
4110 break;
4111 }
4112 }
4113 if (FormatTok->is(tok::l_brace)) {
4114 if (AngleNestingLevel == 0 && IsListInitialization())
4115 return;
4116 calculateBraceTypes(/*ExpectClassBody=*/true);
4117 if (!tryToParseBracedList())
4118 break;
4119 }
4120 if (FormatTok->is(tok::l_square)) {
4121 FormatToken *Previous = FormatTok->Previous;
4122 if (!Previous || (Previous->isNot(tok::r_paren) &&
4123 !Previous->isTypeOrIdentifier(LangOpts))) {
4124 // Don't try parsing a lambda if we had a closing parenthesis before,
4125 // it was probably a pointer to an array: int (*)[].
4126 if (!tryToParseLambda())
4127 continue;
4128 } else {
4129 parseSquare();
4130 continue;
4131 }
4132 }
4133 if (FormatTok->is(tok::semi))
4134 return;
4135 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4136 addUnwrappedLine();
4137 nextToken();
4138 parseCSharpGenericTypeConstraint();
4139 break;
4140 }
4141 nextToken();
4142 } while (!eof());
4143 }
4144
4145 auto GetBraceTypes =
4146 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4147 switch (RecordTok.Tok.getKind()) {
4148 case tok::kw_class:
4149 return {TT_ClassLBrace, TT_ClassRBrace};
4150 case tok::kw_struct:
4151 return {TT_StructLBrace, TT_StructRBrace};
4152 case tok::kw_union:
4153 return {TT_UnionLBrace, TT_UnionRBrace};
4154 default:
4155 // Useful for e.g. interface.
4156 return {TT_RecordLBrace, TT_RecordRBrace};
4157 }
4158 };
4159 if (FormatTok->is(tok::l_brace)) {
4160 if (IsListInitialization())
4161 return;
4162 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4163 FormatTok->setFinalizedType(OpenBraceType);
4164 if (ParseAsExpr) {
4165 parseChildBlock();
4166 } else {
4167 if (ShouldBreakBeforeBrace(Style, InitialToken))
4168 addUnwrappedLine();
4169
4170 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4171 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4172 }
4173 setPreviousRBraceType(ClosingBraceType);
4174 }
4175 // There is no addUnwrappedLine() here so that we fall through to parsing a
4176 // structural element afterwards. Thus, in "class A {} n, m;",
4177 // "} n, m;" will end up in one unwrapped line.
4178}
4179
4180void UnwrappedLineParser::parseObjCMethod() {
4181 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4182 "'(' or identifier expected.");
4183 do {
4184 if (FormatTok->is(tok::semi)) {
4185 nextToken();
4186 addUnwrappedLine();
4187 return;
4188 } else if (FormatTok->is(tok::l_brace)) {
4189 if (Style.BraceWrapping.AfterFunction)
4190 addUnwrappedLine();
4191 parseBlock();
4192 addUnwrappedLine();
4193 return;
4194 } else {
4195 nextToken();
4196 }
4197 } while (!eof());
4198}
4199
4200void UnwrappedLineParser::parseObjCProtocolList() {
4201 assert(FormatTok->is(tok::less) && "'<' expected.");
4202 do {
4203 nextToken();
4204 // Early exit in case someone forgot a close angle.
4205 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4206 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4207 return;
4208 }
4209 } while (!eof() && FormatTok->isNot(tok::greater));
4210 nextToken(); // Skip '>'.
4211}
4212
4213void UnwrappedLineParser::parseObjCUntilAtEnd() {
4214 do {
4215 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4216 nextToken();
4217 addUnwrappedLine();
4218 break;
4219 }
4220 if (FormatTok->is(tok::l_brace)) {
4221 parseBlock();
4222 // In ObjC interfaces, nothing should be following the "}".
4223 addUnwrappedLine();
4224 } else if (FormatTok->is(tok::r_brace)) {
4225 // Ignore stray "}". parseStructuralElement doesn't consume them.
4226 nextToken();
4227 addUnwrappedLine();
4228 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4229 nextToken();
4230 parseObjCMethod();
4231 } else {
4232 parseStructuralElement();
4233 }
4234 } while (!eof());
4235}
4236
4237void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4238 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4239 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4240 nextToken();
4241 nextToken(); // interface name
4242
4243 // @interface can be followed by a lightweight generic
4244 // specialization list, then either a base class or a category.
4245 if (FormatTok->is(tok::less))
4246 parseObjCLightweightGenerics();
4247 if (FormatTok->is(tok::colon)) {
4248 nextToken();
4249 nextToken(); // base class name
4250 // The base class can also have lightweight generics applied to it.
4251 if (FormatTok->is(tok::less))
4252 parseObjCLightweightGenerics();
4253 } else if (FormatTok->is(tok::l_paren)) {
4254 // Skip category, if present.
4255 parseParens();
4256 }
4257
4258 if (FormatTok->is(tok::less))
4259 parseObjCProtocolList();
4260
4261 if (FormatTok->is(tok::l_brace)) {
4263 addUnwrappedLine();
4264 parseBlock(/*MustBeDeclaration=*/true);
4265 }
4266
4267 // With instance variables, this puts '}' on its own line. Without instance
4268 // variables, this ends the @interface line.
4269 addUnwrappedLine();
4270
4271 parseObjCUntilAtEnd();
4272}
4273
4274void UnwrappedLineParser::parseObjCLightweightGenerics() {
4275 assert(FormatTok->is(tok::less));
4276 // Unlike protocol lists, generic parameterizations support
4277 // nested angles:
4278 //
4279 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4280 // NSObject <NSCopying, NSSecureCoding>
4281 //
4282 // so we need to count how many open angles we have left.
4283 unsigned NumOpenAngles = 1;
4284 do {
4285 nextToken();
4286 // Early exit in case someone forgot a close angle.
4287 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4288 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4289 break;
4290 }
4291 if (FormatTok->is(tok::less)) {
4292 ++NumOpenAngles;
4293 } else if (FormatTok->is(tok::greater)) {
4294 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4295 --NumOpenAngles;
4296 }
4297 } while (!eof() && NumOpenAngles != 0);
4298 nextToken(); // Skip '>'.
4299}
4300
4301// Returns true for the declaration/definition form of @protocol,
4302// false for the expression form.
4303bool UnwrappedLineParser::parseObjCProtocol() {
4304 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4305 nextToken();
4306
4307 if (FormatTok->is(tok::l_paren)) {
4308 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4309 return false;
4310 }
4311
4312 // The definition/declaration form,
4313 // @protocol Foo
4314 // - (int)someMethod;
4315 // @end
4316
4317 nextToken(); // protocol name
4318
4319 if (FormatTok->is(tok::less))
4320 parseObjCProtocolList();
4321
4322 // Check for protocol declaration.
4323 if (FormatTok->is(tok::semi)) {
4324 nextToken();
4325 addUnwrappedLine();
4326 return true;
4327 }
4328
4329 addUnwrappedLine();
4330 parseObjCUntilAtEnd();
4331 return true;
4332}
4333
4334void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4335 bool IsImport = FormatTok->is(Keywords.kw_import);
4336 assert(IsImport || FormatTok->is(tok::kw_export));
4337 nextToken();
4338
4339 // Consume the "default" in "export default class/function".
4340 if (FormatTok->is(tok::kw_default))
4341 nextToken();
4342
4343 // Consume "async function", "function" and "default function", so that these
4344 // get parsed as free-standing JS functions, i.e. do not require a trailing
4345 // semicolon.
4346 if (FormatTok->is(Keywords.kw_async))
4347 nextToken();
4348 if (FormatTok->is(Keywords.kw_function)) {
4349 nextToken();
4350 return;
4351 }
4352
4353 // For imports, `export *`, `export {...}`, consume the rest of the line up
4354 // to the terminating `;`. For everything else, just return and continue
4355 // parsing the structural element, i.e. the declaration or expression for
4356 // `export default`.
4357 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4358 !FormatTok->isStringLiteral() &&
4359 !(FormatTok->is(Keywords.kw_type) &&
4360 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4361 return;
4362 }
4363
4364 while (!eof()) {
4365 if (FormatTok->is(tok::semi))
4366 return;
4367 if (Line->Tokens.empty()) {
4368 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4369 // import statement should terminate.
4370 return;
4371 }
4372 if (FormatTok->is(tok::l_brace)) {
4373 FormatTok->setBlockKind(BK_Block);
4374 nextToken();
4375 parseBracedList();
4376 } else {
4377 nextToken();
4378 }
4379 }
4380}
4381
4382void UnwrappedLineParser::parseStatementMacro() {
4383 nextToken();
4384 if (FormatTok->is(tok::l_paren))
4385 parseParens();
4386 if (FormatTok->is(tok::semi))
4387 nextToken();
4388 addUnwrappedLine();
4389}
4390
4391void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4392 // consume things like a::`b.c[d:e] or a::*
4393 while (true) {
4394 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4395 tok::coloncolon, tok::hash) ||
4396 Keywords.isVerilogIdentifier(*FormatTok)) {
4397 nextToken();
4398 } else if (FormatTok->is(tok::l_square)) {
4399 parseSquare();
4400 } else {
4401 break;
4402 }
4403 }
4404}
4405
4406void UnwrappedLineParser::parseVerilogSensitivityList() {
4407 if (FormatTok->isNot(tok::at))
4408 return;
4409 nextToken();
4410 // A block event expression has 2 at signs.
4411 if (FormatTok->is(tok::at))
4412 nextToken();
4413 switch (FormatTok->Tok.getKind()) {
4414 case tok::star:
4415 nextToken();
4416 break;
4417 case tok::l_paren:
4418 parseParens();
4419 break;
4420 default:
4421 parseVerilogHierarchyIdentifier();
4422 break;
4423 }
4424}
4425
4426unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4427 unsigned AddLevels = 0;
4428
4429 if (FormatTok->is(Keywords.kw_clocking)) {
4430 nextToken();
4431 if (Keywords.isVerilogIdentifier(*FormatTok))
4432 nextToken();
4433 parseVerilogSensitivityList();
4434 if (FormatTok->is(tok::semi))
4435 nextToken();
4436 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4437 Keywords.kw_casez, Keywords.kw_randcase,
4438 Keywords.kw_randsequence)) {
4439 if (Style.IndentCaseLabels)
4440 AddLevels++;
4441 nextToken();
4442 if (FormatTok->is(tok::l_paren)) {
4443 FormatTok->setFinalizedType(TT_ConditionLParen);
4444 parseParens();
4445 }
4446 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4447 nextToken();
4448 // The case header has no semicolon.
4449 } else {
4450 // "module" etc.
4451 nextToken();
4452 // all the words like the name of the module and specifiers like
4453 // "automatic" and the width of function return type
4454 while (true) {
4455 if (FormatTok->is(tok::l_square)) {
4456 auto Prev = FormatTok->getPreviousNonComment();
4457 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4458 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4459 parseSquare();
4460 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4461 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon,
4462 Keywords.kw_automatic, tok::kw_static)) {
4463 nextToken();
4464 } else {
4465 break;
4466 }
4467 }
4468
4469 auto NewLine = [this]() {
4470 addUnwrappedLine();
4471 Line->IsContinuation = true;
4472 };
4473
4474 // package imports
4475 while (FormatTok->is(Keywords.kw_import)) {
4476 NewLine();
4477 nextToken();
4478 parseVerilogHierarchyIdentifier();
4479 if (FormatTok->is(tok::semi))
4480 nextToken();
4481 }
4482
4483 // parameters and ports
4484 if (FormatTok->is(Keywords.kw_verilogHash)) {
4485 NewLine();
4486 nextToken();
4487 if (FormatTok->is(tok::l_paren)) {
4488 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4489 parseParens();
4490 }
4491 }
4492 if (FormatTok->is(tok::l_paren)) {
4493 NewLine();
4494 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4495 parseParens();
4496 }
4497
4498 // extends and implements
4499 if (FormatTok->is(Keywords.kw_extends)) {
4500 NewLine();
4501 nextToken();
4502 parseVerilogHierarchyIdentifier();
4503 if (FormatTok->is(tok::l_paren))
4504 parseParens();
4505 }
4506 if (FormatTok->is(Keywords.kw_implements)) {
4507 NewLine();
4508 do {
4509 nextToken();
4510 parseVerilogHierarchyIdentifier();
4511 } while (FormatTok->is(tok::comma));
4512 }
4513
4514 // Coverage event for cover groups.
4515 if (FormatTok->is(tok::at)) {
4516 NewLine();
4517 parseVerilogSensitivityList();
4518 }
4519
4520 if (FormatTok->is(tok::semi))
4521 nextToken(/*LevelDifference=*/1);
4522 addUnwrappedLine();
4523 }
4524
4525 return AddLevels;
4526}
4527
4528void UnwrappedLineParser::parseVerilogTable() {
4529 assert(FormatTok->is(Keywords.kw_table));
4530 nextToken(/*LevelDifference=*/1);
4531 addUnwrappedLine();
4532
4533 auto InitialLevel = Line->Level++;
4534 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4535 FormatToken *Tok = FormatTok;
4536 nextToken();
4537 if (Tok->is(tok::semi))
4538 addUnwrappedLine();
4539 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4540 Tok->setFinalizedType(TT_VerilogTableItem);
4541 }
4542 Line->Level = InitialLevel;
4543 nextToken(/*LevelDifference=*/-1);
4544 addUnwrappedLine();
4545}
4546
4547void UnwrappedLineParser::parseVerilogCaseLabel() {
4548 // The label will get unindented in AnnotatingParser. If there are no leading
4549 // spaces, indent the rest here so that things inside the block will be
4550 // indented relative to things outside. We don't use parseLabel because we
4551 // don't know whether this colon is a label or a ternary expression at this
4552 // point.
4553 auto OrigLevel = Line->Level;
4554 auto FirstLine = CurrentLines->size();
4555 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4556 ++Line->Level;
4557 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4558 --Line->Level;
4559 parseStructuralElement();
4560 // Restore the indentation in both the new line and the line that has the
4561 // label.
4562 if (CurrentLines->size() > FirstLine)
4563 (*CurrentLines)[FirstLine].Level = OrigLevel;
4564 Line->Level = OrigLevel;
4565}
4566
4567bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4568 for (const auto &N : Line.Tokens) {
4569 if (N.Tok->MacroCtx)
4570 return true;
4571 for (const UnwrappedLine &Child : N.Children)
4572 if (containsExpansion(Child))
4573 return true;
4574 }
4575 return false;
4576}
4577
4578void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4579 if (Line->Tokens.empty())
4580 return;
4581 LLVM_DEBUG({
4582 if (!parsingPPDirective()) {
4583 llvm::dbgs() << "Adding unwrapped line:\n";
4584 printDebugInfo(*Line);
4585 }
4586 });
4587
4588 // If this line closes a block when in Whitesmiths mode, remember that
4589 // information so that the level can be decreased after the line is added.
4590 // This has to happen after the addition of the line since the line itself
4591 // needs to be indented.
4592 bool ClosesWhitesmithsBlock =
4593 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4595
4596 // If the current line was expanded from a macro call, we use it to
4597 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4598 // line and the unexpanded token stream.
4599 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4600 if (!Reconstruct)
4601 Reconstruct.emplace(Line->Level, Unexpanded);
4602 Reconstruct->addLine(*Line);
4603
4604 // While the reconstructed unexpanded lines are stored in the normal
4605 // flow of lines, the expanded lines are stored on the side to be analyzed
4606 // in an extra step.
4607 CurrentExpandedLines.push_back(std::move(*Line));
4608
4609 if (Reconstruct->finished()) {
4610 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4611 assert(!Reconstructed.Tokens.empty() &&
4612 "Reconstructed must at least contain the macro identifier.");
4613 assert(!parsingPPDirective());
4614 LLVM_DEBUG({
4615 llvm::dbgs() << "Adding unexpanded line:\n";
4616 printDebugInfo(Reconstructed);
4617 });
4618 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4619 Lines.push_back(std::move(Reconstructed));
4620 CurrentExpandedLines.clear();
4621 Reconstruct.reset();
4622 }
4623 } else {
4624 // At the top level we only get here when no unexpansion is going on, or
4625 // when conditional formatting led to unfinished macro reconstructions.
4626 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4627 CurrentLines->push_back(std::move(*Line));
4628 }
4629 Line->Tokens.clear();
4630 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4631 Line->FirstStartColumn = 0;
4632 Line->IsContinuation = false;
4633 Line->SeenDecltypeAuto = false;
4634
4635 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4636 --Line->Level;
4637 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4638 CurrentLines->append(
4639 std::make_move_iterator(PreprocessorDirectives.begin()),
4640 std::make_move_iterator(PreprocessorDirectives.end()));
4641 PreprocessorDirectives.clear();
4642 }
4643 // Disconnect the current token from the last token on the previous line.
4644 FormatTok->Previous = nullptr;
4645}
4646
4647bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4648
4649bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4650 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4651 FormatTok.NewlinesBefore > 0;
4652}
4653
4654// Checks if \p FormatTok is a line comment that continues the line comment
4655// section on \p Line.
4656static bool
4658 const UnwrappedLine &Line, const FormatStyle &Style,
4659 const llvm::Regex &CommentPragmasRegex) {
4660 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4661 return false;
4662
4663 StringRef IndentContent = FormatTok.TokenText;
4664 if (FormatTok.TokenText.starts_with("//") ||
4665 FormatTok.TokenText.starts_with("/*")) {
4666 IndentContent = FormatTok.TokenText.substr(2);
4667 }
4668 if (CommentPragmasRegex.match(IndentContent))
4669 return false;
4670
4671 // If Line starts with a line comment, then FormatTok continues the comment
4672 // section if its original column is greater or equal to the original start
4673 // column of the line.
4674 //
4675 // Define the min column token of a line as follows: if a line ends in '{' or
4676 // contains a '{' followed by a line comment, then the min column token is
4677 // that '{'. Otherwise, the min column token of the line is the first token of
4678 // the line.
4679 //
4680 // If Line starts with a token other than a line comment, then FormatTok
4681 // continues the comment section if its original column is greater than the
4682 // original start column of the min column token of the line.
4683 //
4684 // For example, the second line comment continues the first in these cases:
4685 //
4686 // // first line
4687 // // second line
4688 //
4689 // and:
4690 //
4691 // // first line
4692 // // second line
4693 //
4694 // and:
4695 //
4696 // int i; // first line
4697 // // second line
4698 //
4699 // and:
4700 //
4701 // do { // first line
4702 // // second line
4703 // int i;
4704 // } while (true);
4705 //
4706 // and:
4707 //
4708 // enum {
4709 // a, // first line
4710 // // second line
4711 // b
4712 // };
4713 //
4714 // The second line comment doesn't continue the first in these cases:
4715 //
4716 // // first line
4717 // // second line
4718 //
4719 // and:
4720 //
4721 // int i; // first line
4722 // // second line
4723 //
4724 // and:
4725 //
4726 // do { // first line
4727 // // second line
4728 // int i;
4729 // } while (true);
4730 //
4731 // and:
4732 //
4733 // enum {
4734 // a, // first line
4735 // // second line
4736 // };
4737 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4738
4739 // Scan for '{//'. If found, use the column of '{' as a min column for line
4740 // comment section continuation.
4741 const FormatToken *PreviousToken = nullptr;
4742 for (const UnwrappedLineNode &Node : Line.Tokens) {
4743 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4744 isLineComment(*Node.Tok)) {
4745 MinColumnToken = PreviousToken;
4746 break;
4747 }
4748 PreviousToken = Node.Tok;
4749
4750 // Grab the last newline preceding a token in this unwrapped line.
4751 if (Node.Tok->NewlinesBefore > 0)
4752 MinColumnToken = Node.Tok;
4753 }
4754 if (PreviousToken && PreviousToken->is(tok::l_brace))
4755 MinColumnToken = PreviousToken;
4756
4757 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4758 MinColumnToken);
4759}
4760
4761void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4762 bool JustComments = Line->Tokens.empty();
4763 for (FormatToken *Tok : CommentsBeforeNextToken) {
4764 // Line comments that belong to the same line comment section are put on the
4765 // same line since later we might want to reflow content between them.
4766 // Additional fine-grained breaking of line comment sections is controlled
4767 // by the class BreakableLineCommentSection in case it is desirable to keep
4768 // several line comment sections in the same unwrapped line.
4769 //
4770 // FIXME: Consider putting separate line comment sections as children to the
4771 // unwrapped line instead.
4772 Tok->ContinuesLineCommentSection =
4773 continuesLineCommentSection(*Tok, *Line, Style, CommentPragmasRegex);
4774 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4775 addUnwrappedLine();
4776 pushToken(Tok);
4777 }
4778 if (NewlineBeforeNext && JustComments)
4779 addUnwrappedLine();
4780 CommentsBeforeNextToken.clear();
4781}
4782
4783void UnwrappedLineParser::nextToken(int LevelDifference) {
4784 if (eof())
4785 return;
4786 flushComments(isOnNewLine(*FormatTok));
4787 pushToken(FormatTok);
4788 FormatToken *Previous = FormatTok;
4789 if (!Style.isJavaScript())
4790 readToken(LevelDifference);
4791 else
4792 readTokenWithJavaScriptASI();
4793 FormatTok->Previous = Previous;
4794 if (Style.isVerilog()) {
4795 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4796 // keywords like `begin`, we can't treat them the same as left braces
4797 // because some contexts require one of them. For example structs use
4798 // braces and if blocks use keywords, and a left brace can occur in an if
4799 // statement, but it is not a block. For keywords like `end`, we simply
4800 // treat them the same as right braces.
4801 if (Keywords.isVerilogEnd(*FormatTok))
4802 FormatTok->Tok.setKind(tok::r_brace);
4803 }
4804}
4805
4806void UnwrappedLineParser::distributeComments(
4807 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4808 // Whether or not a line comment token continues a line is controlled by
4809 // the method continuesLineCommentSection, with the following caveat:
4810 //
4811 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4812 // that each comment line from the trail is aligned with the next token, if
4813 // the next token exists. If a trail exists, the beginning of the maximal
4814 // trail is marked as a start of a new comment section.
4815 //
4816 // For example in this code:
4817 //
4818 // int a; // line about a
4819 // // line 1 about b
4820 // // line 2 about b
4821 // int b;
4822 //
4823 // the two lines about b form a maximal trail, so there are two sections, the
4824 // first one consisting of the single comment "// line about a" and the
4825 // second one consisting of the next two comments.
4826 if (Comments.empty())
4827 return;
4828 bool ShouldPushCommentsInCurrentLine = true;
4829 bool HasTrailAlignedWithNextToken = false;
4830 unsigned StartOfTrailAlignedWithNextToken = 0;
4831 if (NextTok) {
4832 // We are skipping the first element intentionally.
4833 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4834 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4835 HasTrailAlignedWithNextToken = true;
4836 StartOfTrailAlignedWithNextToken = i;
4837 }
4838 }
4839 }
4840 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4841 FormatToken *FormatTok = Comments[i];
4842 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4843 FormatTok->ContinuesLineCommentSection = false;
4844 } else {
4845 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4846 *FormatTok, *Line, Style, CommentPragmasRegex);
4847 }
4848 if (!FormatTok->ContinuesLineCommentSection &&
4849 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4850 ShouldPushCommentsInCurrentLine = false;
4851 }
4852 if (ShouldPushCommentsInCurrentLine)
4853 pushToken(FormatTok);
4854 else
4855 CommentsBeforeNextToken.push_back(FormatTok);
4856 }
4857}
4858
4859void UnwrappedLineParser::readToken(int LevelDifference) {
4860 SmallVector<FormatToken *, 1> Comments;
4861 bool PreviousWasComment = false;
4862 bool FirstNonCommentOnLine = false;
4863 do {
4864 FormatTok = Tokens->getNextToken();
4865 assert(FormatTok);
4866 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4867 TT_ConflictAlternative)) {
4868 if (FormatTok->is(TT_ConflictStart))
4869 conditionalCompilationStart(/*Unreachable=*/false);
4870 else if (FormatTok->is(TT_ConflictAlternative))
4871 conditionalCompilationAlternative();
4872 else if (FormatTok->is(TT_ConflictEnd))
4873 conditionalCompilationEnd();
4874 FormatTok = Tokens->getNextToken();
4875 FormatTok->MustBreakBefore = true;
4876 FormatTok->MustBreakBeforeFinalized = true;
4877 }
4878
4879 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4880 const FormatToken &Tok,
4881 bool PreviousWasComment) {
4882 auto IsFirstOnLine = [](const FormatToken &Tok) {
4883 return Tok.HasUnescapedNewline || Tok.IsFirst;
4884 };
4885
4886 // Consider preprocessor directives preceded by block comments as first
4887 // on line.
4888 if (PreviousWasComment)
4889 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4890 return IsFirstOnLine(Tok);
4891 };
4892
4893 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4894 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4895 PreviousWasComment = FormatTok->is(tok::comment);
4896
4897 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4898 (!Style.isVerilog() ||
4899 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4900 FirstNonCommentOnLine) {
4901 distributeComments(Comments, FormatTok);
4902 Comments.clear();
4903 // If there is an unfinished unwrapped line, we flush the preprocessor
4904 // directives only after that unwrapped line was finished later.
4905 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4906 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4907 assert((LevelDifference >= 0 ||
4908 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4909 "LevelDifference makes Line->Level negative");
4910 Line->Level += LevelDifference;
4911 // Comments stored before the preprocessor directive need to be output
4912 // before the preprocessor directive, at the same level as the
4913 // preprocessor directive, as we consider them to apply to the directive.
4915 PPBranchLevel > 0) {
4916 Line->Level += PPBranchLevel;
4917 }
4918 assert(Line->Level >= Line->UnbracedBodyLevel);
4919 Line->Level -= Line->UnbracedBodyLevel;
4920 flushComments(isOnNewLine(*FormatTok));
4921 parsePPDirective();
4922 PreviousWasComment = FormatTok->is(tok::comment);
4923 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4924 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4925 }
4926
4927 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4928 !Line->InPPDirective) {
4929 continue;
4930 }
4931
4932 if (FormatTok->is(tok::identifier) &&
4933 Macros.defined(FormatTok->TokenText) &&
4934 // FIXME: Allow expanding macros in preprocessor directives.
4935 !Line->InPPDirective) {
4936 FormatToken *ID = FormatTok;
4937 unsigned Position = Tokens->getPosition();
4938
4939 // To correctly parse the code, we need to replace the tokens of the macro
4940 // call with its expansion.
4941 auto PreCall = std::move(Line);
4942 Line.reset(new UnwrappedLine);
4943 bool OldInExpansion = InExpansion;
4944 InExpansion = true;
4945 // We parse the macro call into a new line.
4946 auto Args = parseMacroCall();
4947 InExpansion = OldInExpansion;
4948 assert(Line->Tokens.front().Tok == ID);
4949 // And remember the unexpanded macro call tokens.
4950 auto UnexpandedLine = std::move(Line);
4951 // Reset to the old line.
4952 Line = std::move(PreCall);
4953
4954 LLVM_DEBUG({
4955 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4956 if (Args) {
4957 llvm::dbgs() << "(";
4958 for (const auto &Arg : Args.value())
4959 for (const auto &T : Arg)
4960 llvm::dbgs() << T->TokenText << " ";
4961 llvm::dbgs() << ")";
4962 }
4963 llvm::dbgs() << "\n";
4964 });
4965 if (Macros.objectLike(ID->TokenText) && Args &&
4966 !Macros.hasArity(ID->TokenText, Args->size())) {
4967 // The macro is either
4968 // - object-like, but we got argumnets, or
4969 // - overloaded to be both object-like and function-like, but none of
4970 // the function-like arities match the number of arguments.
4971 // Thus, expand as object-like macro.
4972 LLVM_DEBUG(llvm::dbgs()
4973 << "Macro \"" << ID->TokenText
4974 << "\" not overloaded for arity " << Args->size()
4975 << "or not function-like, using object-like overload.");
4976 Args.reset();
4977 UnexpandedLine->Tokens.resize(1);
4978 Tokens->setPosition(Position);
4979 nextToken();
4980 assert(!Args && Macros.objectLike(ID->TokenText));
4981 }
4982 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4983 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4984 // Next, we insert the expanded tokens in the token stream at the
4985 // current position, and continue parsing.
4986 Unexpanded[ID] = std::move(UnexpandedLine);
4987 SmallVector<FormatToken *, 8> Expansion =
4988 Macros.expand(ID, std::move(Args));
4989 if (!Expansion.empty())
4990 FormatTok = Tokens->insertTokens(Expansion);
4991
4992 LLVM_DEBUG({
4993 llvm::dbgs() << "Expanded: ";
4994 for (const auto &T : Expansion)
4995 llvm::dbgs() << T->TokenText << " ";
4996 llvm::dbgs() << "\n";
4997 });
4998 } else {
4999 LLVM_DEBUG({
5000 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
5001 << "\", because it was used ";
5002 if (Args)
5003 llvm::dbgs() << "with " << Args->size();
5004 else
5005 llvm::dbgs() << "without";
5006 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5007 });
5008 Tokens->setPosition(Position);
5009 FormatTok = ID;
5010 }
5011 }
5012
5013 if (FormatTok->isNot(tok::comment)) {
5014 distributeComments(Comments, FormatTok);
5015 Comments.clear();
5016 return;
5017 }
5018
5019 Comments.push_back(FormatTok);
5020 } while (!eof());
5021
5022 distributeComments(Comments, nullptr);
5023 Comments.clear();
5024}
5025
5026namespace {
5027template <typename Iterator>
5028void pushTokens(Iterator Begin, Iterator End,
5029 SmallVectorImpl<FormatToken *> &Into) {
5030 for (auto I = Begin; I != End; ++I) {
5031 Into.push_back(I->Tok);
5032 for (const auto &Child : I->Children)
5033 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5034 }
5035}
5036} // namespace
5037
5038std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5039UnwrappedLineParser::parseMacroCall() {
5040 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5041 assert(Line->Tokens.empty());
5042 nextToken();
5043 if (FormatTok->isNot(tok::l_paren))
5044 return Args;
5045 unsigned Position = Tokens->getPosition();
5046 FormatToken *Tok = FormatTok;
5047 nextToken();
5048 Args.emplace();
5049 auto ArgStart = std::prev(Line->Tokens.end());
5050
5051 int Parens = 0;
5052 do {
5053 switch (FormatTok->Tok.getKind()) {
5054 case tok::l_paren:
5055 ++Parens;
5056 nextToken();
5057 break;
5058 case tok::r_paren: {
5059 if (Parens > 0) {
5060 --Parens;
5061 nextToken();
5062 break;
5063 }
5064 Args->push_back({});
5065 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5066 nextToken();
5067 return Args;
5068 }
5069 case tok::comma: {
5070 if (Parens > 0) {
5071 nextToken();
5072 break;
5073 }
5074 Args->push_back({});
5075 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5076 nextToken();
5077 ArgStart = std::prev(Line->Tokens.end());
5078 break;
5079 }
5080 default:
5081 nextToken();
5082 break;
5083 }
5084 } while (!eof());
5085 Line->Tokens.resize(1);
5086 Tokens->setPosition(Position);
5087 FormatTok = Tok;
5088 return {};
5089}
5090
5091void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5092 Line->Tokens.push_back(UnwrappedLineNode(Tok));
5093 if (MustBreakBeforeNextToken) {
5094 Line->Tokens.back().Tok->MustBreakBefore = true;
5095 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5096 MustBreakBeforeNextToken = false;
5097 }
5098}
5099
5100} // end namespace format
5101} // end namespace clang
DynTypedNode Node
static char ID
Definition: Arena.cpp:183
enum clang::sema::@1726::IndirectLocalPathEntry::EntryKind Kind
Expr * E
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
StringRef Text
Definition: Format.cpp:3054
This file contains the main building blocks of macro support in clang-format.
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
SourceLocation Begin
StateNode * Previous
ContinuationIndenter * Indenter
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
do v
Definition: arm_acle.h:91
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:58
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:110
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
void setKind(tok::TokenKind K)
Definition: Token.h:95
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:69
tok::TokenKind getKind() const
Definition: Token.h:94
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:196
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
static bool isCOperatorFollowingVar(tok::TokenKind Kind)
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1971
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const FormatStyle &Style, const llvm::Regex &CommentPragmasRegex)
static bool isC78Type(const FormatToken &Tok)
bool isLineComment(const FormatToken &FormatTok)
Definition: FormatToken.h:1964
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3932
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:212
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
@ Parens
New-expression has a C++98 paren-delimited initializer.
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1032
bool isVerilogEnd(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that closes a block.
Definition: FormatToken.h:1865
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1858
bool isVerilogStructuredProcedure(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that starts a structured procedure like 'always'.
Definition: FormatToken.h:1903
bool isVerilogHierarchy(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a module, etc.
Definition: FormatToken.h:1877
bool isVerilogPPDirective(const FormatToken &Tok) const
Returns whether Tok is a Verilog preprocessor directive.
Definition: FormatToken.h:1831
IdentifierInfo * kw_internal_ident_after_define
Definition: FormatToken.h:1465
bool isVerilogIdentifier(const FormatToken &Tok) const
Definition: FormatToken.h:1795
bool AfterClass
Wrap class definitions.
Definition: Format.h:1375
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1442
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1456
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:1390
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1533
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1428
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1422
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1378
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1406
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1470
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
bool isTableGen() const
Definition: Format.h:3311
@ LK_Java
Should be used for Java.
Definition: Format.h:3283
@ LK_TableGen
Should be used for TableGen code.
Definition: Format.h:3294
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:3292
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3297
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:2945
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2803
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2908
bool RemoveSemicolon
Remove semicolons after the closing braces of functions and constructors/destructors.
Definition: Format.h:4042
bool IndentExportBlock
If true, clang-format will indent the body of an export { ... } block.
Definition: Format.h:2816
@ RCS_Always
Apply indentation rules and reflow long comments into new lines, trying to obey the ColumnLimit.
Definition: Format.h:3905
@ IEBS_AfterExternBlock
Backwards compatible with AfterExternBlock's indenting.
Definition: Format.h:2837
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2851
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2784
bool InsertBraces
Insert braces after control statements (if, else, for, do, and while) in C++ unless the control state...
Definition: Format.h:2991
RemoveParenthesesStyle RemoveParentheses
Remove redundant parentheses.
Definition: Format.h:4024
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3315
bool RemoveBracesLLVM
Remove optional braces of control statements (if, else, for, and while) in C++ according to the LLVM ...
Definition: Format.h:3965
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2903
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2885
bool AllowShortLoopsOnASingleLine
If true, while (true) continue; can be put on a single line.
Definition: Format.h:989
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:826
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:3456
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:2200
bool isCSharp() const
Definition: Format.h:3304
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1339
@ BWACS_Never
Never wrap braces after a control statement.
Definition: Format.h:1318
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:2083
ReflowCommentsStyle ReflowComments
Comment reformatting style.
Definition: Format.h:3911
bool isVerilog() const
Definition: Format.h:3307
bool isJavaScript() const
Definition: Format.h:3306
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2873
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1587
@ RPS_Leave
Do not remove parentheses.
Definition: Format.h:3998
@ RPS_ReturnStatement
Also remove parentheses enclosing the expression in a return/co_return statement.
Definition: Format.h:4013
bool SkipMacroDefinitionBody
Do not format macro definition body.
Definition: Format.h:4256
@ NI_All
Indent in all namespaces.
Definition: Format.h:3451
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:3441
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2761
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2856
unsigned ColumnLimit
The column limit.
Definition: Format.h:2408
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:297
bool Optional
Is optional and can be removed.
Definition: FormatToken.h:581
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:670
bool isTypeName(const LangOptions &LangOpts) const
Definition: FormatToken.cpp:44
bool isCppAlternativeOperatorKeyword() const
Definition: FormatToken.h:738
bool isNot(T Kind) const
Definition: FormatToken.h:631
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:317
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:840
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:376
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:466
void setBlockKind(BraceBlockKind BBK)
Definition: FormatToken.h:392
bool isStringLiteral() const
Definition: FormatToken.h:664
bool isBinaryOperator() const
Definition: FormatToken.h:777
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:612
bool hasWhitespaceBefore() const
Returns true if the range of whitespace immediately preceding the Token is not empty.
Definition: FormatToken.h:828
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:624
unsigned ClosesRequiresClause
true if this is the last token within requires clause.
Definition: FormatToken.h:379
bool isAccessSpecifierKeyword() const
Definition: FormatToken.h:674
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:563
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:566
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:660
void setFinalizedType(TokenType T)
Sets the type and also the finalized flag.
Definition: FormatToken.h:445
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
static const size_t kInvalidIndex