为LLVM添加制导解析
LLVM中制导(pragma信息)的添加来说,对于一些专用编译器非常重要。整个制导信息的添加,大概可以分为以下几步:
- 1、定义Token
- 2、添加对应的句柄(handler)
- 3、添加处理函数声明
- 4、添加对应handler的声明
- 5、实现对应的handler
- 6、实现处理函数
工作需要添加一些制导信息,有些地方写的比较乱,最近比较忙,有空整理下思路和逻辑,比较套路化。
大概的逻辑就是添加对应的Handler,然后在全局初始化处初始化,需要对应的Token后,解析,转到对应的ParseStmt中去解析字符串
1、定义Token
在\clang\include\clang\Basic\TokenKinds.def文件中添加,我这里添加的是ANNOTATION类型,还有其他的类型,选择自己合适的进行添加
ANNOTATION(pragma_SIMD)
2、添加对应的句柄(handler)
在\clang\include\clang\Parse\Parse.h文件中添加
std::unique_ptr<PragmaHandler> SIMDHandler;
3、添加处理函数声明
StmtResult ParseSIMDStatement() ;
4、添加对应handler的声明
在\clang\lib\Parse\ParsePragma.cpp中添加声明:
struct PragmaSIMDHandler : public PragmaHandler { PragmaSIMDHandler(const char *name) : PragmaHandler(name) {} void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken) override; };
并且在void Parser::initializePragmaHandlers()中初始化所有的句柄
SIMDHandler.reset(new PragmaSIMDHandler("SIMD")); PP.AddPragmaHandler(SIMDHandler.get());
5、实现对应的handler
void PragmaSIMDHandler::HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &SIMDTok) { SourceLocation SIMDLoc = SIMDTok.getLocation(); Token Tok; PP.Lex(Tok); MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(1), 1); Toks[0].startToken(); Toks[0].setLocation(SIMDLoc); Toks[0].setKind(tok::annot_pragma_SIMD); PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true); return; }
需要在tools\clang\lib\Parse\ParseStmt.cpp文件中的StmtResult Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts,AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc,ParsedAttributesWithRange &Attrs)函数中增加对ParseSIMDStatement的调用(这里我贴了全部的函数)
StmtResult Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts, AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc, ParsedAttributesWithRange &Attrs) { const char *SemiError = nullptr; StmtResult Res; // Cases in this switch statement should fall through if the parser expects // the token to end in a semicolon (in which case SemiError should be set), // or they directly 'return;' if not. Retry: tok::TokenKind Kind = Tok.getKind(); SourceLocation AtLoc; switch (Kind) { case tok::at: // May be a @try or @throw statement { ProhibitAttributes(Attrs); // TODO: is it correct? AtLoc = ConsumeToken(); // consume @ return ParseObjCAtStatement(AtLoc); } case tok::code_completion: Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Statement); cutOffParsing(); return StmtError(); case tok::identifier: { Token Next = NextToken(); if (Next.is(tok::colon)) { // C99 6.8.1: labeled-statement // identifier ':' statement return ParseLabeledStatement(Attrs); } // Look up the identifier, and typo-correct it to a keyword if it's not // found. if (Next.isNot(tok::coloncolon)) { // Try to limit which sets of keywords should be included in typo // correction based on what the next token is. if (TryAnnotateName(/*IsAddressOfOperand*/ false, llvm::make_unique<StatementFilterCCC>(Next)) == ANK_Error) { // Handle errors here by skipping up to the next semicolon or '}', and // eat the semicolon if that's what stopped us. SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); if (Tok.is(tok::semi)) ConsumeToken(); return StmtError(); } // If the identifier was typo-corrected, try again. if (Tok.isNot(tok::identifier)) goto Retry; } // Fall through } case tok::annot_pragma_SIMD: return ParseSIMDStatement(); default: { if ((getLangOpts().CPlusPlus || getLangOpts().MicrosoftExt || Allowed == ACK_Any) && isDeclarationStatement()) { SourceLocation DeclStart = Tok.getLocation(), DeclEnd; DeclGroupPtrTy Decl = ParseDeclaration(Declarator::BlockContext, DeclEnd, Attrs); return Actions.ActOnDeclStmt(Decl, DeclStart, DeclEnd); } if (Tok.is(tok::r_brace)) { Diag(Tok, diag::err_expected_statement); return StmtError(); } return ParseExprStatement(); } case tok::kw_case: // C99 6.8.1: labeled-statement return ParseCaseStatement(); case tok::kw_default: // C99 6.8.1: labeled-statement return ParseDefaultStatement(); case tok::l_brace: // C99 6.8.2: compound-statement return ParseCompoundStatement(); case tok::semi: { // C99 6.8.3p3: expression[opt] ';' bool HasLeadingEmptyMacro = Tok.hasLeadingEmptyMacro(); return Actions.ActOnNullStmt(ConsumeToken(), HasLeadingEmptyMacro); } case tok::kw_if: // C99 6.8.4.1: if-statement return ParseIfStatement(TrailingElseLoc); case tok::kw_switch: // C99 6.8.4.2: switch-statement return ParseSwitchStatement(TrailingElseLoc); case tok::kw_while: // C99 6.8.5.1: while-statement return ParseWhileStatement(TrailingElseLoc); case tok::kw_do: // C99 6.8.5.2: do-statement Res = ParseDoStatement(); SemiError = "do/while"; break; case tok::kw_for: // C99 6.8.5.3: for-statement return ParseForStatement(TrailingElseLoc); case tok::kw_goto: // C99 6.8.6.1: goto-statement Res = ParseGotoStatement(); SemiError = "goto"; break; case tok::kw_continue: // C99 6.8.6.2: continue-statement Res = ParseContinueStatement(); SemiError = "continue"; break; case tok::kw_break: // C99 6.8.6.3: break-statement Res = ParseBreakStatement(); SemiError = "break"; break; case tok::kw_return: // C99 6.8.6.4: return-statement Res = ParseReturnStatement(); SemiError = "return"; break; case tok::kw_co_return: // C++ Coroutines: co_return statement Res = ParseReturnStatement(); SemiError = "co_return"; break; case tok::kw_asm: { ProhibitAttributes(Attrs); bool msAsm = false; Res = ParseAsmStatement(msAsm); Res = Actions.ActOnFinishFullStmt(Res.get()); if (msAsm) return Res; SemiError = "asm"; break; } case tok::kw___if_exists: case tok::kw___if_not_exists: ProhibitAttributes(Attrs); ParseMicrosoftIfExistsStatement(Stmts); // An __if_exists block is like a compound statement, but it doesn't create // a new scope. return StmtEmpty(); case tok::kw_try: // C++ 15: try-block return ParseCXXTryBlock(); case tok::kw___try: ProhibitAttributes(Attrs); // TODO: is it correct? return ParseSEHTryBlock(); case tok::kw___leave: Res = ParseSEHLeaveStatement(); SemiError = "__leave"; break; case tok::annot_pragma_vis: ProhibitAttributes(Attrs); HandlePragmaVisibility(); return StmtEmpty(); case tok::annot_pragma_pack: ProhibitAttributes(Attrs); HandlePragmaPack(); return StmtEmpty(); case tok::annot_pragma_msstruct: ProhibitAttributes(Attrs); HandlePragmaMSStruct(); return StmtEmpty(); case tok::annot_pragma_align: ProhibitAttributes(Attrs); HandlePragmaAlign(); return StmtEmpty(); case tok::annot_pragma_weak: ProhibitAttributes(Attrs); HandlePragmaWeak(); return StmtEmpty(); case tok::annot_pragma_weakalias: ProhibitAttributes(Attrs); HandlePragmaWeakAlias(); return StmtEmpty(); case tok::annot_pragma_redefine_extname: ProhibitAttributes(Attrs); HandlePragmaRedefineExtname(); return StmtEmpty(); case tok::annot_pragma_fp_contract: ProhibitAttributes(Attrs); Diag(Tok, diag::err_pragma_fp_contract_scope); ConsumeToken(); return StmtError(); case tok::annot_pragma_opencl_extension: ProhibitAttributes(Attrs); HandlePragmaOpenCLExtension(); return StmtEmpty(); case tok::annot_pragma_captured: ProhibitAttributes(Attrs); return HandlePragmaCaptured(); case tok::annot_pragma_openmp: ProhibitAttributes(Attrs); return ParseOpenMPDeclarativeOrExecutableDirective(Allowed); case tok::annot_pragma_ms_pointers_to_members: ProhibitAttributes(Attrs); HandlePragmaMSPointersToMembers(); return StmtEmpty(); case tok::annot_pragma_ms_pragma: ProhibitAttributes(Attrs); HandlePragmaMSPragma(); return StmtEmpty(); case tok::annot_pragma_ms_vtordisp: ProhibitAttributes(Attrs); HandlePragmaMSVtorDisp(); return StmtEmpty(); case tok::annot_pragma_loop_hint: ProhibitAttributes(Attrs); return ParsePragmaLoopHint(Stmts, Allowed, TrailingElseLoc, Attrs); case tok::annot_pragma_dump: HandlePragmaDump(); return StmtEmpty(); } // If we reached this code, the statement must end in a semicolon. if (!TryConsumeToken(tok::semi) && !Res.isInvalid()) { // If the result was valid, then we do want to diagnose this. Use // ExpectAndConsume to emit the diagnostic, even though we know it won't // succeed. ExpectAndConsume(tok::semi, diag::err_expected_semi_after_stmt, SemiError); // Skip until we see a } or ;, but don't eat it. SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); } return Res; }
6、实现处理函数
StmtResult Parser::ParseSIMDStatement() { assert(Tok.is(tok::annot_pragma_SIMD) && "Not a SIMD stmt!"); SourceLocation StartLoc = ConsumeToken(); StringRef loop_var; if(Tok.is(tok::l_paren)){ ConsumeAnyToken();// eat "(" }else{ assert("the loop Variable need be (Variable)"); } IdentifierInfo *Name; Name=Tok.getIdentifierInfo(); loop_var=Name->getName(); std::cout<<loop_var.str()<<std::endl; ConsumeToken(); //parse finish and end of ")" if(Tok.is(tok::r_paren)){ ConsumeAnyToken(); }else{ assert("the loop Variable need be (Variable)"); } if(Tok.is(tok::eod)){ clang::SimdBeforeFor.insert(pair<bool,string>(true,loop_var.str())); ConsumeToken(); return StmtEmpty(); } return StmtEmpty(); }
这里使用了很多assert,不是一种很好的方式,因为assert只在debug中有效,暂时没有时间处理他人的历史代码,建议使用Diag(Tok, diag::err_expected) << " string or tok::comma";这种llvm内置的Diag信息处理方式,比较好一点。