CFG 源码分析9.0

SIMPLIFY CFG 源码分析 9.0

Some useful command to show Result

Opt 工具显示优化先后的IR

opt -O2 -print-before-all -print-after-all is_sorted2.ll

Opt 运行单独的优化pass

opt -simplifycfg a.ll -S -o aftercfg.ll

Opt 显示控制流图

opt -dot-cfg a.ll

dot mian.dot -Tpng -o a.png

xdg-open a.png

Debug 时候F->ViewCFG(); 显示函数的控制流图

BB 块的前驱 pred ，控制流图节点的入边

BB 块的后继 succ，控制流节点的出边

Opt 打印统计信息

opt -stats -simplifycfg a.ll -S -o aftercfg.ll

bb块的数据结构

1，函数入口

它依赖AssumptionAnalysis。

PreservedAnalyses SimplifyCFGPass::run(Function &F,

FunctionAnalysisManager &AM) {

auto &TTI = AM.getResult<TargetIRAnalysis>(F);

Options.AC = &AM.getResult<AssumptionAnalysis>(F);

if (!simplifyFunctionCFG(F, TTI, Options))

return PreservedAnalyses::all();

PreservedAnalyses PA;

PA.preserve<GlobalsAA>();

return PA;

}

2，simple CFG的函数主体

A, 检测可达性，去掉不可达的bb块。

B,多个return 语句跳转到一个，增加一个phi指令。

C,迭代优化CFG图。

static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,

const SimplifyCFGOptions &Options) {

bool EverChanged = removeUnreachableBlocks(F);

EverChanged |= mergeEmptyReturnBlocks(F);

EverChanged |= iterativelySimplifyCFG(F, TTI, Options);

// If neither pass changed anything, we're done.

if (!EverChanged) return false;

// iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,

// removeUnreachableBlocks is needed to nuke them, which means we should

// iterate between the two optimizations. We structure the code like this to

// avoid rerunning iterativelySimplifyCFG if the second pass of

// removeUnreachableBlocks doesn't do anything.

if (!removeUnreachableBlocks(F))

return true;

do {

EverChanged = iterativelySimplifyCFG(F, TTI, Options);

EverChanged |= removeUnreachableBlocks(F);

} while (EverChanged);

return true;

}

去除不可达块：

1，从Function entry 开始，检测可达块。

2，遍历Funtion 所有块，如果不是可达快，则删除。

llvm/Local.cpp at release_90 · llvm-mirror/llvm · GitHub removeUnreachableBlocks

bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,

DomTreeUpdater *DTU,

MemorySSAUpdater *MSSAU) {

SmallPtrSet<BasicBlock*, 16> Reachable;

bool Changed = markAliveBlocks(F, Reachable, DTU);

// If there are unreachable blocks in the CFG...

if (Reachable.size() == F.size())

return Changed;

assert(Reachable.size() < F.size());

NumRemoved += F.size()-Reachable.size();

SmallSetVector<BasicBlock *, 8> DeadBlockSet;

for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {

auto *BB = &*I;

if (Reachable.count(BB))

continue;

DeadBlockSet.insert(BB);

}

// Loop over all of the basic blocks that are not reachable, dropping all of

// their internal references. Update DTU and LVI if available.

std::vector<DominatorTree::UpdateType> Updates;

for (auto *BB : DeadBlockSet) {

for (BasicBlock *Successor : successors(BB)) {

if (!DeadBlockSet.count(Successor))

Successor->removePredecessor(BB);

if (DTU)

Updates.push_back({DominatorTree::Delete, BB, Successor});

}

if (LVI)

LVI->eraseBlock(BB);

BB->dropAllReferences();

}

for (Function::iterator I = ++F.begin(); I != F.end();) {

auto *BB = &*I;

if (Reachable.count(BB)) {

++I;

continue;

}

if (DTU) {

// Remove the terminator of BB to clear the successor list of BB.

if (BB->getTerminator())

BB->getInstList().pop_back();

new UnreachableInst(BB->getContext(), BB);

assert(succ_empty(BB) && "The successor list of BB isn't empty before "

"applying corresponding DTU updates.");

++I;

} else {

I = F.getBasicBlockList().erase(I);

}

return true;

}

static bool markAliveBlocks(Function &F,

SmallPtrSetImpl<BasicBlock *> &Reachable,

DomTreeUpdater *DTU = nullptr) {

SmallVector<BasicBlock*, 128> Worklist;

BasicBlock *BB = &F.front();

Worklist.push_back(BB);

Reachable.insert(BB);

bool Changed = false;

do {

BB = Worklist.pop_back_val();

// here I delete some code dealing other things

//**//

for (BasicBlock *Successor : successors(BB))

if (Reachable.insert(Successor).second)

Worklist.push_back(Successor);

} while (!Worklist.empty());

return Changed;

}

实例分析

源码

int main（）{

int i ，j;

for (int i = 0 ; i < 10 ; i++)

printf("%d", j);

return 0;

return 1；

}

%0 %4 %7

合并多个ret分支

mergeEmptyReturnBlocks

examples：

int main(){

int i , j ;

j = 5;

i = rand()%3;

if (i == 0)

return i;

else if (i ==1)

return j;

else

return 3;

return 0;

}

iterativelySimplifyCFG(F, TTI, Options);

static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,

const SimplifyCFGOptions &Options) {

bool Changed = false;

bool LocalChange = true;

SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;

FindFunctionBackedges(F, Edges);

SmallPtrSet<BasicBlock *, 16> LoopHeaders;

for (unsigned i = 0, e = Edges.size(); i != e; ++i)

LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));

while (LocalChange) {

LocalChange = false;

// Loop over all of the basic blocks and remove them if they are unneeded.

for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {

if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) {

LocalChange = true;

++NumSimpl;

}

Changed |= LocalChange;

}

return

对一个CFG 寻找回边。拓扑排序.

Stack/set visited block

Vector edge 7-->4

0-->4

0-->4--->7

0 -->4-->7-->4 find an loop 4 <--->7

0 -->4-->12

0 -->4

bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {

bool Changed = false;

assert(BB && BB->getParent() && "Block not embedded in function!");

assert(BB->getTerminator() && "Degenerate basic block encountered!");

// Remove basic blocks that have no predecessors (except the entry block)...

// or that just have themself as a predecessor. These are unreachable.

if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||

BB->getSinglePredecessor() == BB) {

LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);

DeleteDeadBlock(BB);

return true;

}

// Check to see if we can constant propagate this terminator instruction

// away...

// 常量跳转，当一个bb块最后是一个条件跳转语句，并且是条件是一个常数的时候。

// 将条件跳转转化成无条件跳转。删除无法到达的跳转的bb块的前驱，并更新phi节点。

// 如果两个目标跳转是同一个dest块，也在此处理。

// bb1 : Terminator br 1 : dest : old

// dest : ... pred:bb1

// old : pred:bb1, dest

// t = phi [1,bb1],[2,dest] ...

// ===>

// bb1 : br dest

// dest : ... pred:bb1

// old : pred: dest

// t = phi [2,dest] //

Changed |= ConstantFoldTerminator(BB, true);

// Check for and eliminate duplicate PHI nodes in this block.

// 一个哈希去重，只是incomeming不同的phinode

Changed |= EliminateDuplicatePHINodes(BB);

// Check for and remove branches that will always cause undefined behavior.

Changed |= removeUndefIntroducingPredecessor(BB);

// Merge basic blocks into their predecessor if there is only one distinct

// pred, and if there is only one distinct successor of the predecessor, and

// if there are no PHI nodes.

// 如果只有一个确定的前驱节点，并且该前驱只有一个确定的后继，那么把自己融合到前驱。

// a-->b merge

// a-->b--->c

// d--/ can not merge

// a--->b-->c

// \->d can not merge

if (MergeBlockIntoPredecessor(BB))

return true;

if (SinkCommon && Options.SinkCommonInsts)

Changed |= SinkCommonCodeFromPredecessors(BB);

IRBuilder<> Builder(BB);

// If there is a trivial two-entry PHI node in this basic block, and we can

// eliminate it, do so now.

if (auto *PN = dyn_cast<PHINode>(BB->begin()))

if (PN->getNumIncomingValues() == 2)

// 将if-else 造成的phi 语句改变成select

Changed |= FoldTwoEntryPHINode(PN, TTI, DL);

Builder.SetInsertPoint(BB->getTerminator());

if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) {

if (BI->isUnconditional()) {

// 单指令的cmp上移，phi增加incoming

/// This is called when we find an icmp instruction

/// (a seteq/setne with a constant) as the only instruction in a

/// block that ends with an uncond branch. We are looking for a very specific

/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In

/// this case, we merge the first two "or's of icmp" into a switch, but then the

/// default value goes to an uncond block with a seteq in it, we get something

/// like:

///

/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]

/// DEFAULT:

/// %tmp = icmp eq i8 %A, 92

/// br label %end

/// end:

/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]

///

/// We prefer to split the edge to 'end' so that there is a true/false entry to

/// the PHI, merging the third icmp into the switch.

if (SimplifyUncondBranch(BI, Builder))

return true;

} else {

if (SimplifyCondBranch(BI, Builder))

return true;

}

} else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {

if (SimplifyReturn(RI, Builder))

return true;

} else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {

if (SimplifyResume(RI, Builder))

return true;

} else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {

// 异常处理的代码，类回收

if (SimplifyCleanupReturn(RI))

return true;

} else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {

if (SimplifySwitch(SI, Builder))

return true;

} else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) {

//移除不可达代码例如assert（0）；解引用空指针的后的代码

if (SimplifyUnreachable(UI))

return true;

} else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) {

if (SimplifyIndirectBr(IBI))

return true;

}

return Changed;

}

关于CFG的插入位置

关于CFG执行的次序。

SROA+CFG 消除6个块

CFG只能消除2个块

CFG 和我们delay/@/wait/fork-join/neb 切割的想法

1，我们的delay切割是阻塞的，不是立即跳转。不是一个立即的控制流。通过schedule 链接。不是call 语句。这种bb块是不可以简化切割的。不可以合并在一起。

stmt1

stmt2

stmt1 ==》 stmt2

2, 对于while/if 的切割，这是单纯的控制流分析，这种控制流是可以简化的。

posted @ 2022-04-15 17:08 zzas12345 阅读(724) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

zzas12345

CFG 源码分析9.0

公告