CFG 源码分析9.0

 
SIMPLIFY CFG 源码分析 9.0
Some useful command to show Result
Opt 工具显示优化先后的IR
opt -O2 -print-before-all -print-after-all is_sorted2.ll
Opt 运行单独的优化pass
opt -simplifycfg a.ll -S -o aftercfg.ll
Opt 显示控制流图
opt -dot-cfg a.ll
dot mian.dot -Tpng -o a.png
xdg-open a.png
Debug 时候F->ViewCFG(); 显示函数的控制流图
BB 块的前驱 pred ,控制流图节点的入边
BB 块的后继 succ, 控制流节点的出边
Opt 打印统计信息
opt -stats -simplifycfg a.ll -S -o aftercfg.ll
bb块的数据结构
 
1,函数入口
它依赖AssumptionAnalysis。
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
Options.AC = &AM.getResult<AssumptionAnalysis>(F);
if (!simplifyFunctionCFG(F, TTI, Options))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
return PA;
}
2,simple CFG的函数主体
A, 检测可达性,去掉不可达的bb块。
B,多个return 语句跳转到一个,增加一个phi指令。
C,迭代优化CFG图。
static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
const SimplifyCFGOptions &Options) {
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, Options);
 
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
 
// iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
// removeUnreachableBlocks is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
// avoid rerunning iterativelySimplifyCFG if the second pass of
// removeUnreachableBlocks doesn't do anything.
if (!removeUnreachableBlocks(F))
return true;
 
do {
EverChanged = iterativelySimplifyCFG(F, TTI, Options);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
 
return true;
}
 
去除不可达块:
1,从Function entry 开始,检测可达块。
2,遍历Funtion 所有块,如果不是可达快,则删除。
bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
SmallPtrSet<BasicBlock*, 16> Reachable;
bool Changed = markAliveBlocks(F, Reachable, DTU);
 
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
return Changed;
 
assert(Reachable.size() < F.size());
NumRemoved += F.size()-Reachable.size();
 
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
auto *BB = &*I;
if (Reachable.count(BB))
continue;
DeadBlockSet.insert(BB);
}
 
 
 
// Loop over all of the basic blocks that are not reachable, dropping all of
// their internal references. Update DTU and LVI if available.
std::vector<DominatorTree::UpdateType> Updates;
for (auto *BB : DeadBlockSet) {
for (BasicBlock *Successor : successors(BB)) {
if (!DeadBlockSet.count(Successor))
Successor->removePredecessor(BB);
if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
if (LVI)
LVI->eraseBlock(BB);
BB->dropAllReferences();
}
for (Function::iterator I = ++F.begin(); I != F.end();) {
auto *BB = &*I;
if (Reachable.count(BB)) {
++I;
continue;
}
if (DTU) {
// Remove the terminator of BB to clear the successor list of BB.
if (BB->getTerminator())
BB->getInstList().pop_back();
new UnreachableInst(BB->getContext(), BB);
assert(succ_empty(BB) && "The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
++I;
} else {
I = F.getBasicBlockList().erase(I);
}
}
 
return true;
}
 
static bool markAliveBlocks(Function &F,
SmallPtrSetImpl<BasicBlock *> &Reachable,
DomTreeUpdater *DTU = nullptr) {
SmallVector<BasicBlock*, 128> Worklist;
BasicBlock *BB = &F.front();
Worklist.push_back(BB);
Reachable.insert(BB);
bool Changed = false;
do {
BB = Worklist.pop_back_val();
// here I delete some code dealing other things
//**//
for (BasicBlock *Successor : successors(BB))
if (Reachable.insert(Successor).second)
Worklist.push_back(Successor);
} while (!Worklist.empty());
return Changed;
}
实例分析
源码
int main(){
int i ,j;
for (int i = 0 ; i < 10 ; i++)
printf("%d", j);
return 0;
return 1;
}
 
%0 %4 %7
合并多个ret分支
mergeEmptyReturnBlocks
examples:
 
int main(){
int i , j ;
j = 5;
i = rand()%3;
if (i == 0)
return i;
else if (i ==1)
return j;
else
return 3;
return 0;
}
 
 
iterativelySimplifyCFG(F, TTI, Options);
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
const SimplifyCFGOptions &Options) {
bool Changed = false;
bool LocalChange = true;
 
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
FindFunctionBackedges(F, Edges);
SmallPtrSet<BasicBlock *, 16> LoopHeaders;
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
 
while (LocalChange) {
LocalChange = false;
 
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) {
LocalChange = true;
++NumSimpl;
}
}
Changed |= LocalChange;
}
return
对一个CFG 寻找回边。拓扑排序.
Stack/set visited block
Vector edge 7-->4
0
0-->4
0-->4--->7
0 -->4-->7-->4 find an loop 4 <--->7
0 -->4-->12
0 -->4
0
bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
bool Changed = false;
 
assert(BB && BB->getParent() && "Block not embedded in function!");
assert(BB->getTerminator() && "Degenerate basic block encountered!");
 
// Remove basic blocks that have no predecessors (except the entry block)...
// or that just have themself as a predecessor. These are unreachable.
if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
BB->getSinglePredecessor() == BB) {
LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
DeleteDeadBlock(BB);
return true;
}
 
// Check to see if we can constant propagate this terminator instruction
// away...
// 常量跳转,当一个bb块最后是一个条件跳转语句,并且是条件是一个常数的时候。
// 将条件跳转转化成无条件跳转。删除无法到达的跳转的bb块的前驱,并更新phi节点。
// 如果两个目标跳转是同一个dest块,也在此处理。
// bb1 : Terminator br 1 : dest : old
// dest : ... pred:bb1
// old : pred:bb1, dest
// t = phi [1,bb1],[2,dest] ...
// ===>
// bb1 : br dest
// dest : ... pred:bb1
// old : pred: dest
// t = phi [2,dest] //
Changed |= ConstantFoldTerminator(BB, true);
 
// Check for and eliminate duplicate PHI nodes in this block.
// 一个哈希去重,只是incomeming不同的phinode
Changed |= EliminateDuplicatePHINodes(BB);
 
// Check for and remove branches that will always cause undefined behavior.
Changed |= removeUndefIntroducingPredecessor(BB);
 
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
// 如果只有一个确定的前驱节点,并且该前驱只有一个确定的后继,那么把自己融合到前驱。
// a-->b merge
// a-->b--->c
// d--/ can not merge
// a--->b-->c
// \->d can not merge
if (MergeBlockIntoPredecessor(BB))
return true;
 
if (SinkCommon && Options.SinkCommonInsts)
Changed |= SinkCommonCodeFromPredecessors(BB);
 
IRBuilder<> Builder(BB);
 
// If there is a trivial two-entry PHI node in this basic block, and we can
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
// 将if-else 造成的phi 语句改变成select
Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
 
Builder.SetInsertPoint(BB->getTerminator());
if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
if (BI->isUnconditional()) {
// 单指令的cmp上移,phi增加incoming
/// This is called when we find an icmp instruction
/// (a seteq/setne with a constant) as the only instruction in a
/// block that ends with an uncond branch. We are looking for a very specific
/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
/// this case, we merge the first two "or's of icmp" into a switch, but then the
/// default value goes to an uncond block with a seteq in it, we get something
/// like:
///
/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
/// DEFAULT:
/// %tmp = icmp eq i8 %A, 92
/// br label %end
/// end:
/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
if (SimplifyUncondBranch(BI, Builder))
return true;
} else {
if (SimplifyCondBranch(BI, Builder))
return true;
}
} else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
if (SimplifyReturn(RI, Builder))
return true;
} else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
if (SimplifyResume(RI, Builder))
return true;
} else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
// 异常处理的代码,类回收
if (SimplifyCleanupReturn(RI))
return true;
} else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
if (SimplifySwitch(SI, Builder))
return true;
} else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) {
//移除不可达代码 例如assert(0);解引用空指针的后的代码
if (SimplifyUnreachable(UI))
return true;
} else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) {
if (SimplifyIndirectBr(IBI))
return true;
}
 
return Changed;
}
 
关于CFG的插入位置
关于CFG执行的次序。
SROA+CFG 消除6个块
CFG只能消除2个块
 
CFG 和 我们delay/@/wait/fork-join/neb 切割的想法
 
1, 我们的delay切割是阻塞的,不是立即跳转。不是一个立即的控制流。通过schedule 链接。不是call 语句。这种bb块是不可以简化切割的。不可以合并在一起。
stmt1
#1
stmt2
 
stmt1 ==》 stmt2
2, 对于while/if 的切割,这是单纯的控制流分析,这种控制流是可以简化的。
posted @ 2022-04-15 17:08  zzas12345  阅读(724)  评论(0编辑  收藏  举报