LLVM CodeExtractor

我以为，学一个复杂的东西，要首先提纲挈领，搞清楚大体的框架与各个类的作用，细节以后再查，这样才有可能真正明白这个东西。LLVM显然是这样一个复杂东西，中文资料少之又少，所以我只能尽力把LLVM里的每一个类的功能，每一个类里所包含的函数都从源代码里一一列出来。

这是CodeExtractor.cpp所提供的函数：

// CodeExtractor.cpp位于 llvm-3.0.src\lib\Transforms\Utils 目录下
// 没有头文件，类的声明和实现都在cpp文件里，也不知道是为什么要这样做？

//===----------------------------------------------------------------------===//
// 这个文件是为了实现一个接口，用于提取出一个代码区域，比如一个loop循环或者一个并行部分，
// 到一个新的函数里，并呼叫新函数来代替旧函数
//===----------------------------------------------------------------------===//

using namespace llvm;

// 提供一个基于命令行的参数用于聚集函数参数到一个结构体里，用于code extractor所提供的函数里
// 当转换提取的函数到基于pthread代码的时候，这将是非常有用的，因为只有void*能够被pthread的create()函数所接受
static cl::opt<bool> AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
                                       cl::desc("Aggregate arguments to code-extracted functions"));

namespace {
  class CodeExtractor {
    typedef SetVector<Value*> Values;
    SetVector<BasicBlock*> BlocksToExtract;
    DominatorTree* DT;
    bool AggregateArgs;
    unsigned NumExitBlocks;
    Type *RetTy;
  public:
    // 构造函数
    CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false): DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
    // 提取代码区块
    //  从函数里移除一个loop，并调用一个新函数来代替它。最后返回这个新函数的指针
    //  调度算法：为区块发现输入与输出
    //  对于输入：增加函数作为参数，map input instr* to arg#
    //  对于输出：为标量增加allocas，增加函数作为参数，map output instr* to arg#
    //  重写函数来使用参数，而不是instr*
    //  对于在函数里的每一个标量输出：在每一个出口，存储中间计算结果到内存里
    Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code);
    // 是否符合条件
    bool isEligible(ArrayRef<BasicBlock*> code);

  private:
    // 是否定义在区块里，当指定的值在被提取的区块里定义的时候返回true
    bool definedInRegion(Value *V); 
    
    // 以下英文解释不是很明白：
    /// definedInCaller - Return true if the specified value is defined in the
    /// function being code extracted, but not in the region being extracted.
    // 是否定义在Caller里，当指定值被定义在提取区块的函数里，但不是在被提取的区块里的时候，返回true
    /// These values must be passed in as live-ins to the function.
    // 这些值必须被传递到被工作函数里
    bool definedInCaller(Value *V);

    // 如果一个PHI node 已经有区域外的多个输入了，我们需要分割区域的入口块，这样PHI node更容易被处理
    void severSplitPHINodes(BasicBlock *&Header);
    void splitReturnBlocks();

    // 寻找给代码区域的输入和输出
    void findInputsOutputs(Values &inputs, Values &outputs);

    Function *constructFunction(const Values &inputs,
                                const Values &outputs,
                                BasicBlock *header,
                                BasicBlock *newRootNode, BasicBlock *newHeader,
                                Function *oldFunction, Module *M);

    void moveCodeToFunction(Function *newFunction);

    // 这个方法用通过加入call指令来建立caller方，如果需要的话，分割任意一个在head block里的PHI节点，
    void emitCallAndSwitchStatement(Function *newFunction,
                                    BasicBlock *newHeader,
                                    Values &inputs,
                                    Values &outputs);
  };
}


//  静态方法：
//  给点一个值和一个基本的Block，寻找一个在这个Block里使用这个值的PHI，并返回前一个使用它的相关的block，如果没发现就返回0
static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {}


//  不明白以下三个类为什么没有定义在类的声明里？
//  提取基础的块到一个有标记的新函数里
Function* llvm::ExtractCodeRegion(DominatorTree &DT, ArrayRef<BasicBlock*> code, bool AggregateArgs) {
  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
}

//  提取一个自然loop到一个有标记的新函数里
Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
}

//  提取一个基础块到一个有标记的新函数里
Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){
  return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs);
}

posted @ 2012-03-02 21:24 findumars Views(694) Comments(0) Edit 收藏举报

刷新页面返回顶部

三套件，不信不成功

互联网思维与中国实际相结合是我朝开国之本，持续输出，不死不休

LLVM CodeExtractor

公告