LLVM示例使用简介

什么是LLVM？

编译器？

什么是LLVM？

● 编译器？

● 一组格式、库和工具。

什么是LLVM？

● 编译器？

● 一组格式、库和工具。

–一个简单的、键入的IR（位代码）

–程序分析/优化库

–机器代码生成库

–组成库以执行任务的工具

什么是LLVM？

● 编译器？

● 一组格式、库和工具。

–一个简单的、键入的IR（位代码）

–程序分析/优化库

–机器代码生成库

–组成库以执行任务的工具

● 易于添加/删除/更改的功能

你将如何使用它？

● 将程序编译为位代码：

clang -g -c -emit-llvm -o .bc

你将如何使用它？

● 将程序编译为位代码：

clang -g -c -emit-llvm -o .bc

● 正在分析位代码：

opt -load .so -- -analyze .bc

你将如何使用它？

● 将程序编译为位代码：

clang -g -c -emit-llvm -o .bc

● 正在分析位代码：

opt -load .so -- -analyze .bc

报告程序的属性：

[main] : [A], [C], [F]

[A] : [B]

[C] : [E], [D]

什么是LLVM比特码？

● A（相对）简单的IR

代码：

#include

void foo(unsigned e)

{

for (unsigned i = 0; i < e; ++i)

{

printf("Hello\n");

}

int main(int argc, char **argv)

{

foo(argc);

return 0;

}

使用clang -c -emit-llvm (and llvm-dis)，生成对应代码的IR：

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

什么是LLVM比特码？

● A（相对）简单的IR

#include<stdio.h>

void

foo(unsigned e) {

for (unsigned i = 0; i < e; ++i) {

printf("Hello\n");

}

int

main(int argc, char **argv) {

foo(argc);

return 0;

}

使用clang -c -emit-llvm (and llvm-dis)，生成对应的IR：

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

什么是LLVM比特码？

● A（相对）简单的IR

#include<stdio.h>

void

foo(unsigned e) {

for (unsigned i = 0; i < e; ++i) {

printf("Hello\n");

}

int

main(int argc, char **argv) {

foo(argc);

return 0;

}

生成IR：

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

什么是LLVM比特码？

● A（相对）简单的IR

#include<stdio.h>

void

foo(unsigned e) {

for (unsigned i = 0; i < e; ++i) {

printf("Hello\n");

}

int

main(int argc, char **argv) {

foo(argc);

return 0;

}

生成基本块：

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

#include<stdio.h>

void

foo(unsigned e) {

for (unsigned i = 0; i < e; ++i) {

printf("Hello\n");

}

int

main(int argc, char **argv) {

foo(argc);

return 0;

}

基本块

标签和前置

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

示例：

#include<stdio.h>

void

foo(unsigned e) {

for (unsigned i = 0; i < e; ++i) {

printf("Hello\n");

}

int

main(int argc, char **argv) {

foo(argc);

return 0;

}

生成基本块，标签和前置

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

示例：

#include<stdio.h>

void

foo(unsigned e) {

for (unsigned i = 0; i < e; ++i) {

printf("Hello\n");

}

int

main(int argc, char **argv) {

foo(argc);

return 0;

}

生成指令集：

@str = private constant [6 x i8] c"Hello\00"

define void @foo(i32 %e) {

%1 = icmp eq i32 %e, 0

br i1 %1, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %.lr.ph, %0

%i = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]

%str1 = getelementptr

[6 x i8]* @str, i64 0, i64 0

%puts = tail call i32 @puts(i8* %str1)

%2 = add i32 %i, 1

%cond = icmp eq i32 %2, %e

br i1 %cond, label %.exit, label %.lr.ph

.exit: ; preds = %.lr.ph, %0

ret void

}

define i32 @main(i32 %argc, i8** %argv) {

tail call void @foo(i32 %argc)

ret i32 0

}

检查比特码

● LLVM库有助于检查位代码

–易于检查和/或操作

检查比特码

● LLVM库有助于检查位代码

–易于检查和/或操作

–许多助手（例如CallSite、outs（）、dyn_cast）

Module &module = ...;

for (Function &fun : module) {

for (BasicBlock &bb : fun) {

for (Instruction &i : bb) {

CallSite cs(&i);

if (!cs.getInstruction()) {

continue;

}

...

CallSite可帮助从Call和I nvoke指令中提取信息。

检查比特码

● LLVM库有助于检查位代码

–易于检查和/或操作

–许多助手（例如CallSite、outs（）、dyn_cast）

Module &module = ...;

for (Function &fun : module) {

for (BasicBlock &bb : fun) {

for (Instruction &i : bb) {

CallSite cs(&i);

if (!cs.getInstruction()) {

continue;

}

outs() << "Found a function call: " << i << "\n";

...

检查比特码

● LLVM库有助于检查位代码

–易于检查和/或操作

–许多助手（例如CallSite、outs（）、dyn_cast）

Module &module = ...;

for (Function &fun : module) {

for (BasicBlock &bb : fun) {

for (Instruction &i : bb) {

CallSite cs(&i);

if (!cs.getInstruction()) {

continue;

}

outs() << "Found a function call: " << i << "\n";

Value *called = cs.getCalledValue()->stripPointerCasts();

if (Function *f = dyn_cast<Function>(called)) {

outs() << "Direct call to function: " << f->getName() << "\n";

...

dyn_cast（）有效地检查LLVM IR组件的运行时类型。

处理SSA

● 你可能会问某些价值观是从哪里来的

–有助于跟踪相关性

–“这个变量是在哪里定义的？”

处理SSA

● 你可能会问某些价值观是从哪里来的

● LLVM IR为SSA形式

–一行中可以包含多少个首字母缩略词？

–这意味着什么？

–为什么这很重要？

处理SSA

● 你可能会问某些价值观是从哪里来的

● LLVM IR为SSA形式

–一行中可以包含多少个首字母缩略词？

–这意味着什么？

–为什么这很重要？

void foo()

unsigned i = 0;

while (i < 10) {

i = i + 1;

}

处理SSA

● 你可能会问某些价值观是从哪里来的

● LLVM IR为SSA形式

–一行中可以包含多少个首字母缩略词？

–这意味着什么？

–为什么这很重要？

void foo()

unsigned i = 0;

while (i < 10) {

i = i + 1;

}

在这一点上，i的唯一定义是什么？

处理SSA

● 因此phi指令

–它选择要使用的定义

–始终处于基本块的开头

处理SSA

● 因此phi指令

–它选择要使用的定义

–始终处于基本块的开头

void foo()

unsigned i = 0;

while (i < 10) {

i = i + 1;

}

define void @foo() {

br label %1

; <label>:1 ; preds = %1, %0

%i.phi = phi i32 [ 0, %0 ], [ %2, %1 ]

%2 = add i32 %i.phi, 1

%exitcond = icmp eq i32 %2, 10

br i1 %exitcond, label %3, label %1

; <label>:3 ; preds = %1

ret void

}

void foo()

unsigned i = 0;

while (i < 10) {

i = i + 1;

}

define void @foo() {

br label %1

; <label>:1 ; preds = %1, %0

%i.phi = phi i32 [ 0, %0 ], [ %2, %1 ]

%2 = add i32 %i.phi, 1

%exitcond = icmp eq i32 %2, 10

br i1 %exitcond, label %3, label %1

; <label>:3 ; preds = %1

ret void

}

一般依赖项

● 可以循环指令使用的值

for (auto i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {

// inst uses the Value i

}

一般依赖项

● 可以循环指令使用的值

for (auto i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {

// inst uses the Value i

}

for %a = %b + %c:

[%b, %c]

一般依赖项

● 可以循环指令使用的值

for (auto i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {

// inst uses the Value i

}

● 可以循环使用使用特定值的指令

Instruction *inst = ...;

for (auto i = inst->use_begin(), e = inst->use_end(); i != e; ++i)

if (auto *user = dyn_cast<Instruction>(*i)) {

// inst is used by Instruction user

}

处理类型

● LLVM IR是强类型的

–每个值都有一个类型→ getType（）

处理类型

● LLVM IR是强类型的

–每个值都有一个类型→ getType（）

● 值必须显式转换为新类型

define i64 @trunc(i16 zeroext %a) {

%1 = zext i16 %a to i64

ret i64 %1

}

处理类型

● LLVM IR是强类型的

–每个值都有一个类型→ getType（）

● 值必须显式转换为新类型

define i64 @trunc(i16 zeroext %a) {

%1 = zext i16 %a to i64

ret i64 %1

}

处理类型

● LLVM IR是强类型的

–每个值都有一个类型→ getType（）

● 值必须显式转换为新类型

define i64 @trunc(i16 zeroext %a) {

%1 = zext i16 %a to i64

ret i64 %1

}

● 还有指针、数组、结构等的类型。

–打字能力强意味着他们需要更多的工作

处理类型：GEP

● 有时需要提取元素/字段

来自数组/结构

–指针算术

–使用GetElementPointer（GEP）完成

struct rec {

int x;

int y;

};

struct rec *buf;

void foo() {

buffer[5].y = 7;

}

%struct.rec = type { i32, i32 }

@buf = global %struct.rec* null

define void @foo() {

%1 = load %struct.rec** @buf

%2 = getelementptr %struct.rec* %1, i64 5, i32 1

store i32 7, i32* %2

ret void

}

struct rec {

int x;

int y;

};

struct rec *buf;

void foo() {

buffer[5].y = 7;

}

%struct.rec = type { i32, i32 }

@buf = global %struct.rec* null

define void @foo() {

%1 = load %struct.rec** @buf

%2 = getelementptr %struct.rec* %1, i64 5, i32 1

store i32 7, i32* %2

ret void

}

在哪里可以获得信息？

● 在线文档内容丰富：

–LLVM程序员手册

–LLVM语言参考手册

● 头文件！

– All in llvm-3.x.src/include/llvm/

Function.h

BasicBlock.h

Instructions.h

InstrTypes.h

Support/CallSite.h

Support/InstVisitor.h

Type.h

DerivedTypes.h

进行新的分析

● 分析分为各个环节

–模块pass

–FunctionPass

–LoopPass

–…

源自

适当的基类

Pass

3个步骤

1）申报pass

2）注册pass

3）定义pass

制作ModulePass（1）

● 声明ModulePass

struct CallPrinterPass : public llvm::ModulePass {

static char ID;

DenseMap<Function*, uint64_t> counts;

CallPrinterPass()

: ModulePass(ID)

{ }

virtual bool runOnModule(Module &m) override;

virtual void print(raw_ostream &out, const Module *m) const override;

void handleInstruction(CallSite cs);

};

struct CallPrinterPass : public llvm::ModulePass {

static char ID;

DenseMap<Function*, uint64_t> counts;

CallPrinterPass()

: ModulePass(ID)

{ }

virtual bool runOnModule(Module &m) override;

virtual void print(raw_ostream &out, const Module *m) const override;

void handleInstruction(CallSite cs);

};

制作模块pass（3）

● 定义模块pass

–需要覆盖runOnModule（）和print（）

bool

CallPrinterPass::runOnModule(Module &m) {

for (auto &f : m)

for (auto &bb : f)

for (auto &i : bb)

handleInstruction(&i);

return false; // False because we didn't change the Module

}

制作模块pass（3）

● 分析继续。。。

void

CallPrinterPass::handleInstruction(CallSite cs) {

// Check whether the instruction is actually a call

if (!cs.getInstruction()) { return; }

// Check whether the called function is directly invoked

auto called = cs.getCalledValue()->stripPointerCasts();

auto fun = dyn_cast<Function>(called);

if (!fun) { return; }

// Update the count for the particular call

auto count = counts.find(fun);

if (counts.end() == count) {

count = counts.insert(std::make_pair(fun, 0)).first;

}

++count->second;

}

制作模块pass（3）

● 打印出结果

void

CallPrinterPass::print(raw_ostream &out, const Module *m) const {

out << "Function Counts\n"

<< "===============\n";

for (auto &kvPair : counts) {

auto *function = kvPair.first;

uint64_t count = kvPair.second;

out << function->getName() << " : " << count << "\n";

}

把它们放在一起

● LLVM组织pass和工具组

进入项目

● 最简单的开始方法是使用他们的样本

项目

–llvmsrc/projects/sample

● 在大多数情况下，可以按照

在线指导和项目说明

把它们放在一起

● LLVM组织pass和工具组

进入项目

● 最简单的开始方法是使用他们的样本

项目

–llvmsrc/projects/sample关于创建项目的说明

● 发布在网上，在自己的时间阅读：

–构建

● 将示例项目复制到新目录<proj>

● 为生成<projbuild>创建另一个目录

● <proj>/configure--禁用优化--启用调试

–带有clang=/path/to/clang

–自定义

● 在<proj>/lib/sample中构建整个项目/

● 删除现有的源代码，改为在那里编写模块

● 将这些行添加到库目录中的Makefile中：

LOADABLE_MODULE=1

CPPFLAGS+=-std=c++11

● 在大多数情况下，可以按照

在线指导和项目说明

额外提示

● 有一个指针指向某个东西。它是什么？

–getName（）方法适用于大多数情况。

–通常可以：out（）<<X

● 如何看待的C++API调用

构建模块？

– llc -march=cpp .bc -o .cpp

posted @ 2023-10-27 04:54 吴建明wujianming 阅读(117) 评论(0) 编辑收藏举报

刷新页面返回顶部

吴建明

LLVM示例使用简介

公告