汇编指令的格式如下图所示：

1. get_instruction

get_instruction函数是libdasm的核心，先看一下该函数的注释和原型

// Fetch instruction /* * The operation is quite straightforward: * * - determine actual opcode (skip prefixes etc.) * - figure out which instruction table to use * - index the table with opcode * - parse operands * - fill instruction structure * * Only point where this gets hairy is those *brilliant* * opcode extensions.... * */int get_instruction(PINSTRUCTION inst, BYTE *addr, enum Mode mode) {

该函数分为以下几个部分：

1.1 略过prefix, 获取到实际的opcode

get_real_instruction来做这个工作。

对于单字节opcode，该函数什么也不做；

对于2字节opcode

// 2-byte opcode        case 0x0f:            *index += 1;            *flags |= EXT_T2;            break;

跳过0x0F前缀；

对于强制前缀的opcode，（Mandatory Prefix）

// Prefix group 2        case 0x2e:            *index += 1;            // Clear previous flags from same group (undefined effect)            *flags &= 0xff00ffff;            *flags |= PREFIX_CS_OVERRIDE;            get_real_instruction(addr + 1, index, flags);            break;        case 0x36:            *index += 1;            *flags &= 0xff00ffff;            *flags |= PREFIX_SS_OVERRIDE;            get_real_instruction(addr + 1, index, flags);            break;        case 0x3e:            *index += 1;            *flags &= 0xff00ffff;            *flags |= PREFIX_DS_OVERRIDE;            get_real_instruction(addr + 1, index, flags);            break;        case 0x26:            *index += 1;            *flags &= 0xff00ffff;            *flags |= PREFIX_ES_OVERRIDE;            get_real_instruction(addr + 1, index, flags);            break;        case 0x64:            *index += 1;            *flags &= 0xff00ffff;            *flags |= PREFIX_FS_OVERRIDE;            get_real_instruction(addr + 1, index, flags);            break;        case 0x65:            *index += 1;            *flags &= 0xff00ffff;            *flags |= PREFIX_GS_OVERRIDE;            get_real_instruction(addr + 1, index, flags);            break;        // Prefix group 3 or 3-byte opcode        case 0x66:            // Do not clear flags from the same group!!!!            *index += 1;            *flags |= PREFIX_OPERAND_SIZE_OVERRIDE;            get_real_instruction(addr + 1, index, flags);             break;        // Prefix group 4        case 0x67:            // Do not clear flags from the same group!!!!            *index += 1;            *flags |=  PREFIX_ADDR_SIZE_OVERRIDE;            get_real_instruction(addr + 1, index, flags);             break;

设置相应的flag，然后从下一个字节取真正的opcode;

常见的前缀如下表所示：

1.2 确定使用哪张映射表

在opcode_tables.h中定义了几张不同和映射表

// 1-byte opcodesINST inst_table1[256] = {

// 2-byte instructionsINST inst_table2[256] = {

// 3-byte instructions, prefix 0x66 // Yeah, I know, it's waste to use a full 256-instruction table but now// I'm prepared for future Intel extensions ;-) INST inst_table3_66[256] = {

// 3-byte instructions, prefix 0xf2 INST inst_table3_f2[256] = {

// 3-byte instructions, prefix 0xf3 INST inst_table3_f3[256] = {

映射表中条目的内容是对于INST结构体的描述

// struct INST is used internally by the librarytypedef struct _INST {    DWORD type;        // InstructionType type and flags    const char *mnemonic;    // InstructionType mnemonic    int flags1;        // First operand flags (if any)    int flags2;        // Second operand flags (if any)    int flags3;        // Additional operand flags (if any)    int modrm;        // Is MODRM byte present?    short eflags_affected;    // Processor eflags affected    short eflags_used;      // Processor eflags used by this instruction    int iop_written;    // mask of affected implied registers (written)    int iop_read;        // mask of affected implied registers (read)} INST, *PINST;

举一例：

{ INSTRUCTION_TYPE_ADD,    "add",       AM_E|OT_b|P_w,               AM_G|OT_b|P_r,             FLAGS_NONE,   1, EFL_MATH, 0, 0, 0 },

比较重要的字段是3-5，分别代表着参数的类型

AM_[X]代表Addressing Method，即寻址方法，通过什么方法找到参数

// Operand Addressing Methods, from the Intel manual#define MASK_AM(x) ((x) & 0x00ff0000)#define AM_A 0x00010000        // Direct address with segment prefix#define AM_C 0x00020000        // MODRM reg field defines control register#define AM_D 0x00030000        // MODRM reg field defines debug register#define AM_E 0x00040000        // MODRM byte defines reg/memory address#define AM_G 0x00050000        // MODRM byte defines general-purpose reg#define AM_I 0x00060000        // Immediate data follows#define AM_J 0x00070000        // Immediate value is relative to EIP#define AM_M 0x00080000        // MODRM mod field can refer only to memory#define AM_O 0x00090000        // Displacement follows (without modrm/sib)#define AM_P 0x000a0000        // MODRM reg field defines MMX register#define AM_Q 0x000b0000        // MODRM defines MMX register or memory #define AM_R 0x000c0000        // MODRM mod field can only refer to register#define AM_S 0x000d0000        // MODRM reg field defines segment register#define AM_T 0x000e0000        // MODRM reg field defines test register#define AM_V 0x000f0000        // MODRM reg field defines XMM register#define AM_W 0x00100000        // MODRM defines XMM register or memory // Extra addressing modes used in this implementation#define AM_I1  0x00200000    // Immediate byte 1 encoded in instruction#define AM_REG 0x00210000    // Register encoded in instruction#define AM_IND 0x00220000    // Register indirect encoded in instruction

OT_[X]代表参数的类型Operand Type，即参数的长度

// Operand Types, from the intel manual#define MASK_OT(x) ((x) & 0xff000000)#define OT_a  0x01000000#define OT_b  0x02000000    // always 1 byte#define OT_c  0x03000000    // byte or word, depending on operand#define OT_d  0x04000000    // double-word#define OT_q  0x05000000    // quad-word#define OT_dq 0x06000000    // double quad-word#define OT_v  0x07000000    // word or double-word, depending on operand#define OT_w  0x08000000    // always word#define OT_p  0x09000000    // 32-bit or 48-bit pointer#define OT_pi 0x0a000000    // quadword MMX register#define OT_pd 0x0b000000    // 128-bit double-precision float#define OT_ps 0x0c000000    // 128-bit single-precision float#define OT_s  0x0d000000    // 6-byte pseudo descriptor#define OT_sd 0x0e000000    // Scalar of 128-bit double-precision float#define OT_ss 0x0f000000    // Scalar of 128-bit single-precision float#define OT_si 0x10000000    // Doubleword integer register#define OT_t  0x11000000    // 80-bit packed FP data

P_[X]代表参数的Permission，即参数允许的操作，也就是该条指令会对参数做什么样的操作(r, w, x)

// Operand permissions#define MASK_PERMS(x) ((x) & 0x0000f000)#define P_r   0x00004000    // Read#define P_w   0x00002000    // Write#define P_x   0x00001000    // Execute

1.3 映射

1.4 解析operand

Operand有三种类型：

// Operand typesenum Operand {    OPERAND_TYPE_NONE,    // operand not present    OPERAND_TYPE_MEMORY,    // memory operand ([eax], [0], etc.)    OPERAND_TYPE_REGISTER,    // register operand (eax, mm0, etc.)    OPERAND_TYPE_IMMEDIATE,    // immediate operand (0x1234)};

其中，immediate是直接地址，也可以称为静态地址，即在指令中明确给出的地址；

而register和memory都是间接地址，可以称为动态地址，只有在程序真正运行时才能确定的地址。

即使是静态解析程序，我们也可以对于给定的内存地址，寄存器，逐条指令地模拟其内容的变化，这就是模拟器的原理，说白了，模拟器终归还属于静态解析，而算不上是真正的动态。

1.5 填充结构体

posted @ 2014-04-09 17:59 Daniel King 阅读(761) 评论(0) 编辑收藏举报

刷新页面返回顶部

Daniel King

淡泊明志，宁静致远