转发表

TemplateInterpreter::initialize()方法实现如下：

源代码位置：/src/share/vm/interpreter/templateInterpreter.cpp
  
void TemplateInterpreter::initialize() {
  if (_code != NULL) 
       return;
  
  // 抽象解释器AbstractInterpreter的初始化，AbstractInterpreter是基于汇编模型的解释器的共同基类，
  // 定义了解释器和解释器生成器的抽象接口
  AbstractInterpreter::initialize();
  
  // 模板表TemplateTable的初始化，模板表TemplateTable保存了各个字节码的模板
  TemplateTable::initialize();
  
  // generate interpreter
  {
     ResourceMark  rm;
     int           code_size = InterpreterCodeSize;
     // CodeCache的Stub队列StubQueue的初始化
     _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,"Interpreter");
     //  实例化模板解释器生成器对象TemplateInterpreterGenerator
     InterpreterGenerator g(_code);
  }
  
  // initialize dispatch table
  _active_table = _normal_table;
}

在创建InterpreterGenerator对象时会调用generate_all()方法，如下：

InterpreterGenerator::InterpreterGenerator(StubQueue* code)
  : TemplateInterpreterGenerator(code) {
   generate_all(); // down here so it can be "virtual"
}

在generate_all()方法中生成的各种Codelet，如下：

序号	描述	用途
1	error exits	当方法出现时会调用这个例程，进行出错时程序退出
2	bytecode tracing support	配置命令-XX:+TraceBytecodes后，进行字节码追踪
3	return entry points	函数返回入口
4	invoke return entry points	对于某些invoke字节码调用指令来说，需要一些特殊的返回入口
5	earlyret entry points	JVMTI的EarlyReturn入口
6	deoptimization entry points	从“逆优化”调用返回的入口
7	result handlers for native calls	本地方法调用返回值处理handlers
8	continuation entry points	continuation入口
9	safepoint entry points	safepoint入口
10	exception handling	异常处理例程
11	throw exception entrypoints	抛出异常的入口
12	all non-native method kinds	非本地方法的入口
13	all native method kinds	本地方法的入口
14	Bytecodes	字节码的入口

对于非本地方法的入口，调用generate_method_entry()方法，如下：

address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) {

  // determine code generation flags
  bool synchronized = false;
  address entry_point = NULL;
  InterpreterGenerator* ig_this = (InterpreterGenerator*)this;

  switch (kind) {
  case Interpreter::zerolocals : // zerolocals类型表示正常的Java函数
	  break;
  case Interpreter::zerolocals_synchronized:
	  synchronized = true;
	  break;
  case Interpreter::native :
	  entry_point = ig_this->generate_native_entry(false);
	  break;
  case Interpreter::native_synchronized :
	  entry_point = ig_this->generate_native_entry(true);
	  break;
  case Interpreter::empty :
	  entry_point = ig_this->generate_empty_entry();
	  break;
  case Interpreter::accessor :
	  entry_point = ig_this->generate_accessor_entry();
	  break;
  case Interpreter::abstract :
	  entry_point = ig_this->generate_abstract_entry();
	  break;

  case Interpreter::java_lang_math_sin     : // fall thru
  case Interpreter::java_lang_math_cos     : // fall thru
  case Interpreter::java_lang_math_tan     : // fall thru
  case Interpreter::java_lang_math_abs     : // fall thru
  case Interpreter::java_lang_math_log     : // fall thru
  case Interpreter::java_lang_math_log10   : // fall thru
  case Interpreter::java_lang_math_sqrt    : // fall thru
  case Interpreter::java_lang_math_pow     : // fall thru
  case Interpreter::java_lang_math_exp     :
	   entry_point = ig_this->generate_math_entry(kind);
	   break;
  case Interpreter::java_lang_ref_reference_get:
	   entry_point = ig_this->generate_Reference_get_entry();
	   break;
  case Interpreter::java_util_zip_CRC32_update :
	  entry_point = ig_this->generate_CRC32_update_entry();
	  break;
  case Interpreter::java_util_zip_CRC32_updateBytes :
	  // fall thru
  case Interpreter::java_util_zip_CRC32_updateByteBuffer :
	  entry_point = ig_this->generate_CRC32_updateBytes_entry(kind);
	  break;
  default:
    fatal(err_msg("unexpected method kind: %d", kind));
    break;
  }

  if (entry_point) {
    return entry_point;
  }

  return ig_this->generate_normal_entry(synchronized);
}

方法类型通过枚举类型MethodKind定义，如下：

 enum MethodKind {
    zerolocals,                        // method needs locals initialization
    zerolocals_synchronized,           // method needs locals initialization & is synchronized
    native,                            // native method
    native_synchronized,               // native method & is synchronized
    empty,                             // empty method (code: _return)
    accessor,                          // accessor method (code: _aload_0, _getfield, _(a|i)return)
    abstract,                          // abstract method (throws an AbstractMethodException)
    method_handle_invoke_FIRST,        // java.lang.invoke.MethodHandles::invokeExact, etc.
    method_handle_invoke_LAST  = (method_handle_invoke_FIRST + (vmIntrinsics::LAST_MH_SIG_POLY - vmIntrinsics::FIRST_MH_SIG_POLY)),
    java_lang_math_sin,                // implementation of java.lang.Math.sin   (x)
    java_lang_math_cos,                // implementation of java.lang.Math.cos   (x)
    java_lang_math_tan,                // implementation of java.lang.Math.tan   (x)
    java_lang_math_abs,                // implementation of java.lang.Math.abs   (x)
    java_lang_math_sqrt,               // implementation of java.lang.Math.sqrt  (x)
    java_lang_math_log,                // implementation of java.lang.Math.log   (x)
    java_lang_math_log10,              // implementation of java.lang.Math.log10 (x)
    java_lang_math_pow,                // implementation of java.lang.Math.pow   (x,y)
    java_lang_math_exp,                // implementation of java.lang.Math.exp   (x)
    java_lang_ref_reference_get,       // implementation of java.lang.ref.Reference.get()
    java_util_zip_CRC32_update,        // implementation of java.util.zip.CRC32.update()
    java_util_zip_CRC32_updateBytes,      // implementation of java.util.zip.CRC32.updateBytes()
    java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer()
    number_of_method_entries,
    invalid = -1
  };

对于zerolocals类型来说，在generate_all()方法中会调用set_entry_points_for_all_bytes()方法，方法对所有被定义的字节码生成目标代码并设置对应的入口，set_entry_points_for_all_bytes()方法的实现如下：

void TemplateInterpreterGenerator::set_entry_points_for_all_bytes() {
  for (int i = 0; i < DispatchTable::length; i++) {
     Bytecodes::Code code = (Bytecodes::Code)i;
     if (Bytecodes::is_defined(code)) {
         set_entry_points(code);
     } else {
         set_unimplemented(i);
     }
  }
}

当code已经定义时，会调用set_entry_points()方法，此方法取出该字节码对应的Template模板并调用set_short_enrty_points()函数进行处理，将入口地址保存在转发表（DispatchTable）_normal_table或_wentry_table（使用wide指令）中。Template模板在之前已经介绍过，字节码指令都会对应一个Template模板，而模板中保存着字节码指令生成对应代码例程中需要的信息。set_entry_points()方法的实现如下：

void TemplateInterpreterGenerator::set_entry_points(Bytecodes::Code code) {
  CodeletMark   cm(_masm, Bytecodes::name(code), code);
  // initialize entry points
  assert(_unimplemented_bytecode    != NULL, "should have been generated before");
  assert(_illegal_bytecode_sequence != NULL, "should have been generated before");

  address bep = _illegal_bytecode_sequence;
  address cep = _illegal_bytecode_sequence;
  address sep = _illegal_bytecode_sequence;
  address aep = _illegal_bytecode_sequence;
  address iep = _illegal_bytecode_sequence;
  address lep = _illegal_bytecode_sequence;
  address fep = _illegal_bytecode_sequence;
  address dep = _illegal_bytecode_sequence;
  address vep = _unimplemented_bytecode;
  address wep = _unimplemented_bytecode;

  // code for short & wide version of bytecode
  if (Bytecodes::is_defined(code)) {
     Template* t = TemplateTable::template_for(code);
     set_short_entry_points(t, bep, cep, sep, aep, iep, lep, fep, dep, vep);
  }

  if (Bytecodes::wide_is_defined(code)) {
     Template* t = TemplateTable::template_for_wide(code);
     set_wide_entry_point(t, wep);
  }

  // set entry points
  EntryPoint  entry(bep, cep, sep, aep, iep, lep, fep, dep, vep);
  Interpreter::_normal_table.set_entry(code, entry);
  Interpreter::_wentry_point[code] = wep;
}

调用set_short_entry_points()方法时，需要传入栈顶缓存状态，也就是上一个字节执行时可能会将产生的结果存储到缓存。使用栈顶缓存（Top-of-Stack Caching，缩写为TOSCA，简称Tos）主要还是为了性能优化。HotSpot共定义了9种TosState，通过枚举类型来表示，如下：

// TosState describes the top-of-stack state before and after the execution of
// a bytecode or method. The top-of-stack value may be cached in one or more CPU
// registers. The TosState corresponds to the 'machine represention' of this cached
// value. There's 4 states corresponding to the JAVA types int, long, float & double
// as well as a 5th state in case the top-of-stack value is actually on the top
// of stack (in memory) and thus not cached. The atos state corresponds to the itos
// state when it comes to machine representation but is used separately for (oop)
// type specific operations (e.g. verification code).

enum TosState {         // describes the tos cache contents
  btos = 0,             // byte, bool tos cached
  ctos = 1,             // char tos cached
  stos = 2,             // short tos cached
  itos = 3,             // int tos cached
  ltos = 4,             // long tos cached
  ftos = 5,             // float tos cached
  dtos = 6,             // double tos cached
  atos = 7,             // object cached
  vtos = 8,             // tos not cached
  number_of_states,
  ilgl                  // illegal state: should not occur
};

以非wide指令为例进行分析，bep(byte entry point)、cep、 sep、aep、iep、lep、fep、dep、vep分别表示指令执行前栈顶元素状态为byte/boolean、char、short、array/reference(对象引用)、int、long、float、double、void类型时的入口地址。举个例子，如iconst_0表示向栈中压入常量0，那么模板中定义如下：

def(Bytecodes::_iconst_0 , ____|____|____|____, vtos, itos, iconst,0);

第3个参数指明了tos_in，第4个参数为tos_out，tos_in与tos_out是指令执行前后的TosState。也就是说，执行此字节码指令时不需要获取栈顶缓存的值，所以为void，执行完成后栈顶会缓存一个int类型的整数，也就是0。缓存通常会缓存到寄存器中，所以比起压入栈中，获取的效率要更高一些，如果下一个执行的字节码指令不需要，那么还需要将缓存的0值压入栈内。假设下一个执行的字节码也为iconst，那么要从此指令的iep（上一个缓存了int类型整数0）入口来执行，由于iconst的入口要求为vtos，所以需要将缓存的int类型入栈。

TemplateInterpreterGenerator::set_entry_points()方法最后还会将为当前字节码指令生成的9个入口封装为Entry对象然后保存到Interpreter::_normal_table数组中。对于wild指令则没有缓存，只有一个入口，处理起来会更简单一些。

调用TemplateTable::template_for()方法可以从TemplateTable::_template_table数组中获取对应的Tempalte对象，然后调用set_short_entry_points()方法生成代码片段。非wild指令将调用set_short_entry_points()方法，方法的实现如下：

void TemplateInterpreterGenerator::set_short_entry_points(
                Template* t,
		address& bep, address& cep, address& sep, address& aep, address& iep,
		address& lep, address& fep, address& dep, address& vep
) {
  assert(t->is_valid(), "template must exist");
  switch (t->tos_in()) {
    case btos:
    case ctos:
    case stos:
      ShouldNotReachHere();  // btos/ctos/stos should use itos.
      break;
    case atos: vep = __ pc(); __ pop(atos); aep = __ pc(); generate_and_dispatch(t);   break;
    case itos: vep = __ pc(); __ pop(itos); iep = __ pc(); generate_and_dispatch(t);   break;
    case ltos: vep = __ pc(); __ pop(ltos); lep = __ pc(); generate_and_dispatch(t);   break;
    case ftos: vep = __ pc(); __ pop(ftos); fep = __ pc(); generate_and_dispatch(t);   break;
    case dtos: vep = __ pc(); __ pop(dtos); dep = __ pc(); generate_and_dispatch(t);   break;
    case vtos: set_vtos_entry_points(t, bep, cep, sep, aep, iep, lep, fep, dep, vep);  break;
    default  : ShouldNotReachHere();                                                   break;
  }
}

set_short_entry_points()方法根据操作数栈栈顶元素类型进行判断，首先byte、char和short类型都应被当做int类型进行处理，对于非void类型将调用generate_and_dispatch()方法产生目标代码，这里以iconst_0为例对TOS的处理进行介绍：

对于iconst，其期望的_tos_in（执行前栈顶元素类型）是void类型（vtos），期望的_tos_out（执行后栈顶元素类型）是int类型（itos）。所以如果入口状态为非void时，需要从栈中弹出数据缓存到寄存器中，以满足当前指令的入口状态。如果为void时，期望的栈顶状态为vtos会调用set_vtos_entry_points()方法处理。注意对vep、aep、iep、lep、fep、dep的赋值。set_vtos_entry_points()方法的实现如下：

// Helper for vtos entry point generation
void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
                                                         address& bep,
                                                         address& cep,
                                                         address& sep,
                                                         address& aep,
                                                         address& iep,
                                                         address& lep,
                                                         address& fep,
                                                         address& dep,
                                                         address& vep) {
  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
  Label L;
  aep = __ pc();  __ push_ptr();  __ jmp(L);
  fep = __ pc();  __ push_f();    __ jmp(L);
  dep = __ pc();  __ push_d();    __ jmp(L);
  lep = __ pc();  __ push_l();    __ jmp(L);
  bep = cep = sep =
  iep = __ pc();  __ push_i();
  vep = __ pc();
  __ bind(L);
  generate_and_dispatch(t);
}

当要生成iconst指令的对应的代码例程时并不关心栈顶元素的状态，也就是栈顶元素状态为vtos，那么如果栈顶元素的状态为其它状态就需要压入栈中存储了。例如当栈顶缓存了上一次执行字节码时的结果值，这个结果值的类型为float，那么就会从fep入口进入，然后调用push_f()方法将这个结果值压入栈中，这样栈顶缓存的状态就为vtos了。

push_f()方法的实现如下：

源代码位置：/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp
void InterpreterMacroAssembler::push_f(XMMRegister r) { // r的默认值为xmm0
    subptr(rsp, wordSize);       // wordSize为机器字长，64位下为8字节，所以值为8
    movflt(Address(rsp, 0), r);
}

void MacroAssembler::subptr(Register dst, int32_t imm32) {
  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
}

void Assembler::subq(Register dst, int32_t imm32) {
   (void) prefixq_and_encode(dst->encoding());
   emit_arith(0x81, 0xE8, dst, imm32);
}

void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
  assert(isByte(op1) && isByte(op2), "wrong opcode");
  assert((op1 & 0x01) == 1, "should be 32bit operation");
  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
  if (is8bit(imm32)) {
    emit_int8(op1 | 0x02); // set sign bit
    emit_int8(op2 | encode(dst));
    emit_int8(imm32 & 0xFF);
  } else {
    emit_int8(op1);
    emit_int8(op2 | encode(dst));
    emit_int32(imm32);
  }
}

emit_arith()方法将调用emit_int8()和emit_int32()方法写入指令的二进制代码：

83 EC 08 // 由于8可由8位有符号数表示，第一个字节为0x81|0x02(0x83)，rsp的寄存器号为4，第二个字节为0xE8|0x04(0xEC)，第三个字节为0x08&0xFF(0x08)，该指令即AT&T风格的sub $0x8,%rsp。

set_vtos_entry_points()产生的入口部分代码如下：

push   %rax           .....(atos entry)
jmpq   <addr> 
sub    $0x8,%rsp      .....(ftos entry)
movss  %xmm0,(%rsp)
jmpq   <addr>(addr为字节码的本地代码入口)
sub    $0x10,%rsp     .....(dtos entry)
movsd  %xmm0,(%rsp)
jmpq   <addr>
sub    $0x10,%rsp     .....(ltos entry)
mov    %rax,(%rsp)
jmpq   <addr>
push   %rax           .....(itos entry)

set_vtos_entry_points()方法的最后调用generate_and_dispatch()方法写入当前字节码的机器代码和跳转到下一个字节码继续执行的逻辑处理部分。

generate_and_dispatch()方法的主要实现如下：

void TemplateInterpreterGenerator::generate_and_dispatch(Template* t, TosState tos_out) {
  // ...
  // generate template
  t->generate(_masm);
  // advance
  if (t->does_dispatch()) {
     // asserts
  } else {
     // dispatch to next bytecode
     __ dispatch_epilog(tos_out, step);
  }
}

这里以iconst字节码为例分析generate()：　　

void Template::generate(InterpreterMacroAssembler* masm) {
  // parameter passing
  TemplateTable::_desc = this;
  TemplateTable::_masm = masm;
  // code generation
  _gen(_arg);
  masm->flush();
}

generate()会调用生成器函数_gen(_arg)，对于iconst指令来说，生成器函数为iconst()。generate()函数根据平台而不同，如x86_64平台下，定义如下：

源代码位置：/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp
void TemplateTable::iconst(int value) {
  transition(vtos, itos);
  if (value == 0) {
    __ xorl(rax, rax);
  } else {
    __ movl(rax, value);
  }
}

我们知道，iconst_i指令是将i压入栈，这里生成器函数iconst()在i为0时，没有直接将0写入rax，而是使用异或运算清零，即向代码缓冲区写入指令”xor %rax, %rax”；在i不为0时，写入指令”mov $0xi, %rax”

当不需要转发时，会在TemplateInterpreterGenerator::generate_and_dispatch()方法中调用dispatch_epilog()方法生成取下一条指令和分派的目标代码：

void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
   dispatch_next(state, step);
}

dispatch_next()实现如下：

void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
  // load next bytecode (load before advancing r13 to prevent AGI)
  load_unsigned_byte(rbx, Address(r13, step));
  // advance r13
  increment(r13, step);
  dispatch_base(state, Interpreter::dispatch_table(state));
}

这个方法在之前已经介绍过，这里不再介绍。

专注虚拟机与编译器研究

转发表

导航

公告