java Atomic compareAndSet部分原理分析

以AtomicLong的compareAndSet方法举例。先说结论:如果CPU支持,则基于CPU指令(CMPXCHG8)实现;否则使用ObjectLocker锁实现。

 

分析过程如下:

该方法在jdk中源代码如下:

public final boolean compareAndSet(long expect, long update) {
        return unsafe.compareAndSwapLong(this, valueOffset, expect, update);
    }

unsafe是sun.misc.Unsafe的一个实例,Unsafe类在jdk中没有源代码,是由jvm提供的native代码。在openjdk中对应位置是hotspot/src/share/vm/prims/unsafe.cpp

jdk代码里没有用锁,对用户来说是无锁的操作

openjdk里是怎么实现unsafe.compareAndSwapLong的呢?直接用代码说话,如下:

UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x))
  UnsafeWrapper("Unsafe_CompareAndSwapLong");
  Handle p (THREAD, JNIHandles::resolve(obj));
  jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
  if (VM_Version::supports_cx8())
    return (jlong)(Atomic::cmpxchg(x, addr, e)) == e;
  else {
    jboolean success = false;
    ObjectLocker ol(p, THREAD);
    if (*addr == e) { *addr = x; success = true; }
    return success;
  }
UNSAFE_END

可以看到,如果不支持cx8,那么就需要用到ObjectLocker锁,那么什么 VM_Version::supports_cx8() 的底层实现又是什么呢?还是上代码,在openjdk/hotspot/src/share/vm/runtime/vm_version.hpp里

static bool supports_cx8()  {
#ifdef SUPPORTS_NATIVE_CX8
    return true;
#else
    return _supports_cx8;
#endif
  }
_supports_cx8在何处赋值呢?该值默认为false,在x86系统中使用supports_cmpxchg8()方法赋值,在sparc系统中使用has_v9()赋值。我们来看一下x86系统中的情况,
static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }

_cpuFeatures定义如下:

static int _cpuFeatures;     // features returned by the "cpuid" instruction
                               // 0 if this instruction is not available

CPU_CX8定义如下:

enum {
    CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
    CPU_CMOV   = (1 << 1),
    CPU_FXSR   = (1 << 2),
    CPU_HT     = (1 << 3),
    CPU_MMX    = (1 << 4),
    CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
                                    // may not necessarily support other 3dnow instructions
    CPU_SSE    = (1 << 6),
    CPU_SSE2   = (1 << 7),
    CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
    CPU_SSSE3  = (1 << 9),
    CPU_SSE4A  = (1 << 10),
    CPU_SSE4_1 = (1 << 11),
    CPU_SSE4_2 = (1 << 12),
    CPU_POPCNT = (1 << 13),
    CPU_LZCNT  = (1 << 14),
    CPU_TSC    = (1 << 15),
    CPU_TSCINV = (1 << 16),
    CPU_AVX    = (1 << 17),
    CPU_AVX2   = (1 << 18),
    CPU_AES    = (1 << 19),
    CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
    CPU_CLMUL  = (1 << 21) // carryless multiply for CRC
  } cpuFeatureFlags;

在刨根问底_cpuFeatures的值是怎么来的?

_cpuFeatures = feature_flags();
static uint32_t feature_flags() {
    uint32_t result = 0;
    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
      result |= CPU_CX8;
    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
      result |= CPU_CMOV;
    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd() &&
        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
      result |= CPU_FXSR;
    // HT flag is set for multi-core processors also.
    if (threads_per_core() > 1)
      result |= CPU_HT;
    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd() &&
        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
      result |= CPU_MMX;
    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
      result |= CPU_SSE;
    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
      result |= CPU_SSE2;
    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
      result |= CPU_SSE3;
    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
      result |= CPU_SSSE3;
    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
      result |= CPU_SSE4_1;
    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
      result |= CPU_SSE4_2;
    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
      result |= CPU_POPCNT;
    if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
        _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
        _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
        _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
      result |= CPU_AVX;
      if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
        result |= CPU_AVX2;
    }
    if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
      result |= CPU_TSC;
    if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
      result |= CPU_TSCINV;
    if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
      result |= CPU_AES;
    if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
      result |= CPU_ERMS;
    if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
      result |= CPU_CLMUL;

    // AMD features.
    if (is_amd()) {
      if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
          (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
        result |= CPU_3DNOW_PREFETCH;
      if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
        result |= CPU_LZCNT;
      if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
        result |= CPU_SSE4A;
    }

    return result;
  }
至此,基本可以断定这里的判断,是从CPUID中获取的信息,来看CPU是否支持CMPXCHG8指令。

再回过头来看这句:
return (jlong)(Atomic::cmpxchg(x, addr, e)) == e;

这里Atomic::cmpxchg方法是核心,定义在openjdk/hotspot/src/share/vm/runtime/atomic.hpp

inline static jlong    cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value);

 

在不同系统中有不同的实现,在linux_x86中:openjdk/hotspot/os_cpu/linux_x86/vm/atomic_linux_x86.inline.hpp

inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
  bool mp = os::is_MP();
  __asm__ __volatile__ (LOCK_IF_MP(%4) "cmpxchgq %1,(%3)"
                        : "=a" (exchange_value)
                        : "r" (exchange_value), "a" (compare_value), "r" (dest), "r" (mp)
                        : "cc", "memory");
  return exchange_value;
}

在windows_x86中:openjdk/hotspot/os_cpu/linux_x86/vm/atomic_windows_x86.inline.hpp

inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
  int mp = os::is_MP();
  jint ex_lo  = (jint)exchange_value;
  jint ex_hi  = *( ((jint*)&exchange_value) + 1 );
  jint cmp_lo = (jint)compare_value;
  jint cmp_hi = *( ((jint*)&compare_value) + 1 );
  __asm {
    push ebx
    push edi
    mov eax, cmp_lo
    mov edx, cmp_hi
    mov edi, dest
    mov ebx, ex_lo
    mov ecx, ex_hi
    LOCK_IF_MP(mp)
    cmpxchg8b qword ptr [edi]
    pop edi
    pop ebx
  }
}

 

可以看出,当CPU支持时,最终确实是直接用cmpxchg相关指令实现的。

posted on 2016-06-28 16:53  oceanking  阅读(2337)  评论(0编辑  收藏  举报