SSE求解向量大小

    float f=0;
        __asm
        {
            mov    esi,   this            ; vector u
            movups xmm0,  [esi]           ; first vector in xmm0
            mulps  xmm0,  xmm0            ; mul with 2nd vector
            movaps xmm1,  xmm0            ; copy result
            shufps xmm1,  xmm1, 4Eh       ; shuffle: f1,f0,f3,f2
            addps  xmm0,  xmm1            ; add: f3+f1,f2+f0,f1+f3,f0+f2 
            movaps xmm1,  xmm0            ; copy results
            shufps xmm1,  xmm1, 11h       ; shuffle: f0+f2,f1+f3,f0+f2,f1+f3 
            addps  xmm0,  xmm1            ; add: x,x,f0+f1+f2+f3,f0+f1+f2+f3
            sqrtss xmm0,  xmm0            ; sqroot from least bit value
            movss  f, xmm0                ; move result from xmm0 to edi
        }

x, y, z, w平方后,利用shufps将4个分量凑成相加结果为x方+y方+z方+w方,最后开方,取xmm寄存器的一个分量即是结果,取的xmm寄存器的低32位。

posted @ 2014-05-29 22:02  xxx1  阅读(670)  评论(0编辑  收藏  举报