SSE 向量乘矩阵

 

struct Vector4
{    
    float x, y, z, w;        
};
struct Matrix
{
    float _M[4][4];
};


void SSE_VectorMultiplyMatrix(const Vector4& v,const Matrix& m1,Vector4& ret)
{
    Vector4 va,vb,vc,vd;
    Vector4 *pva,*pvb,*pvc,*pvd;
    const Vector4 *pv;
    //取出矩阵每一列
    va.x = m1._M[0][0];
    va.y = m1._M[1][0];
    va.z = m1._M[2][0];
    va.w = m1._M[3][0];
    
    vb.x = m1._M[0][1];
    vb.y = m1._M[1][1];
    vb.z = m1._M[2][1];
    vb.w = m1._M[3][1];

    vc.x = m1._M[0][2];
    vc.y = m1._M[1][2];
    vc.z = m1._M[2][2];
    vc.w = m1._M[3][2];

    vd.x = m1._M[0][3];
    vd.y = m1._M[1][3];
    vd.z = m1._M[2][3];
    vd.w = m1._M[3][3];
    
    pva = &va;
    pvb = &vb;
    pvc = &vc;
    pvd = &vd;
    pv = &v;
    __asm 
    {   
        //矩阵四列放入mmx0-mmx3
        MOV EAX, pva                           // Load pointer into CPU reg
        MOVUPS XMM0, [EAX]
        MOV EAX, pvb                           // Load pointer into CPU reg
        MOVUPS XMM1, [EAX]
        MOV EAX, pvc                           // Load pointer into CPU reg
        MOVUPS XMM2, [EAX]
        MOV EAX, pvd                           // Load pointer into CPU reg
        MOVUPS XMM3, [EAX]
        
        //向量放入 mmx4
        MOV EAX, pv 
        MOVUPS XMM4, [EAX]

        //向量点乘矩阵每列
        MULPS XMM0,XMM4
        MULPS XMM1,XMM4  
        MULPS XMM2,XMM4  
        MULPS XMM3,XMM4 
        
        //输出四个分量
        MOVUPS [va], XMM0
        MOVUPS [vb], XMM1
        MOVUPS [vc], XMM2
        MOVUPS [vd], XMM3
    }

    //四个分量求和得变换后向量
    ret.x = va.w + va.x + va.y + va.z;
    ret.y = vb.w + vb.x + vb.y + vb.z;
    ret.z = vc.w + vc.x + vc.y + vc.z;
    ret.w = vd.w + vd.x + vd.y + vd.z;

}

 

 

 

 

posted on 2017-02-23 15:46  草丛有头猪  阅读(391)  评论(1编辑  收藏  举报

导航