caffe中各种cblas的函数使用总结
转来的,来自:http://www.cnblogs.com/huashiyiqike/p/3886670.html
总结的很赞,转到这里,留一下笔记。感觉cblas的函数名字很好记的,试着去找过源代码,但是是fortran的,我当时写过的那些fortran程序早忘记了。
Y=alpha * X +beta*Y
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | Y=alpha * X +beta*Y template <> void caffe_cpu_axpby< float >( const int N, const float alpha, const float * X, const float beta, float * Y) { cblas_saxpby(N, alpha, X, 1, beta, Y, 1); } template <> void caffe_cpu_axpby< double >( const int N, const double alpha, const double * X, const double beta, double * Y) { cblas_daxpby(N, alpha, X, 1, beta, Y, 1); } cblas_dscal(N, beta, Y, incY); Y=Y*beta cblas_daxpy(N, alpha, X, incX, Y, incY); Y= (alpha * X) + Y) |
Y=alpha * X + Y
1 2 3 4 5 6 7 | template <> void caffe_axpy< float >( const int N, const float alpha, const float * X, float * Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); } template <> void caffe_axpy< double >( const int N, const double alpha, const double * X, double * Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]); DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]); DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]); DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]); template <> void caffe_add< float >( const int n, const float * a, const float * b, float * y) { vsAdd(n, a, b, y); } template <> void caffe_add< double >( const int n, const double * a, const double * b, double * y) { vdAdd(n, a, b, y); } |
y=x;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | template <> void caffe_copy< float >( const int N, const float * X, float * Y) { cblas_scopy(N, X, 1, Y, 1); } template <> void caffe_copy< double >( const int N, const double * X, double * Y) { cblas_dcopy(N, X, 1, Y, 1); } template <> void caffe_gpu_copy< float >( const int N, const float * X, float * Y) { CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), N, X, 1, Y, 1)); } template <> void caffe_gpu_copy< double >( const int N, const double * X, double * Y) { CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), N, X, 1, Y, 1)); } |
Computes alpha*x*y' + A.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | cblas_sger Multiplies vector X by the transform of vector Y, then adds matrix A (single precison). Multiplies vector X by the transform of vector Y, then adds matrix A (single precison). void cblas_sger ( const enum CBLAS_ORDER Order, const int M, const int N, const float alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda ); |
1 2 3 4 5 6 7 8 9 | Y(vetor)←αAX + βY This function multiplies A * X (after transposing A, if needed) and multiplies the resulting matrix by alpha. It then multiplies vector Y by beta. It stores the sum of these two products in vector Y. template <> void caffe_cpu_gemv< float >( const CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float * A, const float * x, const float beta, float * y) { cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); } |
C(matrix)←αAB + βC
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | template < typename T> void gpu_multmat(T* A, T* B, T* C, int M, int K, int N){ const T alpha = 1,beta=0; caffe_gpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); } template <> void caffe_cpu_gemm< float >( const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float * A, const float * B, const float beta, float * C) { int lda = (TransA == CblasNoTrans) ? K : M; int ldb = (TransB == CblasNoTrans) ? N : K; cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, N); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | A=M*N B=M*K C=A'*B N M K template < typename T> void cpu_multTmat(T* A, T* B, T* C, int M, int K, int N){ const T alpha = 1,beta=0; caffe_cpu_gemm(CblasTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, M, B, K, beta, C, M); } A=M*N B=N*K C=A*B M N K template < typename T> void cpu_multmat(T* A, T* B, T* C, int M, int K, int N){ const T alpha = 1,beta=0; caffe_cpu_gemm(CblasNoTrans,CblasNoTrans,M,N,K,alpha,A,B,beta,C); // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, M, B, K, beta, C, M); } |
Sophie的世界,转载请注明出处,谢谢。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)