numba

#Below is the sample to run numba benchmark

#install and setting
pip install numba
export NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice
export NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so
export NUMBAPRO_CUDA_DRIVER=/usr/lib/x86_64-linux-gnu/libcuda.so

#python code to run:
import numba
from numba import float32, float64
from numba import vectorize
from numba import cuda
import numpy as np
import time

def lerp(A,B,factor):
    return (1.0-factor)*A+factor*B

lerp_ufunc_p = numba.vectorize(['float32(float32, float32, float32)', 'float64(float64, float64, float64)'], target="parallel")(lerp)
lerp_ufunc_cuda = numba.vectorize(['float32(float32, float32, float32)', 'float64(float64, float64, float64)'], target="cuda")(lerp)
lerp_ufunc_cpu = numba.vectorize(['float32(float32, float32, float32)', 'float64(float64, float64, float64)'], target="cpu")(lerp)

length=5000000
B = np.arange(length, 0.0, -2.0)
A = np.arange(0.0, length, 2.0)

A_cuda=cuda.to_device(A)
B_cuda=cuda.to_device(B)
factor=0.5

#parallel
t0=time.clock() 
for i in range(10000):
    c=lerp_ufunc_p(A,B,0.5)
t1=time.clock()
print((t1-t0)/10000) #0.0164714971

#gpu
t0=time.clock() 
for i in range(10000):
    c=lerp_ufunc_cuda(A_cuda,B_cuda,0.5)
t1=time.clock()
print((t1-t0)/10000)#0.0014463519

  

posted on 2019-01-11 10:36  cdekelon  阅读(213)  评论(0编辑  收藏  举报

导航