两种形式的dma 实现memory copy代码
在飞思卡尔的时候,需要用SDMA实现内存到内存memory copy的功能,需要做两部分的工作:
1:在DMA controller中加入M2M的支持。
2:写一个驱动来调用DMA controller的M2M功能。
上面的2实际上对于不同的SoC来讲,思路是一样的,有通用性,在这里总结下。
当时在实现的时候,用了两种方法:
1:cyclic, 用dma_alloc_coherent分配两段dma 内存空间, 一段做src, 一段做dst. 调用DMA controller接口来将src中的数据往dst中拷贝。因为DMA操作的是物理内地址上连续的内存空间,dma_alloc_coherent分配不了太大的连续物理地址空间,所以,仅仅能实现小批量数据的M2M拷贝。
2:sg, 用dma_alloc_coherent分配很多段dma 内存空间,一半大小的空间做src,一半大小的空间做dst.通过device_prep_dma_sg来将各自独立的src/dst空间链接起来。这个,可以将若干段分散的物理地址链接成逻辑上连续的,可以实现较大数据的拷贝。
顺便复习下dma engine的用法:
传输结束的时候可以用:
1. int dmaengine_terminate_all(struct dma_chan *chan)
看下面代码:
1: cyclic方式实现
2:sg方式实现
原文链接:https://blog.csdn.net/u012769691/article/details/46814305
1:在DMA controller中加入M2M的支持。
2:写一个驱动来调用DMA controller的M2M功能。
上面的2实际上对于不同的SoC来讲,思路是一样的,有通用性,在这里总结下。
当时在实现的时候,用了两种方法:
1:cyclic, 用dma_alloc_coherent分配两段dma 内存空间, 一段做src, 一段做dst. 调用DMA controller接口来将src中的数据往dst中拷贝。因为DMA操作的是物理内地址上连续的内存空间,dma_alloc_coherent分配不了太大的连续物理地址空间,所以,仅仅能实现小批量数据的M2M拷贝。
2:sg, 用dma_alloc_coherent分配很多段dma 内存空间,一半大小的空间做src,一半大小的空间做dst.通过device_prep_dma_sg来将各自独立的src/dst空间链接起来。这个,可以将若干段分散的物理地址链接成逻辑上连续的,可以实现较大数据的拷贝。
顺便复习下dma engine的用法:
Linux/Documentation/dmaengine.txt The slave DMA usage consists of following steps: 1. Allocate a DMA slave channel 2. Set slave and controller specific parameters 3. Get a descriptor for transaction 4. Submit the transaction 5. Issue pending requests and wait for callback notification 1. Allocate a DMA slave channel Interface: struct dma_chan *dma_request_channel(dma_cap_mask_t mask, dma_filter_fn filter_fn, void *filter_param); 2. Set slave and controller specific parameters Interface: int dmaengine_slave_config(struct dma_chan *chan, struct dma_slave_config *config) 3. Get a descriptor for transaction Interface: struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_data_direction direction, unsigned long flags); struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_data_direction direction); struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags); 4. Submit the transaction Interface: dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) 5. Issue pending DMA requests and wait for callback notification Interface: void dma_async_issue_pending(struct dma_chan *chan);
传输结束的时候可以用:
1. int dmaengine_terminate_all(struct dma_chan *chan)
看下面代码:
1: cyclic方式实现
#include <linux/module.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/mman.h> #include <linux/init.h> #include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/version.h> #include <linux/delay.h> #include <mach/dma.h> #include <linux/dmaengine.h> #include <linux/device.h> #include <linux/io.h> #include <linux/delay.h> static int gMajor; //major number of device static struct class *dma_tm_class; static char *wbuf; static char *rbuf; static dma_addr_t wpaddr; static dma_addr_t rpaddr; struct dma_chan *dma_m2m_chan; struct completion dma_m2m_ok; #define SDMA_BUF_SIZE 1024 static bool dma_m2m_filter(struct dma_chan *chan, void *param) { if (!imx_dma_is_general_purpose(chan)) return false; chan->private = param; return true; } int sdma_open(struct inode * inode, struct file * filp) { dma_cap_mask_t dma_m2m_mask; struct imx_dma_data m2m_dma_data = {0}; init_completion(&dma_m2m_ok); dma_cap_zero(dma_m2m_mask); dma_cap_set(DMA_SLAVE, dma_m2m_mask); m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY; m2m_dma_data.priority = DMA_PRIO_HIGH; dma_m2m_chan = dma_request_channel(dma_m2m_mask, dma_m2m_filter, &m2m_dma_data); if (!dma_m2m_chan) { printk("Error opening the SDMA memory to memory channel\n"); return -EINVAL; } wbuf = dma_alloc_coherent(NULL, SDMA_BUF_SIZE, &wpaddr, GFP_DMA); rbuf = dma_alloc_coherent(NULL, SDMA_BUF_SIZE, &rpaddr, GFP_DMA); return 0; } int sdma_release(struct inode * inode, struct file * filp) { dma_release_channel(dma_m2m_chan); dma_m2m_chan = NULL; dma_free_coherent(NULL, SDMA_BUF_SIZE, wbuf, wpaddr); dma_free_coherent(NULL, SDMA_BUF_SIZE, rbuf, rpaddr); return 0; } ssize_t sdma_read (struct file *filp, char __user * buf, size_t count, loff_t * offset) { int i; wait_for_completion(&dma_m2m_ok); for (i=0; i<SDMA_BUF_SIZE; i++) { printk("src_data_%d = %x\n",i, *(wbuf+i) ); } for (i=0; i<SDMA_BUF_SIZE; i++) { printk("dst_data_%d = %x\n",i, *(rbuf+i) ); } return 0; } static void dma_m2m_callback(void *data) { printk("in %s\n",__func__); complete(&dma_m2m_ok); return ; } ssize_t sdma_write(struct file * filp, const char __user * buf, size_t count, loff_t * offset) { u32 *index1; struct dma_slave_config dma_m2m_config; struct dma_async_tx_descriptor *dma_m2m_desc; int i; index1 = wbuf; for (i=0; i<SDMA_BUF_SIZE; i++) { *(index1 + i) = 0x12345678; } for (i=0; i<SDMA_BUF_SIZE; i++) { printk("%d : %x\n",i, *(wbuf+i) ); } dma_m2m_config.direction = DMA_MEM_TO_MEM; dma_m2m_config.dst_addr = rpaddr; dma_m2m_config.src_addr = wpaddr; dma_m2m_config.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; dma_m2m_config.dst_maxburst = 4; dma_m2m_config.src_maxburst = 4; dmaengine_slave_config(dma_m2m_chan, &dma_m2m_config); dma_m2m_desc = dma_m2m_chan->device->device_prep_dma_cyclic( dma_m2m_chan, NULL, SDMA_BUF_SIZE, SDMA_BUF_SIZE/2, DMA_MEM_TO_MEM); dma_m2m_desc->callback = dma_m2m_callback; dmaengine_submit(dma_m2m_desc); return 0; } struct file_operations dma_fops = { open: sdma_open, release: sdma_release, read: sdma_read, write: sdma_write, }; int __init sdma_init_module(void) { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)) struct device *temp_class; #else struct class_device *temp_class; #endif int error; /* register a character device */ error = register_chrdev(0, "sdma_test", &dma_fops); if (error < 0) { printk("SDMA test driver can't get major number\n"); return error; } gMajor = error; printk("SDMA test major number = %d\n",gMajor); dma_tm_class = class_create(THIS_MODULE, "sdma_test"); if (IS_ERR(dma_tm_class)) { printk(KERN_ERR "Error creating sdma test module class.\n"); unregister_chrdev(gMajor, "sdma_test"); return PTR_ERR(dma_tm_class); } #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)) temp_class = device_create(dma_tm_class, NULL, MKDEV(gMajor, 0), NULL, "sdma_test"); #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)) temp_class = device_create(dma_tm_class, NULL, MKDEV(gMajor, 0), "sdma_test"); #else temp_class = class_device_create(dma_tm_class, NULL, MKDEV(gMajor, 0), NULL, "sdma_test"); #endif if (IS_ERR(temp_class)) { printk(KERN_ERR "Error creating sdma test class device.\n"); class_destroy(dma_tm_class); unregister_chrdev(gMajor, "sdma_test"); return -1; } printk("SDMA test Driver Module loaded\n"); return 0; } static void sdma_cleanup_module(void) { unregister_chrdev(gMajor, "sdma_test"); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)) device_destroy(dma_tm_class, MKDEV(gMajor, 0)); #else class_device_destroy(dma_tm_class, MKDEV(gMajor, 0)); #endif class_destroy(dma_tm_class); printk("SDMA test Driver Module Unloaded\n"); } module_init(sdma_init_module); module_exit(sdma_cleanup_module);
2:sg方式实现
#include <linux/module.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/mman.h> #include <linux/init.h> #include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/version.h> #include <linux/delay.h> #include <mach/dma.h> #include <linux/dmaengine.h> #include <linux/device.h> #include <linux/io.h> #include <linux/delay.h> static int gMajor; /* major number of device */ static struct class *dma_tm_class; u32 *wbuf, *wbuf2, *wbuf3, *wbuf4; u32 *rbuf, *rbuf2, *rbuf3, *rbuf4; struct dma_chan *dma_m2m_chan; struct completion dma_m2m_ok; struct scatterlist sg[4], sg2[4]; #define SDMA_BUF_SIZE 1024*60 static bool dma_m2m_filter(struct dma_chan *chan, void *param) { if (!imx_dma_is_general_purpose(chan)) return false; chan->private = param; return true; } int sdma_open(struct inode *inode, struct file *filp) { dma_cap_mask_t dma_m2m_mask; struct imx_dma_data m2m_dma_data; init_completion(&dma_m2m_ok); dma_cap_zero(dma_m2m_mask); dma_cap_set(DMA_SLAVE, dma_m2m_mask); m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY; m2m_dma_data.priority = DMA_PRIO_HIGH; dma_m2m_chan = dma_request_channel(dma_m2m_mask, dma_m2m_filter, &m2m_dma_data); if (!dma_m2m_chan) { printk("Error opening the SDMA memory to memory channel\n"); return -EINVAL; } wbuf = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!wbuf) { printk("error wbuf !!!!!!!!!!!\n"); return -1; } wbuf2 = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!wbuf2) { printk("error wbuf2 !!!!!!!!!!!\n"); return -1; } wbuf3 = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!wbuf3) { printk("error wbuf3 !!!!!!!!!!!\n"); return -1; } wbuf4 = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!wbuf4) { printk("error wbuf4 !!!!!!!!!!!\n"); return -1; } rbuf = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!rbuf) { printk("error rbuf !!!!!!!!!!!\n"); return -1; } rbuf2 = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!rbuf2) { printk("error rbuf2 !!!!!!!!!!!\n"); return -1; } rbuf3 = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!rbuf3) { printk("error rbuf3 !!!!!!!!!!!\n"); return -1; } rbuf4 = kzalloc(SDMA_BUF_SIZE, GFP_DMA); if(!rbuf4) { printk("error rbuf4 !!!!!!!!!!!\n"); return -1; } return 0; } int sdma_release(struct inode * inode, struct file * filp) { dmaengine_terminate_all(dma_m2m_chan); dma_release_channel(dma_m2m_chan); dma_m2m_chan = NULL; kfree(wbuf); kfree(wbuf2); kfree(wbuf3); kfree(rbuf); kfree(rbuf2); kfree(rbuf3); return 0; } ssize_t sdma_read (struct file *filp, char __user * buf, size_t count, loff_t * offset) { int i; #if 0 for (i=0; i<SDMA_BUF_SIZE/4; i++) { printk("dst data_%d : %x\n", i, *(rbuf+i)); } for (i=0; i<SDMA_BUF_SIZE/4; i++) { printk("dst data2_%d : %x\n", i, *(rbuf2+i)); } for (i=0; i<SDMA_BUF_SIZE/4; i++) { printk("dst data3_%d : %x\n", i, *(rbuf3+i)); } #endif for (i=0; i<SDMA_BUF_SIZE/4; i++) { if (*(rbuf+i) != *(wbuf+i)) { printk("buffer 1 copy falled!\n"); return 0; } } printk("buffer 1 copy passed!\n"); for (i=0; i<SDMA_BUF_SIZE/4; i++) { if (*(rbuf2+i) != *(wbuf2+i)) { printk("buffer 2 copy falled!\n"); return 0; } } printk("buffer 2 copy passed!\n"); for (i=0; i<SDMA_BUF_SIZE/4; i++) { if (*(rbuf3+i) != *(wbuf3+i)) { printk("buffer 3 copy falled!\n"); return 0; } } printk("buffer 3 copy passed!\n"); for (i=0; i<SDMA_BUF_SIZE/4; i++) { if (*(rbuf4+i) != *(wbuf4+i)) { printk("buffer 4 copy falled!\n"); return 0; } } printk("buffer 4 copy passed!\n"); return 0; } static void dma_m2m_callback(void *data) { complete(&dma_m2m_ok); return ; } ssize_t sdma_write(struct file * filp, const char __user * buf, size_t count, loff_t * offset) { u32 *index1, *index2, *index3, *index4, i, ret; struct dma_slave_config dma_m2m_config; struct dma_async_tx_descriptor *dma_m2m_desc; index1 = wbuf; index2 = wbuf2; index3 = wbuf3; index4 = wbuf4; struct timeval end_time; unsigned long end, start; for (i=0; i<SDMA_BUF_SIZE/4; i++) { *(index1 + i) = 0x12121212; } for (i=0; i<SDMA_BUF_SIZE/4; i++) { *(index2 + i) = 0x34343434; } for (i=0; i<SDMA_BUF_SIZE/4; i++) { *(index3 + i) = 0x56565656; } for (i=0; i<SDMA_BUF_SIZE/4; i++) { *(index4 + i) = 0x78787878; } #if 0 for (i=0; i<SDMA_BUF_SIZE/4; i++) { printk("input data_%d : %x\n", i, *(wbuf+i)); } for (i=0; i<SDMA_BUF_SIZE/2/4; i++) { printk("input data2_%d : %x\n", i, *(wbuf2+i)); } for (i=0; i<SDMA_BUF_SIZE/4; i++) { printk("input data3_%d : %x\n", i, *(wbuf3+i)); } #endif dma_m2m_config.direction = DMA_MEM_TO_MEM; dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; dmaengine_slave_config(dma_m2m_chan, &dma_m2m_config); sg_init_table(sg, 4); sg_set_buf(&sg[0], wbuf, SDMA_BUF_SIZE); sg_set_buf(&sg[1], wbuf2, SDMA_BUF_SIZE); sg_set_buf(&sg[2], wbuf3, SDMA_BUF_SIZE); sg_set_buf(&sg[3], wbuf4, SDMA_BUF_SIZE); ret = dma_map_sg(NULL, sg, 4, dma_m2m_config.direction); sg_init_table(sg2, 4); sg_set_buf(&sg2[0], rbuf, SDMA_BUF_SIZE); sg_set_buf(&sg2[1], rbuf2, SDMA_BUF_SIZE); sg_set_buf(&sg2[2], rbuf3, SDMA_BUF_SIZE); sg_set_buf(&sg2[3], rbuf4, SDMA_BUF_SIZE); ret = dma_map_sg(NULL, sg2, 4, dma_m2m_config.direction); dma_m2m_desc = dma_m2m_chan->device-> device_prep_dma_sg(dma_m2m_chan, sg2, 4, sg, 4, 0); dma_m2m_desc->callback = dma_m2m_callback; //printk("1111111111111\n"); do_gettimeofday(&end_time); start = end_time.tv_sec*1000000 + end_time.tv_usec; dmaengine_submit(dma_m2m_desc); dma_async_issue_pending(dma_m2m_chan); wait_for_completion(&dma_m2m_ok); //printk("2222222222222\n"); do_gettimeofday(&end_time); end = end_time.tv_sec*1000000 + end_time.tv_usec; printk("end - start = %d\n", end - start); dma_unmap_sg(NULL, sg, 4, dma_m2m_config.direction); dma_unmap_sg(NULL, sg2, 4, dma_m2m_config.direction); return 0; } struct file_operations dma_fops = { open: sdma_open, release: sdma_release, read: sdma_read, write: sdma_write, }; int __init sdma_init_module(void) { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)) struct device *temp_class; #else struct class_device *temp_class; #endif int error; /* register a character device */ error = register_chrdev(0, "sdma_test", &dma_fops); if (error < 0) { printk("SDMA test driver can't get major number\n"); return error; } gMajor = error; printk("SDMA test major number = %d\n",gMajor); dma_tm_class = class_create(THIS_MODULE, "sdma_test"); if (IS_ERR(dma_tm_class)) { printk(KERN_ERR "Error creating sdma test module class.\n"); unregister_chrdev(gMajor, "sdma_test"); return PTR_ERR(dma_tm_class); } #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)) temp_class = device_create(dma_tm_class, NULL, MKDEV(gMajor, 0), NULL, "sdma_test"); #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)) temp_class = device_create(dma_tm_class, NULL, MKDEV(gMajor, 0), "sdma_test"); #else temp_class = class_device_create(dma_tm_class, NULL, MKDEV(gMajor, 0), NULL, "sdma_test"); #endif if (IS_ERR(temp_class)) { printk(KERN_ERR "Error creating sdma test class device.\n"); class_destroy(dma_tm_class); unregister_chrdev(gMajor, "sdma_test"); return -1; } printk("SDMA test Driver Module loaded\n"); return 0; } static void sdma_cleanup_module(void) { unregister_chrdev(gMajor, "sdma_test"); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)) device_destroy(dma_tm_class, MKDEV(gMajor, 0)); #else class_device_destroy(dma_tm_class, MKDEV(gMajor, 0)); #endif class_destroy(dma_tm_class); printk("SDMA test Driver Module Unloaded\n"); } module_init(sdma_init_module); module_exit(sdma_cleanup_module);
当时测出大约1秒钟可以拷贝50M的数据,但是客户还觉得不满意。
原文链接:https://blog.csdn.net/u012769691/article/details/46814305
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!