DMA性能测试
本程序主要用来计算DMA数据读写过程中所花费的总得时间周期,依据公式T=tStart+ceil(L/4)*2+ceil(L/256)*tTransform*2
因为tTransform是一个常量(通常默认为11),因此只需根据debug文件分析出tStart即可。
一开始我以为tStart是一个变量,是buffer2-2(读)的开始时刻与准备时刻最后一个buffer2-17(写)的差值,因此写了
以下程序:
1 #************************************************************************* 2 3 # File Name: performance_testing.py 4 # Author: jiangjing 5 # Mail: jjdl@mail.ustc.edu.cn 6 # Created Time: Sat 15 Jul 2017 09:34:04 PM PDT 7 #************************************************************************\ 8 import math 9 import sys 10 11 def GetFirstBuffer2_2(path): 12 fDebug=open(path).read() 13 index2_2=fDebug.index("[buffer2-2]") 14 index_space=fDebug.index(" ",index2_2+17) 15 return [index2_2,fDebug[index2_2+17:index_space]] 16 17 def GetLastBuffer2_17(path,index2_2): 18 fDebug=open(path).read() 19 index2_17=fDebug.rindex("[buffer2-17]",0,index2_2) 20 index_space=fDebug.index(" ",index2_17+18) 21 return [index2_17,fDebug[index2_17+18:index_space]] 22 23 def GetTotalTime(L,path,tTransform=11): #the 'path' is the paht of debug file 24 buffer2_2=GetFirstBuffer2_2(path) 25 buffer2_17=GetLastBuffer2_17(path,buffer2_2[0]) 26 return int(buffer2_2[1])-int(buffer2_17[1])+math.ceil(L/4)+math.ceil(L/256)*tTransform*2 27 28 #the code just for debug,you can comment the following code 29 if __name__=='__main__': 30 if(len(sys.argv)>1): 31 print(GetTotalTime(int(sys.argv[1]),sys.argv[2]))
但师兄说其实tStart也是一个常量,所以又将程序简化了以下:
1 #************************************************************************* 2 # File Name: PerformanceTesting.py 3 # Author: jiangjing 4 # Mail: jjdl@mail.ustc.edu.cn 5 # Created Time: Sun 16 Jul 2017 06:12:22 AM PDT 6 #************************************************************************ 7 import math 8 9 tStart=2448 10 def GetTime(L,tTransform=11): 11 return tStart+math.ceil(L/4)+math.ceil(L/256)*tTransform*2 12 13 def GetTotalTime(lList): 14 totalTime=0 15 for l in lList: 16 L=l[4] 17 totalTime+=GetTime(L) 18 return totalTime
下面的这个test.py为测试样例:
1 #************************************************************************* 2 # File Name: test.py 3 # Author: jiangjing 4 # Mail: jjdl@mail.ustc.edu.cn 5 # Created Time: Sun 16 Jul 2017 06:18:24 AM PDT 6 #************************************************************************ 7 import PerformanceTesting as PT 8 9 lList=[[0,0,0,0,65555],[0,0,0,0,672323]] 10 print(PT.GetTotalTime(lList))
考虑到测试的便捷性,师兄建议我采用c语言进行编写。
一下为C语言版本代码:
1 #include"stdio.h" 2 #include"math.h" 3 #include"conv2_route_plan.c" 4 #include"route_plan.h" 5 #define uint32_t unsigned long 6 7 uint32_t tStart=2448; 8 uint32_t GetTime(uint32_t L){ 9 return tStart+ceil(L/4)*2+ceil(L/256)*11*2; 10 } 11 12 uint32_t GetTotalTime(route_plan_t lList){ 13 uint32_t totalTime=0; 14 uint32_t i; 15 for(i=0;i<lList.num_item;i++) 16 { 17 totalTime+=GetTime(lList.route_item_array[i].len); 18 } 19 return totalTime; 20 } 21 22 int main() 23 { 24 printf("%d\n",GetTotalTime(conv2_route_plan)); 25 return 0; 26 }
其中“route_plan.h“是从师兄的项目中拷下来的,里面定义了输出数据的格式。
1 #ifndef _ROUTE_PLAN_H_ 2 #define _ROUTE_PLAN_H_ 3 4 #include <stdint.h> 5 6 typedef struct { 7 uint32_t src_buffer_id; 8 uint32_t src_buffer_offset; 9 uint32_t dest_buffer_id; 10 uint32_t dest_buffer_offset; 11 size_t len; 12 } route_item_t; 13 14 typedef struct { 15 uint32_t num_item; 16 route_item_t* route_item_array; 17 } route_plan_t; 18 #endif // #ifndef _ROUTE_PLAN_H_
其中“conv2_route_plan.c”文件是main.py文件运行输出的结果之一,里面包含了计算计算clock必要的数据。通过改变dsp的数目,本文件中的数据也会发生变化。
以下是dsp=32时的数据情况:
#include "route_plan.h" route_item_t conv2_route_item_array[] = { {0, 0.0, 32, 57856.0, 404992.0}, {1, 0.0, 32, 462848.0, 57856.0}, {0, 347136.0, 33, 0.0, 57856.0}, {1, 0.0, 33, 57856.0, 404992.0}, {2, 0.0, 33, 462848.0, 57856.0}, {1, 347136.0, 34, 0.0, 57856.0}, {2, 0.0, 34, 57856.0, 404992.0}, {3, 0.0, 34, 462848.0, 57856.0}, {2, 347136.0, 35, 0.0, 57856.0}, {3, 0.0, 35, 57856.0, 404992.0}, {4, 0.0, 35, 462848.0, 57856.0}, {3, 347136.0, 36, 0.0, 57856.0}, {4, 0.0, 36, 57856.0, 404992.0}, {5, 0.0, 36, 462848.0, 57856.0}, {4, 347136.0, 37, 0.0, 57856.0}, {5, 0.0, 37, 57856.0, 404992.0}, {6, 0.0, 37, 462848.0, 57856.0}, {5, 347136.0, 38, 0.0, 57856.0}, {6, 0.0, 38, 57856.0, 404992.0}, {7, 0.0, 38, 462848.0, 57856.0}, {6, 347136.0, 39, 0.0, 57856.0}, {7, 0.0, 39, 57856.0, 404992.0}, {8, 0.0, 39, 462848.0, 57856.0}, {7, 347136.0, 40, 0.0, 57856.0}, {8, 0.0, 40, 57856.0, 404992.0}, {9, 0.0, 40, 462848.0, 57856.0}, {8, 347136.0, 41, 0.0, 57856.0}, {9, 0.0, 41, 57856.0, 404992.0}, {10, 0.0, 41, 462848.0, 57856.0}, {9, 347136.0, 42, 0.0, 57856.0}, {10, 0.0, 42, 57856.0, 404992.0}, {11, 0.0, 42, 462848.0, 57856.0}, {10, 347136.0, 43, 0.0, 57856.0}, {11, 0.0, 43, 57856.0, 404992.0}, {12, 0.0, 43, 462848.0, 57856.0}, {11, 347136.0, 44, 0.0, 57856.0}, {12, 0.0, 44, 57856.0, 404992.0}, {13, 0.0, 44, 462848.0, 57856.0}, {12, 347136.0, 45, 0.0, 57856.0}, {13, 0.0, 45, 57856.0, 404992.0}, {14, 0.0, 45, 462848.0, 57856.0}, {13, 347136.0, 46, 0.0, 57856.0}, {14, 0.0, 46, 57856.0, 404992.0}, {15, 0.0, 46, 462848.0, 57856.0}, {14, 347136.0, 47, 0.0, 57856.0}, {15, 0.0, 47, 57856.0, 404992.0}, {16, 0.0, 47, 462848.0, 57856.0}, {15, 347136.0, 48, 0.0, 57856.0}, {16, 0.0, 48, 57856.0, 404992.0}, {17, 0.0, 48, 462848.0, 57856.0}, {16, 347136.0, 49, 0.0, 57856.0}, {17, 0.0, 49, 57856.0, 404992.0}, {18, 0.0, 49, 462848.0, 57856.0}, {17, 347136.0, 50, 0.0, 57856.0}, {18, 0.0, 50, 57856.0, 404992.0}, {19, 0.0, 50, 462848.0, 57856.0}, {18, 347136.0, 51, 0.0, 57856.0}, {19, 0.0, 51, 57856.0, 404992.0}, {20, 0.0, 51, 462848.0, 57856.0}, {19, 347136.0, 52, 0.0, 57856.0}, {20, 0.0, 52, 57856.0, 404992.0}, {21, 0.0, 52, 462848.0, 57856.0}, {20, 347136.0, 53, 0.0, 57856.0}, {21, 0.0, 53, 57856.0, 404992.0}, {22, 0.0, 53, 462848.0, 57856.0}, {21, 347136.0, 54, 0.0, 57856.0}, {22, 0.0, 54, 57856.0, 404992.0}, {23, 0.0, 54, 462848.0, 57856.0}, {22, 347136.0, 55, 0.0, 57856.0}, {23, 0.0, 55, 57856.0, 404992.0}, {24, 0.0, 55, 462848.0, 57856.0}, {23, 347136.0, 56, 0.0, 57856.0}, {24, 0.0, 56, 57856.0, 404992.0}, {25, 0.0, 56, 462848.0, 57856.0}, {24, 347136.0, 57, 0.0, 57856.0}, {25, 0.0, 57, 57856.0, 404992.0}, {26, 0.0, 57, 462848.0, 57856.0}, {25, 347136.0, 58, 0.0, 57856.0}, {26, 0.0, 58, 57856.0, 404992.0}, {27, 0.0, 58, 462848.0, 57856.0}, {26, 347136.0, 59, 0.0, 57856.0}, {27, 0.0, 59, 57856.0, 404992.0}, {28, 0.0, 59, 462848.0, 57856.0}, {27, 347136.0, 60, 0.0, 57856.0}, {28, 0.0, 60, 57856.0, 404992.0}, {29, 0.0, 60, 462848.0, 57856.0}, {28, 347136.0, 61, 0.0, 57856.0}, {29, 0.0, 61, 57856.0, 404992.0}, {30, 0.0, 61, 462848.0, 57856.0}, {29, 347136.0, 62, 0.0, 57856.0}, {30, 0.0, 62, 57856.0, 404992.0}, {31, 0.0, 62, 462848.0, 57856.0}, {30, 347136.0, 63, 0.0, 57856.0}, {31, 0.0, 63, 57856.0, 404992.0}, }; route_plan_t conv2_route_plan = { .num_item = 94, .route_item_array = conv2_route_item_array };
最后的测试结果如下表: