Qfplib的介绍页面
https://www.quinapalus.com/qfplib.html
该浮点库使用针对cortex-m3优化的汇编代码实现了qfp_fadd,qfp_fsub,qfp_fmul,qfp_fdiv
等函数,用以替代编译器内置的软浮点实现,和大多数数学库一样,Qfplib也需要你将代码中的符号运算替换成函数调用。
下面使用cortex-m4内核的单片机进行测试,虽然介绍页面有如下阐述It will also run on Cortex-M4 microcontrollers but is not optimised for these devices.
,但是m4能支持m3的所有指令,将qfplib-m3.s
文件中的.cpu cortex-m3
改成.cpu cortex-m4
直接进行编译也是可以通过的(不改也行)。
测试平台
目标芯片:AT32F415 Cortex-M4(No FPU),实际主频:144MHz,SRAM:32KB,FLASH等待4个周期。
编译器:arm gcc 13.2.Rel1,优化等级:O2
IQmath定点数使用iq15,Qfplib使用float
测试代码
#define CONST_PI_VAL 3.1415926f
#define VECTOR_SIZE 600
static _iq15 ResultVector[VECTOR_SIZE];
static _iq15 VectorA[VECTOR_SIZE];
static _iq15 VectorB[VECTOR_SIZE];
static float ResultVectorF[VECTOR_SIZE];
static float VectorAF[VECTOR_SIZE];
static float VectorBF[VECTOR_SIZE];
// IQ定点数 数组初始化
static void BenchmarkVectorIQArrayInit(void) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
VectorA[index] = _IQ15(1.0221f) * index;
VectorB[index] = _IQ15(2.127f) * index;
ResultVector[index] = 0;
}
}
// 浮点数 数组初始化
static void BenchmarkVectorQfpArrayInit(void) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
VectorAF[index] = qfp_fmul(1.0221f, index);
VectorBF[index] = qfp_fmul(2.127f, index);
ResultVectorF[index] = 0.0f;
}
}
// IQ定点数累加
static void VectorIQAdd(_iq15 *vectorA, _iq15 *vectorB, _iq15 *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = vectorA[index]+ vectorB[index];
}
}
// 浮点数累加
static void VectorQfpAdd(float *vectorA, float *vectorB, float *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = qfp_fadd(vectorA[index], vectorB[index]);
}
}
// IQ定点数乘法
static void VectorIQMultiply(_iq15 *vectorA, _iq15 *vectorB, _iq15 *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = _IQ15mpy(vectorA[index], vectorB[index]);
}
}
// 浮点数乘法
static void VectorQfpMultiply(float *vectorA, float *vectorB, float *result ) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++){
result[index] = qfp_fmul(vectorA[index], vectorB[index]);
}
}
// IQ定点数乘加
static void VectorIQScale(_iq15 *vectorA, _iq15 *vectorB, _iq15 *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = _IQ15mpy(vectorA[index], _IQ15(CONST_PI_VAL));
result[index] += vectorB[index];
}
}
// 浮点数乘加
static void VectorQfpScale(float *vectorA, float *vectorB, float *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = qfp_fmul(vectorA[index], CONST_PI_VAL);
result[index] = qfp_fadd(result[index], vectorB[index]);
}
}
// IQ定点数除法
static void VectorIQDiv(_iq15 *vectorA, _iq15 *vectorB, _iq15 *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = _IQ15div(vectorA[index], vectorB[index]);
}
}
// 浮点数除法
static void VectorQfpDiv(float *vectorA, float *vectorB, float *result) {
unsigned int index = 0u;
for(index = 0; index < VECTOR_SIZE; index++) {
result[index] = qfp_fdiv(vectorA[index], vectorB[index]);
}
}
int main(void) {
uint32_t start, end, diff;
BenchmarkVectorIQArrayInit();
BenchmarkVectorQfpArrayInit();
DWT->CYCCNT = 0x0;
start = DWT->CYCCNT;
VectorIQAdd(VectorA, VectorB, ResultVector);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("iq add, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorQfpAdd(VectorAF, VectorBF, ResultVectorF);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("qfp add, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorIQMultiply(VectorA, VectorB, ResultVector);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("iq mpy, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorQfpMultiply(VectorAF, VectorBF, ResultVectorF);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("qfp mpy, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorIQDiv(VectorA, VectorB, ResultVector);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("iq div, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorQfpDiv(VectorAF, VectorBF, ResultVectorF);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("qfp div, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorIQScale(VectorA, VectorB, ResultVector);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("iq scale, elapse:%d\n", diff);
start = DWT->CYCCNT;
VectorQfpScale(VectorAF, VectorBF, ResultVectorF);
end = DWT->CYCCNT;
diff = end - start;
rt_kprintf("qfp scale, elapse:%d\n", diff);
while(1) {
osDelay(1000);
}
return 0;
}
原始日志
iq add, elapse:6023
qfp add, elapse:40991
iq mpy, elapse:11463
qfp mpy, elapse:32473
iq div, elapse:83931
qfp div, elapse:49770
iq scale, elapse:14447
qfp scale, elapse:71038
iq add, elapse:6023
qfp add, elapse:40991
iq mpy, elapse:11463
qfp mpy, elapse:32473
iq div, elapse:83931
qfp div, elapse:49770
iq scale, elapse:14447
qfp scale, elapse:71038
iq add, elapse:6023
qfp add, elapse:40991
iq mpy, elapse:11463
qfp mpy, elapse:32473
iq div, elapse:83931
qfp div, elapse:49770
iq scale, elapse:14447
qfp scale, elapse:71038
iq add, elapse:6023
qfp add, elapse:40991
iq mpy, elapse:11463
qfp mpy, elapse:32473
iq div, elapse:83931
qfp div, elapse:49770
iq scale, elapse:14447
qfp scale, elapse:71038
iq add, elapse:6023
qfp add, elapse:40991
iq mpy, elapse:11463
qfp mpy, elapse:32473
iq div, elapse:83931
qfp div, elapse:49770
iq scale, elapse:14447
qfp scale, elapse:71038
定点数加法和软浮点数加法
序号 |
iq add(us) |
qfp add(us) |
1 |
41.823 |
284.659 |
定点数乘法和软浮点数乘法
序号 |
iq mpy(us) |
qfp mpy(us) |
1 |
79.604 |
225.507 |
定点数除法和软浮点数除法
序号 |
iq div(us) |
qfp div(us) |
1 |
582.854 |
345.625 |
定点数除法和软浮点数乘加
序号 |
iq scale(us) |
qfp scale(us) |
1 |
100.326 |
493.319 |
总结
Qfp浮点库相比于IQ定点库性能提升
|
加法 |
乘法 |
除法 |
乘加 |
提升百分比 |
-580.6% |
-183.3% |
40.7% |
-391.7% |