MCU上三角函数运算速度对比
前言
使用170M主频M4f核芯片(STM32G431),启用浮点加速,启用arm_dsp库。
对比条件:
方式 | 输入 | 输出 |
---|---|---|
cordic | q15 | q15 |
arm_dsp_q15 | q15 | q15 |
arm_dsp_q31 | q31 | q31 |
arm_dsp_f32 | float | float |
math | float | float |
math | double | double |
taylor6 | float | float |
测试代码
硬件cordic
/**
* @brief Trigonometrical functions type definition
*/
typedef struct
{
int16_t hCos;
int16_t hSin;
} Trig_Components;
/* CORDIC FUNCTION: COSINE q1.15 */
#define CORDIC_CONFIG_COSINE (LL_CORDIC_FUNCTION_COSINE | LL_CORDIC_PRECISION_6CYCLES | LL_CORDIC_SCALE_0 |\
LL_CORDIC_NBWRITE_1 | LL_CORDIC_NBREAD_1 |\
LL_CORDIC_INSIZE_16BITS | LL_CORDIC_OUTSIZE_16BITS)
__weak Trig_Components MCM_Trig_Functions(int16_t hAngle)
{
union u32toi16x2 {
uint32_t CordicRdata;
Trig_Components Components;
} CosSin;
WRITE_REG(CORDIC->CSR, CORDIC_CONFIG_COSINE);
LL_CORDIC_WriteData(CORDIC, ((uint32_t)0x7FFF0000) + ((uint32_t)hAngle));
CosSin.CordicRdata = LL_CORDIC_ReadData(CORDIC);
return (CosSin.Components);
}
泰勒展开函数
float factorial(int n) {
if (n == 0) {
return 1.0f;
} else {
return n * factorial(n - 1);
}
}
float taylor_sin(float x, int n) {
float result = 0.0f;
float sign = 1.0f;
for (int i = 0; i < n; i++) {
result += (sign * powf(x, 2 * i + 1)) / (factorial(2 * i + 1));
sign = -sign;
}
return result;
}
float taylor_cos(float x, int n) {
float result = 0.0f;
float sign = 1.0f;
for (int i = 0; i < n; i++) {
result += (sign * powf(x, 2 * i)) / (factorial(2 * i));
sign = -sign;
}
return result;
}
测试流程
#include "arm_math.h"
#include "math.h"
void math_test(void)
{
usb_printf("\r\n\r\n######################\r\n");
HAL_Delay(0);
int16_t angle = rand() % 0xffff - 0x7fff;
float anglef = (float)angle / 0x7fff * PI;
usb_printf("[input]:%.6f\r\n",anglef * 360 / 2 / PI);
HAL_Delay(0);
int time_base = 0,time_cost = 0;
float sindata = 0;
float cosdata = 0;
Trig_Components sincos = {0};
time_base = time.time();
sincos = MCM_Trig_Functions(angle);
sindata = (float)sincos.hSin / 0x7fff;
cosdata = (float)sincos.hCos / 0x7fff;
time_cost = time.time() - time_base;
usb_printf("[type]:cordq15\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
time_base = time.time();
sindata = (float)arm_sin_q15(angle >> 1) / 0x7fff;
cosdata = (float)arm_cos_q15(angle >> 1) / 0x7fff;
time_cost = time.time() - time_base;
usb_printf("[type]:dspq15\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
time_base = time.time();
sindata = (float)arm_sin_q31(angle << 15) / 0x7fffffff;
cosdata = (float)arm_cos_q31(angle << 15) / 0x7fffffff;
time_cost = time.time() - time_base;
usb_printf("[type]:dspq31\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
time_base = time.time();
sindata = arm_sin_f32(anglef);
cosdata = arm_cos_f32(anglef);
time_cost = time.time() - time_base;
usb_printf("[type]:dspf32\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
time_base = time.time();
sindata = sinf(anglef);
cosdata = cosf(anglef);
time_cost = time.time() - time_base;
usb_printf("[type]:mathf\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
time_base = time.time();
sindata = sin(anglef);
cosdata = cos(anglef);
time_cost = time.time() - time_base;
usb_printf("[type]:mathd\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
time_base = time.time();
sindata = taylor_sin(anglef,6);
cosdata = taylor_cos(anglef,6);
time_cost = time.time() - time_base;
usb_printf("[type]:taylor6\t[time]:%fus [output]:%.6f,%.6f\r\n",(float)time_cost / 170,sindata,cosdata);
HAL_Delay(10);
}
输出结果
######################
[input]:140.343643
[type]:cordq15 [time]:0.923529us [output]:0.638234,-0.769829
[type]:dspq15 [time]:1.441176us [output]:0.638203,-0.769860
[type]:dspq31 [time]:1.447059us [output]:0.638228,-0.769824
[type]:dspf32 [time]:1.682353us [output]:0.638170,-0.769872
[type]:mathf [time]:2.458823us [output]:0.638182,-0.769886
[type]:mathd [time]:36.694118us [output]:0.638182,-0.769886
[type]:taylor6 [time]:38.758823us [output]:0.638164,-0.769980
######################
[input]:67.578964
[type]:cordq15 [time]:0.923529us [output]:0.924406,0.381451
[type]:dspq15 [time]:1.382353us [output]:0.924345,0.381420
[type]:dspq31 [time]:1.552941us [output]:0.924386,0.381440
[type]:dspf32 [time]:1.629412us [output]:0.924399,0.381407
[type]:mathf [time]:2.458823us [output]:0.924406,0.381410
[type]:mathd [time]:30.770588us [output]:0.924406,0.381410
[type]:taylor6 [time]:39.188236us [output]:0.924406,0.381410
######################
[input]:51.236301
[type]:cordq15 [time]:0.923529us [output]:0.779717,0.626118
[type]:dspq15 [time]:1.382353us [output]:0.779626,0.626118
[type]:dspq31 [time]:1.835294us [output]:0.779711,0.626126
[type]:dspf32 [time]:1.858824us [output]:0.779728,0.626104
[type]:mathf [time]:2.458823us [output]:0.779735,0.626110
[type]:mathd [time]:29.305882us [output]:0.779735,0.626110
[type]:taylor6 [time]:39.223530us [output]:0.779735,0.626110
######################
[input]:-95.369728
[type]:cordq15 [time]:0.923529us [output]:-0.995575,-0.093539
[type]:dspq15 [time]:1.388235us [output]:-0.995697,-0.093692
[type]:dspq31 [time]:1.558824us [output]:-0.995599,-0.093530
[type]:dspf32 [time]:1.623529us [output]:-0.995594,-0.093581
[type]:mathf [time]:2.505882us [output]:-0.995612,-0.093582
[type]:mathd [time]:27.211765us [output]:-0.995612,-0.093582
[type]:taylor6 [time]:41.788235us [output]:-0.995612,-0.093583