ostringstream 性能测试
ostringstream 性能测试
分析与建议
性能分析
-
ostringstream 格式转换的性能并不差,至少从测试上看与 snprintf 差距不是数量级上的
-
每次创建一个 ostringstream 对象都会触发一次全局 locale 对象的引用计数递增(参考1,参考2),多线程场景下有 false sharing 的性能损耗,而且很明显
- 不要重复的创建与销毁 ostringstream 对象,避免 false sharing
-
ostringstream 会在堆上创建内存,系统会优化 malloc/free,所以不必太过关注这个问题
测试结果
下面代码在我机器(6 cores,g++ -O2)上运行的结果如下,false sharing 的影响可以从线程增加但 use_oss
函数耗时未降上看出来
Thread num from 1 to 12, cores num: 6
1: use_oss/1048, use_oss_op/225, use_printf/198, use_printf_malloc/233, use_printf_malloc_op/198, use_printf_op/197,
2: use_oss/1338, use_oss_op/113, use_printf/99, use_printf_malloc/116, use_printf_malloc_op/99, use_printf_op/98,
3: use_oss/1304, use_oss_op/75, use_printf/65, use_printf_malloc/78, use_printf_malloc_op/66, use_printf_op/65,
4: use_oss/1282, use_oss_op/57, use_printf/50, use_printf_malloc/59, use_printf_malloc_op/50, use_printf_op/50,
5: use_oss/1315, use_oss_op/52, use_printf/50, use_printf_malloc/54, use_printf_malloc_op/43, use_printf_op/46,
6: use_oss/1306, use_oss_op/41, use_printf/41, use_printf_malloc/42, use_printf_malloc_op/39, use_printf_op/36,
7: use_oss/1301, use_oss_op/65, use_printf/57, use_printf_malloc/65, use_printf_malloc_op/57, use_printf_op/56,
8: use_oss/1317, use_oss_op/61, use_printf/68, use_printf_malloc/62, use_printf_malloc_op/53, use_printf_op/51,
9: use_oss/1328, use_oss_op/52, use_printf/47, use_printf_malloc/55, use_printf_malloc_op/47, use_printf_op/46,
10: use_oss/1290, use_oss_op/50, use_printf/46, use_printf_malloc/61, use_printf_malloc_op/49, use_printf_op/51,
11: use_oss/1306, use_oss_op/47, use_printf/40, use_printf_malloc/53, use_printf_malloc_op/54, use_printf_op/45,
12: use_oss/1305, use_oss_op/40, use_printf/40, use_printf_malloc/59, use_printf_malloc_op/37, use_printf_op/37,
测试代码
#include <map>
#include <future>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string.h>
#include <thread>
#include <vector>
int use_oss(int cnt)
{
int cnt_tmp = 0;
while (cnt--) {
std::ostringstream oss;
oss.str("");
oss.clear();
oss << "hi" << 999;
cnt_tmp += oss.str().size();
}
return cnt_tmp;
}
int use_oss_op(int cnt)
{
int cnt_tmp = 0;
std::ostringstream oss;
while (cnt--) {
oss.str("");
oss.clear();
oss << "hi" << 999 << std::flush;
cnt_tmp += oss.str().size();
}
return cnt_tmp;
}
int use_printf(int cnt)
{
int cnt_tmp = 0;
while (cnt--) {
char buf[64];
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
}
return cnt_tmp;
}
int use_printf_op(int cnt)
{
int cnt_tmp = 0;
char buf[64];
while (cnt--) {
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
}
return cnt_tmp;
}
int use_printf_malloc(int cnt)
{
int cnt_tmp = 0;
while (cnt--) {
char* buf = (char*)malloc(64);
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
free(buf);
}
return cnt_tmp;
}
int use_printf_malloc_op(int cnt)
{
int cnt_tmp = 0;
char* buf = (char*)malloc(64);
while (cnt--) {
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
}
free(buf);
return cnt_tmp;
}
// 1*2*3*4*5*6*7*8*9 = 362880
const int TOTAL_USE_CNT = 1 * 2 * 3 * 4 * 5 * 6 * 7 * 8 * 9 * 10;
int main()
{
std::map<std::string, decltype(use_oss)*> funs;
funs["use_oss"] = use_oss;
funs["use_oss_op"] = use_oss_op;
funs["use_printf"] = use_printf;
funs["use_printf_op"] = use_printf_op;
funs["use_printf_malloc"] = use_printf_malloc;
funs["use_printf_malloc_op"] = use_printf_malloc_op;
auto cores_num = std::thread::hardware_concurrency();
std::cout << "Thread num from 1 to " << cores_num * 2 << ", cores num: "
<< cores_num << std::endl << std::endl;
for (int t = 1; t <= cores_num * 2; t++) {
int USE_CNT_PER_THREAD = TOTAL_USE_CNT / t;
std::cout << t <<": ";
size_t acc = 0;
for (auto p : funs) {
auto f = p.second;
std::vector<std::future<int>> future_vec;
future_vec.reserve(cores_num * 2);
auto ms_begin = std::chrono::steady_clock::now();
for (int i = 0; i < t; i++) {
future_vec.push_back(std::async(std::launch::async,
[USE_CNT_PER_THREAD,f]() -> int { return f(USE_CNT_PER_THREAD); }));
}
for (auto& f : future_vec) {
f.wait();
acc += f.get();
}
auto ms_end = std::chrono::steady_clock::now();
auto mc_s = std::chrono::duration_cast<std::chrono::milliseconds>(ms_end - ms_begin);
std::cout << p.first << "/" << mc_s.count() << ", ";
}
std::cout << std::endl;
}
}