ostringstream 性能测试

ostringstream 性能测试

分析与建议

性能分析

  1. ostringstream 格式转换的性能并不差,至少从测试上看与 snprintf 差距不是数量级上的

  2. 每次创建一个 ostringstream 对象都会触发一次全局 locale 对象的引用计数递增(参考1参考2),多线程场景下有 false sharing 的性能损耗,而且很明显

    1. 不要重复的创建与销毁 ostringstream 对象,避免 false sharing
  3. ostringstream 会在堆上创建内存,系统会优化 malloc/free,所以不必太过关注这个问题

测试结果

下面代码在我机器(6 cores,g++ -O2)上运行的结果如下,false sharing 的影响可以从线程增加但 use_oss 函数耗时未降上看出来

Thread num from 1 to 12, cores num: 6

1: use_oss/1048, use_oss_op/225, use_printf/198, use_printf_malloc/233, use_printf_malloc_op/198, use_printf_op/197,
2: use_oss/1338, use_oss_op/113, use_printf/99, use_printf_malloc/116, use_printf_malloc_op/99, use_printf_op/98,
3: use_oss/1304, use_oss_op/75, use_printf/65, use_printf_malloc/78, use_printf_malloc_op/66, use_printf_op/65,
4: use_oss/1282, use_oss_op/57, use_printf/50, use_printf_malloc/59, use_printf_malloc_op/50, use_printf_op/50,
5: use_oss/1315, use_oss_op/52, use_printf/50, use_printf_malloc/54, use_printf_malloc_op/43, use_printf_op/46,
6: use_oss/1306, use_oss_op/41, use_printf/41, use_printf_malloc/42, use_printf_malloc_op/39, use_printf_op/36,
7: use_oss/1301, use_oss_op/65, use_printf/57, use_printf_malloc/65, use_printf_malloc_op/57, use_printf_op/56,
8: use_oss/1317, use_oss_op/61, use_printf/68, use_printf_malloc/62, use_printf_malloc_op/53, use_printf_op/51,
9: use_oss/1328, use_oss_op/52, use_printf/47, use_printf_malloc/55, use_printf_malloc_op/47, use_printf_op/46,
10: use_oss/1290, use_oss_op/50, use_printf/46, use_printf_malloc/61, use_printf_malloc_op/49, use_printf_op/51,
11: use_oss/1306, use_oss_op/47, use_printf/40, use_printf_malloc/53, use_printf_malloc_op/54, use_printf_op/45,
12: use_oss/1305, use_oss_op/40, use_printf/40, use_printf_malloc/59, use_printf_malloc_op/37, use_printf_op/37,

测试代码

#include <map>
#include <future>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string.h>
#include <thread>
#include <vector>

int use_oss(int cnt)
{
    int cnt_tmp = 0;
    while (cnt--) {
        std::ostringstream oss;
        oss.str("");
        oss.clear();
        oss << "hi" << 999;
        cnt_tmp += oss.str().size();
    }

    return cnt_tmp;
}

int use_oss_op(int cnt)
{
    int cnt_tmp = 0;
    std::ostringstream oss;
    while (cnt--) {
        oss.str("");
        oss.clear();
        oss << "hi" << 999 << std::flush;
        cnt_tmp += oss.str().size();
    }

    return cnt_tmp;
}

int use_printf(int cnt)
{
    int cnt_tmp = 0;
    while (cnt--) {
        char buf[64];
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
    }

    return cnt_tmp;
}

int use_printf_op(int cnt)
{
    int cnt_tmp = 0;
    char buf[64];
    while (cnt--) {
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
    }

    return cnt_tmp;
}

int use_printf_malloc(int cnt)
{
    int cnt_tmp = 0;
    while (cnt--) {
        char* buf = (char*)malloc(64);
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
        free(buf);
    }

    return cnt_tmp;
}

int use_printf_malloc_op(int cnt)
{
    int cnt_tmp = 0;
    char* buf = (char*)malloc(64);
    while (cnt--) {
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
    }

    free(buf);
    return cnt_tmp;
}

// 1*2*3*4*5*6*7*8*9 = 362880
const int TOTAL_USE_CNT  = 1 * 2 * 3 * 4 * 5 * 6 * 7 * 8 * 9 * 10;

int main()
{
    std::map<std::string, decltype(use_oss)*> funs;
    
    funs["use_oss"]    = use_oss;
    funs["use_oss_op"] = use_oss_op;

    funs["use_printf"]           = use_printf;
    funs["use_printf_op"]        = use_printf_op;
    funs["use_printf_malloc"]    = use_printf_malloc;
    funs["use_printf_malloc_op"] = use_printf_malloc_op;

    auto cores_num = std::thread::hardware_concurrency();

    std::cout << "Thread num from 1 to " << cores_num * 2 << ", cores num: " 
              << cores_num << std::endl  << std::endl;
    for (int t = 1; t <= cores_num * 2; t++) {
        int USE_CNT_PER_THREAD = TOTAL_USE_CNT / t;

        std::cout << t <<": ";
        size_t acc = 0;
        for (auto p : funs) {

            auto f = p.second;
            std::vector<std::future<int>> future_vec;
            future_vec.reserve(cores_num * 2);
            auto ms_begin = std::chrono::steady_clock::now();
            for (int i = 0; i < t; i++) {
                future_vec.push_back(std::async(std::launch::async,
                    [USE_CNT_PER_THREAD,f]() -> int { return f(USE_CNT_PER_THREAD); }));
            }

            for (auto& f : future_vec) {
                f.wait();
                acc += f.get();
            }
            auto ms_end = std::chrono::steady_clock::now();
            auto mc_s = std::chrono::duration_cast<std::chrono::milliseconds>(ms_end - ms_begin);
            std::cout << p.first << "/" << mc_s.count() << ", ";
        }
        std::cout << std::endl;
    }
}
posted @ 2020-07-07 10:28  jiahu  阅读(856)  评论(0编辑  收藏  举报