C++ unordered_map 在key为string类型和char*类型时测试时间性能差异
测试系统liunx centos6.5
代码如下
#include <string.h> #include <sstream> #include <list> #include <sys/time.h> #include <unordered_map> #include <cstdlib> #include <stdio.h> #include "unistd.h" using namespace std; struct Cmp { bool operator()(const char* a,const char* b) const { return memcmp(a,b,64)==0; } }; struct hash_func { int operator()(const char* str) const { int seed = 131; int hash = 0; hash = (hash * seed) + (*str); while(*(++str)) { hash = (hash * seed) + (*str); } return hash & (0x7FFFFFFF); } }; double current_usec(){ struct timeval tv; gettimeofday( &tv, NULL ); return tv.tv_sec * 1000 * 1000 + tv.tv_usec; } //产生随机串 char* genRandomString(int length){ sleep(1); int flag, i; char* string; srand((unsigned) time(NULL)); if ((string = (char*) malloc(length)) == NULL ) { printf("Malloc failed!flag:14\n"); return NULL ; } for (i = 0; i < length - 1; i++) { flag = rand() % 3; switch (flag) { case 0: string[i] = 'A' + rand() % 26; break; case 1: string[i] = 'a' + rand() % 26; break; case 2: string[i] = '0' + rand() % 10; break; default: string[i] = 'x'; break; } } string[length - 1] = '\0'; return string; } int main(int argc, char* argv[] ){ char* value; string s; std::unordered_map<const char*, int, hash_func, Cmp> mapchar; std::unordered_map<const char*, int, hash_func, Cmp>::iterator itchar; std::unordered_map<std::string, int> mapstring; std::unordered_map<std::string, int>::iterator itstring; int count = atoi(argv[1]); if(count == 0) { printf("the count is zero"); return 0; } std::string str[30000]; char* val[30000]; double start=0; double end = 0; int i=0; int num = count; char v[64]; while(num) { value = genRandomString(64); strcpy(v,value); val[i] = value; s = value; str[i++] = s; --num; } //插入count 个string start = current_usec(); for(int i=0; i< count;++i) { mapstring[str[i]]= rand(); } end = current_usec(); double string_insert_us = end - start; //插入count 个char* start = current_usec(); for(int i=0; i< count;++i) { mapchar[val[i]]= rand(); } end = current_usec(); double char_insert_us = end - start; //查找count 个string start = current_usec(); for(int i=0; i<count; ++i) { itstring = mapstring.find(str[i]); if( itstring == mapstring.end()) { printf("string not find,something wrong with it"); } } end = current_usec(); double string_find_us = end - start; //查找count个char* start = current_usec(); for(int i=0; i<count; ++i) { itchar = mapchar.find(val[i]); if( itchar == mapchar.end()) { printf("char not find,something wrong with it"); } } end = current_usec(); double char_find_us = end - start; //删除count 个string start = current_usec(); for(int i=0; i<count; ++i) { mapstring.erase(str[i]); } end = current_usec(); double string_del_us = end - start; //删除count 个char* start = current_usec(); for(int i=0; i<count; ++i) { mapchar.erase(val[i]); } end = current_usec(); double char_del_us = end - start; printf("插入string time is %f us \n", string_insert_us/count); printf("插入char time is %f us\n", char_insert_us/count); printf("查找string time is %f us\n", string_find_us/count); printf("查找char time is %f us\n", char_find_us/count); printf("删除string time is %f us\n", string_del_us/count); printf("删除char time is %f us\n", char_del_us/count); return 0; }
插入的字符串是64位的字符串,
在并发1个情况下
在并发10的情况下
并发1000
并发5000
并发10000
一开始我以为char* 的速度会快,因为插入string的时候是要构造string申请内存的,可能是我的hash函数比系统的要慢了,但是没想到会是这个结果,有相关经验的朋友可以看下是不是我写的代码有问题或者是什么情况导致的这个情况。
把hash函数修改成inline处理一下,并且加了一个map函数key为char*的进行对比
源码
#include <string.h> #include <sstream> #include <list> #include <sys/time.h> #include <unordered_map> #include <cstdlib> #include <stdio.h> #include "unistd.h" #include <map> using namespace std; struct Cmp { inline bool operator()(const char* a,const char* b) const { return memcmp(a,b,64)==0; } }; struct Cmp2 { bool operator()(const char* a,const char* b) const { return memcmp(a,b,64)<0; } }; struct hash_func { int operator()(const char* str) const { int seed = 131; int hash = 0; hash = (hash * seed) + (*str); while(*(++str)) { hash = (hash * seed) + (*str); } return hash & (0x7FFFFFFF); } }; double current_usec(){ struct timeval tv; gettimeofday( &tv, NULL ); return tv.tv_sec * 1000 * 1000 + tv.tv_usec; } //产生随机串 char* genRandomString(int length){ sleep(1); int flag, i; char* string; srand((unsigned) time(NULL)); if ((string = (char*) malloc(length)) == NULL ) { printf("Malloc failed!flag:14\n"); return NULL ; } for (i = 0; i < length - 1; i++) { flag = rand() % 3; switch (flag) { case 0: string[i] = 'A' + rand() % 26; break; case 1: string[i] = 'a' + rand() % 26; break; case 2: string[i] = '0' + rand() % 10; break; default: string[i] = 'x'; break; } } string[length - 1] = '\0'; return string; } int main(int argc, char* argv[] ){ char* value; string s; std::unordered_map<const char*, int, hash_func, Cmp> mapchar; std::unordered_map<const char*, int, hash_func, Cmp>::iterator itchar; map<const char*, int, Cmp2> mchar; map<const char*, int, Cmp2>::iterator mitchar; std::unordered_map<std::string, int> mapstring; std::unordered_map<std::string, int>::iterator itstring; int count = atoi(argv[1]); if(count == 0) { printf("the count is zero"); return 0; } std::string str[30000]; char* val[30000]; double start=0; double end = 0; int i=0; int num = count; char v[64]; while(num) { value = genRandomString(64); strcpy(v,value); val[i] = value; s = value; str[i++] = s; --num; } //插入count 个string start = current_usec(); for(int i=0; i< count;++i) { mapstring[str[i]]= rand(); } end = current_usec(); double string_insert_us = end - start; //插入count 个char* start = current_usec(); for(int i=0; i< count;++i) { mapchar[val[i]]= rand(); } end = current_usec(); double char_insert_us = end - start; //插入count char*到map里面 start = current_usec(); for(int i=0; i< count;++i) { mchar[val[i]]= rand(); } end = current_usec(); double mchar_insert_us = end - start; //查找count 个string start = current_usec(); for(int i=0; i<count; ++i) { itstring = mapstring.find(str[i]); if( itstring == mapstring.end()) { printf("string not find,something wrong with it"); } } end = current_usec(); double string_find_us = end - start; //查找count个char* start = current_usec(); for(int i=0; i<count; ++i) { itchar = mapchar.find(val[i]); if( itchar == mapchar.end()) { printf("char not find,something wrong with it"); } } end = current_usec(); double char_find_us = end - start; //查找count个 map char* start = current_usec(); for(int i=0; i<count; ++i) { mitchar = mchar.find(val[i]); if( mitchar == mchar.end()) { printf("map char not find,something wrong with it"); } } end = current_usec(); double mchar_find_us = end - start; //删除count 个string start = current_usec(); for(int i=0; i<count; ++i) { mapstring.erase(str[i]); } end = current_usec(); double string_del_us = end - start; //删除count 个char* start = current_usec(); for(int i=0; i<count; ++i) { mapchar.erase(val[i]); } end = current_usec(); double char_del_us = end - start; //删除count个char* start = current_usec(); for(int i=0; i<count; ++i) { mchar.erase(val[i]); } end = current_usec(); double mchar_del_us = end - start; printf("插入string time is %f us \n", string_insert_us/count); printf("插入char time is %f us\n", char_insert_us/count); printf("插入map char time is %f us\n", mchar_insert_us/count); printf("查找string time is %f us\n", string_find_us/count); printf("查找char time is %f us\n", char_find_us/count); printf("查找map char time is %f us\n", mchar_find_us/count); printf("删除string time is %f us\n", string_del_us/count); printf("删除char time is %f us\n", char_del_us/count); printf("删除map char time is %f us\n", mchar_del_us/count); return 0; }
并发为1
并发1000
并发10000
主要原因我猜我的hash函数自己定义的比较费时间了,需要再仔细的考虑一下看下如何能进一步的省去这个时间