关于vector push_back()与其他方式读取数据的效率对比（转）

引言：
在读取大量数据（数组）时，使用vector会尽量保证不会炸空间（MLE），但是相比于scanf的读取方式会慢上不少。但到底效率相差有多大，我们将通过对比测试得到结果。

测试数据：利用srand()函数生成1e7的随机数组（x[i] ∈ (0, 115000]），最终结果将是读取这1e7（一千万）的数组所消耗的时间。

测试环境：在Linux虚拟机下测试，利用编译命令：time ./t得到运行时间。

备注：在debug模式下运行，不开任何优化。

生成数据代码：

 1 #include <bits/stdc++.h>
 2 using namespace std;
 3 
 4 const int maxn = 10000005, lenth = 115000;
 5 
 6 int n, x, y;
 7 
 8 int main()
 9 {
10     freopen("test.in", "w", stdout);
11 
12     cout << maxn << endl;
13     srand((unsigned int) time(0));
14     for(int i = 0; i != maxn; ++i)
15     {
16         x = rand()%lenth+1;
17         cout << x << endl;
18     }
19 
20     fclose(stdout);
21     return 0;
22 }

对比读入：
1.正常使用push_back()读入

1 for(int i = 0; i != n; ++i)
2 {
3     scanf("%d", &curr);
4     q1.push_back(curr);
5 }

2.每次空间不够时将vector数组增大空间

 1 void test_resize(int a)
 2 {
 3     if(num == size_2-1)
 4     {
 5         q2.resize(size_2 += 10000);
 6     }
 7     q2[++num] = a;
 8     return ;
 9 }
10 
11 for(int i = 0; i != n; ++i)//main函数中
12 {
13     scanf("%d", &curr);
14     test_resize(curr);
15 }

3.scanf读入

1 for(int i = 0; i != n; ++i)//main函数中
2 {
3     scanf("%d", &x[i]);
4 }

4.读入优化

 1 int read()
 2 {
 3     input = 0;
 4     a = getchar();  
 5     while(a < '0' || a > '9')
 6         a = getchar();
 7     while(a >= '0' && a <= '9')
 8     {
 9         input = input*10+a-'0';
10         a = getchar();
11     }
12     return input;
13 }
14 for(int i = 0; i != n; ++i)
15 {
16     x[i] = read();
17 }

5.读入优化+resize()，再扔入vector数组

 1 void test_resize(int a)
 2 {
 3     if(num == size_2-1)
 4     {
 5         q2.resize(size_2 += 10000);
 6     }
 7     q2[++num] = a;
 8     return ;
 9 }
10 
11 int read()
12 {
13     input = 0;
14     a = getchar();  
15     while(a < '0' || a > '9')
16         a = getchar();
17     while(a >= '0' && a <= '9')
18     {
19         input = input*10+a-'0';
20         a = getchar();
21     }
22     return input;
23 }
24 
25 for(int i = 0; i != n; ++i)
26 {
27     curr = read();
28     test_resize(curr);
29 }

测试结果：
1.push_back()读入

real    0m2.046s
user    0m1.620s
sys 0m0.428s

2.resize()后再读入

real    0m1.743s
user    0m1.636s
sys 0m0.104s

3.scanf读入

real    0m1.885s
user    0m1.776s
sys 0m0.108s

4.读入优化

real    0m0.996s
user    0m0.948s
sys 0m0.044s

5.读入优化+resize，再扔入vector数组

real    0m1.121s
user    0m1.036s
sys 0m0.084s

读入优化一骑绝尘，读入优化+resize位居第二，scanf和resize大致相当，push_back()最慢。

结论：
当数据范围很大的时候，建议使用vector的resize(lenth)+读入优化的方式进行读取，这样既最大限度降低了内存的浪费，又保证了不会在读入上花费太久。

完整测试程序：

 1 #include <bits/stdc++.h>
 2 using namespace std;
 3 #define maxn 10000005
 4 
 5 vector<int> q1, q2, q3;
 6 int n, curr, num = -1, size_1, size_2;
 7 int x[maxn], input;
 8 char a;
 9 
10 void test_resize(int a)
11 {
12     if(num == size_2-1)
13     {
14         q2.resize(size_2 += 10000);
15     }
16     q2[++num] = a;
17     return ;
18 }
19 
20 int read()
21 {
22     input = 0;
23     a = getchar();  
24     while(a < '0' || a > '9')
25         a = getchar();
26     while(a >= '0' && a <= '9')
27     {
28         input = input*10+a-'0';
29         a = getchar();
30     }
31     return input;
32 }
33 
34 
35 int main()
36 {
37     freopen("test.in", "r", stdin);
38     scanf("%d", &n);
39     for(int i = 0; i != n; ++i)
40     {
41         //x[i] = read();
42         //curr = read();
43         //test_resize(curr);
44         //scanf("%d", &x[i]);
45         //scanf("%d", &curr);
46         //test_resize(curr);
47         //q3.push_back(curr);
48     }
49     return 0;
50 }