erlang下lists模块sort(排序)方法源码解析(一)
排序算法一直是各种语言最简单也是最复杂的算法,例如十大经典排序算法(动图演示)里面讲的那样
第一次看lists的sort方法的时候,蒙了,几百行的代码,我心想要这么复杂么(因为C语言的冒泡排序我记得不超过30行),于是自己就实现了下
结果更蒙了
bubble_sort(L)-> bubble_sort(L,length(L)). bubble_sort(L,0)-> L; bubble_sort(L,N)-> bubble_sort(do_bubble_sort(L),N-1). do_bubble_sort([A])-> [A]; do_bubble_sort([A,B|R])-> case A<B of true -> [A|do_bubble_sort([B|R])]; false -> [B|do_bubble_sort([A|R])] end.
对比结果如下
6> timer:tc(tt1,bubble_sort,[B]). {21130, [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22, 23,24,25,26,27|...]} 7> timer:tc(lists,sort,[B]). {162, [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22, 23,24,25,26,27|...]} 8>
B是一个打乱顺序的1到1000的序列,我X,这不是一个数量级的算法啊~~~~,不是说好越简单的代码越快么,三观被刷新了。
还是老实读lists的源码,一共250+行,摘录于lists.erl
1 -spec sort(List1) -> List2 when 2 List1 :: [T], 3 List2 :: [T], 4 T :: term(). 5 6 sort([X, Y | L] = L0) when X =< Y -> 7 case L of 8 [] -> 9 L0; 10 [Z] when Y =< Z -> 11 L0; 12 [Z] when X =< Z -> 13 [X, Z, Y]; 14 [Z] -> 15 [Z, X, Y]; 16 _ when X == Y -> 17 sort_1(Y, L, [X]); 18 _ -> 19 split_1(X, Y, L, [], []) 20 end; 21 sort([X, Y | L]) -> 22 case L of 23 [] -> 24 [Y, X]; 25 [Z] when X =< Z -> 26 [Y, X | L]; 27 [Z] when Y =< Z -> 28 [Y, Z, X]; 29 [Z] -> 30 [Z, Y, X]; 31 _ -> 32 split_2(X, Y, L, [], []) 33 end; 34 sort([_] = L) -> 35 L; 36 sort([] = L) -> 37 L. 38 39 sort_1(X, [Y | L], R) when X == Y -> 40 sort_1(Y, L, [X | R]); 41 sort_1(X, [Y | L], R) when X < Y -> 42 split_1(X, Y, L, R, []); 43 sort_1(X, [Y | L], R) -> 44 split_2(X, Y, L, R, []); 45 sort_1(X, [], R) -> 46 lists:reverse(R, [X]). 47 48 %% Ascending. 49 split_1(X, Y, [Z | L], R, Rs) when Z >= Y -> 50 split_1(Y, Z, L, [X | R], Rs); 51 split_1(X, Y, [Z | L], R, Rs) when Z >= X -> 52 split_1(Z, Y, L, [X | R], Rs); 53 split_1(X, Y, [Z | L], [], Rs) -> 54 split_1(X, Y, L, [Z], Rs); 55 split_1(X, Y, [Z | L], R, Rs) -> 56 split_1_1(X, Y, L, R, Rs, Z); 57 split_1(X, Y, [], R, Rs) -> 58 rmergel([[Y, X | R] | Rs], []). 59 60 split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y -> 61 split_1_1(Y, Z, L, [X | R], Rs, S); 62 split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X -> 63 split_1_1(Z, Y, L, [X | R], Rs, S); 64 split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z -> 65 split_1(S, Z, L, [], [[Y, X | R] | Rs]); 66 split_1_1(X, Y, [Z | L], R, Rs, S) -> 67 split_1(Z, S, L, [], [[Y, X | R] | Rs]); 68 split_1_1(X, Y, [], R, Rs, S) -> 69 rmergel([[S], [Y, X | R] | Rs], []). 70 71 %% Descending. 72 split_2(X, Y, [Z | L], R, Rs) when Z =< Y -> 73 split_2(Y, Z, L, [X | R], Rs); 74 split_2(X, Y, [Z | L], R, Rs) when Z =< X -> 75 split_2(Z, Y, L, [X | R], Rs); 76 split_2(X, Y, [Z | L], [], Rs) -> 77 split_2(X, Y, L, [Z], Rs); 78 split_2(X, Y, [Z | L], R, Rs) -> 79 split_2_1(X, Y, L, R, Rs, Z); 80 split_2(X, Y, [], R, Rs) -> 81 mergel([[Y, X | R] | Rs], []). 82 83 split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< Y -> 84 split_2_1(Y, Z, L, [X | R], Rs, S); 85 split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< X -> 86 split_2_1(Z, Y, L, [X | R], Rs, S); 87 split_2_1(X, Y, [Z | L], R, Rs, S) when S > Z -> 88 split_2(S, Z, L, [], [[Y, X | R] | Rs]); 89 split_2_1(X, Y, [Z | L], R, Rs, S) -> 90 split_2(Z, S, L, [], [[Y, X | R] | Rs]); 91 split_2_1(X, Y, [], R, Rs, S) -> 92 mergel([[S], [Y, X | R] | Rs], []). 93 94 %% merge/1 95 96 mergel([[] | L], Acc) -> 97 mergel(L, Acc); 98 mergel([T1, [H2 | T2], [H3 | T3] | L], Acc) -> 99 mergel(L, [merge3_1(T1, [], H2, T2, H3, T3) | Acc]); 100 mergel([T1, [H2 | T2]], Acc) -> 101 rmergel([merge2_1(T1, H2, T2, []) | Acc], []); 102 mergel([L], []) -> 103 L; 104 mergel([L], Acc) -> 105 rmergel([lists:reverse(L, []) | Acc], []); 106 mergel([], []) -> 107 []; 108 mergel([], Acc) -> 109 rmergel(Acc, []); 110 mergel([A, [] | L], Acc) -> 111 mergel([A | L], Acc); 112 mergel([A, B, [] | L], Acc) -> 113 mergel([A, B | L], Acc). 114 115 rmergel([[H3 | T3], [H2 | T2], T1 | L], Acc) -> 116 rmergel(L, [rmerge3_1(T1, [], H2, T2, H3, T3) | Acc]); 117 rmergel([[H2 | T2], T1], Acc) -> 118 mergel([rmerge2_1(T1, H2, T2, []) | Acc], []); 119 rmergel([L], Acc) -> 120 mergel([lists:reverse(L, []) | Acc], []); 121 rmergel([], Acc) -> 122 mergel(Acc, []). 123 124 %% merge3/3 125 126 %% Take L1 apart. 127 merge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 -> 128 merge3_12(T1, H1, H2, T2, H3, T3, M); 129 merge3_1([H1 | T1], M, H2, T2, H3, T3) -> 130 merge3_21(T1, H1, H2, T2, H3, T3, M); 131 merge3_1([], M, H2, T2, H3, T3) when H2 =< H3 -> 132 merge2_1(T2, H3, T3, [H2 | M]); 133 merge3_1([], M, H2, T2, H3, T3) -> 134 merge2_2(T2, H3, T3, M, H2). 135 136 %% Take L2 apart. 137 merge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 -> 138 merge3_12(T1, H1, H2, T2, H3, T3, M); 139 merge3_2(T1, H1, M, [H2 | T2], H3, T3) -> 140 merge3_21(T1, H1, H2, T2, H3, T3, M); 141 merge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 -> 142 merge2_1(T1, H3, T3, [H1 | M]); 143 merge3_2(T1, H1, M, [], H3, T3) -> 144 merge2_2(T1, H3, T3, M, H1). 145 146 % H1 =< H2. Inlined. 147 merge3_12(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 -> 148 merge3_1(T1, [H1 | M], H2, T2, H3, T3); 149 merge3_12(T1, H1, H2, T2, H3, T3, M) -> 150 merge3_12_3(T1, H1, H2, T2, [H3 | M], T3). 151 152 % H1 =< H2, take L3 apart. 153 merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 -> 154 merge3_1(T1, [H1 | M], H2, T2, H3, T3); 155 merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) -> 156 merge3_12_3(T1, H1, H2, T2, [H3 | M], T3); 157 merge3_12_3(T1, H1, H2, T2, M, []) -> 158 merge2_1(T1, H2, T2, [H1 | M]). 159 160 % H1 > H2. Inlined. 161 merge3_21(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 -> 162 merge3_2(T1, H1, [H2 | M], T2, H3, T3); 163 merge3_21(T1, H1, H2, T2, H3, T3, M) -> 164 merge3_21_3(T1, H1, H2, T2, [H3 | M], T3). 165 166 % H1 > H2, take L3 apart. 167 merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 -> 168 merge3_2(T1, H1, [H2 | M], T2, H3, T3); 169 merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) -> 170 merge3_21_3(T1, H1, H2, T2, [H3 | M], T3); 171 merge3_21_3(T1, H1, H2, T2, M, []) -> 172 merge2_2(T1, H2, T2, M, H1). 173 174 %% rmerge/3 175 176 %% Take L1 apart. 177 rmerge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 -> 178 rmerge3_12(T1, H1, H2, T2, H3, T3, M); 179 rmerge3_1([H1 | T1], M, H2, T2, H3, T3) -> 180 rmerge3_21(T1, H1, H2, T2, H3, T3, M); 181 rmerge3_1([], M, H2, T2, H3, T3) when H2 =< H3 -> 182 rmerge2_2(T2, H3, T3, M, H2); 183 rmerge3_1([], M, H2, T2, H3, T3) -> 184 rmerge2_1(T2, H3, T3, [H2 | M]). 185 186 %% Take L2 apart. 187 rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 -> 188 rmerge3_12(T1, H1, H2, T2, H3, T3, M); 189 rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) -> 190 rmerge3_21(T1, H1, H2, T2, H3, T3, M); 191 rmerge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 -> 192 rmerge2_2(T1, H3, T3, M, H1); 193 rmerge3_2(T1, H1, M, [], H3, T3) -> 194 rmerge2_1(T1, H3, T3, [H1 | M]). 195 196 % H1 =< H2. Inlined. 197 rmerge3_12(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 -> 198 rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3); 199 rmerge3_12(T1, H1, H2, T2, H3, T3, M) -> 200 rmerge3_2(T1, H1, [H2 | M], T2, H3, T3). 201 202 % H1 =< H2, take L3 apart. 203 rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 -> 204 rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3); 205 rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) -> 206 rmerge3_2(T1, H1, [H2 | M], T2, H3, T3); 207 rmerge3_12_3(T1, H1, H2, T2, M, []) -> 208 rmerge2_2(T1, H2, T2, M, H1). 209 210 % H1 > H2. Inlined. 211 rmerge3_21(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 -> 212 rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3); 213 rmerge3_21(T1, H1, H2, T2, H3, T3, M) -> 214 rmerge3_1(T1, [H1 | M], H2, T2, H3, T3). 215 216 % H1 > H2, take L3 apart. 217 rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 -> 218 rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3); 219 rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) -> 220 rmerge3_1(T1, [H1 | M], H2, T2, H3, T3); 221 rmerge3_21_3(T1, H1, H2, T2, M, []) -> 222 rmerge2_1(T1, H2, T2, [H1 | M]). 223 224 %% merge/2 225 226 merge2_1([H1 | T1], H2, T2, M) when H1 =< H2 -> 227 merge2_1(T1, H2, T2, [H1 | M]); 228 merge2_1([H1 | T1], H2, T2, M) -> 229 merge2_2(T1, H2, T2, M, H1); 230 merge2_1([], H2, T2, M) -> 231 lists:reverse(T2, [H2 | M]). 232 233 merge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 -> 234 merge2_1(T1, H2, T2, [H1, HdM | M]); 235 merge2_2(T1, HdM, [H2 | T2], M, H1) -> 236 merge2_2(T1, H2, T2, [HdM | M], H1); 237 merge2_2(T1, HdM, [], M, H1) -> 238 lists:reverse(T1, [H1, HdM | M]). 239 240 %% rmerge/2 241 242 rmerge2_1([H1 | T1], H2, T2, M) when H1 =< H2 -> 243 rmerge2_2(T1, H2, T2, M, H1); 244 rmerge2_1([H1 | T1], H2, T2, M) -> 245 rmerge2_1(T1, H2, T2, [H1 | M]); 246 rmerge2_1([], H2, T2, M) -> 247 lists:reverse(T2, [H2 | M]). 248 249 rmerge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 -> 250 rmerge2_2(T1, H2, T2, [HdM | M], H1); 251 rmerge2_2(T1, HdM, [H2 | T2], M, H1) -> 252 rmerge2_1(T1, H2, T2, [H1, HdM | M]); 253 rmerge2_2(T1, HdM, [], M, H1) -> 254 lists:reverse(T1, [H1, HdM | M]).
好,这是我见过最复杂的排序算法了。
这个算法和归并排序有点像,可是由于erlang的特性,变量不能变,使得和大部分的排序方法有很大的区别,这个算法的复杂度应该是0(2n)
这个算法可以份3大块,第一块是sort_*函数,第二块是split_*,第3块是rmergel和mergel
首先
sort([X, Y | L] = L0) when X =< Y -> %当list是3个对比会返回,当list超过3个进入sort_1或者splite_*函数 .......... sort([X, Y | L]) -> %分了2种情况,第一个元素大于第二个 或者 第一个元素小于等于第二个 ....... sort([_] = L) -> %list只有1个也直接返回 L; sort([] = L) -> %list为空直接返回 L. sort_1(X, [Y | L], R) when X == Y -> sort_1(Y, L, [X | R]); sort_1(X, [Y | L], R) when X < Y -> split_1(X, Y, L, R, []); sort_1(X, [Y | L], R) -> split_2(X, Y, L, R, []); sort_1(X, [], R) -> lists:reverse(R, [X]).
当这段代码还是比较清晰的,就说把超过3个元素的list传入split_*
下面看split_1系列
%% Ascending.
split_1(X, Y, [Z | L], R, Rs) when Z >= Y -> %这里的时候是X<Y,也就是Z>=Y就是说这时X<Y<=Z,我们把最小X的放到R里面,而且Y,Z替换X,Y
split_1(Y, Z, L, [X | R], Rs);
split_1(X, Y, [Z | L], R, Rs) when Z >= X -> %这里的时候Z>=X,也就是X<=Z<Y,我们把最小的X放到R里面,而且Z替代X成了Z,Y
split_1(Z, Y, L, [X | R], Rs);
split_1(X, Y, [Z | L], [], Rs) -> %这里的时候Z<X,也就是Z<X<Y,我们把最小的Z放到R里面(R目前为空)
split_1(X, Y, L, [Z], Rs);
split_1(X, Y, [Z | L], R, Rs) -> %这里的时候Z<X,也就是Z<X<Y,我们把最小的Z放到最后的参数(R不为空的时候),调用split_1_1,为什么???
split_1_1(X, Y, L, R, Rs, Z);
split_1(X, Y, [], R, Rs) -> %当列表完成后调用下个函数rmerge1,这个后面再讲
rmergel([[Y, X | R] | Rs], []).
WTF,这些到底在干什么,erlang又没有调试跟踪,又没说明,完全就蒙了,仔细研究下终于明白了这2个函数的意义,不得说写源码的真是大神啊~~~
通过上面的分析,我们知道了一个规律,每次都会比较3个数的大小,而且还会处理其中最小的数
X:下桩 Y:上桩, Z:目前list的第一个元素 R:经过排序了的list,Rs和S是split_1_1使用的变量
split_1这个函数的作用是把X,Y,Z中最小的放到R中,同时要保证这个数比R中现有的元素都大,
这个怎么保证呢,当Z>X(包括Z>X和Z>Y两种情况)的时候把直接X放进去R,
原因就是X一直小于Y,而且R里面的元素都比X小才放进去的,而且整个过程X和Y的值都是增加的,所以X肯定大于R中的任何一个
开始是R代表R中任何一个),假设Z0>Y0
- R0<X0<Y0<Z0 开始R0为空,比较成立
- R1<X1<Y1<Z1 这时R1=[X0|R0],X1=Y0,Y1=Z0,当Z1>Y1,比较还是成立
- R2<X2<Y2<Z2 这时R2=[X1|R1],X2=Y1,Y2=Z1,当Z2>Y2,比较还是成立
- 。。。。。。。
当Z>X的时候也一样,于是当Z>X或者Z>Y的时候,只要把X的值放到R中就行,R里面的元素越来越大,是排好序的(从大到小),于是上面绿色的注释的代码就能理解了
蓝色的注释代码当R为空, Z<X<Y,当然R<Z<X<Y,于是也能理解了
主要是褐色的代码模块当R不为空,我们知道R<X<Y,而且Z<X<Y,可是R里面的元素和Z不能确定,
于是我们知道了当前最小的是Z,可是Z不一定大于R的所有元素,上面的split_1函数的逻辑就不通了,然后把Z存入到最后一个参数进入split_1_1
我们来查看split_1_1
split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y -> %这时候X<Y<=Z,R<X, S<X,我们这里不管S(S不变)于是R<X<Y<=Z,按照上面逻辑,X存入R,Y,Z替换X,Y split_1_1(Y, Z, L, [X | R], Rs, S); split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X -> %这时候X<=Z<Y,R<X, S<X,我们这里不管S(S不变)于是R<X<=Z<Y,按照上面逻辑,X存入R,Z替换X split_1_1(Z, Y, L, [X | R], Rs, S); split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z -> %这时候S<=Z<X<Y,R<X,在这里我们知道Y>X>R,这里S,Z设置为X,Y,因为X,Y被重新设置,所以后面没有比较性 split_1(S, Z, L, [], [[Y, X | R] | Rs]); %于是我们把Y,X存入R(R里面的还是有序的),然后把R存入RS,清空R,返回到开始split_1的函数
split_1_1(X, Y, [Z | L], R, Rs, S) -> %当S>Z一样 split_1(Z, S, L, [], [[Y, X | R] | Rs]); split_1_1(X, Y, [], R, Rs, S) -> rmergel([[S], [Y, X | R] | Rs], []).
我们可以看到,紫色注释的代码,当S<=Z<X<Y,R<X我们知道最小的数是S,然后是Z,可是我们不能比较R里面的元素与这2个数的大小,
如果按照上面函数的逻辑,可以在弄个函数split_1_1_1,可这样函数不是闭环的,于是大神直接把肯定比R大的2个元素存入R(保证了R的有序),再回到split_1,这里真是太厉害了
1 X:12,Y:13,Z:54,L:[32,1,4521,32,214,541,1,12,3],R:[],Rs:[] 2 X:13,Y:54,Z:32,L:[1,4521,32,214,541,1,12,3],R:"\f",Rs:[] 3 X:32,Y:54,Z:1,L:[4521,32,214,541,1,12,3],R:"\r\f",Rs:[] 4 X:32,Y:54,Z:4521,L:[32,214,541,1,12,3],R:"\r\f",Rs:[],S:1 5 X:54,Y:4521,Z:32,L:[214,541,1,12,3],R:" \r\f",Rs:[],S:1 6 X:1,Y:32,Z:214,L:[541,1,12,3],R:[],Rs:[[4521,54,32,13,12]] 7 X:32,Y:214,Z:541,L:[1,12,3],R:[1],Rs:[[4521,54,32,13,12]] 8 X:214,Y:541,Z:1,L:[12,3],R:[32,1],Rs:[[4521,54,32,13,12]] 9 X:214,Y:541,Z:12,L:[3],R:[32,1],Rs:[[4521,54,32,13,12]],S:1 10 X:1,Y:12,Z:3,L:[],R:[],Rs:[[541,214,32,1],[4521,54,32,13,12]] 11 Rs:[[12,3,1],[541,214,32,1],[4521,54,32,13,12]]
我们看个简单的例子执行过程,大概就能明白这个逻辑了。
这里的List = [12,13,54,32,1,4521,32,214,541,1,12,3],这2个函数执行完成后的结果是[[12,3,1],[541,214,32,1],[4521,54,32,13,12]]
可以看到这里经过了N次循环(N是List长度),生成了几个子list,每个子list都是有序的,这样肯定没有完成,剩下的就是mergel和rmergel函数的作用了
篇幅太长,不好排版,下面的函数分析放
erlang下lists模块sort(排序)方法源码解析(二)
未完待续。。。