======================= **基础知识** =======================
Union-find 是一种抽象化较高的数据结构,主要用来处理联通性问题,联通性自身具有传递性;
参考文章: 并查集(Union-Find)算法介绍 & 应用举例
1 //#include "UnionSet.h" 2 #include <iostream> 3 using namespace std; 4 class UnionSet { 5 public: 6 int *fa, n; 7 UnionSet(int n): n(n) { 8 fa = new int[n + 1]; 9 for(int i = 0 ; i<= n; i++) { 10 fa[i] = i; 11 } 12 } 13 int find(int x) { 14 return fa[x] = (fa[x] == x ? x : find(fa[x])); 15 } 16 void merge(int a, int b) { 17 fa[find(b)] = find(a); 18 return; 19 } 20 }; 21 22 int main() 23 { 24 //input 25 //6 5 26 //1 1 2 27 //2 1 3 28 //1 2 4 29 //1 4 3 30 //2 1 3 31 32 int n, m; 33 int a, b, c; 34 scanf("%d %d\n", &n, &m); 35 UnionSet u1(n); 36 while(m--) { 37 int a, b, c; 38 scanf("%d %d %d\n", &a, &b, &c); 39 switch(a) { 40 case 1 : 41 u1.merge(b, c); break; 42 case 2:{ 43 if(u1.find(b) == u1.find(c)) printf("Yes\n"); 44 else printf("No\n"); 45 } break;; 46 default : 47 printf("no such op code %d\n", a); 48 } 49 } 50 51 return 0; 5
1 //#include "UnionSet.h" 2 #include <iostream> 3 using namespace std; 4 class UnionSet { 5 public: 6 int *fa, n; 7 UnionSet(int n): n(n) { 8 fa = new int[n + 1]; 9 for(int i = 0 ; i<= n; i++) { 10 fa[i] = i; 11 } 12 } 13 int find(int x) { 14 return fa[x] = (fa[x] == x ? x : find(fa[x])); 15 } 16 void merge(int a, int b) { 17 fa[find(b)] = find(a); 18 return; 19 } 20 }; 21 22 int main() 23 { 24 //input 25 //6 5 26 //1 1 2 27 //2 1 3 28 //1 2 4 29 //1 4 3 30 //2 1 3 31 32 int n, m; 33 int a, b, c; 34 scanf("%d %d\n", &n, &m); 35 UnionSet u1(n); 36 while(m--) { 37 int a, b, c; 38 scanf("%d %d %d\n", &a, &b, &c); 39 switch(a) { 40 case 1 : 41 u1.merge(b, c); break; 42 case 2:{ 43 if(u1.find(b) == u1.find(c)) printf("Yes\n"); 44 else printf("No\n"); 45 } break;; 46 default : 47 printf("no such op code %d\n", a); 48 } 49 } 50 51 return 0; 52 }
实际使用过程中要具体问题具体分析;
======================= **代码演示** =======================
1. Qucik-Find 算法: 染色法:通过把联通点标定相同颜色; 查找(联通判断)操作O(1); 合并操作O(n);
核心思想在于每次merge都会把所有联通点的对应的颜色变为一种颜色,也就是标定哪些点具有相同的颜色(其实也是指向同一个根节点);
每次merge 操作都会遍历所有点,这种方式比较呆板,每次都把所有点都过一遍,那就不会漏了;
1 class UnionSet { 2 public: 3 int *color, n; 4 UnionSet(int n): n(n) { 5 color = new int[n]; 6 } 7 ~UnionSet() { 8 delete color; 9 } 10 int find(int idx) { 11 if(idx >= n) return -1; 12 return color[idx]; 13 } 14 void merge(int a, int b) { 15 if(color[a] == color[b]) return; 16 int cb = color[b]; 17 for(int i = 0; i < n; ++i) if(cb == color[i]) color[i] = color[a]; 18 return; 19 } 20 };
2. Quick-Union 算法:树形结构存储节点,每组root下的包含的节点,是同一个集合; 查找:O(tree-height树高); 合并操作O(tree-height树高)
所以每次要维护的是根节点;在合并过程,如果把树高 大的 合并到树高 小的 ,就会导致树高越来越 大(极限会退化成一个链表(每次都合并到一个单独节点时候));
1 class UnionSet { 2 public: 3 int *boss, n; 4 UnionSet(int n) : n(n) { 5 boss = new int[n + 1]; 6 for(int i = 0; i <= n; ++i) boss[i] = i; 7 } 8 9 int find(int x) { 10 if(boss[x] == x) return x; 11 return find(boss[x]); 12 } 13 14 void merge(int a, int b) { //这里是对两个集合合并,并不仅仅是对两个点合并; 15 int f_a = find(a), f_b = find(b); //必须要得到root 节点,然后合并root 节点,这才是合并两个集合,否则的话,值是合并了以b 为root的集合到a; 16 if(f_a == f_b) return; 17 boss[f_b] = f_a; 18 return; 19 } 20 };
优化的方式:a. 合并过程中,两个树都遍历到root节点,所以次数没差别;查找过程中,把节点数量 少的root 合并到 节点数量多的 可以减少平均查找次数(被合并的树,所有的节点的查找到root 次数都+1, 所以平均下来,节点少的树被合并能减少平均查找次数); 查找:O(log N); 合并:O(log N)
1 class UnionSet { 2 public: 3 int *boss, *size, n; 4 UnionSet(int n) : n(n) { 5 boss = new int[n + 1]; 6 size = new int[n + 1]; 7 for(int i = 0; i <= n; ++i) { 8 boss[i] = i; 9 size[i] = 1; 10 } 11 } 12 13 int find(int x) { 14 if(boss[x] == x) return x; 15 return find(boss[x]); 16 } 17 18 void merge(int a, int b) { //这里是对两个集合合并,并不仅仅是对两个点合并; 19 int f_a = find(a), f_b = find(b); 20 if(f_a == f_b) return; 21 if(size[f_a] < size[f_b]) { 22 boss[f_a] = f_b; 23 size[f_b] += size[f_a]; 24 } 25 else { 26 boss[f_b] = f_a; 27 size[f_a] += size[f_b]; 28 } 29 return; 30 } 31 };
b. 路径压缩:树高 都变成1,所有节点都挂在root 下面即可;查找:近似O(1); 合并:近似O(1);
1 class UnionSet { 2 public: 3 int *boss, n; 4 UnionSet(int n) : n(n) { 5 boss = new int[n + 1]; 6 for(int i = 0; i <= n; ++i) boss[i] = i; 7 } 8 9 int find(int x) { 10 if(boss[x] == x) return x; 11 return boss[x] = find(boss[x]); 12 } 13 14 void merge(int a, int b) { //这里是对两个集合合并,并不仅仅是对两个点合并; 15 int f_a = find(a), f_b = find(b); //必须要得到root 节点,然后合并root 节点,这才是合并两个集合,否则的话,值是合并了以b 为root的集合到a; 16 if(f_a == f_b) return; 17 boss[f_b] = f_a; 18 return; 19 } 20 };
======================= **经典问题** =======================
1. 简单应用(leetcode200)
1 class Solution { 2 public: 3 class UnionSet{ 4 public: 5 int n, *father; 6 UnionSet(int n):n(n) { 7 father = new int[n + 1]; 8 for(int i = 0; i <= n; ++i) father[i] = i; 9 } 10 int find(int i) { 11 return father[i] = (father[i] == i ? i : find(father[i])); 12 } 13 void merge(int a, int b) { 14 father[find(b)] = find(a); 15 return; 16 } 17 }; 18 19 int numIslands(vector<vector<char>>& grid) { 20 int m = grid.size(), n = grid[0].size(), ans = 0; 21 int dir[2][2] = {-1, 0, 0, 1}; 22 UnionSet u1(m * n); 23 for(int i = 0; i < m; ++i) { //y 24 for(int j = 0; j < n; ++j) { //x 25 if('0' == grid[i][j]) continue; 26 for(int d = 0; d < 2; ++d) { 27 int nj = j + dir[d][0], ni = i + dir[d][1]; 28 if(nj < 0 || nj >= n || ni < 0 || ni >= m) continue; 29 if('0' == grid[ni][nj]) continue; 30 u1.merge(i * n + j, ni * n + nj); 31 } 32 } 33 } 34 35 for(int i = 0; i < m; ++i) { 36 for(int j = 0; j < n; ++j) { 37 int ind = i * n + j; 38 if('1' == grid[i][j] && u1.find(ind) == ind) ans += 1; 39 } 40 } 41 42 return ans; 43 } 44 };
2. 相等关系的联通性(leetcode990)
1 class Solution { 2 public: 3 class UnionSet{ 4 public: 5 int n, *father; 6 UnionSet(int n): n(n) { 7 father = new int[n + 1]; 8 for(int i = 0; i <= n; ++i) father[i] = i; 9 } 10 int find(int i) { 11 return father[i] = (father[i] == i ? i : find(father[i])); 12 } 13 void merge(int a, int b) { 14 father[find(b)] = find(a); 15 return; 16 } 17 }; 18 bool equationsPossible(vector<string>& equations) { 19 UnionSet u1(26); 20 for(auto &x : equations) { 21 if('!' == x[1]) continue; 22 u1.merge(x[0] - 'a', x[3] - 'a'); 23 } 24 25 for(auto &x : equations) { 26 if('=' == x[1]) continue; 27 if(u1.find(x[0] - 'a') == u1.find(x[3] - 'a')) return false; 28 } 29 return true; 30 } 31 };
3. 代表集合关系的树中存在环,且存在所有边的数据,去除不影响集合的边;(leecode684)
1 class Solution { 2 public: 3 struct UnionSet{ 4 int n, *father; 5 UnionSet(int n): n(n) { 6 father = new int[n + 1]; 7 for(int i = 0; i <= n; ++i) father[i] = i; 8 } 9 int find(int i) { 10 return father[i] = (father[i] == i ? i : find(father[i])); 11 } 12 void merge(int a, int b) { 13 father[find(b)] = find(a); 14 return; 15 } 16 }; 17 18 vector<int> findRedundantConnection(vector<vector<int>>& edges) { 19 int ans = 0, len = edges.size(); 20 UnionSet u(len); 21 for(int i = 0; i < len; ++i) { 22 if(u.find(edges[i][0]) == u.find(edges[i][1])) { 23 ans = i; 24 continue; 25 } 26 u.merge(edges[i][0], edges[i][1]); 27 } 28 return edges[ans]; 29 } 30 };
//进阶题:
4. 并查集变种(leetcode128) : a. 存index减少无用数据空间; b. hashmap 快速查找值;c. 并查集变种满足特定需求;
1 class Solution { 2 public: 3 struct UnionSet{ 4 int n, *father, *cnt; 5 UnionSet(int n): n(n) { 6 father = new int[n]; 7 cnt = new int[n]; 8 for(int i = 0; i < n; ++i) { 9 father[i] = i; 10 cnt[i] = 1; 11 } 12 } 13 14 int find(int i) { 15 return father[i] = (father[i] == i ? i : find(father[i])); 16 } 17 18 void merge(int a, int b) { 19 int fa = find(a), fb = find(b); 20 if(fa == fb) return; 21 father[fb] = fa; 22 cnt[fa] += cnt[fb]; 23 return; 24 } 25 26 }; 27 int longestConsecutive(vector<int>& nums) { 28 int len = nums.size(); 29 if(0 == len) return 0; 30 UnionSet u1(len); 31 unordered_map<int,int> m1; //index, val 32 33 for(int i = 0; i < len; ++i) { 34 if(m1.find(nums[i]) != m1.end()) continue; 35 if(m1.find(nums[i] + 1) != m1.end()) u1.merge(i, m1[nums[i] + 1]); 36 if(m1.find(nums[i] - 1) != m1.end()) u1.merge(i, m1[nums[i] - 1]); 37 m1[nums[i]] = i; 38 } 39 40 int ans = 0; 41 for(int i = 0; i < len; ++i) { 42 if(u1.find(i) != i) continue; 43 ans = max(ans, u1.cnt[i]); 44 } 45 return ans; 46 47 // 排序+ 遍历 48 // set<int> s1(nums.begin(), nums.end()); 49 // set<int>::iterator x = s1.begin(); 50 // int ans = 0, temp = 1, before = *x; 51 // while(++x != s1.end()) { 52 // if((*x - before) == 1) temp += 1; 53 // else { 54 // ans = max(ans, temp); 55 // temp = 1; 56 // } 57 // before = *x; 58 // } 59 // ans = max(ans, temp); 60 // return ans; 61 } 62 };
leetcode947 : 这里cnt 存储的就是root 节点数量;
1 class Solution { 2 public: 3 struct UnionSet{ 4 int n, cnt, *father; 5 UnionSet(int n):n(n), cnt(n) { 6 father = new int[n]; 7 for(int i = 0;i < n; ++i) father[i] = i; 8 } 9 int find(int i) { 10 return father[i] = (father[i] == i ? i : find(father[i])); 11 } 12 void merge(int a, int b) { 13 int fa = find(a), fb = find(b); 14 if(fa == fb) return; 15 father[fb] = fa; 16 cnt -= 1; 17 return; 18 } 19 }; 20 21 int removeStones(vector<vector<int>>& stones) { 22 int len = stones.size(); 23 UnionSet u1(len); 24 unordered_map<int, int> mx, my; //val, ind 25 for(int i = 0; i < len; ++i) { 26 int x = stones[i][0], y = stones[i][1]; 27 if(mx.find(x) == mx.end()) mx[x] = i; 28 else u1.merge(i, mx[x]); 29 30 if(my.find(y) == my.end()) my[y] = i; 31 else u1.merge(i, my[y]); 32 } 33 34 return (len - u1.cnt); 35 } 36 };
5. 如何对于各个root 节点下的集合进行操作:
leetcode1202: 对于每个集合中元素进行排列,并输出
1 class Solution { 2 public: 3 struct UnionSet { 4 int n, *father; 5 UnionSet(int n):n(n) { 6 father = new int[n + 1]; 7 for(int i = 0; i <= n; ++i) father[i] = i; 8 } 9 int find(int i) { 10 return father[i] = (father[i] == i ? i : find(father[i])); 11 } 12 void merge(int a, int b) { 13 father[find(b)] = find(a); 14 return; 15 } 16 }; 17 18 string smallestStringWithSwaps(string s, vector<vector<int>>& pairs) { 19 UnionSet u1(s.size()); 20 for(auto x : pairs) u1.merge(x[0], x[1]); 21 22 map<int, priority_queue<char,vector<char>, greater<char>>> m; 23 for(int i = 0; s[i]; ++i) m[u1.find(i)].push(s[i]); 24 25 string ans = ""; 26 27 for(int i = 0; s[i]; ++i) { 28 int idx = u1.find(i); 29 ans += m[idx].top(); 30 m[idx].pop(); 31 } 32 33 return ans; 34 } 35 };
1 class Solution { 2 public: 3 struct UnionSet { 4 int n, *father; 5 UnionSet(int n):n(n) { 6 father = new int[n]; 7 for(int i = 0; i < n; ++i) father[i] = i; 8 } 9 int find(int i) { 10 return father[i] = (father[i] == i ? i : find(father[i])); 11 } 12 void merge(int a, int b) { 13 father[find(b)] = find(a); 14 return; 15 } 16 }; 17 18 vector<vector<string>> accountsMerge(vector<vector<string>>& accounts) { 19 unordered_map<string, int> m2i; 20 int len = accounts.size(); 21 UnionSet u1(len); 22 for(int i = 0; i < len; ++i) { 23 for(int j = 1, J = accounts[i].size(); j < J; ++j) { 24 if(m2i.find(accounts[i][j]) != m2i.end()) u1.merge(i, m2i[accounts[i][j]]); 25 m2i[accounts[i][j]] = i; 26 } 27 } 28 vector<set<string>> proc(len); 29 30 for(int i = 0; i < len; ++i) { 31 int idx = u1.find(i); 32 proc[idx].insert(accounts[i].begin() + 1, accounts[i].end()); 33 } 34 35 vector<vector<string>> ans; 36 for(int i = 0; i < len; ++i) { 37 if(u1.find(i) != i) continue; 38 vector<string> temp; 39 temp.push_back(accounts[i][0]); 40 for(auto x : proc[i]) temp.push_back(x); 41 ans.push_back(temp); 42 } 43 44 return ans; 45 }
6. 反复调用UnionSet, 并判断最后一个多余联通条件;leetcode 685
1 //这里包含了有向图中入度/出度的应用 2 class Solution { 3 public: 4 struct UnionSet{ 5 int n, *father; 6 UnionSet(int n):n(n) { 7 father = new int[n + 1]; 8 for(int i = 0; i <= n; ++i) father[i] = i; 9 } 10 void init(){ 11 for(int i = 0; i <= n; ++i) father[i] = i; 12 return; 13 } 14 int find(int i) { 15 return father[i] = (father[i] == i ? i : find(father[i])); 16 } 17 void merge(int a, int b) { 18 father[find(b)] = find(a); 19 return; 20 } 21 }; 22 23 vector<int> findRedundantDirectedConnection(vector<vector<int>>& edges) { 24 int len = edges.size(), ans = len - 1, in_idx = 0, flag = 1; 25 vector<int> in(len + 1, 0); 26 UnionSet u1(len); 27 28 for(int i = 0; i < len; ++i) { 29 //找入度不正常点(>1) 30 if((++in[edges[i][1]]) < 2) continue; 31 in_idx = edges[i][1]; 32 break; 33 } 34 35 do { 36 //针对入度不正常点的特殊处理 37 if(in_idx && edges[ans][1] != in_idx) continue; 38 39 //暴力尝试去掉某条边后,就不存在环的情况 40 u1.init(); 41 flag = 1; 42 for(int i = len - 1; i >= 0; --i) { 43 if(ans == i) continue; 44 if(u1.find(edges[i][0]) == u1.find(edges[i][1])) { 45 flag = 0; 46 break; 47 } else u1.merge(edges[i][0], edges[i][1]); 48 } 49 if(flag) return edges[ans]; 50 51 }while(--ans > 0); 52 53 return edges[0]; 54 } 55 };
7.1562. 查找大小为 M 的最新分组 : 对于问题和数据处理的转化;
1 class Solution { 2 struct UnionSet { 3 int n; 4 vector<int> boss, size, cnt; 5 UnionSet(int _n) : n(_n), boss(_n), size(_n, _n), cnt(_n, 0) { 6 for(int i = 1; i < _n; ++i) boss[i] = i, size[i] = 0; 7 } 8 int find(int a) { 9 if(boss[a] == a) return a; 10 return boss[a] = find(boss[a]); 11 } 12 void merge(int a, int b) { 13 int fa = find(a), fb = find(b); 14 if(fa == fb) return; 15 boss[fb] = fa; 16 //联通过程,如何计数是关键; 17 //这里设计了一个0点,以及每次联通都将联通后的点链接到前一个点,作为一组; 18 //最终n次操作后,所有的点都链接到0点上; 19 size[cnt[fb]] -= 1; 20 size[cnt[fa]] -= 1; 21 cnt[fa] += (cnt[fb] + 1); 22 size[cnt[fa]] += 1; 23 return; 24 } 25 }; 26 public: 27 int findLatestStep(vector<int>& arr, int m) { 28 int ans = -1, len = arr.size(); 29 UnionSet us(len + 1); 30 31 for(int i = 0; i < len; ++i) { 32 us.merge(arr[i] - 1, arr[i]); 33 if(us.size[m]) ans = i + 1; 34 } 35 return ans; 36 } 37 };
======================= **应用场景** =======================
用来处理联通性问题;
树可以看成是一个连通且 无环 的 无向 图。