1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | 相关函数请参考官方网站:https: //clickhouse.tech/docs/zh/sql-reference/functions/date-time-functions/行列转置一般由由行转为列,或者由列转为行。 CREATE TABLE datasets.t_city ( `province` String, `city` String, `createtime` DateTime, `city_level` Int8 ) ENGINE = MergeTree() ORDER BY province SETTINGS index_granularity = 8192; insert into t_city values( 'Hubei' , 'Wuhan' ,now(),2),( 'Hubei' , 'Xiangyang' ,now(),3),( 'Shanghai' , 'Shanghai' ,now(),1),( 'Guangdong' , 'Guangzhou' ,now(),1),( 'Guangdong' , 'Shenzhen' ,now(),1),( 'Guangdong' , 'DOngguan' ,now(),2),( 'Guangdong' , 'Zhuhai' ,now(),3); Clickhouse> select * from t_city; SELECT * FROM t_city ┌─province──┬─city──────┬──────────createtime─┬─city_level─┐ │ Guangdong │ Guangzhou │ 2020-07-07 14:02:53 │ 1 │ │ Guangdong │ Shenzhen │ 2020-07-07 14:02:53 │ 1 │ │ Guangdong │ DOngguan │ 2020-07-07 14:02:53 │ 2 │ │ Guangdong │ Zhuhai │ 2020-07-07 14:02:53 │ 3 │ │ Hubei │ Wuhan │ 2020-07-07 14:02:53 │ 2 │ │ Hubei │ Xiangyang │ 2020-07-07 14:02:53 │ 3 │ │ Shanghai │ Shanghai │ 2020-07-07 14:02:53 │ 1 │ └───────────┴───────────┴─────────────────────┴────────────┘ 7 rows in set . Elapsed: 0.002 sec. 列转行: 查询每个省份用户的城市: Clickhouse> select province, groupArray(city) from t_city group by province; SELECT province, groupArray(city) FROM t_city GROUP BY province ┌─province──┬─groupArray(city)─────────────────────────────┐ │ Shanghai │ [ 'Shanghai' ] │ │ Hubei │ [ 'Wuhan' , 'Xiangyang' ] │ │ Guangdong │ [ 'Guangzhou' , 'Shenzhen' , 'Dongguan' , 'Zhuhai' ] │ └───────────┴──────────────────────────────────────────────┘ 插入一条重复的记录: insert into t_city values( 'Hubei' , 'Wuhan' ,now(),2); 可以看到Hubei有一个重复的wuhan Clickhouse> select province, groupArray(city) from t_city group by province; SELECT province, groupArray(city) FROM t_city GROUP BY province ┌─province──┬─groupArray(city)─────────────────────────────┐ │ Shanghai │ [ 'Shanghai' ] │ │ Hubei │ [ 'Wuhan' , 'Xiangyang' , 'Wuhan' ] │ │ Guangdong │ [ 'Guangzhou' , 'Shenzhen' , 'Dongguan' , 'Zhuhai' ] │ └───────────┴──────────────────────────────────────────────┘ 3 rows in set . Elapsed: 0.002 sec. 可以使用函数groupUniqArray进行去重: Clickhouse> select province, groupUniqArray(city) from t_city group by province; SELECT province, groupUniqArray(city) FROM t_city GROUP BY province ┌─province──┬─groupUniqArray(city)─────────────────────────┐ │ Shanghai │ [ 'Shanghai' ] │ │ Hubei │ [ 'Wuhan' , 'Xiangyang' ] │ │ Guangdong │ [ 'Zhuhai' , 'Dongguan' , 'Guangzhou' , 'Shenzhen' ] │ └───────────┴──────────────────────────────────────────────┘ 3 rows in set . Elapsed: 0.003 sec. 字符串拆分: Clickhouse> select splitByChar( '#' , 'Hubei_Wuhan#Hubei_Xiangyang#Guangdong_Shenzhen' ) col; SELECT splitByChar( '#' , 'Hubei_Wuhan#Hubei_Xiangyang#Guangdong_Shenzhen' ) AS col ┌─col────────────────────────────────────────────────────┐ │ [ 'Hubei_Wuhan' , 'Hubei_Xiangyang' , 'Guangdong_Shenzhen' ] │ └────────────────────────────────────────────────────────┘ 1 rows in set . Elapsed: 0.002 sec. 可以使用ArrayJoin 函数将一行转为行: Clickhouse> select arrayJoin(splitByChar( '#' , 'Hubei_Wuhan#Hubei_Xiangyang#Guangdong_Shenzhen' )) col; SELECT arrayJoin(splitByChar( '#' , 'Hubei_Wuhan#Hubei_Xiangyang#Guangdong_Shenzhen' )) AS col ┌─col────────────────┐ │ Hubei_Wuhan │ │ Hubei_Xiangyang │ │ Guangdong_Shenzhen │ └────────────────────┘ 3 rows in set . Elapsed: 0.002 sec. 实际示例: SELECT province, groupArray(city) FROM t_city GROUP BY province ┌─province──┬─groupArray(city)─────────────────────────────┐ │ Shanghai │ [ 'Shanghai' ] │ │ Hubei │ [ 'Wuhan' , 'Xiangyang' , 'Wuhan' ] │ │ Guangdong │ [ 'Guangzhou' , 'Shenzhen' , 'Dongguan' , 'Zhuhai' ] │ └───────────┴──────────────────────────────────────────────┘ 3 rows in set . Elapsed: 0.003 sec. Clickhouse> SELECT province, arrayJoin(groupArray(city)) new_city FROM t_city GROUP BY province; SELECT province, arrayJoin(groupArray(city)) AS new_city FROM t_city GROUP BY province ┌─province──┬─new_city──┐ │ Shanghai │ Shanghai │ │ Hubei │ Wuhan │ │ Hubei │ Xiangyang │ │ Hubei │ Wuhan │ │ Guangdong │ Guangzhou │ │ Guangdong │ Shenzhen │ │ Guangdong │ Dongguan │ │ Guangdong │ Zhuhai │ └───────────┴───────────┘ 8 rows in set . Elapsed: 0.003 sec. 结论:在clickhouse中主要使用两个函数groupArray 和ArrayJoin。 |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET 9 new features-C#13新的锁类型和语义
· Linux系统下SQL Server数据库镜像配置全流程详解
· 现代计算机视觉入门之:什么是视频
· 你所不知道的 C/C++ 宏知识
· 聊一聊 操作系统蓝屏 c0000102 的故障分析
· DeepSeek V3 两周使用总结
· 回顾我的软件开发经历(1)
· C#使用yield关键字提升迭代性能与效率
· 低成本高可用方案!Linux系统下SQL Server数据库镜像配置全流程详解
· 4. 使用sql查询excel内容