Mysql和Hive实现列转行,类似reduceByKey操作
给的数据如下图:
需要实现的结果(需要对label去重):
mysql:
建表和准备数据
CREATE TABLE `comp_label` ( `compName` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL, `label` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL ) ENGINE = InnoDB CHARACTER SET = latin1 COLLATE = latin1_swedish_ci ROW_FORMAT = Compact; INSERT INTO `comp_label` VALUES ('视野数科', 'FourthCompany'); INSERT INTO `comp_label` VALUES ('视野数科', 'PrepareThirdCompany'); INSERT INTO `comp_label` VALUES ('视野数科', 'PrepareThirdCompany'); INSERT INTO `comp_label` VALUES ('视野数科', 'SYBI10000'); INSERT INTO `comp_label` VALUES ('天眼查', 'SYBB10000');
实现:
SELECT compName,GROUP_CONCAT(DISTINCT label) label FROM comp_label GROUP BY compName
hive:
建表和准备数据
CREATE TABLE `default.comp_label`( `compName` string, `label` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim'='\u0001', 'line.delim'='\n', 'serialization.format'='r') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; INSERT INTO `default.comp_label` VALUES ('视野数科', 'FourthCompany'),('视野数科', 'PrepareThirdCompany'), ('视野数科', 'PrepareThirdCompany'), ('视野数科', 'SYBI10000'),('天眼查', 'SYBB10000');
实现:
SELECT compName,concat_ws(',',collect_set(label)) AS label FROM default.comp_label GROUP BY compName
hive如果不去重
SELECT compName,concat_ws(',',collect_list(label)) AS label FROM default.comp_label GROUP BY compName