用mapreduce读取hdfs数据到hbase上
hdfs数据到hbase过程
将HDFS上的文件中的数据导入到hbase中
实现上面的需求也有两种办法,一种是自定义mr,一种是使用hbase提供好的import工具
hbase先创建好表 create 'TB','info'
下面是实现代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException; /** * 用于HDFS的数据读取,写入到hbase中, * hbase里预先创建好表:create 'NNTB','info' * */ public class HdfsToHBase { public static void main(String[] args) throws Exception{ System.setProperty( "hadoop.home.dir" , "D:\\hadoop-2.7.6" ); //这行我是本地运行所需指定的hadoop home Configuration conf = HBaseConfiguration.create(); conf.set( "hbase.zookeeper.quorum" , "202.168.27.196:2181" ); //ip乱写的,端口默认2181 conf.set(TableOutputFormat.OUTPUT_TABLE, "NNTB" ); Job job = Job.getInstance(conf, HdfsToHBase. class .getSimpleName()); TableMapReduceUtil.addDependencyJars(job); job.setJarByClass(HdfsToHBase. class ); job.setMapperClass(HdfsToHBaseMapper. class ); job.setMapOutputKeyClass(Text. class ); job.setMapOutputValueClass(Text. class ); job.setReducerClass(HdfsToHBaseReducer. class ); FileInputFormat.addInputPath(job, new Path( "hdfs://202.168.27.196:9000/user/hadoop/gznt/gznt_bmda/*" )); job.setOutputFormatClass(TableOutputFormat. class ); job.waitForCompletion( true ); } public static class HdfsToHBaseMapper extends Mapper<LongWritable, Text, Text, Text> { private Text outKey = new Text(); private Text outValue = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] splits = value.toString().split( "\t" ); outKey.set(splits[ 0 ]); outValue.set(splits[ 1 ]+ "\t" +splits[ 2 ]+ "\t" +splits[ 3 ]+ "\t" +splits[ 4 ]); context.write(outKey, outValue); } } //::: create 'NNTB','info' public static class HdfsToHBaseReducer extends TableReducer<Text, Text, NullWritable> { @Override protected void reduce(Text k2, Iterable<Text> v2s, Context context) throws IOException, InterruptedException { Put put = new Put(k2.getBytes()); for (Text v2 : v2s) { String[] splis = v2.toString().split( "\t" ); //info,对应hbase列族名 if (splis[ 0 ]!= null && ! "NULL" .equals(splis[ 0 ])){ put.addColumn( "info" .getBytes(), "NodeCode" .getBytes(),splis[ 0 ].getBytes()); } if (splis[ 1 ]!= null && ! "NULL" .equals(splis[ 1 ])){ put.addColumn( "info" .getBytes(), "NodeType" .getBytes(),splis[ 1 ].getBytes()); } if (splis[ 2 ]!= null && ! "NULL" .equals(splis[ 2 ])){ put.addColumn( "info" .getBytes(), "NodeName" .getBytes(),splis[ 2 ].getBytes()); } if (splis[ 3 ]!= null && ! "NULL" .equals(splis[ 3 ])){ put.addColumn( "info" .getBytes(), "IsWarehouse" .getBytes(),splis[ 3 ].getBytes()); } } context.write(NullWritable.get(),put); } } } |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步