Mapreduce读取Hbase表,写数据到一个Hbase表中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
public class LabelJob
{
     
    public static void main(String[] args)
        throws Exception
    {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(LabelJob.class);
        job.setJobName("Hbase.LabelJob");
         
        Configuration conf = job.getConfiguration();
        conf.set("tablename", "product_tags");
         
        Scan scan = new Scan();
        scan.setCaching(500);
        scan.setCacheBlocks(false);
        //输入表
        TableMapReduceUtil.initTableMapperJob("tb_user", scan, LabelMapper.class, Text.class, Text.class, job);
         
        job.setReducerClass(LabelReducer.class);
        //输出表
        TableMapReduceUtil.initTableReducerJob("usertags", LabelReducer.class, job);
        job.waitForCompletion(true);
         
    }
     
}

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
public class LabelMapper extends TableMapper<Text, Text>
{
      protected void setup(Context context)
        throws IOException, InterruptedException
    {
        super.setup(context);
        String tablename = context.getConfiguration().get("tablename");
         .................
    }
 protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
        throws IOException, InterruptedException
    {
          String userid = Bytes.toString(rowKey.get()); // 读取HBase用户表rowkey
         
          String strlabel = fhb.getStringValue(result, "labels", "label");
         String[] userLabels = strlabel.split(",");
....................
}
}

  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
public class LabelReducer extends TableReducer<Text, Text, ImmutableBytesWritable>
{
     @Override
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException
    {
String rowKey = key.toString();// 读取Map输出
 for (Text v : values)
            {
                String tag = v.toString();
                Long count = tagMap.get(tag);
                tagMap.put(tag, (count == null) ? 1 : (count + 1));// 计数
            }
 Put put = new Put(productId.getBytes());
  put.add("prodtags".getBytes(), "prodtags".getBytes(),outputlabel.toString().getBytes());
 
context.write(new ImmutableBytesWritable(productId.getBytes()), put);
 
}
 
}

  

 

posted @   JackyKen  阅读(4296)  评论(1编辑  收藏  举报
编辑推荐:
· 智能桌面机器人:用.NET IoT库控制舵机并多方法播放表情
· Linux glibc自带哈希表的用例及性能测试
· 深入理解 Mybatis 分库分表执行原理
· 如何打造一个高并发系统?
· .NET Core GC压缩(compact_phase)底层原理浅谈
阅读排行:
· 手把手教你在本地部署DeepSeek R1,搭建web-ui ,建议收藏!
· 新年开篇:在本地部署DeepSeek大模型实现联网增强的AI应用
· Janus Pro:DeepSeek 开源革新,多模态 AI 的未来
· 互联网不景气了那就玩玩嵌入式吧,用纯.NET开发并制作一个智能桌面机器人(三):用.NET IoT库
· 【非技术】说说2024年我都干了些啥
点击右上角即可分享
微信分享提示