读取hbase数据到mysql
先写一个自己的MyRecordWriter类 extends RecordWriter
package calllog; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; public class IRecordWrite extends RecordWriter<Text, IntWritable>{ @Override public void write(Text key, IntWritable value){ String driver = "com.mysql.jdbc.Driver"; String url = "jdbc:mysql://192.168.120.110:3306/calllog?characterEncoding=UTF-8"; String user = "root"; String password = "******"; System.out.println("开始写入数据"); Connection conn = null; Statement statement = null; //数据处理 String string = key.toString(); String[] split = string.split("\t"); String zhujian = split[0]+"_"+split[1]+"_"+split[2]; try { Class.forName(driver); conn = DriverManager.getConnection(url, user, password); conn.setAutoCommit(true); statement = conn.createStatement(); //有则更新,无则插入 //INSERT INTO `tb_call` (`id_date_contact`, `id_date_dimension`, `id_contact`, `call_sum`, `call_duration_sum`) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE `id_date_contact` = ?; String sql = "INSERT INTO mylog values ('"+zhujian+"','"+split[0]+"','"+split[1]+"','"+split[2]+"','"+value.toString()+"') ON DUPLICATE KEY UPDATE name_phone_time = '"+zhujian+"';"; System.out.println(sql); statement.execute(sql); System.err.println("---------插入成功!--------------------------"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally { try { if (statement!=null) { statement.close(); } if (conn!=null) { conn.close(); } } catch (SQLException e) { e.printStackTrace(); } } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { // TODO Auto-generated method stub } }
再写一个自己的outputFormat extends OutputFormat
package calllog; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; public class IOutputFormat extends OutputFormat<Text, IntWritable>{ @Override public RecordWriter<Text, IntWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new IRecordWrite(); } @Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { } @Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return (new org.apache.hadoop.mapreduce.lib.output.NullOutputFormat<Text, IntWritable>()) .getOutputCommitter(context); } }
最后再driver端 自定义自己的输出类
public class Idriver { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(Idriver.class); // 3 关 联 map job.setMapperClass(Imap.class); job.setReducerClass(Ireduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //分区 //job.setPartitionerClass(IPartitioner.class); //job.setNumReduceTasks(4); // 4 设置最终输出数据类型 //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(IOutputFormat.class); // 5 设置输入输出路径 FileInputFormat.setInputPaths(job, new Path(args[0])); //FileOutputFormat.setOutputPath(job, new Path(args[1])); // 8 提交 boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }