hadoo访问MySql数据库_1_简单访问例子

问题:执行自己编译的jar包时,是否要先将这个jar拷到每个结点的相同路径下(可以通过hdfs来复制,也可通过scp)?

几个备忘:

1.为保证中文编码问题,先配置默认UTF8编码

2.对JDBC的Jar包处理,可以放每个节点上,也可放在hdfs上的lib

3.如有需要,对其它节点访问mysql授权brian(123456),相应IP

mysql> grant all on *.* to 'brian'@172.19.32.108 identified by '123456'

 

-------------------------

本文先对参考的《hadoop开发者》相关文章例子在两个节点的hadoop上实现。

 

在.12机器上

1.配置默认UTF8编码

在my.cnf中
[mysqld]下添加
character_set_server=utf8(较新版本)

[mysql]下添加
default-character-set=utf8

重启mysql服务

这时可以验证登录mysql后不用先执行set name utf8;可以正常显示中文。

 

2.在tmp库下创建表,

use tmp;

set names utf8;(可不用,但这样保证些)

DROP TABLE IF EXISTS `tmp`.`teacher`;
CREATE TABLE `tmp`.`teacher` (
`id` int(11) default NULL,
`name` char(20) default NULL,
`age` int(11) default NULL,
`departmentID` int(11) default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

insert into teacher(id, name, age, departmentID)

values('101', 'tom', '29', '10');

insert into teacher(id, name, age, departmentID)

values('102', 'tony', '39', '20');

insert into teacher(id, name, age, departmentID)

values('103', 'lily', '26', '20');

insert into teacher(id, name, age, departmentID)

values('104', '张三', '25', '30');

 

3.在~/hadoop-1.0.3下新建mytest目录

mkdir mytest

mkdir mytest/src

mkdir mytest/classes

4.

nano mytest/src/DBAccess2.java

代码:

import java.io.IOException;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;    //
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.KeyValueTextInputFormat;
import org.apache.hadoop.mapred.TextInputFormat;    //
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.FileSplit;    //
import org.apache.hadoop.mapred.lib.IdentityReducer;        // IdentityReducer.class

import org.apache.hadoop.mapred.lib.db.DBConfiguration; //
import org.apache.hadoop.mapred.lib.db.*; 
import org.apache.hadoop.io.*;
import java.io.DataInput;
import java.io.DataOutput;
import java.sql.*;        // ResultSet, PreparedStatement
import com.mysql.jdbc.Driver;


import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

//
import java.io.BufferedReader; 
import java.io.InputStreamReader; 
import java.util.ArrayList; 

import java.util.Arrays;


public class DBAccess2 extends Configured implements Tool {
    

    public static class TeacherRecord implements Writable, DBWritable {        // add "static" ??
    int id;
    String name;
    int age;
    int departmentID;

    public TeacherRecord(){        // add by brian
    System.out.println("TeacherRecord()");
    }

    public TeacherRecord(TeacherRecord t){        // add by brian
    System.out.println("TeacherRecord(TeacherRecord t)");
    this.id = t.id;
    this.name = t.name;
    this.age = t.age;
    this.departmentID = t.departmentID;

    }

    public void readFields(DataInput in) throws IOException {
        // TODO Auto-generated method stub
        this.id = in.readInt();
        this.name = Text.readString(in);
        this.age = in.readInt();
        this.departmentID = in.readInt();
    }

    public void write(DataOutput out) throws IOException {
        out.writeInt(this.id);
        Text.writeString(out, this.name);
        out.writeInt(this.age);
        out.writeInt(this.departmentID);
    }

    public void readFields(ResultSet result) throws SQLException {
        this.id = result.getInt(1);
        this.name = result.getString(2);
        this.age = result.getInt(3);
        this.departmentID = result.getInt(4);
    }

    public void write(PreparedStatement stmt) throws SQLException{
        stmt.setInt(1, this.id);
        stmt.setString(2, this.name);
        stmt.setInt(3, this.age);
        stmt.setInt(4, this.departmentID);
    }

    public String toString() {
        return new String(this.name + "," + this.age + "," + this.departmentID);    //
    }

}


    public static class DBAccessMapper extends MapReduceBase
        implements Mapper<LongWritable, TeacherRecord, LongWritable, Text> {
        
        private final static IntWritable uno = new IntWritable(1);
        private IntWritable citationCount = new IntWritable();
        
        public void map(LongWritable key, TeacherRecord value,
                        OutputCollector<LongWritable, Text> collector, Reporter reporter) throws IOException {
            collector.collect(new LongWritable(value.id), new Text(value.toString()));
    }
}    
    

    
    public int run(String[] args) throws Exception {

        Configuration conf = getConf();        
        JobConf job = new JobConf(conf, DBAccess2.class);
        job.setInputFormat(DBInputFormat.class);
        
        FileOutputFormat.setOutputPath(job, new Path("dboutput"));
        DBConfiguration.configureDB(job, "com.mysql.jdbc.Driver", "jdbc:mysql://172.19.102.12/tmp", "brian", "123456");

        String [] fields = {"id", "name", "age", "departmentID"};
        DBInputFormat.setInput(job, TeacherRecord.class, "teacher", null, "id", fields);

        job.setMapperClass(DBAccessMapper.class);
        job.setReducerClass(IdentityReducer.class);  
        JobClient.runJob(job);      
   
        return 0;
    }
    
    public static void main(String[] args) throws Exception { 
        int res = ToolRunner.run(new Configuration(), 
                                 new DBAccess2(), 
                                 args);
        
        System.exit(res);
    }
}

 

5.将mysql-connector-java-5.1.18-bin.jar上传到hadoop-1.0.3/lib目录下

6.编译及执行命令

javac -classpath hadoop-core-1.0.3.jar:lib/commons-cli-1.2.jar:lib/mysql-connector-java-5.1.18-bin.jar -d mytest/classes/ mytest/src/DBAccess2.java
jar -cvf mytest/DBAccess2.jar -C mytest/classes/ .
bin/hadoop fs -rmr dboutput //输出路径在代码中指定的
time bin/hadoop jar mytest/DBAccess2.jar DBAccess2

但运行时出现下面错误:

huangshaobin@backtest12:~/hadoop-1.0.3$ time bin/hadoop jar mytest/DBAccess2.jar DBAccess2
12/09/21 17:20:25 INFO mapred.JobClient: Running job: job_201209201824_0008
12/09/21 17:20:26 INFO mapred.JobClient:  map 0% reduce 0%
12/09/21 17:20:38 INFO mapred.JobClient: Task Id : attempt_201209201824_0008_m_000000_0, Status : FAILED
java.lang.RuntimeException: Error in configuring object
        at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93)
        at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64)
        at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117)
        at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)
        at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:197)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:418)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:416)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.lang.reflect.InvocationTargetException
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:616)
        at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88)
        ... 11 more
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271)
        ... 16 more
Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at java.net.URLClassLoader$1.run(URLClassLoader.java:217)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:294)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:266)
        at java.lang.Class.forName0(Native Method)
        at java.lang.Class.forName(Class.java:186)
        at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123)
        at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266)
        ... 16 more

12/09/21 17:20:41 INFO mapred.JobClient: Task Id : attempt_201209201824_0008_m_000001_0, Status : FAILED
java.lang.RuntimeException: Error in configuring object
        at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93)
        at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64)
        at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117)
        at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)
        at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:197)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:418)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.lang.reflect.InvocationTargetException
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
        at java.lang.reflect.Method.invoke(Method.java:597)
        at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88)
        ... 11 more
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271)
        ... 16 more
Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:307)
        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:248)
        at java.lang.Class.forName0(Native Method)
        at java.lang.Class.forName(Class.java:169)
        at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123)
        at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266)
        ... 16 more

7.将jdbc驱动拷到另外节点:

huangshaobin@backtest12:~/hadoop-1.0.3$ scp lib/mysql-connector-java-5.1.18-bin.jar backtest11:/home/huangshaobin/hadoop-1.0.3/lib
mysql-connector-java-5.1.18-bin.jar 100% 771KB 771.4KB/s 00:00

重新运行:

bin/hadoop fs -rmr dboutput 
time bin/hadoop jar mytest/DBAccess2.jar DBAccess2

这时还出现一样的错误,而在伪分布式下是正常的???

将jar拷到另一节点:

huangshaobin@backtest12:~/hadoop-1.0.3$ scp -r mytest/ backtest11:/home/huangshaobin/hadoop-1.0.3

再重新运行:

bin/hadoop fs -rmr dboutput 
time bin/hadoop jar mytest/DBAccess2.jar DBAccess2

还是一样错误!难道第一个错是java版本问题?但这里还是有找不到jdbc驱动的错误!

把11上面的lib/下jdbc驱动删了,同时也把mytest/DBAccess2.jar也删了,在代码中JobConf job = new JobConf(conf, DBAccess2.class);后加入以下两句:

job.set("mapred.job.tracker", "172.19.102.12:9001");//
DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //

并加上import org.apache.hadoop.filecache.DistributedCache;

将jdbc驱动拷到hafs上:

bin/hadoop fs -put lib/mysql-connector-java-5.1.18-bin.jar /lib/mysql-connector-java-5.1.18-bin.jar

重新编译

huangshaobin@backtest12:~/hadoop-1.0.3$ javac -classpath hadoop-core-1.0.3.jar:lib/commons-cli-1.2.jar:lib/mysql-connector-java-5.1.18-bin.jar -d mytest/classes/ mytest/src/DBAccess2.java
Note: mytest/src/DBAccess2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.

这两个警告是什么意思?先不理

重新运行,这时候成功了!!

所以,

job.set("mapred.job.tracker", "172.19.102.12:9001");//
DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //

这两句很重要!!但并不需要将DBAccess2.jar拷到其它节点,估计代码会自动在各个节点之间传输。

代码前面的import也很乱,一并整理后,最后的改后的代码如下:

  1 import java.io.IOException;
  2 import java.io.DataInput;
  3 import java.io.DataOutput;
  4 import java.sql.Connection;
  5 import java.sql.DriverManager;
  6 import java.sql.PreparedStatement;
  7 import java.sql.ResultSet;
  8 import java.sql.SQLException;
  9  
 10 import org.apache.hadoop.filecache.DistributedCache;
 11 import org.apache.hadoop.fs.Path;
 12 import org.apache.hadoop.io.IntWritable;
 13 import org.apache.hadoop.io.LongWritable;
 14 import org.apache.hadoop.io.Text;
 15 import org.apache.hadoop.io.Writable;
 16 import org.apache.hadoop.mapred.JobClient;
 17 import org.apache.hadoop.mapred.JobConf;
 18 import org.apache.hadoop.mapred.MapReduceBase;
 19 import org.apache.hadoop.mapred.Mapper;
 20 import org.apache.hadoop.mapred.OutputCollector;
 21 import org.apache.hadoop.mapred.FileOutputFormat;
 22 import org.apache.hadoop.mapred.Reporter;
 23 import org.apache.hadoop.mapred.lib.IdentityReducer;
 24 import org.apache.hadoop.mapred.lib.db.DBWritable;
 25 import org.apache.hadoop.mapred.lib.db.DBInputFormat;
 26 import org.apache.hadoop.mapred.lib.db.DBConfiguration;
 27 
 28 import org.apache.hadoop.util.Tool;
 29 import org.apache.hadoop.util.ToolRunner;
 30 import org.apache.hadoop.conf.Configuration;
 31 import org.apache.hadoop.conf.Configured;
 32 
 33 public class DBAccess2 extends Configured implements Tool {
 34     
 35 
 36     public static class TeacherRecord implements Writable, DBWritable {        // add "static" ??
 37     int id;
 38     String name;
 39     int age;
 40     int departmentID;
 41 
 42     public TeacherRecord(){        // add by brian
 43     System.out.println("TeacherRecord()");
 44     }
 45 
 46     public TeacherRecord(TeacherRecord t){        // add by brian
 47     System.out.println("TeacherRecord(TeacherRecord t)");
 48     this.id = t.id;
 49     this.name = t.name;
 50     this.age = t.age;
 51     this.departmentID = t.departmentID;
 52 
 53     }
 54 
 55     public void readFields(DataInput in) throws IOException {
 56         // TODO Auto-generated method stub
 57         this.id = in.readInt();
 58         this.name = Text.readString(in);
 59         this.age = in.readInt();
 60         this.departmentID = in.readInt();
 61     }
 62 
 63     public void write(DataOutput out) throws IOException {
 64         out.writeInt(this.id);
 65         Text.writeString(out, this.name);
 66         out.writeInt(this.age);
 67         out.writeInt(this.departmentID);
 68     }
 69 
 70     public void readFields(ResultSet result) throws SQLException {
 71         this.id = result.getInt(1);
 72         this.name = result.getString(2);
 73         this.age = result.getInt(3);
 74         this.departmentID = result.getInt(4);
 75     }
 76 
 77     public void write(PreparedStatement stmt) throws SQLException{
 78         stmt.setInt(1, this.id);
 79         stmt.setString(2, this.name);
 80         stmt.setInt(3, this.age);
 81         stmt.setInt(4, this.departmentID);
 82     }
 83 
 84     public String toString() {
 85         return new String(this.name + "," + this.age + "," + this.departmentID);    //
 86     }
 87 
 88 }
 89 
 90 
 91     public static class DBAccessMapper extends MapReduceBase
 92         implements Mapper<LongWritable, TeacherRecord, LongWritable, Text> {
 93         
 94         private final static IntWritable uno = new IntWritable(1);
 95         private IntWritable citationCount = new IntWritable();
 96         
 97         public void map(LongWritable key, TeacherRecord value,
 98                         OutputCollector<LongWritable, Text> collector, Reporter reporter) throws IOException {
 99             collector.collect(new LongWritable(value.id), new Text(value.toString()));
100     }
101 }    
102     
103 
104     
105     public int run(String[] args) throws Exception {
106 
107         Configuration conf = getConf();        
108         JobConf job = new JobConf(conf, DBAccess2.class);
109     job.set("mapred.job.tracker", "172.19.102.12:9001");//
110     DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job);    //
111         job.setInputFormat(DBInputFormat.class);
112         FileOutputFormat.setOutputPath(job, new Path("dboutput"));
113         DBConfiguration.configureDB(job, "com.mysql.jdbc.Driver", "jdbc:mysql://172.19.102.12/tmp", "brian", "123456");
114 
115         String [] fields = {"id", "name", "age", "departmentID"};
116         DBInputFormat.setInput(job, TeacherRecord.class, "teacher", null, "id", fields);
117 
118         job.setMapperClass(DBAccessMapper.class);
119         job.setReducerClass(IdentityReducer.class);  
120         JobClient.runJob(job);      
121    
122         return 0;
123     }
124     
125     public static void main(String[] args) throws Exception { 
126         int res = ToolRunner.run(new Configuration(), 
127                                  new DBAccess2(), 
128                                  args);
129         
130         System.exit(res);
131     }
132 }

输出的结果是:

huangshaobin@backtest12:~/hadoop-1.0.3$ bin/hadoop fs -cat dboutput/*
101 tom,29,10
102 tony,39,20
103 lily,26,20
104 张三,25,30

 

总结步骤:

1.在代码中指定jobtracker及额外的jdbc包jar

job.set("mapred.job.tracker", "172.19.102.12:9001");

DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job);    

2.编译通过

3.将额外的jdbc包传到hdfs上/lib目录下bin/hadoop fs -put lib/mysql-connector-java-5.1.18-bin.jar /lib/mysql-connector-java-5.1.18-bin.jar

4.运行成功

 

另外,可在http://hadoop.apache.org/docs/r1.0.3/api/index.html 上查看 class DistributedCache,进一步了解其它方法用法,这里也有例子说明。

 

 

参考:

Hadoop集群(第10期)_MySQL关系数据库 http://www.cnblogs.com/xia520pi/archive/2012/06/12/2546261.html

《hadoop开发者第二期:访问数据库》

posted @ 2012-09-21 18:37  aha~  阅读(1059)  评论(0编辑  收藏  举报