hadoo访问MySql数据库_1_简单访问例子
问题:执行自己编译的jar包时,是否要先将这个jar拷到每个结点的相同路径下(可以通过hdfs来复制,也可通过scp)?
几个备忘:
1.为保证中文编码问题,先配置默认UTF8编码
2.对JDBC的Jar包处理,可以放每个节点上,也可放在hdfs上的lib
3.如有需要,对其它节点访问mysql授权brian(123456),相应IP
mysql> grant all on *.* to 'brian'@172.19.32.108 identified by '123456'
-------------------------
本文先对参考的《hadoop开发者》相关文章例子在两个节点的hadoop上实现。
在.12机器上
1.配置默认UTF8编码
在my.cnf中
[mysqld]下添加
character_set_server=utf8(较新版本)
[mysql]下添加
default-character-set=utf8
重启mysql服务
这时可以验证登录mysql后不用先执行set name utf8;可以正常显示中文。
2.在tmp库下创建表,
use tmp;
set names utf8;(可不用,但这样保证些)
DROP TABLE IF EXISTS `tmp`.`teacher`;
CREATE TABLE `tmp`.`teacher` (
`id` int(11) default NULL,
`name` char(20) default NULL,
`age` int(11) default NULL,
`departmentID` int(11) default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
insert into teacher(id, name, age, departmentID)
values('101', 'tom', '29', '10');
insert into teacher(id, name, age, departmentID)
values('102', 'tony', '39', '20');
insert into teacher(id, name, age, departmentID)
values('103', 'lily', '26', '20');
insert into teacher(id, name, age, departmentID)
values('104', '张三', '25', '30');
3.在~/hadoop-1.0.3下新建mytest目录
mkdir mytest
mkdir mytest/src
mkdir mytest/classes
4.
nano mytest/src/DBAccess2.java
代码:
import java.io.IOException; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; // import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.KeyValueTextInputFormat; import org.apache.hadoop.mapred.TextInputFormat; // import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.mapred.FileSplit; // import org.apache.hadoop.mapred.lib.IdentityReducer; // IdentityReducer.class import org.apache.hadoop.mapred.lib.db.DBConfiguration; // import org.apache.hadoop.mapred.lib.db.*; import org.apache.hadoop.io.*; import java.io.DataInput; import java.io.DataOutput; import java.sql.*; // ResultSet, PreparedStatement import com.mysql.jdbc.Driver; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; // import java.io.BufferedReader; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; public class DBAccess2 extends Configured implements Tool { public static class TeacherRecord implements Writable, DBWritable { // add "static" ?? int id; String name; int age; int departmentID; public TeacherRecord(){ // add by brian System.out.println("TeacherRecord()"); } public TeacherRecord(TeacherRecord t){ // add by brian System.out.println("TeacherRecord(TeacherRecord t)"); this.id = t.id; this.name = t.name; this.age = t.age; this.departmentID = t.departmentID; } public void readFields(DataInput in) throws IOException { // TODO Auto-generated method stub this.id = in.readInt(); this.name = Text.readString(in); this.age = in.readInt(); this.departmentID = in.readInt(); } public void write(DataOutput out) throws IOException { out.writeInt(this.id); Text.writeString(out, this.name); out.writeInt(this.age); out.writeInt(this.departmentID); } public void readFields(ResultSet result) throws SQLException { this.id = result.getInt(1); this.name = result.getString(2); this.age = result.getInt(3); this.departmentID = result.getInt(4); } public void write(PreparedStatement stmt) throws SQLException{ stmt.setInt(1, this.id); stmt.setString(2, this.name); stmt.setInt(3, this.age); stmt.setInt(4, this.departmentID); } public String toString() { return new String(this.name + "," + this.age + "," + this.departmentID); // } } public static class DBAccessMapper extends MapReduceBase implements Mapper<LongWritable, TeacherRecord, LongWritable, Text> { private final static IntWritable uno = new IntWritable(1); private IntWritable citationCount = new IntWritable(); public void map(LongWritable key, TeacherRecord value, OutputCollector<LongWritable, Text> collector, Reporter reporter) throws IOException { collector.collect(new LongWritable(value.id), new Text(value.toString())); } } public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, DBAccess2.class); job.setInputFormat(DBInputFormat.class); FileOutputFormat.setOutputPath(job, new Path("dboutput")); DBConfiguration.configureDB(job, "com.mysql.jdbc.Driver", "jdbc:mysql://172.19.102.12/tmp", "brian", "123456"); String [] fields = {"id", "name", "age", "departmentID"}; DBInputFormat.setInput(job, TeacherRecord.class, "teacher", null, "id", fields); job.setMapperClass(DBAccessMapper.class); job.setReducerClass(IdentityReducer.class); JobClient.runJob(job); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new DBAccess2(), args); System.exit(res); } }
5.将mysql-connector-java-5.1.18-bin.jar上传到hadoop-1.0.3/lib目录下
6.编译及执行命令
javac -classpath hadoop-core-1.0.3.jar:lib/commons-cli-1.2.jar:lib/mysql-connector-java-5.1.18-bin.jar -d mytest/classes/ mytest/src/DBAccess2.java
jar -cvf mytest/DBAccess2.jar -C mytest/classes/ .
bin/hadoop fs -rmr dboutput //输出路径在代码中指定的
time bin/hadoop jar mytest/DBAccess2.jar DBAccess2
但运行时出现下面错误:
huangshaobin@backtest12:~/hadoop-1.0.3$ time bin/hadoop jar mytest/DBAccess2.jar DBAccess2 12/09/21 17:20:25 INFO mapred.JobClient: Running job: job_201209201824_0008 12/09/21 17:20:26 INFO mapred.JobClient: map 0% reduce 0% 12/09/21 17:20:38 INFO mapred.JobClient: Task Id : attempt_201209201824_0008_m_000000_0, Status : FAILED java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575) at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:197) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:418) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:416) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:616) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) ... 11 more Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271) ... 16 more Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver at java.net.URLClassLoader$1.run(URLClassLoader.java:217) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:205) at java.lang.ClassLoader.loadClass(ClassLoader.java:321) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:294) at java.lang.ClassLoader.loadClass(ClassLoader.java:266) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:186) at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123) at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266) ... 16 more 12/09/21 17:20:41 INFO mapred.JobClient: Task Id : attempt_201209201824_0008_m_000001_0, Status : FAILED java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575) at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:197) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:418) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) ... 11 more Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271) ... 16 more Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver at java.net.URLClassLoader$1.run(URLClassLoader.java:202) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:190) at java.lang.ClassLoader.loadClass(ClassLoader.java:307) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) at java.lang.ClassLoader.loadClass(ClassLoader.java:248) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:169) at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123) at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266) ... 16 more
7.将jdbc驱动拷到另外节点:
huangshaobin@backtest12:~/hadoop-1.0.3$ scp lib/mysql-connector-java-5.1.18-bin.jar backtest11:/home/huangshaobin/hadoop-1.0.3/lib
mysql-connector-java-5.1.18-bin.jar 100% 771KB 771.4KB/s 00:00
重新运行:
bin/hadoop fs -rmr dboutput
time bin/hadoop jar mytest/DBAccess2.jar DBAccess2
这时还出现一样的错误,而在伪分布式下是正常的???
将jar拷到另一节点:
huangshaobin@backtest12:~/hadoop-1.0.3$ scp -r mytest/ backtest11:/home/huangshaobin/hadoop-1.0.3
再重新运行:
bin/hadoop fs -rmr dboutput
time bin/hadoop jar mytest/DBAccess2.jar DBAccess2
还是一样错误!难道第一个错是java版本问题?但这里还是有找不到jdbc驱动的错误!
把11上面的lib/下jdbc驱动删了,同时也把mytest/DBAccess2.jar也删了,在代码中JobConf job = new JobConf(conf, DBAccess2.class);后加入以下两句:
job.set("mapred.job.tracker", "172.19.102.12:9001");//
DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //
并加上import org.apache.hadoop.filecache.DistributedCache;
将jdbc驱动拷到hafs上:
bin/hadoop fs -put lib/mysql-connector-java-5.1.18-bin.jar /lib/mysql-connector-java-5.1.18-bin.jar
重新编译
huangshaobin@backtest12:~/hadoop-1.0.3$ javac -classpath hadoop-core-1.0.3.jar:lib/commons-cli-1.2.jar:lib/mysql-connector-java-5.1.18-bin.jar -d mytest/classes/ mytest/src/DBAccess2.java
Note: mytest/src/DBAccess2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
这两个警告是什么意思?先不理
重新运行,这时候成功了!!
所以,
job.set("mapred.job.tracker", "172.19.102.12:9001");//
DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //
这两句很重要!!但并不需要将DBAccess2.jar拷到其它节点,估计代码会自动在各个节点之间传输。
代码前面的import也很乱,一并整理后,最后的改后的代码如下:
1 import java.io.IOException; 2 import java.io.DataInput; 3 import java.io.DataOutput; 4 import java.sql.Connection; 5 import java.sql.DriverManager; 6 import java.sql.PreparedStatement; 7 import java.sql.ResultSet; 8 import java.sql.SQLException; 9 10 import org.apache.hadoop.filecache.DistributedCache; 11 import org.apache.hadoop.fs.Path; 12 import org.apache.hadoop.io.IntWritable; 13 import org.apache.hadoop.io.LongWritable; 14 import org.apache.hadoop.io.Text; 15 import org.apache.hadoop.io.Writable; 16 import org.apache.hadoop.mapred.JobClient; 17 import org.apache.hadoop.mapred.JobConf; 18 import org.apache.hadoop.mapred.MapReduceBase; 19 import org.apache.hadoop.mapred.Mapper; 20 import org.apache.hadoop.mapred.OutputCollector; 21 import org.apache.hadoop.mapred.FileOutputFormat; 22 import org.apache.hadoop.mapred.Reporter; 23 import org.apache.hadoop.mapred.lib.IdentityReducer; 24 import org.apache.hadoop.mapred.lib.db.DBWritable; 25 import org.apache.hadoop.mapred.lib.db.DBInputFormat; 26 import org.apache.hadoop.mapred.lib.db.DBConfiguration; 27 28 import org.apache.hadoop.util.Tool; 29 import org.apache.hadoop.util.ToolRunner; 30 import org.apache.hadoop.conf.Configuration; 31 import org.apache.hadoop.conf.Configured; 32 33 public class DBAccess2 extends Configured implements Tool { 34 35 36 public static class TeacherRecord implements Writable, DBWritable { // add "static" ?? 37 int id; 38 String name; 39 int age; 40 int departmentID; 41 42 public TeacherRecord(){ // add by brian 43 System.out.println("TeacherRecord()"); 44 } 45 46 public TeacherRecord(TeacherRecord t){ // add by brian 47 System.out.println("TeacherRecord(TeacherRecord t)"); 48 this.id = t.id; 49 this.name = t.name; 50 this.age = t.age; 51 this.departmentID = t.departmentID; 52 53 } 54 55 public void readFields(DataInput in) throws IOException { 56 // TODO Auto-generated method stub 57 this.id = in.readInt(); 58 this.name = Text.readString(in); 59 this.age = in.readInt(); 60 this.departmentID = in.readInt(); 61 } 62 63 public void write(DataOutput out) throws IOException { 64 out.writeInt(this.id); 65 Text.writeString(out, this.name); 66 out.writeInt(this.age); 67 out.writeInt(this.departmentID); 68 } 69 70 public void readFields(ResultSet result) throws SQLException { 71 this.id = result.getInt(1); 72 this.name = result.getString(2); 73 this.age = result.getInt(3); 74 this.departmentID = result.getInt(4); 75 } 76 77 public void write(PreparedStatement stmt) throws SQLException{ 78 stmt.setInt(1, this.id); 79 stmt.setString(2, this.name); 80 stmt.setInt(3, this.age); 81 stmt.setInt(4, this.departmentID); 82 } 83 84 public String toString() { 85 return new String(this.name + "," + this.age + "," + this.departmentID); // 86 } 87 88 } 89 90 91 public static class DBAccessMapper extends MapReduceBase 92 implements Mapper<LongWritable, TeacherRecord, LongWritable, Text> { 93 94 private final static IntWritable uno = new IntWritable(1); 95 private IntWritable citationCount = new IntWritable(); 96 97 public void map(LongWritable key, TeacherRecord value, 98 OutputCollector<LongWritable, Text> collector, Reporter reporter) throws IOException { 99 collector.collect(new LongWritable(value.id), new Text(value.toString())); 100 } 101 } 102 103 104 105 public int run(String[] args) throws Exception { 106 107 Configuration conf = getConf(); 108 JobConf job = new JobConf(conf, DBAccess2.class); 109 job.set("mapred.job.tracker", "172.19.102.12:9001");// 110 DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); // 111 job.setInputFormat(DBInputFormat.class); 112 FileOutputFormat.setOutputPath(job, new Path("dboutput")); 113 DBConfiguration.configureDB(job, "com.mysql.jdbc.Driver", "jdbc:mysql://172.19.102.12/tmp", "brian", "123456"); 114 115 String [] fields = {"id", "name", "age", "departmentID"}; 116 DBInputFormat.setInput(job, TeacherRecord.class, "teacher", null, "id", fields); 117 118 job.setMapperClass(DBAccessMapper.class); 119 job.setReducerClass(IdentityReducer.class); 120 JobClient.runJob(job); 121 122 return 0; 123 } 124 125 public static void main(String[] args) throws Exception { 126 int res = ToolRunner.run(new Configuration(), 127 new DBAccess2(), 128 args); 129 130 System.exit(res); 131 } 132 }
输出的结果是:
huangshaobin@backtest12:~/hadoop-1.0.3$ bin/hadoop fs -cat dboutput/*
101 tom,29,10
102 tony,39,20
103 lily,26,20
104 张三,25,30
总结步骤:
1.在代码中指定jobtracker及额外的jdbc包jar
job.set("mapred.job.tracker", "172.19.102.12:9001");
DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job);
2.编译通过
3.将额外的jdbc包传到hdfs上/lib目录下bin/hadoop fs -put lib/mysql-connector-java-5.1.18-bin.jar /lib/mysql-connector-java-5.1.18-bin.jar
4.运行成功
另外,可在http://hadoop.apache.org/docs/r1.0.3/api/index.html 上查看 class DistributedCache,进一步了解其它方法用法,这里也有例子说明。
参考:
Hadoop集群(第10期)_MySQL关系数据库 http://www.cnblogs.com/xia520pi/archive/2012/06/12/2546261.html
《hadoop开发者第二期:访问数据库》