spark操作数据库的几种方法

一.使用jdbcRDD的接口:

  1 SparkConf conf = new SparkConf();
  2         conf.setAppName("Simple Application").setMaster("local");
  3         JavaSparkContext jsc = new JavaSparkContext(conf);
  4 
  5         
  6 //1.直接使用jdbcRDD的构造函数
  7 class DbConnection extends AbstractFunction0<Connection> implements
  8         Serializable {
  9     private static final long serialVersionUID = 1L;
 10     private String driverClassName;
 11     private String connectionUrl;
 12     private String userName;
 13     private String password;
 14 
 15     public DbConnection(String driverClassName, String connectionUrl,
 16             String userName, String password) {
 17         this.driverClassName = driverClassName;
 18         this.connectionUrl = connectionUrl;
 19         this.userName = userName;
 20         this.password = password;
 21     }
 22 
 23     @Override
 24     public Connection apply() {
 25         try {
 26             Class.forName(driverClassName);
 27         } catch (ClassNotFoundException e) {
 28         }
 29         Properties properties = new Properties();
 30         properties.setProperty("user", userName);
 31         properties.setProperty("password", password);
 32         Connection connection = null;
 33         try {
 34             connection = DriverManager.getConnection(connectionUrl,
 35                     properties);
 36         } catch (SQLException e) {
 37         }
 38         return connection;
 39     }
 40 }
 41 
 42 class MapResult extends AbstractFunction1<ResultSet, Object[]>
 43         implements Serializable {
 44     private static final long serialVersionUID = 1L;
 45 
 46     public Object[] apply(ResultSet row) {
 47         return JdbcRDD.resultSetToObjectArray(row);
 48     }
 49 }
 50 
 51 String Connection_url = "jdbc:mysql://ip:port/dbname?useUnicode=true&characterEncoding=utf8";
 52 String Driver="com.mysql.jdbc.Driver";
 53 String UserName = "root";
 54 String password = "pd";
 55 DbConnection dbConnection = new DbConnection(Driver,
 56         Connection_url, UserName, password);
 57 sql = "select * from (" + sql + ") as tmp where 0=? and 0=?";
 58 //lowerBound,upperBound均设置0,where条件就为恒真,这个是个处理技巧
 59 JdbcRDD<Object[]> jdbcRDD = new JdbcRDD<>(jsc.sc(), dbConnection,
 60         sql, 0, 0, 1, new MapResult(),
 61         ClassManifestFactory$.MODULE$.fromClass(Object[].class));
 62 JavaRDD<Object[]> javaRDD = JavaRDD.fromRDD(jdbcRDD,
 63         ClassManifestFactory$.MODULE$.fromClass(Object[].class));
 64         
 65         
 66 //另外一种实现:
 67 class DbConnectionFactory implements JdbcRDD.ConnectionFactory {
 68     private static final long serialVersionUID = 1L;
 69     private String driverClassName;
 70     private String connectionUrl;
 71     private String userName;
 72     private String password;
 73     
 74     public Connection getConnection() throws Exception {
 75         Class.forName(driverClassName);
 76         String url = connectionUrl;
 77         Properties properties = new Properties();
 78         properties.setProperty("user", userName);
 79         properties.setProperty("password", password);
 80         return DriverManager.getConnection(url, properties);
 81     }
 82     
 83     public DbConnectionFactory(String driverClassName, String connectionUrl,
 84             String userName, String password) {
 85         this.driverClassName = driverClassName;
 86         this.connectionUrl = connectionUrl;
 87         this.userName = userName;
 88         this.password = password;
 89     }
 90 
 91 }
 92 
 93 String Connection_url = "jdbc:mysql://ip:port/dbname?useUnicode=true&characterEncoding=utf8";
 94 sql = "select * from (" + sql + ") as tmp where 0=? and 0=?";
 95 DbConnectionFactory ConnectFactory = new DbConnectionFactory(Driver,
 96         Connection_url, UserName, password)
 97 javaRDD = JdbcRDD.create(jsc, new DbConnectionFactory(Driver,
 98                     Connection_url, UserName, password), sql, 0, 0, 1,new Function<ResultSet,Object[]>()
 99             {
100                 private static final long serialVersionUID = 1L;
101                 public Object[] call(ResultSet resultSet)
102                   {
103                     return JdbcRDD.resultSetToObjectArray(resultSet);
104                   }
105             });//直接返回JavaRDD<Object[]>,这个底层调用的是JdbcRDD(SparkContext sc, Function0<Connection> getConnection, String sql, long lowerBound, long upperBound, int numPartitions, Function1<ResultSet, T> mapRow, ClassTag<T> evidence$1)
106 //javaRDD =JdbcRDD.create(jsc, ConnectFactory, sql, 0, 0, 1);//该方法更加简洁,底层调用上面的create(JavaSparkContext paramJavaSparkContext, ConnectionFactory paramConnectionFactory, String paramString, long paramLong1, long paramLong2, int paramInt, Function<ResultSet, T> paramFunction)

 二.使用通过sparksession的接口:

 1 SparkSession ss = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
 2 //读取方式1
 3 String sql = "(select * from xxx) as tmp; //注意这里的sql格式,该sql也可以直接是一个表名
 4 Dataset<Row> df = session.read().format("jdbc")
 5                     .option("url", jdbcURL)
 6                     .option("driver", driver)
 7                     .option("dbtable", sql)
 8                     .option("user", username)
 9                     .option("password", password)
10                     .load();
11 //读取方式2:
12 Properties connectionProperties = new Properties();
13             connectionProperties.put("user", username);
14             connectionProperties.put("password", password);
15             connectionProperties.put("driver", driver);
16                  session.read().jdbc(url, table, properties)
17 df = session.read().jdbc(jdbcURL,sql,connectionProperties);
18 
19 
20 //写入方式1:
21 String saveMode = "Overwrite";
22 df.write().mode(saveMode).jdbc(jdbcURL, tablename, connectionProperties);
23 //写入方式2:
24 final String sql = "insert into tab_xxx (c1,c2,c3) values(?,?,?)";
25 
26     df.javaRDD().foreachPartition(new VoidFunction<Iterator<Row>>() {
27         private static final long serialVersionUID = -834520661839866305L;
28         @Override
29         public void call(Iterator<Row> t) throws Exception {
30             Class.forName(driver);
31             Connection conn = (Connection) DriverManager.getConnection(url, username, password);
32             conn.setAutoCommit(false);
33             try {
34                 PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
35                 int loop = 0;
36                 while (t.hasNext()) {
37                     Row row = t.next();
38                     for (int i = 0; i < 3; i++) { //这里的3是插入的列只有3列
39                         pstmt.setObject(i + 1, row.get(i));
40                     }
41                     pstmt.executeUpdate();
42                     if (++loop % 1000 == 0) {
43                         conn.commit();
44                     }
45                 }
46                 conn.commit();
47                 pstmt.close();
48             } finally {
49                 conn.close();
50             }
51         }
52     });
53 }

 

 1 //写入方法3
 2 3.转换成List<Row>,可以批量写入,但是有可能导致Driver 内存承载过高
 3 
 4 private static void InsertTmpTable(Connection conn,Dataset<Row> ndf,final String m_cols,final String tabname) {
 5         System.out.println("InsertTmpTab "+tabname+" start!");
 6         String  placeholderStr="values(";
 7         int cnt = m_cols.split(",").length+1;
 8         for(int  i = 0;i<cnt;i++){
 9             placeholderStr+="?"+ (i == cnt-1 ? ")" : ",");
10         }
11         
12          String sql = "insert into "+ tabname+"("+m_cols+",orderby_time)"+placeholderStr;
13          PreparedStatement pstmt = null;
14         
15         try {
16            List<Row> lrow=ndf.collectAsList();
17            pstmt = (PreparedStatement) conn.prepareStatement(sql);
18     
19            for(int j =0;j<lrow.size();j++){
20                for(int i=0;i<cnt;i++){
21                 pstmt.setObject(i+1,lrow.get(j).get(i));
22                }
23                pstmt.executeUpdate();
24                if(j%10000==0)  //批量提交
25                    conn.commit(); 
26            }
27            conn.commit();
28            pstmt.close();
29         }catch (Exception e){
30             System.out.println(e.getMessage());
31             System.exit(-1);
32         }
33         System.out.println("InsertTmpTab "+tabname+" success!");
34     }

 

posted @ 2018-09-12 11:43  一直爬行的蜗牛牛  阅读(2976)  评论(0编辑  收藏  举报