java代码如下:
package db.insert;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
public class SparkInsertData {
public static void main(String[] args){
//初始化sparkContext,
SparkConf sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local[2]");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
final HBaseDBDao hb = new HBaseDBDao();
try {
final String tableName = "mapCar";
hb.deleteTable(tableName);
// 第一步:创建数据库表:“mapCar”
String[] columnFamilys = { "cids", "gis", "times"};
if(!hb.isExist(tableName)){
hb.createTable(tableName, columnFamilys);
}
hb.initHTable(tableName);
// 第二步:向数据表的添加数据
// 添所有车辆数据到表中
JavaRDD<String> fcar = sc.textFile(
"/usr/local/myjar/mongo/地图数据/mongo/MongoDB/mapCar.txt", 10);
fcar.foreachPartition(new VoidFunction<Iterator<String>>(){
private static final long serialVersionUID = 1L;
@Override
public void call(Iterator<String> iter) throws Exception {
while(iter.hasNext()){
String s = iter.next();
StringTokenizer stk = new StringTokenizer(s);
String cid = stk.nextToken();
String lat = stk.nextToken();
String lon = stk.nextToken();
String time = stk.nextToken();
int n = 13-time.length();
StringBuilder sb = new StringBuilder(time);
for(int i=0;i<n;i++){
sb.insert(0, '0');
}
String row = cid + "_" + sb.toString();
hb.addRowBatch(tableName, row, "cids", "cid", cid);
hb.addRowBatch(tableName, row, "gis", "lat", lat);
hb.addRowBatch(tableName, row, "gis", "lon", lon);
hb.addRowBatch(tableName, row, "times", "time", time);
System.out.println("row: " + row + ", cid : " + cid
+ ", lat: " + lat + ", lon: " + lon + ", time: "
+ time);
}
hb.flushCommits(tableName);
}
});
System.out.println("插入完毕!");
// 第三步:获取所有数据
// hb.getAllRows(tableName);
} catch (Exception e) {
e.printStackTrace();
}
}
}
所需jar包如下: