spark读写Sequoiadb

spark如何读写Sequoiadb,最近被客户问多了,这个记录下。

 

Spark读Sequoiadb数据:

package marketing

import com.sequoiadb.hadoop.io.BSONWritable
import com.sequoiadb.hadoop.mapreduce.SequoiadbInputFormat
import org.apache.hadoop.conf.Configuration
import org.apache.spark.{SparkContext, SparkConf}

/**
  * Created by joy on 2015/12/15.
  */
object Read extends App {
    val conf = new SparkConf().setAppName("cgbdata").
      setMaster("local").registerKryoClasses(Array(classOf[BSONWritable]))

    val sc = new SparkContext(conf)
    val hadoopConfig = new Configuration()
    hadoopConfig.set("sequoiadb.input.url","master:11810,slave1:11810,slave2:11810")
    hadoopConfig.set("sequoiadb.in.collectionspace","default")
    hadoopConfig.set("sequoiadb.in.collection","bar")
    val sdbRDD = sc.newAPIHadoopRDD[Object,BSONWritable,SequoiadbInputFormat](hadoopConfig,classOf[SequoiadbInputFormat],classOf[Object], classOf[BSONWritable])
    sdbRDD.map(_._2.getBson).collect.map(println)
    sc.stop()
}

 

Spark写Sequoiadb

package marketing

import com.sequoiadb.hadoop.io.BSONWritable
import com.sequoiadb.hadoop.mapreduce.SequoiadbOutputFormat
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.{NullWritable, IntWritable}
import org.apache.spark.{SparkConf, SparkContext}
import org.bson.BasicBSONObject
import org.bson.types.ObjectId

/**
 * Hello world!
 *
 */
object Save extends App {
  val sparkconf = new SparkConf().setMaster("local[2]").setAppName("save").registerKryoClasses(Array(classOf[BSONWritable]))
  val sc = new SparkContext(sparkconf)

  var data = sc.parallelize(List((NullWritable.get(),new BSONWritable(new BasicBSONObject("name","gaoxing")))))
  val config = new Configuration()
  config.set("sequoiadb.output.url","master:11810")
  config.set("sequoiadb.out.collectionspace","foo")
  config.set("sequoiadb.out.collection","bar")
  data.saveAsNewAPIHadoopFile("",classOf[NullWritable],classOf[BSONWritable],classOf[SequoiadbOutputFormat],config)
}

  

posted @ 2015-12-15 17:14  高兴的博客  阅读(854)  评论(0编辑  收藏  举报