Spark在Eclipse运行的demo
刚接触Spark,很多还都不是太懂,查看了别人写的中文API:Spark中文文档 根据里面的参数写了一个最简单的调用,如果maven已配好的话,可以直接运行
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>1.2.0</version>
</dependency>
下面是最简单的一个demo,是获取某个文本文件下,每行的长度
import java.io.Serializable;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
public class SparkMainTest implements Serializable {
/** */
private static final long serialVersionUID = 4746372206301124368L;
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setAppName("WordCounter").setMaster("local");
String fileName = "C:/Users/dulinan/Desktop/LICENSE.txt";
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.textFile(fileName, 1);
JavaRDD<Integer> add = lines.map(new Function<String, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer call(String v1) throws Exception {
return v1.length();
}
});
add.foreach(new VoidFunction<Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void call(Integer t) throws Exception {
System.out.println(t);
}
});
sc.close();
}
}