Spark:使用Java代码提交spark任务

本文基于spark-1.6.2-bin-hadoop2.6

提交到本地

程序中指定的参数(param)和spark-submit提交时参数配置一致:

import org.apache.spark.deploy.SparkSubmit;
public class Dr {
    public static void main(String[] args) {  
        String[] param = new String[]{  
            "--name", "app_name",  
            "--master", "local[*]", 
            "--class", "Tkmeans",  
            "/mnt/tkmeans_2.10-1.0.jar"
        };
        SparkSubmit.main(param);
    }
}

提交到YARN

用spark-submit提交任务到YARN集群,只需要HADOOP_CONF_DIR环境变量指向YARN的配置文件目录就好。

用程序提交虽然也要求指定HADOOP_CONF_DIR环境变量,但指定了却并不起作用,需要在程序里指定yarn配置:

import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.deploy.yarn.Client;
import org.apache.spark.deploy.yarn.ClientArguments;

public class Test {
    public static void main(String[] args) {
        String[] param = new String[] { 
                "--name", "test java submit job to yarn", 
                "--class", "Tkmeans",
                // "--executor-memory","1G",
                // "--arg","hdfs://node101:8020/user/root/log.txt",
                // "--arg","hdfs://node101:8020/user/root/badLines_yarn_",
                "--jar", "/mnt/tkmeans_2.10-1.0.jar" };

        Configuration conf = new Configuration();
        String os = System.getProperty("os.name");
        boolean cross_platform = false;
        if (os.contains("Windows")) {
            cross_platform = true;
        }
        // 配置使用跨平台提交任务
        conf.setBoolean("mapreduce.app-submission.cross-platform", cross_platform);
        conf.set("fs.defaultFS", "hdfs://data60:9000"); // 指定namenode
        conf.set("mapreduce.framework.name", "yarn");   // 指定使用yarn框架
        conf.set("yarn.resourcemanager.address", "data60:8032"); // 指定resourcemanager
        conf.set("yarn.resourcemanager.scheduler.address", "data60:8030"); // 指定资源分配器
        conf.set("mapreduce.jobhistory.address", "data60:10020");

        System.setProperty("SPARK_YARN_MODE", "true");
        // 防止每次提交任务都上传此jar,只能指定1个jar文件而不能批量指定或指定目录。
        // 可以通过指定SPARK_JAR环境变量,但此种方式已经弃用。
        System.setProperty("spark.yarn.jar", "hdfs:///jars/spark-assembly-1.6.2-hadoop2.6.0.jar");

        SparkConf sparkConf = new SparkConf();
        ClientArguments cArgs = new ClientArguments(param, sparkConf);
        Client client = new Client(cArgs, conf, sparkConf);

        client.run(); // 这种提交方式无法反馈任务状态
    }
}
posted @ 2019-01-04 17:24  xuejianbest  阅读(3844)  评论(0编辑  收藏  举报