14(spark环境搭建)
1,安装java8
cd /software 上传文件
tar -zxvf ./jdk-8u181-linux-x64.tar.gz
vi /etc/profile 四台机器
export JAVA_HOME=/software/jdk1.8.0_181
export PATH=$JAVA_HOME/bin:$PATH
. /etc/profile
java -version
scp -r jdk1.8.0_181/ node0002:`pwd`
scp -r jdk1.8.0_181/ node0003:`pwd`
scp -r jdk1.8.0_181/ node0004:`pwd`
. /etc/profile
java -version 查看四台机器是否都是1.8
2,修改jdk指向
cd /usr/java
ll (指向1.7)
ln -sf /software/jdk1.8.0_181/bin/java /usr/bin/java (四台机器)
cd /usr/bin
ll (java指向1.8)
3,修改hadoop的jdk配置
cd /opt/sxt/hadoop-2.6.5/etc/hadoop
vi hadoop-env.sh (四台机器)
export JAVA_HOME=/software/jdk1.8.0_181/bin/java
4,安装spark
cd /software
tar -zxvf spark-2.3.1-bin-hadoop2.6.tgz
mv spark-2.3.1-bin-hadoop2.6 spark-2.3.1
cd spark-2.3.1/conf/
cp slaves.template slaves
vi slaves (配置worker节点)
node0002
node0003
cp spark-env.sh.template spark-env.sh
vi spark-env.sh (配置master节点,添加)
export SPARK_MASTER_HOST=node0001
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=3g
5,分发到node0002,node0003
cd /software
scp -r spark-2.3.1 node0002:`pwd`
scp -r spark-2.3.1 node0003:`pwd`
6,启动spark
cd spark-2.3.1/sbin/
./start-all.sh (一定要加./,否则会启动hdfs集群)
页面访问:node0001:8080
7,基于yarn任务提交
node0002:
scp -r spark-2.3.1/ node0004:`pwd`
node0004:
cd /software/spark-2.3.1/conf/
rm -rf ./slaves
vi spark-env.sh (注释掉export的几个)
8,配置yarn
vi /opt/sxt/hadoop-2.6.5/etc/hadoop/yarn-site.xml (添加)
<property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property>
cd /opt/sxt/hadoop-2.6.5/etc/hadoop
scp ./yarn-site.xml node0002:`pwd`
scp ./yarn-site.xml node0002:`pwd`
scp ./yarn-site.xml node0002:`pwd`
vi /opt/sxt/hadoop-2.6.5/etc/hadoop/hadoop-env.sh
改为:export JAVA_HOME=/software/jdk1.8.0_181
scp ./hadoop-env.sh node0002:`pwd`
scp ./hadoop-env.sh node0002:`pwd`
scp ./hadoop-env.sh node0002:`pwd`