k8s部署spark
1.镜像构建打包推送阿里镜像:
- DockerFile构建:
FROM java:openjdk-8-jdk
ENV hadoop_ver 2.8.2
ENV spark_ver 2.4.4
RUN mkdir -p /opt
ADD hadoop-${hadoop_ver}.tar.gz /opt/
ADD spark-${spark_ver}-bin-without-hadoop.tar.gz /opt/
RUN cd /opt && \
ln -s hadoop-${hadoop_ver} hadoop && \
echo Hadoop ${hadoop_ver} installed in /opt
RUN cd /opt && \
ln -s spark-${spark_ver}-bin-without-hadoop spark && \
echo Spark ${spark_ver} installed in /opt
ENV SPARK_HOME=/opt/spark
ENV PATH=$PATH:$SPARK_HOME/bin
ENV HADOOP_HOME=/opt/hadoop
ENV PATH=$PATH:$HADOOP_HOME/bin
ENV LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.8.2/hadoop-aws-2.8.2.jar -o /opt/spark/jars/hadoop-aws-2.8.2.jar
RUN curl https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.5.3/httpclient-4.5.3.jar -o /opt/spark/jars/httpclient-4.5.3.jar
RUN curl https://repo1.maven.org/maven2/joda-time/joda-time/2.9.9/joda-time-2.9.9.jar -o /opt/spark/jars/joda-time-2.9.9.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.11.712/aws-java-sdk-core-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-core-1.11.712.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.11.712/aws-java-sdk-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-1.11.712.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-kms/1.11.712/aws-java-sdk-kms-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-kms-1.11.712.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/1.11.712/aws-java-sdk-s3-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-s3-1.11.712.jar
ADD start-common.sh start-worker start-master /
RUN chmod 777 start-common.sh
ADD core-site.xml /opt/spark/conf/core-site.xml
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
ENV PATH $PATH:/opt/spark/bin
- 推送镜像到阿里云
docker build . -t xujunkai/spark:2.4.4
docker tag dc45b9403697 registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4
docker push registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4
- 打包镜像相关文件地址:链接:https://pan.baidu.com/s/1OoBuEVcCNjsYZKqBTCMWaw
提取码:ct16
2.k8s 构建spark
- spark-master-controller.yaml
kind: ReplicationController
apiVersion: v1
metadata:
name: spark-master-controller
spec:
replicas: 1
selector:
component: spark-master
template:
metadata:
labels:
component: spark-master
spec:
hostname: spark-master-hostname
subdomain: spark-master-headless
containers:
- name: spark-master
image: registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4
imagePullPolicy: Always
command: ["/start-master"]
ports:
- containerPort: 7077
- containerPort: 8080
resources:
requests:
cpu: 100m
- spark-worker-controller.yaml
kind: ReplicationController
apiVersion: v1
metadata:
name: spark-worker-controller
spec:
replicas: 2
selector:
component: spark-worker
template:
metadata:
labels:
component: spark-worker
spec:
containers:
- name: spark-worker
image: registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4
imagePullPolicy: Always
command: ["/start-worker"]
ports:
- containerPort: 8081
resources:
requests:
cpu: 100m
- spark-master-service.yaml
kind: Service
apiVersion: v1
metadata:
name: spark-master-headless
spec:
ports:
clusterIP: None
selector:
component: spark-master
---
kind: Service
apiVersion: v1
metadata:
name: spark-master
spec:
ports:
- port: 7077
targetPort: 7077
name: spark
- port: 8080
targetPort: 8080
name: http
selector:
component: spark-master
- 部署
$kubectl apply -f spark-master-controller.yaml
$kubectl apply -f spark-master-service.yaml
$kubectl apply -f spark-worker-controller.yaml
- 进入到pod:
kubectl exec -it spark-master-controller-jcpw7 /bin/bash
# 输入:
$export SPARK_DIST_CLASSPATH=$(hadoop classpath)
# 进入spark
$spark-shell
- spark UI部署可参照
https://blog.csdn.net/lixinkuan328/article/details/104528182/
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步