Fork me on GitHub

k8s部署spark

1.镜像构建打包推送阿里镜像:

  • DockerFile构建:
FROM java:openjdk-8-jdk

ENV hadoop_ver 2.8.2
ENV spark_ver 2.4.4


RUN mkdir -p /opt
ADD hadoop-${hadoop_ver}.tar.gz /opt/
ADD spark-${spark_ver}-bin-without-hadoop.tar.gz /opt/

RUN cd /opt && \
    ln -s hadoop-${hadoop_ver} hadoop && \
    echo Hadoop ${hadoop_ver} installed in /opt

RUN cd /opt && \
    ln -s spark-${spark_ver}-bin-without-hadoop spark && \
    echo Spark ${spark_ver} installed in /opt


ENV SPARK_HOME=/opt/spark
ENV PATH=$PATH:$SPARK_HOME/bin
ENV HADOOP_HOME=/opt/hadoop
ENV PATH=$PATH:$HADOOP_HOME/bin
ENV LD_LIBRARY_PATH=$HADOOP_HOME/lib/native

RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.8.2/hadoop-aws-2.8.2.jar -o /opt/spark/jars/hadoop-aws-2.8.2.jar
RUN curl https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.5.3/httpclient-4.5.3.jar -o /opt/spark/jars/httpclient-4.5.3.jar
RUN curl https://repo1.maven.org/maven2/joda-time/joda-time/2.9.9/joda-time-2.9.9.jar -o /opt/spark/jars/joda-time-2.9.9.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.11.712/aws-java-sdk-core-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-core-1.11.712.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.11.712/aws-java-sdk-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-1.11.712.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-kms/1.11.712/aws-java-sdk-kms-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-kms-1.11.712.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/1.11.712/aws-java-sdk-s3-1.11.712.jar -o /opt/spark/jars/aws-java-sdk-s3-1.11.712.jar


ADD start-common.sh start-worker start-master /
RUN chmod 777 start-common.sh
ADD core-site.xml /opt/spark/conf/core-site.xml
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
ENV PATH $PATH:/opt/spark/bin
  • 推送镜像到阿里云
docker build . -t xujunkai/spark:2.4.4

docker tag dc45b9403697 registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4

docker push registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4

2.k8s 构建spark

  • spark-master-controller.yaml
kind: ReplicationController
apiVersion: v1
metadata:
  name: spark-master-controller
spec:
  replicas: 1
  selector:
    component: spark-master
  template:
    metadata:
      labels:
        component: spark-master
    spec:
      hostname: spark-master-hostname
      subdomain: spark-master-headless
      containers:
        - name: spark-master
          image: registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4
          imagePullPolicy: Always
          command: ["/start-master"]
          ports:
            - containerPort: 7077
            - containerPort: 8080
          resources:
            requests:
              cpu: 100m
  • spark-worker-controller.yaml
kind: ReplicationController
apiVersion: v1
metadata:
  name: spark-worker-controller
spec:
  replicas: 2
  selector:
    component: spark-worker
  template:
    metadata:
      labels:
        component: spark-worker
    spec:
      containers:
        - name: spark-worker
          image: registry.cn-hangzhou.aliyuncs.com/xujunkai/spark:2.4.4
          imagePullPolicy: Always
          command: ["/start-worker"]
          ports:
            - containerPort: 8081
          resources:
            requests:
              cpu: 100m
  • spark-master-service.yaml
kind: Service
apiVersion: v1
metadata:
  name: spark-master-headless
spec:
  ports:
  clusterIP: None
  selector:
    component: spark-master
---
kind: Service
apiVersion: v1
metadata:
  name: spark-master
spec:
  ports:
    - port: 7077
      targetPort: 7077
      name: spark
    - port: 8080
      targetPort: 8080
      name: http
  selector:
    component: spark-master
  • 部署
$kubectl apply -f spark-master-controller.yaml
$kubectl apply -f spark-master-service.yaml
$kubectl apply -f spark-worker-controller.yaml
  • 进入到pod:
kubectl exec -it spark-master-controller-jcpw7 /bin/bash

# 输入:
$export SPARK_DIST_CLASSPATH=$(hadoop classpath)
# 进入spark
$spark-shell
  • spark UI部署可参照

https://blog.csdn.net/lixinkuan328/article/details/104528182/

posted @   是阿凯啊  阅读(1176)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
阅读排行:
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
历史上的今天:
2019-06-18 用BeautifulSoup简单爬取BOSS直聘网岗位
点击右上角即可分享
微信分享提示