SparkOnYarn
# 解压安装包 tar -zxvf /export/server/spark-2.4.5-bin-hadoop2.7.tgz # 创建软连接,方便后期升级 ln -s /export/server/spark-2.4.5-bin-hadoop2.7 /export/server/spark # 如果有权限问题,可以修改为root,方便学习时操作,实际中使用运维分配的用户和权限即可 chown -R root /export/server/spark-2.4.5-bin-hadoop2.7 chgrp -R root /export/server/spark-2.4.5-bin-hadoop2.7 cd /export/server/spark/conf # 修改文件名 mv spark-env.sh.template spark-env.sh # 编辑配置文件,增加以下内容 vim /export/server/spark/conf/spark-env.sh HADOOP_CONF_DIR=/export/server/hadoop-2.7.5/etc/hadoop YARN_CONF_DIR=/export/server/hadoop-2.7.5/etc/hadoop cd /export/server/hadoop-3.3.0/etc/hadoop vim /export/server/hadoop-3.3.0/etc/hadoop/yarn-site.xml <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>20480</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>2048</value> </property> <property> <name>yarn.nodemanager.vmem-pmem-ratio</name> <value>2.1</value> </property> cd /export/server/spark/conf mv spark-defaults.conf.template spark-defaults.conf # 编辑文件,增加以下内容 vim spark-defaults.conf spark.eventLog.enabled true spark.eventLog.dir hdfs://node1:8020/user/spark/log/ spark.eventLog.compress true spark.yarn.historyServer.address node1:18080 # 注意:sparklog需要手动创建 hdfs dfs -mkdir -p /user/spark/log/ cd /export/server/spark/conf # 编辑文件,并增加以下内容 vim spark-env.sh ## 配置spark历史服务器地址 SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://node1:8020/user/spark/log/ -Dspark.history.fs.cleaner.enabled=true" # hdfs上创建存储spark相关jar包目录 hdfs dfs -mkdir -p /user/spark/jars/ # 上传$SPARK_HOME/jars所有jar包 hdfs dfs -put /export/server/spark/jars/* /user/spark/jars/ vim /export/server/spark/conf/spark-defaults.conf # 增加以下内容 spark.yarn.jars hdfs://node1:8020/user/spark/jars/* # 在 node1 上启动整个 Hadoop 集群 start-all.sh # 启动 MRHistoryServer 服务,在 node1 执行命令 mr-jobhistory-daemon.sh start historyserver # WARNING: Use of this script to start the MR JobHistory daemon is deprecated. # WARNING: Attempting to execute replacement "mapred --daemon start" instead. # 启动 Spark HistoryServer 服务,,在 node1 执行命令 /export/server/spark/sbin/start-history-server.sh # starting org.apache.spark.deploy.history.HistoryServer, logging to /export/server/spark/logs/spark-root-org.apache.spark.deploy.history.HistoryServer-1-node1.itcast.cn.out # 测试: node1:9870 http://node1:18080 http://node1:19888/jobhistory http://node1:8088/cluster
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?