Hadoop/Spark 集群都启动了哪些 Java 程序

HDFS

org.apache.hadoop.hdfs.server.datanode.DataNode
org.apache.hadoop.hdfs.server.namenode.NameNode
## 非 HA 模式下才有 SecondaryNameNode,配置 HA 的话是两个 NameNode
org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
## 配置 HA 模式才有 JournalNode,用于两个 NameNode 间的数据同步
org.apache.hadoop.hdfs.qjournal.server.JournalNode
## 配置 HA 模式才有 DFSZKFailoverController,用于 NameNode 的故障恢复
org.apache.hadoop.hdfs.tools.DFSZKFailoverController

MapReduce

## 记录 MapReduce Job 的历史信息
org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer

YARN

org.apache.hadoop.yarn.server.nodemanager.NodeManager
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
## 使用了 App Timeline Server,用于记录 Yarn Job 的历史信息
org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer

Spark

## 用于记录 Spark Job 的历史信息
## 在 Job 运行时,可以通过 YARN UI 跳到 Spark Job UI
## 在 Job 结束后,需要通过 Spark History Server UI 查看 Job 的历史信息(默认端口是 18080)
org.apache.spark.deploy.history.HistoryServer
## 这是提交 Spark 应用的 Client
org.apache.spark.deploy.SparkSubmit \
    --master yarn \
    --deploy-mode cluster \
    --conf spark.driver.memory=1G \
    --conf spark.driver.extraClassPath=postgresql-xxx.jar \
    --name Test \
    --py-files Test.zip \
    --jars postgresql-xxx.jar,spark-streaming-kafka-xxx.jar \
    --executor-memory 1G \
    --num-executors 4 \
    Test.py
## 这里运行的就是 Driver 程序
## 每一个 Spark 应用都有一个 ApplicationMaster 运行 Driver
org.apache.spark.deploy.yarn.ApplicationMaster \
    --class org.apache.spark.deploy.PythonRunner \
    --primary-py-file Test.py \
    --arg param \
    --properties-file /mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000001/__spark_conf__/__spark_conf__.properties
## 这里运行的就是 Executor 程序
## 每一个 Spark 应用有一个或多个 Executor 程序
## 可以看到 Executor 会和对应的 Driver 链接上
org.apache.spark.executor.CoarseGrainedExecutorBackend \
    --driver-url spark://CoarseGrainedScheduler@192.168.3.16:42742 \
    --executor-id 3 \
    --hostname hadoop-1 \
    --cores 1 \ 
    --app-id application_1574480275665_115552 \
    --user-class-path file:/mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000004/__app__.jar \
    --user-class-path file:/mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000004/postgresql-xxx.jar \
    --user-class-path file:/mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000004/spark-streaming-kafka-xxx.jar \
    ......

Ambari

org.apache.ambari.server.controller.AmbariServer

ZooKeeper

org.apache.zookeeper.server.quorum.QuorumPeerMain conf/zoo.cfg

Kafka

kafka.Kafka  config/server.properties

HBase

org.apache.hadoop.hbase.master.HMaster start
org.apache.hadoop.hbase.regionserver.HRegionServer start
org.apache.phoenix.queryserver.server.Main

posted @ 2020-05-25 01:43  moon~light  阅读(361)  评论(0编辑  收藏  举报