Zeppelin-0.9.0安装并集成Hive、Spark、MySQL、Python
1、下载安装包
http://archive.apache.org/dist/zeppelin/zeppelin-0.9.0/
2、上传zeppelin-0.9.0-bin-all.tgz至/opt/soft
3、解压
tar -zxvf /opt/soft/zeppelin-0.9.0-bin-all.tgz -C /opt/module/
4、修改安装包名称
mv /opt/module/zeppelin-0.9.0-bin-all/ /opt/module/zeppelin
5、修改配置文件名称
cd /opt/module/zeppelin/conf mv zeppelin-env.sh.template zeppelin-env.sh mv zeppelin-site.xml.template zeppelin-site.xml
6、修改zeppelin-env.sh
vi zeppelin-env.sh
#配置JAVA_HOME export JAVA_HOME=/opt/module/jdk1.8.0_181 #配置SPARK_HOME export SPARK_HOME=/opt/module/spark-yarn #配置HADOOP_HOME export HADOOP_HOME=/opt/module/hadoop-3.1.3 #配置HADOOP_CONF_DIR export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop #配置SPARK_SUBMIT_OPTIONS export SPARK_SUBMIT_OPTIONS="--master yarn --deploy-mode cluster --executor-memory 4G --executor-cores 1 --num-executors 4 --queue default"
###设置单独的笔记本工作区
#创建笔记本时默认为公共,否则为私人
export ZEPPELIN_NOTEBOOK_PUBLIC="false"
7、修改zeppelin-site.xml
vi zeppelin-site.xml
<!-- 修改地址 --> <property> <name>zeppelin.server.addr</name> <value>192.168.67.1</value> <description>Server address</description> </property> <!-- 修改端口 --> <property> <name>zeppelin.server.port</name> <value>98080</value> <description>Server port.</description> </property>
<!-- 开启定时调度 -->
<property>
<name>zeppelin.notebook.cron.enable</name>
<value>true</value>
<description>Notebook enable cron scheduler feature</description>
</property>
<property>
<name>zeppelin.notebook.cron.folders</name>
<value></value>
<description>Notebook cron folders</description>
</property>
8、赋予权限
chmod +x *.sh
9、配置权限管理
cd /opt/module/zeppelin/conf
mv shiro.ini.template shiro.ini
vim shiro.ini
#配置用户名和密码以及对应的角色 #将默认以下默认配置注释掉 #user1 = password2, role1, role2 #user2 = password3, role3 #user3 = password4, role2
#配置admin用户名密码角色(账号 = 密码, 角色)
admin = 123456, admin
#配置其他账号密码角色(账号 = 密码, 角色) abc01 = abc01#sdas, role1 abc02 = abc02trdas, role1 abc03 = abc03hgjas, role1 abc04 = abc04676ss, role1 abc05 = abc05khjkhj, role1 abc06 = abc06zzzzz, role1 abc07 = abc07vnvbvcb, role1 abc08 = abc08jkljkjh, role1 abc09 = abc09asdasda, role1 abc10 = abc10pppjkhj, role1 abc11 = abc11cvxvcas, role1 abc12 = abc12werer@sd, role1 abc13 = abc13vdfdz!ds, role1 abc14 = abc14xvsdfss, role1 abc15 = abc15erertet, role1 abc16 = abc16ghjgasd, role1 abc17 = abc17gfdfuit, role1 abc18 = abc18fsdfuit, role1
#配置账号可以看到解释器,修改如下内容: [main] anyofrolesuser = org.apache.zeppelin.utils.AnyOfRolesUserAuthorizationFilter [urls] /api/interpreter/** = authc, anyofrolesuser[admin, abc01, abc02, abc03, abc04, abc05, abc06, abc07, abc08, abc09, abc10, abc11, abc12, abc13, abc14, abc15, abc16, abc17, abc18] /api/notebook-repositories/** = authc, anyofrolesuser[admin, abc01, abc02, abc03, abc04, abc05, abc06, abc07, abc08, abc09, abc10, abc11, abc12, abc13, abc14, abc15, abc16, abc17, abc18] /api/configurations/** = authc, anyofrolesuser[admin, abc01, abc02, abc03, abc04, abc05, abc06, abc07, abc08, abc09, abc10, abc11, abc12, abc13, abc14, abc15, abc16, abc17, abc18] /api/credential/** = authc, anyofrolesuser[admin, abc01, abc02, abc03] /api/admin/** = authc, anyofrolesuser[admin, abc01, abc02, abc03]
10、启动/停止zeppelin
/opt/module/zeppelin/bin/zeppelin-daemon.sh start
/opt/module/zeppelin/bin/zeppelin-daemon.sh stop
11、访问Web端口
12、配置Interpreters的Maven依赖
insecure http://insecure.repo1.maven.org/maven2/ aliyun https://maven.aliyun.com/repository/public
13、配置MySQL Interpreters
(1)配置解释器
Name:MySQLName01 Interpreter group:jdbc default.driver com.mysql.jdbc.Driver default.password 123456 default.url jdbc:mysql://192.168.67.1:3306/agets?autoReconnect=true&useSSL=false&allowPublicKeyRetrieval=true&sslMode=disabled default.user root Dependencies:mysql:mysql-connector-java:5.1.48
(2)创建note
/VM/MySQL/MySQLName01
配置选择:
MySQLName01 %jdbc(default)
执行语句:
show tables;
14、配置Hive Interpreters
(1)拷贝hive-site.xml到zeppelin/conf目录下
(2)拷贝hadoop-common-3.1.3.jar、hive-jdbc-3.1.2.jar、hive-jdbc-handler-3.1.2.jar到/opt/module/zeppelin/interpreter/jdbc中
cp /opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-common-3.1.3.jar /opt/module/zeppelin/interpreter/jdbc cp /opt/module/hive/lib/hive-jdbc-3.1.2.jar /opt/module/zeppelin/interpreter/jdbc/ cp /opt/module/hive/lib/hive-jdbc-handler-3.1.2.jar /opt/module/zeppelin/interpreter/jdbc/
(3)重启Zeppelin
/opt/module/zeppelin/bin/zeppelin-daemon.sh stop
/opt/module/zeppelin/bin/zeppelin-daemon.sh start
(4)配置Hive Interpreters
Name:HiveName Interpreter group:jdbc default.driver : org.apache.hive.jdbc.HiveDriver default.url : jdbc:hive2://192.168.67.1:10000 default.user : hive default.password Dependencies:org.apache.hive:hive-jdbc:3.1.2
(5)创建note
/VM/hive/HiveName
配置选择:
HiveName %jdbc(default)
执行语句:
show databases;
15、配置Python Interpreters
(1)搜索python,点击修改按钮,将zeppelin.python修改为/opt/module/anaconda3/bin/python3
(2)完成之后点击保存、重启Python Interpreters
(3)创建笔记:
/VM/python/PythonName
配置选择:
python %jdbc(default)
(4)在zeppelin中执行如下代码,测试python环境:
import gc a="xxx" print(a) del a gc.collect() z.input(name='my_name', defaultValue='hello') import pandas as pd df = pd.DataFrame({'name':['a','b','c'], 'count':[12,24,18]}) z.show(df)
16、配置Spark Interpreters
(1)搜索spark,点击修改按钮,修改如下参数:
#作业名称 spark.app.name=zeppelin_spark #导出的行数 zeppelin.spark.maxResult=1000000 #提交模式 spark.submit.deployMode=client #本地模式或集群模式 spark.master=yarn #spark环境变量 SPARK_HOME=/opt/module/spark-yarn
(2)完成之后点击保存
(3)创建笔记
/VM/spark/SparkName
(4)在zeppelin中执行如下代码,测试spark环境:
spark.sql("show databases").show() %pyspark spark.sql("show tables").show()