AirFlow 1.10.11的安装部署
1 AirFlow 介绍
参见官网
2 AirFlow 1.10.11的安装部署
2.1 安装依赖
- Centos7.x
- Python3.5或以上(本次采用3.6.6)
- Mysql5.7.x
- Apache-Airflow 1.10.11
- 可访问外网
若是虚拟机等,安装前可备份或快照,以免安装失败,导致不可用
2.2 Python环境准备
Python-3.6.6.tgz
# 卸载 mariadb
rpm -qa | grep mariadb
mariadb-libs-5.5.65-1.el7.x86_64
mariadb-5.5.65-1.el7.x86_64
mariadb-devel-5.5.65-1.el7.x86_64
yum remove mariadb
yum remove mariadb-libs
# 安装依赖
rpm -ivh mysql57-community-release-el7-11.noarch.rpm
yum install readline readline-devel -y
yum install gcc -y
yum install zlib* -y
yum install openssl openssl-devel -y
yum install sqlite-devel -y
yum install python-devel mysql-devel -y
# 提前到python官网下载好包
cd /opt/src
tar -zxvf Python-3.6.6.tgz
# 安装 python3 运行环境
cd Python-3.6.6/
# configure文件是一个可执行的脚本文件。如果配置了--prefix,安装后的所有
资源文件都会放在目录中
./configure --prefix=/usr/local/python3.6
make && make install
/usr/local/python3.6/bin/pip3 install virtualenv
# 启动 python3 环境
cd /usr/local/python3.6/bin/
./virtualenv env
. env/bin/activate
# 检查 python 版本
python -V
2.3 安装Airflow
2.3.1 环境变量
export AIRFLOW_HOME=/opt/app/servers/airflow
source /etc/profile
2.3.2 虚拟环境中安装
(env) [root@gcw2 bin]# pip install apache-airflow==1.10.11 -i https://pypi.douban.com/simple
pip install mysqlclient==1.4.6
airflow initdb
在执行 airflow initdb 命令时,如遇上如下报错:
ModuleNotFoundError: No module named
'sqlalchemy.ext.declarative.clsregistry'
这是由于 SQLAlchemy 模块版本低导致的错误。执行以下命令后,重新执行 airflow initdb 命令。
pip install SQLAlchemy==1.3.23
mysql数据库创建及用户权限
-- 创建数据库
SET GLOBAL explicit_defaults_for_timestamp = 1;
create database airflowgcw2;
-- 创建用户airflow,设置所有ip均可以访问
create user 'airflow'@'%' identified by 'xxx';
create user 'airflow'@'localhost' identified by 'xxx';
-- 用户授权,为新建的airflow用户授予Airflow库的所有权限
grant all on airflowgcw2.* to 'airflow'@'%';
flush privileges;
修改 $AIRFLOW_HOME/airflow.cfg:
# 约 75 行
sql_alchemy_conn = mysql://airflow:password@ip:port/airflowgcw2
# 重新执行
airflow initdb
2.4 密码模块
(env) [root@gcw2 airflow]# pip install apache-airflow[password]
修改 airflow.cfg 配置文件(第一行修改,第二行增加):
## 约 281 行
[webserver]
# 约 353行
authenticate = True
auth_backend = airflow.contrib.auth.backends.password_auth
python中启动
输入python进入,然后执行下列
import airflow
from airflow import models, settings
from airflow.contrib.auth.backends.password_auth import PasswordUser
user = PasswordUser(models.User())
user.username = 'airflow'
user.email = 'xxx@xxx.com'
user.password = 'xxx'
session = settings.Session()
session.add(user)
session.commit()
session.close()
exit()
2.5 python3环境中启动
# 备注:要先进入python3的运行环境
cd /usr/local/python3.6/bin/
./virtualenv env
. env/bin/activate
# 退出虚拟环境命令
deactivate
# 启动scheduler调度器:
airflow scheduler -D
# 服务页面启动:
airflow webserver -D
2.6 初步成功
2021-09-09
2.7 配置
2.7.1 时区修改
-
修改$AIRFLOW_HOME/airflow.cfg
# 约 65 行 default_timezone = Asia/Shanghai
-
airflow安装包位置
# 进入Airflow包的安装位置 cd /usr/local/python3.6/bin/env/lib/python3.6/site-packages/ # 修改airflow/utils/timezone.py cd airflow/utils vi timezone.py
第27行注释,增加行:
27 utc = pendulum.timezone('UTC')
from airflow import configuration as conf try: tz = conf.get("core", "default_timezone") if tz == "system": utc = pendulum.local_timezone() else: utc = pendulum.timezone(tz) except Exception: pass
71行
d = dt.datetime.now()
-
vi sqlalchemy.py
utc = pendulum.timezone('UTC') from airflow import configuration as conf try: tz = conf.get("core", "default_timezone") if tz == "system": utc = pendulum.local_timezone() else: utc = pendulum.timezone(tz) except Exception: pass
-
修改 /usr/local/python3.6/bin/env/lib/python3.6/site-packages/airflow/www/templates/admin/master.html
# 将第40行修改为以下内容: 40 var UTCseconds = x.getTime(); # 将第43行修改为以下内容: 43 "timeFormat":"H:i:s",
2.7.2 重启
# 关闭 airflow webserver 对应的服务 # 关闭 airflow scheduler 对应的服务
ps -ef | grep 'airflow-webserver' | grep -v 'grep' | awk '{print $2}' | xargs -i kill -9 {}
ps -ef | grep 'airflow' | grep 'scheduler' | awk '{print $2}' | xargs -i kill -9 {}
# 删除对应的pid文件 # 重启服务(在python3.6虚拟环境中执行)
cd $AIRFLOW_HOME
rm -rf *.pid
#重新启动
airflow scheduler -D
airflow webserver -D
2.7.3 时间正常
2.7.4 禁用自带DAG
系统自带DAG太多,选择禁用掉
-
停止系统
# 关闭 airflow webserver 对应的服务 # 关闭 airflow webserver 对应的服务 ps -ef | grep 'airflow-webserver' | grep -v 'grep' | awk '{print $2}' | xargs -i kill -9 {} ps -ef | grep 'airflow' | grep 'scheduler' | awk '{print $2}' | xargs -i kill -9 {} # 删除对应的pid文件 cd $AIRFLOW_HOME rm -rf *.pid
-
修改文件 $AIRFLOW_HOME/airflow.cfg:
# 修改文件第 136 行 136 # load_examples = True 137 load_examples = False # 重新设置db airflow resetdb -y
-
重新设置账户、口令:
import airflow from airflow import models, settings from airflow.contrib.auth.backends.password_auth import PasswordUser user = PasswordUser(models.User()) user.username = 'airflow' user.email = 'xxx@xxx.com' user.password = 'airflow' session = settings.Session() session.add(user) session.commit() session.close() exit()
-
重启服务
airflow scheduler -D airflow webserver -D