官方文档地址:http://airflow.apache.org/
[root@node1 ~]# yum install -y epel-release
[root@node1 ~]# yum install -y python-pip
#配置pip国内源,加快速度
[root@node1 ~]# mkdir ~/.pip
[root@node1 ~]# vim .pip/pip.conf
[global]
trusted-host=mirrors.aliyun.com
index-url=https://mirrors.aliyun.com/pypi/simple/
#升级pip
[root@node1 ~]# pip install --upgrade pip
#升级setuptools,版本太低会导致部分模块安装失败
[root@node1 ~]# pip install setuptools -U
因为当前最新版本是1.10.10,这里不用==<版本号>处理(pip install apache-airflow==1.10.10)
[root@node1 ~]# pip install apache-airflow
######报错,有一个组件版本不太匹配,暂时不去处理
ERROR: flask-appbuilder 1.13.1 has requirement marshmallow<2.20,>=2.18.0, but you'll have marshmallow 2.21.0 which is incompatible.
#####最后结果:
Successfully installed Babel-2.8.0 Flask-Babel-0.12.2 Flask-JWT-Extended-3.24.1 Flask-OpenID-1.2.5 Flask-SQLAlchemy-2.4.3 Mako-1.1.3 MarkupSafe-1.1.1 PyJWT-1.7.1 PyYAML-5.3.1 alembic-1.4.2 apache-airflow-1.10.10 apispec-2.0.2 argcomplete-1.11.1 attrs-19.3.0 cached-property-1.5.1 cattrs-0.9.2 certifi-2020.4.5.1 chardet-3.0.4 click-7.1.2 colorama-0.4.3 colorlog-4.0.2 configparser-3.5.3 contextlib2-0.6.0.post1 croniter-0.3.32 dill-0.3.1.1 docutils-0.16 enum34-1.1.10 flask-1.1.2 flask-admin-1.5.4 flask-appbuilder-1.13.1 flask-caching-1.3.3 flask-login-0.4.1 flask-swagger-0.2.13 flask-wtf-0.14.3 funcsigs-1.0.2 functools32-3.2.3.post2 future-0.18.2 futures-3.3.0 graphviz-0.14 gunicorn-19.10.0 idna-2.9 importlib-metadata-1.6.0 iso8601-0.1.12 itsdangerous-1.1.0 jinja2-2.10.3 json-merge-patch-0.2 jsonschema-3.2.0 lazy-object-proxy-1.4.3 lockfile-0.12.2 markdown-2.6.11 marshmallow-2.21.0 marshmallow-enum-1.5.1 marshmallow-sqlalchemy-0.18.0 monotonic-1.5 natsort-6.2.1 numpy-1.16.6 pandas-0.24.2 pathlib2-2.3.5 pendulum-1.4.4 prison-0.1.0 psutil-5.7.0 pygments-2.5.2 pyrsistent-0.16.0 python-daemon-2.1.2 python-dateutil-2.8.1 python-editor-1.0.4 python-openid-2.2.5 pytz-2020.1 pytzdata-2019.3 requests-2.23.0 scandir-1.10.0 setproctitle-1.1.10 singledispatch-3.4.0.3 six-1.15.0 sqlalchemy-1.3.17 sqlalchemy-jsonfield-0.8.0 tabulate-0.8.7 tenacity-4.12.0 termcolor-1.1.0 text-unidecode-1.2 thrift-0.13.0 typing-3.7.4.1 typing-extensions-3.7.4.2 tzlocal-1.5.1 unicodecsv-0.14.1 urllib3-1.25.9 werkzeug-0.16.1 wtforms-2.3.1 zipp-1.2.0 zope.deprecation-4.4.0
安装必要的其他插件(如果你的大数据开启kerberos,你要安装相关插件)
#需要那几个插件根据研发人员需求,我感觉下面这几个差不多都需要
pip install 'apache-airflow[hdfs]'
pip install 'apache-airflow[hive]'
pip install 'apache-airflow[jdbc]'
#jdbc需要安装依赖yum install gcc-c++
pip install 'apache-airflow[kerberos]'
#kerbero插件报错:ERROR: thrift-sasl 0.4.2 has requirement thrift==0.9.3, but you'll have thrift 0.13.0 which is incompatible.
#kerbero依赖:yum -y install cyrus-sasl cyrus-sasl-devel cyrus-sasl-lib
pip install 'apache-airflow[mysql]'
pip install 'apache-airflow[presto]'
pip install 'apache-airflow[redis]'
至此airflow安装 完成
[root@node1 ~]# mkdir /hadoop/airflow
[root@node1 ~]# cd /hadoop/airflow
#增加环境变量AIRFLOW_HOME
[root@node1 airflow]# vim /etc/profile
export AIRFLOW_HOME=/hadoop/airflow
[root@node1 airflow]# source !$
source /etc/profile
[root@node1 airflow]# airflow initdb
[root@node1 airflow]# ll
total 124
-rw-r--r-- 1 root root 36412 Jun 5 21:58 airflow.cfg
-rw-r--r-- 1 root root 86016 Jun 5 21:58 airflow.db
drwxr-xr-x 3 root root 23 Jun 5 21:58 logs
-rw-r--r-- 1 root root 2543 Jun 5 21:58 unittests.cfg
###启动webserver 与scheduler(-D 守护进程模式,-p修改端口)
[root@node1 airflow]# airflow webserver -D -p 7777
[root@node1 airflow]# airflow scheduler -D
[root@node1 airflow]# ps aux | grep air
root 109028 1.4 1.4 457656 88876 ? S 22:00 0:02 /usr/bin/python2 /usr/bin/airflow scheduler -D
root 109033 0.6 1.5 457792 89216 ? S 22:00 0:00 airflow scheduler -- DagFileProcessorManager
root 109497 0.1 0.9 321348 53528 ? S 22:02 0:00 gunicorn: master [airflow-webserver]
root 109502 8.9 1.3 449832 82240 ? S 22:02 0:02 [ready] gunicorn: worker [airflow-webserver]
root 109503 8.8 1.3 449836 82272 ? S 22:02 0:02 [ready] gunicorn: worker [airflow-webserver]
root 109504 10.8 1.3 449840 82260 ? S 22:02 0:03 [ready] gunicorn: worker [airflow-webserver]
root 109505 12.8 1.3 449820 82276 ? S 22:02 0:04 [ready] gunicorn: worker [airflow-webserver]
root 109655 0.0 0.0 112808 968 pts/0 S+ 22:03 0:00 grep --color=auto air
默认数据库sqlite
[root@node1 airflow]# mysql -uroot -p
Enter password:
Welcome to the MariaDB monitor. Commands end with ; or \g.
Your MariaDB connection id is 134
Server version: 5.5.65-MariaDB MariaDB Server
Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
MariaDB [(none)]> create database airflow;
Query OK, 1 row affected (0.03 sec)
MariaDB [(none)]> GRANT all privileges on airflow.* TO 'airflow'@'%' IDENTIFIED BY '123456';
Query OK, 0 rows affected (0.14 sec)
MariaDB [(none)]> FLUSH PRIVILEGES;
Query OK, 0 rows affected (0.11 sec)
MariaDB [(none)]> set explicit_defaults_for_timestamp = 1;
[root@node1 airflow]# pip install pymysql
[root@node1 airflow]# airflow resetdb
[root@node1 airflow]# airflow initdb
申请airflow的princ
kadmin.local
kadmin: addprinc -randkey airflow/client1.hdp.work.com@work.com
kadmin: xst -norandkey -k airflow.keytab airflow/client1.hdp.work.com@work.com
修改airflow配置文件
vim airflow.cfg
[core]
security = kerberos
[kerberos]
ccache = /tmp/airflow_krb5_ccache
keytab = /etc/security/keytabs/airflow.keytab
# gets augmented with fqdn
principal = airflow/client1.hdp.work.com@work.com
reinit_frequency = 3600
kinit_path = kinit
#查看yum是否有python3的包(没有请自行配置yum源,centos7一般都有)
[root@client2 ~]# yum search python3
[root@client2 ~]# yum install python3 python3-devel
[root@client2 ~]# mkdir /etc/.pip
[root@client2 ~]# vim /etc/.pip/pip.conf
[global]
trusted-host=mirrors.aliyun.com
index-url=https://mirrors.aliyun.com/pypi/simple/
[root@client2 ~]# pip3 install --upgrade pip
[root@client2 ~]# pip3 install setuptools -U
[root@client2 ~]# pip3 install apache-airflow
#安装kerberos依赖(安装失败)
[root@client2 ~]# yum install krb5-devel
[root@client2 ~]# pip3 install 'apache-airflow[kerberos]'
### 删除dag(airflow delete dag)
在airflow的web ui上delete dag会发现不生效,一刷新又出来了,网上的处理方法也比较多,这里说一下stackoverflow上的一个简单的方法,先删除dag文件,再在ui删除dag即可。