安装(主要安装的是pyhs2 ,一下部分包可以省略,python版本python2.7,centos7):
easy_install pip
easy_install -U setuptools
pip install setuptools --upgrade
yum install python-devel gcc-c++ cyrus-sasl-devel gcc
yum install cyrus-sasl-plain cyrus-sasl-devel cyrus-sasl-gssapi
pip install thrift
pip install thrift-sasl
pip install sasl
pip install pyhs2
报错处理方法:/usr/lib/python2.7/site-packages/pyhs2/cursor.py 的152行MAX_BLOCK_SIZE修改为10000
Traceback (most recent call last):
File "h.py", line 56, in <module>
data_list = cur.fetchmany(10000)
File "/usr/lib/python2.7/site-packages/pyhs2/cursor.py", line 152, in fetchmany
if size < 0 or size > MAX_BLOCK_SIZE:
NameError: global name 'MAX_BLOCK_SIZE' is not defined
已经不维护了,不建议使用
这种方式要在当前用户具有认证缓存的情况下使用:
先在系统中认证
[root@localhost yh]# klist -kte hive.service.keytab
Keytab name: FILE:hive.service.keytab
KVNO Timestamp Principal
---- ------------------- ------------------------------------------------------
1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (aes128-cts-hmac-sha1-96)
1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (arcfour-hmac)
1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (des3-cbc-sha1)
1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (aes256-cts-hmac-sha1-96)
1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (des-cbc-md5)
[root@localhost yh]# kinit -kt hive.service.keytab hive/hdpprde01.example.com@example.com
[root@localhost yh]# klist
Ticket cache: FILE:/tmp/krb5cc_0
Default principal: hive/hdpprde01.example.com@example.com
Valid starting Expires Service principal
2020-12-11T11:18:45 2020-12-12T11:18:45 krbtgt/example.com@example.com
python脚本:
[root@localhost yh]# cat h.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pyhs2
sql='''SELECT
A.INFNR
,a.EBELN
...省略sql内容 ...
and a.prctr_code= b.prctr_CODE
WHERE a.period_wid >= 20200101 limit 10'''
#print sql
conn = pyhs2.connect(host='hdp.example.com.cn',
port=10000,
authMechanism="KERBEROS")
#不使用kerberos用下面的连接串
#conn = pyhs2.connect(host='10.0.0.32',
# port=10000,
# authMechanism='PLAIN',
# user='hive'
# )
resnum=0
fo=open("res.txt","wb+")
flags=True
with conn.cursor() as cur:
cur.execute(sql)
while flags:
#每次取10000条数据
data_list = cur.fetchmany(10000)
for data in data_list:
#判断data_list有数据
if data != None:
fo.write(str(data)+'\n')
resnum+=1
print resnum
else:
flags=False
fo.close()
conn.close()
print resnum
安装
[root@master-mysql ~]# pip3 install sasl thrift thrift-sasl pyhive
使用
[root@master-mysql ~]# vim ph.py
#!/usr/bin/python3
from pyhive.hive import connect
con = connect(host='10.0.0.32',port=10000,auth='NONE',database='default')
cursor = con.cursor()
#cursor.execute('select * from tmp.pricing_calculate_result_spark where time_id="201907171355" limit 10,1')
cursor.execute('show databases')
datas = cursor.fetchall()
print(datas)
cursor.close()
con.close()
[root@master-mysql ~]# python3 ph.py
[('ad',), ('bak',), ('temp',), ('test',), ('test01',)]
https://pypi.org/project/krbcontext/0.3.3/
pip3 install krbcontext
pip3 install pykerberos