others linux服务器运维 django3 监控 k8s golang 数据库 大数据 前端 devops 理论基础 java oracle 运维日志

python链接 启用kerberos的 hive

访问量:1628 创建时间:2020-12-11

pyhs2

安装(主要安装的是pyhs2 ,一下部分包可以省略,python版本python2.7,centos7):

easy_install pip
easy_install -U setuptools
pip install setuptools --upgrade
yum install python-devel  gcc-c++  cyrus-sasl-devel gcc
yum install cyrus-sasl-plain  cyrus-sasl-devel  cyrus-sasl-gssapi
pip install thrift
pip install thrift-sasl
pip install sasl
pip install pyhs2

报错处理方法:/usr/lib/python2.7/site-packages/pyhs2/cursor.py 的152行MAX_BLOCK_SIZE修改为10000

Traceback (most recent call last):
  File "h.py", line 56, in <module>
    data_list =   cur.fetchmany(10000)
  File "/usr/lib/python2.7/site-packages/pyhs2/cursor.py", line 152, in fetchmany
    if size < 0 or size > MAX_BLOCK_SIZE:
NameError: global name 'MAX_BLOCK_SIZE' is not defined

已经不维护了,不建议使用

通过kerberos缓存的凭证链接hiveserver2

这种方式要在当前用户具有认证缓存的情况下使用:

先在系统中认证

[root@localhost yh]# klist -kte hive.service.keytab 
Keytab name: FILE:hive.service.keytab
KVNO Timestamp           Principal
---- ------------------- ------------------------------------------------------
   1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (aes128-cts-hmac-sha1-96) 
   1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (arcfour-hmac) 
   1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (des3-cbc-sha1) 
   1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (aes256-cts-hmac-sha1-96) 
   1 2020-08-19T09:41:23 hive/hdpprde01.example.com@example.com (des-cbc-md5) 
[root@localhost yh]# kinit -kt hive.service.keytab  hive/hdpprde01.example.com@example.com
[root@localhost yh]# klist 
Ticket cache: FILE:/tmp/krb5cc_0
Default principal: hive/hdpprde01.example.com@example.com

Valid starting       Expires              Service principal
2020-12-11T11:18:45  2020-12-12T11:18:45  krbtgt/example.com@example.com

python脚本:

[root@localhost yh]# cat h.py 
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pyhs2

sql='''SELECT
A.INFNR 
,a.EBELN
...省略sql内容 ...
and a.prctr_code= b.prctr_CODE
WHERE a.period_wid >= 20200101 limit 10'''

#print sql
conn = pyhs2.connect(host='hdp.example.com.cn',
                     port=10000,
                     authMechanism="KERBEROS")

#不使用kerberos用下面的连接串
#conn = pyhs2.connect(host='10.0.0.32',
#                     port=10000,
#                     authMechanism='PLAIN',
#                     user='hive'
#                     )

resnum=0
fo=open("res.txt","wb+")
flags=True
with conn.cursor() as cur:
    cur.execute(sql)
    while flags:
        #每次取10000条数据
        data_list =   cur.fetchmany(10000)
        for data in data_list:
            #判断data_list有数据
            if data != None:
                fo.write(str(data)+'\n')
                resnum+=1
                print resnum
            else:
                flags=False

fo.close()
conn.close()
print resnum

pyhive

安装

[root@master-mysql ~]# pip3 install sasl thrift thrift-sasl pyhive

使用

[root@master-mysql ~]# vim ph.py
#!/usr/bin/python3

from pyhive.hive import connect

con = connect(host='10.0.0.32',port=10000,auth='NONE',database='default')
cursor = con.cursor()
#cursor.execute('select * from tmp.pricing_calculate_result_spark  where time_id="201907171355" limit 10,1')
cursor.execute('show databases')
datas = cursor.fetchall()
print(datas)
cursor.close()
con.close()
[root@master-mysql ~]# python3 ph.py 
[('ad',), ('bak',), ('temp',), ('test',), ('test01',)]

连接kerberos的hive

https://pypi.org/project/krbcontext/0.3.3/

pip3 install krbcontext
pip3 install pykerberos
登陆评论: 使用GITHUB登陆