Python使用HappyBase连接Hbase与基本操作
hbase基本概念可以参考:https://www.cnblogs.com/swordfall/p/8737328.html文章目录安装HbasePython连接Hbase1.创建、删除表结构2.增改表数据3.查看表数据4.删除表数据完整代码安装Hbasedocker安装hbase:docker pull harisekhon/hbasedocker run -d -h myhbase -p 21
·
hbase
基本概念可以参考:https://www.cnblogs.com/swordfall/p/8737328.html
安装Hbase
docker安装hbase:
docker pull harisekhon/hbase
docker run -d -h myhbase -p 2181:2181 -p 8080:8080 -p 8085:8085 -p 9090:9090 -p 9095:9095 -p 16000:16000 -p 16010:16010 -p 16201:16201 -p 16301:16301 --name hbase harisekhon/hbase
然后访问本机:http://localhost:16010/master-status,就可以看到基本的运行情况
Python连接Hbase
这里使用HappyBase
连接
HappyBase
文档:https://happybase.readthedocs.io/en/latest/user.html
HappyBase
github页面:https://github.com/python-happybase/happybase
连接Hbase有两种方法:
import happybase
# 方法1
connection = happybase.Connection()
connection.close()
# 方法2
pool = happybase.ConnectionPool(size=3)
with pool.connection() as connection:
pass
1.创建、删除表结构
HappyBase
并不提供方法动态修改hbase的列族,因此当使用HappyBase
定义了一个表的时候,它的列族就已经固定了,只能进入Hbase Shell
进行修改列族的操作
import happybase
connection = happybase.Connection()
# 创建hbase表
if "my_hbase_table" not in connection.tables(): # 在所有的表中
connection.create_table(
'my_hbase_table', # 表名
{
"col_family_1": dict(), # 定义列族
"col_family_2": dict(), # 定义列族
}
)
elif not connection.is_table_enabled("my_hbase_table"): # 查看是否被禁用
connection.enable_table("my_hbase_table") # 取消禁用
# connection.delete_table("my_hbase_table", disable=True) # 删除表
connection.close()
2.增改表数据
import happybase
pool = happybase.ConnectionPool(size=3)
# 获取连接
with pool.connection() as connection:
table = connection.table("my_hbase_table")
# 新增数据
# 格式是:put("行名称",{" '列族:列名':'值' "})
table.put("row1", {"col_family_1:col_1": "a", "col_family_2:col_1": "b"})
table.put("row2", {"col_family_1:col_1": "1", "col_family_1:col_2": "2", "col_family_2:col_1": "c"})
Table.put()
详细操作可以参考官网文档:https://happybase.readthedocs.io/en/latest/api.html#happybase.Table.put
3.查看表数据
import happybase
pool = happybase.ConnectionPool(size=3)
# 获取连接
with pool.connection() as connection:
table = connection.table("my_hbase_table")
# 查询数据
# 方法1:
one_row = table.row('row1') # 获取row1行数据
for value in one_row.keys(): # 遍历当前行的每一列
print(value.decode('utf-8'), one_row[value].decode('utf-8')) # 可能有中文,使用encode转码
# 方法2:
for row_index, col_families in table.scan(): # row_key是行index, col_families是列族
for col_key, col_value in col_families.items():
col_key_str = col_key.decode('utf-8')
col_value_str = col_value.decode('utf-8')
print("行:{} 列:{} 值:{}".format(row_index, col_key_str, col_value_str))
print("=================")
4.删除表数据
import happybase
pool = happybase.ConnectionPool(size=3)
# 获取连接
with pool.connection() as connection:
table = connection.table("my_hbase_table")
# 删除数据
table.delete("row1", ["col_family_1:col_1"]) # 删除单个单元格数据
table.delete("row2", ["col_family_1"]) # 删除整个列族
for row_index, col_families in table.scan(): # row_key是行index, col_families是列族
for col_key, col_value in col_families.items():
col_key_str = col_key.decode('utf-8')
col_value_str = col_value.decode('utf-8')
print("行:{} 列:{} 值:{}".format(row_index, col_key_str, col_value_str))
完整代码
import happybase
connection = happybase.Connection()
# 创建hbase表
if "my_hbase_table" not in connection.tables(): # 在所有的表中
connection.create_table(
'my_hbase_table', # 表名
{
"col_family_1": dict(), # 定义列族
"col_family_2": dict(), # 定义列族
}
)
elif not connection.is_table_enabled("my_hbase_table"): # 查看是否被禁用
connection.enable_table("my_hbase_table") # 取消禁用
connection.close()
# 开始操作
pool = happybase.ConnectionPool(size=3)
# 获取连接
with pool.connection() as connection:
table = connection.table("my_hbase_table")
# 新增数据
# put("行名称",{" '列族:列名':'值' "})
table.put("row1", {"col_family_1:col_1": "a", "col_family_2:col_1": "b"})
table.put("row2", {"col_family_1:col_1": "1", "col_family_1:col_2": "2", "col_family_2:col_1": "c"})
# 查询数据
one_row = table.row('row1') # 获取row1行数据
for value in one_row.keys(): # 遍历当前行的每一列
print(value.decode('utf-8'), one_row[value].decode('utf-8')) # 可能有中文,使用encode转码
for row_index, col_families in table.scan(): # row_key是行index, col_families是列族
for col_key, col_value in col_families.items():
col_key_str = col_key.decode('utf-8')
col_value_str = col_value.decode('utf-8')
print("行:{} 列:{} 值:{}".format(row_index, col_key_str, col_value_str))
print("=================")
# 删除
table.delete("row1", ["col_family_1:col_1"]) # 删除单个单元格数据
table.delete("row2", ["col_family_1"]) # 删除整个列族
for row_index, col_families in table.scan(): # row_key是行index, col_families是列族
for col_key, col_value in col_families.items():
col_key_str = col_key.decode('utf-8')
col_value_str = col_value.decode('utf-8')
print("行:{} 列:{} 值:{}".format(row_index, col_key_str, col_value_str))
打印结果:
col_family_1:col_1 a
col_family_2:col_1 b
行:b'row1' 列:col_family_1:col_1 值:a
行:b'row1' 列:col_family_2:col_1 值:b
行:b'row2' 列:col_family_1:col_1 值:1
行:b'row2' 列:col_family_1:col_2 值:2
行:b'row2' 列:col_family_2:col_1 值:c
=================
行:b'row1' 列:col_family_2:col_1 值:b
行:b'row2' 列:col_family_2:col_1 值:c
更多参考
更多推荐
已为社区贡献66条内容
所有评论(0)