Mongodb与Clickhouse对比
硬件设施:大数据插入脚本:import json, timeimport pymongo,tracebackfrom clickhouse_driver import Clientimport uuidimport random# 装饰器统计运行耗时def coast_time(func):def fun(*args, **kwargs):t = time.perf_counter()result
·
一、硬件设施
二、安装数据库
2.1、docker安装clickhouse
1、拉取镜像
docker pull yandex/clickhouse-server
2、启动服务(临时启动,获取配置文件)
docker run --rm -d --name=clickhouse-server \
--ulimit nofile=262144:262144 \
-p 8123:8123 -p 9009:9009 -p 9000:9000 \
yandex/clickhouse-server:latest
3、复制容器中的配置文件到宿主机
提前创建好文件夹,mkdir -p /home/clickhouse/conf/
docker cp clickhouse-server:/etc/clickhouse-server/config.xml /home/clickhouse/conf/config.xml
docker cp clickhouse-server:/etc/clickhouse-server/users.xml /home/clickhouse/conf/users.xml
4、停止第2步启动的ck容器(clickhouse-server)
5、修改密码 第3步的配置文件
vi /home/clickhouse/conf/users.xml,在password标签中加上密码,可以使用加密密码
6、重启启动
docker run -d --name=clickhouse-server \
-p 8123:8123 -p 9009:9009 -p 9000:9000 \
--ulimit nofile=262144:262144 \
-v /home/clickhouse/data:/var/lib/clickhouse:rw \
-v /home/clickhouse/conf/config.xml:/etc/clickhouse-server/config.xml \
-v /home/clickhouse/conf/users.xml:/etc/clickhouse-server/users.xml \
-v /home/clickhouse/log:/var/log/clickhouse-server:rw \
yandex/clickhouse-server:latest
7、进入ck容器
docker exec -it 82 clickhouse-client --user=default --password=your_password
select version() # 查看版本
2.2、docker安装mongodb
配置文件:/home/mongo/configdb/mongod.conf
storage:
dbPath: /data/db
journal:
enabled: true
engine: wiredTiger
security:
authorization: enabled
net:
port: 27017
bindIp: 192.168.xx.xx
容器启动:
docker run -d \
-p 27017:27017 \
--name mongo \
-v /home/mongo/db:/data/db \
-v /home/mongo/configdb:/data/configdb \
-v /etc/localtime:/etc/localtime \
mongo -f /data/configdb/mongod.conf
三、大数据插入脚本
import json, time
import pymongo,traceback
from clickhouse_driver import Client
import uuid
import random
# 装饰器统计运行耗时
def coast_time(func):
def fun(*args, **kwargs):
t = time.perf_counter()
result = func(*args, **kwargs)
print(f'func {func.__name__} coast time:{time.perf_counter() - t:.8f} s')
return result
return fun
class MyEncoder(json.JSONEncoder):
"""
func:
解决Object of type 'bytes' is not JSON serializable
"""
def default(self, obj):
if isinstance(obj, bytes):
return str(obj, encoding='utf-8')
return json.JSONEncoder.default(self, obj)
create_task_table = """CREATE TABLE IF NOT EXISTS task(\
`_id` String,\
`task_name` String,\
`task_size` UInt16,\
`status` UInt8
)
ENGINE = MergeTree() PRIMARY KEY _id;
"""
ck_client = Client(host='192.168.12.199', port=9000, database="testdb", user='default', send_receive_timeout=20)
mongo_client = pymongo.MongoClient("mongodb://192.168.12.199:27017/")
mongo_db = mongo_client["testdb"]
mongo_col = mongo_db["task"]
@coast_time
def insert_mongo_task_data(total, patch):
"""
func:批量插入任务数据到mongo
"""
mongo_col.drop()
sig = 0
data_list = []
for i in range(total):
sig += 1
try:
dicts = {
'_id':str(uuid.uuid1()),
'task_name': 'task_' + str(i),
'task_size': i,
'status': random.choice([0, 1])
}
data_list.append(dicts)
if sig == patch:
mongo_col.insert_many(data_list)
sig = 0
data_list=[]
except Exception as e:
print("task name :%s process failed:%s" % ('task_' + str(i), traceback.print_exc()))
if len(data_list) >0:
mongo_col.insert_many(data_list)
@coast_time
def insert_ck_task_data(total, patch):
"""
func:批量插入任务数据到CK
"""
ck_client.execute('DROP TABLE IF EXISTS task')
ck_client.execute(create_task_table)
sig = 0
data_list = []
for i in range(total):
sig += 1
try:
dicts = {
'_id':str(uuid.uuid1()),
'task_name': 'task_' + str(i),
'task_size': i,
'status': random.choice([0, 1])
}
data_list.append(dicts)
if sig == patch:
ck_client.execute("INSERT INTO task(*) VALUES", data_list, types_check=True)
sig = 0
data_list=[]
except Exception as e:
print("task name :%s process failed:%s" % ('task_' + str(i), traceback.print_exc()))
if len(data_list) >0:
ck_client.execute("INSERT INTO task(*) VALUES", data_list, types_check=True)
insert_ck_task_data(100000000, 10000)
insert_mongo_task_data(100000000, 10000)
批次
10000
:
插入条数
|
mongo
耗时
|
ck
耗时
|
Mongo
大小
|
Ck
大小
|
1000
|
0.01972508s
|
0.01732014s
|
99.5K
|
12K
|
10000
|
0.12277857s
|
0.08004815s
|
1004.8K
|
119K
|
100000
|
1.12529528s
|
0.73075602s
|
9.0M
|
1.9M
|
1000000
|
10.92156150s
|
7.17739819s
|
100M
|
49M
|
10000000
|
108.91806854s
|
72.16343116s
|
1009.8M
|
117M
|
100000000
|
1189.25558783s
|
748.89750133s
|
10G
|
1.1G
|
不同条数精准查询耗时分析:
插入条数
|
db.task.find({'task_name':'task_1'}) .explain("executionStats")
耗时
|
select * from testdb.task where
task_name ='task_1'
耗时
|
1000
|
0ms
|
0.004 sec
|
10000
|
3ms
|
0.008 sec
|
100000
|
29ms
|
0.006 sec
|
1000000
|
340ms
|
0.009 sec
|
10000000
|
3281ms
|
0.035 sec
|
100000000
|
165762ms
|
0.626 sec
|
不同条数模糊查询耗时分析:
插入条数
|
db.task.find({'task_name':{
'$regex':'.*_1.*'}})
.explain("executionStats")
耗时
|
select * from testdb.task where
task_name like '%_1%'
耗时
|
1000
|
0ms
|
0.004 sec
|
10000
|
4ms
|
0.012 sec
|
100000
|
42ms
|
0.022 sec
|
1000000
|
468ms
|
0.077 sec
|
10000000
|
5871ms
|
0.670 sec
|
100000000
|
112334ms
|
21.094 sec
|
group聚合耗时:
插入条数
|
db.getCollection
("task"
).aggregate
([
{"$group"
:
{
"_id"
:null
,
"total_num"
:{"$sum"
:1
},
"total_size"
:{"$sum"
:"$task_size"
},
"avg_size"
:{"$avg"
:"$task_size"
}
}}
])
耗时
|
select count(*), SUM(task_size), AVG(task_size) from task
耗时
|
100000000
|
106775ms
|
0.035 sec
|
通过上述对比,Clickhouse对数据的压缩更为出色,同样的数据下,占用的空间较小;
批量插入数据,Clickhouse比Mongodb耗时少,而且随着数据量的增大,这种差距也在拉大;
万级别下的查询,Mongodb耗时比Clickhouse少,但超过万级别,随着数据的不断增大,Clickhouse的耗时小于Mongodb。
更多推荐
所有评论(0)