You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ka...@apache.org on 2021/08/04 15:30:44 UTC
[iotdb] branch py_cli_performance updated: add
This is an automated email from the ASF dual-hosted git repository.
kangrong pushed a commit to branch py_cli_performance
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/py_cli_performance by this push:
new 4cc4edc add
4cc4edc is described below
commit 4cc4edca8b4175d5a2430ccbbf7b64a589bb6d89
Author: kangrong.cn <ka...@bytedance.com>
AuthorDate: Wed Aug 4 23:29:57 2021 +0800
add
---
client-py/iotdb/utils/Tablet.py | 33 +++++++++---
client-py/tablet_performance_comparison.py | 80 +++++++++++++++++++++++-------
2 files changed, 89 insertions(+), 24 deletions(-)
diff --git a/client-py/iotdb/utils/Tablet.py b/client-py/iotdb/utils/Tablet.py
index a1780f4..2009f31 100644
--- a/client-py/iotdb/utils/Tablet.py
+++ b/client-py/iotdb/utils/Tablet.py
@@ -135,10 +135,31 @@ class Tablet(object):
format_str = "".join(format_str_list)
return struct.pack(format_str, *values_tobe_packed)
else:
- ret = []
- for i in range(self.__column_number):
- # if self.__data_types[i] == TSDataType.FLOAT:
- ret.append(self.__values.tobytes())
- # else:
- # raise RuntimeError("Unsupported data type:" + str(self.__data_types[i]))
+ # refer to: https://www.guyrutenberg.com/2020/04/04/fast-bytes-concatenation-in-python/
+ bs_len = 0
+ bs_list = []
+ for i, value in enumerate(self.__values):
+ if self.__data_types[i] == TSDataType.TEXT:
+ format_str_list = [">"]
+ values_tobe_packed = []
+ for str_list in value:
+ # Fot TEXT, it's same as the original solution
+ value_bytes = bytes(str_list, "utf-8")
+ format_str_list.append("i")
+ format_str_list.append(str(len(value_bytes)))
+ format_str_list.append("s")
+ values_tobe_packed.append(len(value_bytes))
+ values_tobe_packed.append(value_bytes)
+ format_str = "".join(format_str_list)
+ bs = struct.pack(format_str, *values_tobe_packed)
+ else:
+ bs = value.tobytes()
+ bs_list.append(bs)
+ bs_len += len(bs)
+ ret = memoryview(bytearray(bs_len))
+ offset = 0
+ for bs in bs_list:
+ _l = len(bs)
+ ret[offset:offset + _l] = bs
+ offset += _l
return ret
diff --git a/client-py/tablet_performance_comparison.py b/client-py/tablet_performance_comparison.py
index 284fb29..49908a5 100644
--- a/client-py/tablet_performance_comparison.py
+++ b/client-py/tablet_performance_comparison.py
@@ -17,6 +17,7 @@
#
# Uncomment the following line to use apache-iotdb module installed by pip3
+import struct
from iotdb.Session import Session
from iotdb.utils.IoTDBConstants import TSDataType
@@ -25,6 +26,24 @@ import random
import numpy as np
import time
+# format_str_list = []
+# values_tobe_packed = []
+# values = ['asd','123','@#$']
+# for v in values:
+# value_bytes = bytes(v, "utf-8")
+# format_str_list.append("i")
+# format_str_list.append(str(len(value_bytes)))
+# format_str_list.append("s")
+# values_tobe_packed.append(len(value_bytes))
+# values_tobe_packed.append(value_bytes)
+# format_str = "".join(format_str_list)
+# st_bytes = struct.pack(format_str, *values_tobe_packed)
+# print("struct:", st_bytes)
+
+# # numpy obj
+# np.
+# exit(1)
+
def create_open_session():
# creating session connection.
@@ -45,7 +64,8 @@ def check_count(expect, _session, _sql):
if get_count_line:
assert False, "select count return more than one line"
line = session_data_set.next()
- assert expect == line.get_fields()[0].get_long_value(), "count result error"
+ actual = line.get_fields()[0].get_long_value()
+ assert expect == actual, f"count error: expect {expect} lines, actual {actual} lines"
get_count_line = True
if not get_count_line:
assert False, "select count has no result"
@@ -64,10 +84,15 @@ def check_query_result(expect, _session, _sql):
assert idx == len(expect), f"result rows: actual ({idx}) != expect ({len(expect)})"
session_data_set.close_operation_handle()
+def prepare_input_data():
+ pass
def performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_result=False, row=10000, col=2000,
seed=0):
session = create_open_session()
+ # clean data
+ session.execute_non_query_statement(f'delete timeseries root.*')
+
st = time.perf_counter()
random.seed(a=seed, version=2)
insert_cost = 0
@@ -78,11 +103,11 @@ def performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_r
TSDataType.INT32: 100,
TSDataType.INT64: 123456789098,
TSDataType.TEXT: "test_record"}
- FORMAT_CHAR_OF_TYPES = {TSDataType.BOOLEAN: "?",
- TSDataType.DOUBLE: "d",
- TSDataType.FLOAT: "f",
- TSDataType.INT32: "i",
- TSDataType.INT64: "q",
+ FORMAT_CHAR_OF_TYPES = {TSDataType.BOOLEAN: ">?",
+ TSDataType.DOUBLE: ">d",
+ TSDataType.FLOAT: ">f",
+ TSDataType.INT32: ">i",
+ TSDataType.INT64: ">q",
TSDataType.TEXT: str}
MEASUREMENT_OF_TYPES = {TSDataType.BOOLEAN: "s0",
TSDataType.DOUBLE: "s1",
@@ -95,7 +120,6 @@ def performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_r
for i in range(0, col):
device_id = "root.sg%d.%d" % (i % 8, i)
-
if not use_new:
timestamps_ = []
values_ = []
@@ -107,7 +131,12 @@ def performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_r
values_.append(value_)
else:
timestamps_ = np.zeros(row, dtype='>q')
- values_ = [np.zeros(row, dtype=FORMAT_CHAR_OF_TYPES[data_type]) for data_type in data_types]
+ values_ = []
+ for data_type in data_types:
+ if data_type == TSDataType.TEXT:
+ values_.append([None for _ in range(row)])
+ else:
+ values_.append(np.zeros(row, dtype=FORMAT_CHAR_OF_TYPES[data_type]))
for t in range(0, row):
timestamps_[t] = t
for j, data_type in enumerate(data_types):
@@ -125,6 +154,8 @@ def performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_r
expect = [str(e) + "\t\t" + expect_values for e in range(row)]
check_query_result(expect, session, f"select {','.join(measurements_)} from {device_id}")
# print("query validation have passed")
+ # clean data
+ session.execute_non_query_statement(f'delete timeseries root.*')
session.close()
end = time.perf_counter()
@@ -135,16 +166,29 @@ def performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_r
valid_result = True
-use_new = True
+# use_new = True
# use_new = False
# performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=use_new, valid_result=valid_result, row=3, col=2)
# performance_test(data_types=tuple([TSDataType.FLOAT]), use_new=True, valid_result=valid_result)
-
-performance_test(
- data_types=tuple([TSDataType.BOOLEAN, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.INT32, TSDataType.INT64]),
- use_new=use_new, valid_result=valid_result, row=3, col=3)
-
-# performance_test(data_types=tuple([TSDataType.BOOLEAN, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.INT32, TSDataType.INT64]), use_new=True, valid_result=valid_result)
-#
-# performance_test(data_types=tuple([TSDataType.BOOLEAN, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.INT32, TSDataType.INT64, TSDataType.TEXT]), use_new=False, valid_result=valid_result)
-# performance_test(data_types=tuple([TSDataType.BOOLEAN, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.INT32, TSDataType.INT64, TSDataType.TEXT]), use_new=True, valid_result=valid_result)
+for use_new in [
+ True,
+ False
+]:
+ row = 5000
+ col = 2000
+ # row = 2
+ # col = 2
+ valid_result = False
+ # valid_result = True
+ data_types = tuple([
+ TSDataType.BOOLEAN,
+ TSDataType.FLOAT,
+ TSDataType.DOUBLE,
+ TSDataType.INT32,
+ TSDataType.INT64,
+ TSDataType.TEXT,
+ ])
+ print('-' * 10)
+ print(f"use new: {use_new}, row: {row}, col: {col}. data types: {[str(dt) for dt in data_types]}")
+ performance_test(data_types=data_types, use_new=use_new, valid_result=valid_result, row=row, col=col)
+ print('-' * 10)