You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@iotdb.apache.org by ka...@apache.org on 2021/08/07 17:23:12 UTC

[iotdb] branch py_cli_performance created (now cac100f)

This is an automated email from the ASF dual-hosted git repository.

kangrong pushed a change to branch py_cli_performance
in repository https://gitbox.apache.org/repos/asf/iotdb.git.


      at cac100f  speed up the python tablet insertion

This branch includes the following new commits:

     new cac100f  speed up the python tablet insertion

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[iotdb] 01/01: speed up the python tablet insertion

Posted by ka...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

kangrong pushed a commit to branch py_cli_performance
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit cac100f85b092600139e05a47b7c9d50f0a48816
Author: kr11 <3095717866.com>
AuthorDate: Sun Aug 8 01:22:29 2021 +0800

    speed up the python tablet insertion
---
 client-py/iotdb/utils/Tablet.py                    | 131 +++++++----
 client-py/tests/tablet_performance_comparison.py   | 248 +++++++++++++++++++++
 .../main/java/org/apache/iotdb/TabletExample.java  | 178 +++++++++++++++
 3 files changed, 508 insertions(+), 49 deletions(-)

diff --git a/client-py/iotdb/utils/Tablet.py b/client-py/iotdb/utils/Tablet.py
index 667adcb..4cef54d 100644
--- a/client-py/iotdb/utils/Tablet.py
+++ b/client-py/iotdb/utils/Tablet.py
@@ -22,7 +22,7 @@ from iotdb.utils.IoTDBConstants import TSDataType
 
 
 class Tablet(object):
-    def __init__(self, device_id, measurements, data_types, values, timestamps):
+    def __init__(self, device_id, measurements, data_types, values, timestamps, use_new=False):
         """
         creating a tablet for insertion
           for example, considering device: root.sg1.d1
@@ -39,7 +39,7 @@ class Tablet(object):
         :param values: 2-D List, the values of each row should be the outer list element.
         :param timestamps: List.
         """
-        if len(timestamps) != len(values):
+        if not use_new and len(timestamps) != len(values):
             raise RuntimeError(
                 "Input error! len(timestamps) does not equal to len(values)!"
             )
@@ -57,6 +57,7 @@ class Tablet(object):
         self.__data_types = data_types
         self.__row_number = len(timestamps)
         self.__column_number = len(measurements)
+        self.__use_new = use_new
 
     @staticmethod
     def check_sorted(timestamps):
@@ -78,54 +79,86 @@ class Tablet(object):
         return self.__device_id
 
     def get_binary_timestamps(self):
-        format_str_list = [">"]
-        values_tobe_packed = []
-        for timestamp in self.__timestamps:
-            format_str_list.append("q")
-            values_tobe_packed.append(timestamp)
+        if not self.__use_new:
+            format_str_list = [">"]
+            values_tobe_packed = []
+            for timestamp in self.__timestamps:
+                format_str_list.append("q")
+                values_tobe_packed.append(timestamp)
 
-        format_str = "".join(format_str_list)
-        return struct.pack(format_str, *values_tobe_packed)
+            format_str = "".join(format_str_list)
+            return struct.pack(format_str, *values_tobe_packed)
+        else:
+            return self.__timestamps.tobytes()
 
     def get_binary_values(self):
-        format_str_list = [">"]
-        values_tobe_packed = []
-        for i in range(self.__column_number):
-            if self.__data_types[i] == TSDataType.BOOLEAN:
-                format_str_list.append(str(self.__row_number))
-                format_str_list.append("?")
-                for j in range(self.__row_number):
-                    values_tobe_packed.append(self.__values[j][i])
-            elif self.__data_types[i] == TSDataType.INT32:
-                format_str_list.append(str(self.__row_number))
-                format_str_list.append("i")
-                for j in range(self.__row_number):
-                    values_tobe_packed.append(self.__values[j][i])
-            elif self.__data_types[i] == TSDataType.INT64:
-                format_str_list.append(str(self.__row_number))
-                format_str_list.append("q")
-                for j in range(self.__row_number):
-                    values_tobe_packed.append(self.__values[j][i])
-            elif self.__data_types[i] == TSDataType.FLOAT:
-                format_str_list.append(str(self.__row_number))
-                format_str_list.append("f")
-                for j in range(self.__row_number):
-                    values_tobe_packed.append(self.__values[j][i])
-            elif self.__data_types[i] == TSDataType.DOUBLE:
-                format_str_list.append(str(self.__row_number))
-                format_str_list.append("d")
-                for j in range(self.__row_number):
-                    values_tobe_packed.append(self.__values[j][i])
-            elif self.__data_types[i] == TSDataType.TEXT:
-                for j in range(self.__row_number):
-                    value_bytes = bytes(self.__values[j][i], "utf-8")
+        if not self.__use_new:
+            format_str_list = [">"]
+            values_tobe_packed = []
+            for i in range(self.__column_number):
+                if self.__data_types[i] == TSDataType.BOOLEAN:
+                    format_str_list.append(str(self.__row_number))
+                    format_str_list.append("?")
+                    for j in range(self.__row_number):
+                        values_tobe_packed.append(self.__values[j][i])
+                elif self.__data_types[i] == TSDataType.INT32:
+                    format_str_list.append(str(self.__row_number))
                     format_str_list.append("i")
-                    format_str_list.append(str(len(value_bytes)))
-                    format_str_list.append("s")
-                    values_tobe_packed.append(len(value_bytes))
-                    values_tobe_packed.append(value_bytes)
-            else:
-                raise RuntimeError("Unsupported data type:" + str(self.__data_types[i]))
-
-        format_str = "".join(format_str_list)
-        return struct.pack(format_str, *values_tobe_packed)
+                    for j in range(self.__row_number):
+                        values_tobe_packed.append(self.__values[j][i])
+                elif self.__data_types[i] == TSDataType.INT64:
+                    format_str_list.append(str(self.__row_number))
+                    format_str_list.append("q")
+                    for j in range(self.__row_number):
+                        values_tobe_packed.append(self.__values[j][i])
+                elif self.__data_types[i] == TSDataType.FLOAT:
+                    format_str_list.append(str(self.__row_number))
+                    format_str_list.append("f")
+                    for j in range(self.__row_number):
+                        values_tobe_packed.append(self.__values[j][i])
+                elif self.__data_types[i] == TSDataType.DOUBLE:
+                    format_str_list.append(str(self.__row_number))
+                    format_str_list.append("d")
+                    for j in range(self.__row_number):
+                        values_tobe_packed.append(self.__values[j][i])
+                elif self.__data_types[i] == TSDataType.TEXT:
+                    for j in range(self.__row_number):
+                        value_bytes = bytes(self.__values[j][i], "utf-8")
+                        format_str_list.append("i")
+                        format_str_list.append(str(len(value_bytes)))
+                        format_str_list.append("s")
+                        values_tobe_packed.append(len(value_bytes))
+                        values_tobe_packed.append(value_bytes)
+                else:
+                    raise RuntimeError("Unsupported data type:" + str(self.__data_types[i]))
+
+            format_str = "".join(format_str_list)
+            return struct.pack(format_str, *values_tobe_packed)
+        else:
+            bs_len = 0
+            bs_list = []
+            for i, value in enumerate(self.__values):
+                if self.__data_types[i] == TSDataType.TEXT:
+                    format_str_list = [">"]
+                    values_tobe_packed = []
+                    for str_list in value:
+                        # Fot TEXT, it's same as the original solution
+                        value_bytes = bytes(str_list, "utf-8")
+                        format_str_list.append("i")
+                        format_str_list.append(str(len(value_bytes)))
+                        format_str_list.append("s")
+                        values_tobe_packed.append(len(value_bytes))
+                        values_tobe_packed.append(value_bytes)
+                    format_str = "".join(format_str_list)
+                    bs = struct.pack(format_str, *values_tobe_packed)
+                else:
+                    bs = value.tobytes()
+                bs_list.append(bs)
+                bs_len += len(bs)
+            ret = memoryview(bytearray(bs_len))
+            offset = 0
+            for bs in bs_list:
+                _l = len(bs)
+                ret[offset:offset + _l] = bs
+                offset += _l
+            return ret
diff --git a/client-py/tests/tablet_performance_comparison.py b/client-py/tests/tablet_performance_comparison.py
new file mode 100644
index 0000000..7f3eac9
--- /dev/null
+++ b/client-py/tests/tablet_performance_comparison.py
@@ -0,0 +1,248 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import argparse
+import os
+import random
+import time
+import numpy as np
+import pandas as pd
+
+from iotdb.Session import Session
+from iotdb.utils.IoTDBConstants import TSDataType
+from iotdb.utils.Tablet import Tablet
+
+# the data type specified the byte order (i.e. endian)
+FORMAT_CHAR_OF_TYPES = {TSDataType.BOOLEAN: ">?",
+                        TSDataType.FLOAT: ">f4",
+                        TSDataType.DOUBLE: ">f8",
+                        TSDataType.INT32: ">i4",
+                        TSDataType.INT64: ">i8",
+                        TSDataType.TEXT: "str"}
+
+# the time column name in the csv file.
+TIME_STR = 'time'
+
+
+def load_csv_data(measure_tstype_infos: dict, data_file_name: str) -> pd.DataFrame:
+    """
+    load csv data.
+    :param measure_tstype_infos: key(str): measurement name, value(TSDataType): measurement data type
+    :param data_file_name: the csv file name to load
+    :return: data in format of pd.DataFrame.
+    """
+    metadata_for_pd = {TIME_STR: FORMAT_CHAR_OF_TYPES[TSDataType.INT64]}
+    for _measure, _type in measure_tstype_infos.items():
+        metadata_for_pd[_measure] = FORMAT_CHAR_OF_TYPES[_type]
+    df = pd.read_csv(data_file_name, dtype=metadata_for_pd)
+    return df
+
+
+def generate_csv_data(measure_tstype_infos: dict, data_file_name: str, _row: int, seed=0) -> None:
+    """
+    generate csv data randomly according to given measurements and their data types.
+    :param measure_tstype_infos: key(str): measurement name, value(TSDataType): measurement data type
+    :param data_file_name: the csv file name to output
+    :param _row: tablet row number
+    :param seed: random seed
+    """
+    import random
+    random.seed(seed)
+
+    CHAR_BASE = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
+
+    def generate_data(_type: TSDataType):
+        if _type == TSDataType.BOOLEAN:
+            return [random.randint(0, 1) == 1 for _ in range(_row)]
+        elif _type == TSDataType.INT32:
+            return [random.randint(-2 ** 31, 2 ** 31) for _ in range(_row)]
+        elif _type == TSDataType.INT64:
+            return [random.randint(-2 ** 63, 2 ** 63) for _ in range(_row)]
+        elif _type == TSDataType.FLOAT:
+            return [1.5 for _ in range(_row)]
+        elif _type == TSDataType.DOUBLE:
+            return [0.844421 for _ in range(_row)]
+        elif _type == TSDataType.TEXT:
+            return [''.join(random.choice(CHAR_BASE) for _ in range(5)) for _ in range(_row)]
+        else:
+            raise TypeError('not support type:' + str(_type))
+
+    values = {TIME_STR: pd.Series(np.arange(_row), dtype=FORMAT_CHAR_OF_TYPES[TSDataType.INT64])}
+    for column, data_type in measure_tstype_infos.items():
+        values[column] = pd.Series(generate_data(data_type), dtype=FORMAT_CHAR_OF_TYPES[data_type])
+
+    df = pd.DataFrame(values)
+    df.to_csv(data_file_name, index=False)
+    print("data file has generated.")
+
+
+def create_open_session():
+    """
+    creating session connection.
+    :return:
+    """
+    ip = "127.0.0.1"
+    port_ = "6667"
+    username_ = "root"
+    password_ = "root"
+    session = Session(ip, port_, username_, password_, fetch_size=1024, zone_id="UTC+8")
+    session.open(False)
+    return session
+
+
+def check_count(expect, _session, _sql):
+    """
+    check out the line number of the given SQL's query result.
+    :param expect: expected number
+    :param _session: IoTDB session
+    :param _sql: query SQL
+    """
+    session_data_set = _session.execute_query_statement(_sql)
+    session_data_set.set_fetch_size(1)
+    get_count_line = False
+    while session_data_set.has_next():
+        if get_count_line:
+            assert False, "select count return more than one line"
+        line = session_data_set.next()
+        actual = line.get_fields()[0].get_long_value()
+        assert expect == actual, f"count error: expect {expect} lines, actual {actual} lines"
+        get_count_line = True
+    if not get_count_line:
+        assert False, "select count has no result"
+    session_data_set.close_operation_handle()
+
+
+def check_query_result(expect, _session, _sql):
+    """
+    check out the query result of given query.
+    :param expect: expected result
+    :param _session: IoTDB session
+    :param _sql: query SQL
+    """
+    session_data_set = _session.execute_query_statement(_sql)
+    session_data_set.set_fetch_size(1)
+    idx = 0
+    while session_data_set.has_next():
+        line = session_data_set.next()
+        assert str(line) == expect[idx], f"line {idx}: actual {str(line)} != expect ({expect[idx]})"
+        idx += 1
+    assert idx == len(expect), f"result rows: actual ({idx}) != expect ({len(expect)})"
+    session_data_set.close_operation_handle()
+
+
+def performance_test(measure_tstype_infos, data_file_name, use_new=True, check_result=False, row=10000, col=5000):
+    """
+    execute tablet insert using original or new methods.
+    :param measure_tstype_infos: key(str): measurement name, value(TSDataType): measurement data type
+    :param use_new: True if check out the result
+    :param data_file_name: the csv file name to insert
+    :param row: tablet row number
+    :param col: tablet column number
+    """
+    print(f"Test python: use new: {use_new}, row: {row}, col: {col}. measurements: {measure_tstype_infos}")
+    print(f"Total points: {len(measure_tstype_infos) * row * col}")
+
+    # open the session and clean data
+    session = create_open_session()
+    session.execute_non_query_statement(f'delete timeseries root.*')
+
+    # test start
+    st = time.perf_counter()
+    csv_data = load_csv_data(measure_tstype_infos, data_file_name)
+    load_cost = time.perf_counter() - st
+    insert_cost = 0
+    measurements = list(measure_tstype_infos.keys())
+    data_types = list(measure_tstype_infos.values())
+    for i in range(0, col):
+        # if i % 500 == 0:
+        #     print(f"insert {i} cols")
+        device_id = "root.sg%d.%d" % (i % 8, i)
+        if not use_new:
+            # Use the ORIGINAL method to construct tablet
+            timestamps_ = []
+            values = []
+            for t in range(0, row):
+                timestamps_.append(csv_data.at[t, TIME_STR])
+                value_array = []
+                for m in measurements:
+                    value_array.append(csv_data.at[t, m])
+                values.append(value_array)
+        else:
+            # Use the NEW method to construct tablet
+            timestamps_ = csv_data[TIME_STR].values
+            if timestamps_.dtype != FORMAT_CHAR_OF_TYPES[TSDataType.INT64]:
+                timestamps_ = timestamps_.astype(FORMAT_CHAR_OF_TYPES[TSDataType.INT64])
+            values = []
+            for measure, tstype in measure_tstype_infos.items():
+                type_char = FORMAT_CHAR_OF_TYPES[tstype]
+                value_array = csv_data[measure].values
+                if value_array.dtype != type_char:
+                    if not (tstype == TSDataType.TEXT and value_array.dtype == object):
+                        value_array = value_array.astype(type_char)
+                values.append(value_array)
+
+        tablet = Tablet(device_id, measurements, data_types, values, timestamps_, use_new=use_new)
+        cost_st = time.perf_counter()
+        session.insert_tablet(tablet)
+        insert_cost += time.perf_counter() - cost_st
+
+        if check_result:
+            check_count(row, session, "select count(*) from %s" % device_id)
+            expect = []
+            for t in range(row):
+                line = [str(csv_data.at[t, TIME_STR])]
+                for m in measurements:
+                    line.append(str(csv_data.at[t, m]))
+                expect.append("\t\t".join([v for v in line]))
+            check_query_result(expect, session, f"select {','.join(measurements)} from {device_id}")
+            print("query validation have passed")
+    end = time.perf_counter()
+
+    # clean data and close the session
+    session.execute_non_query_statement(f'delete timeseries root.*')
+    session.close()
+
+    print("load cost: %.3f s" % load_cost)
+    print("construct tablet cost: %.3f s" % (end - st - insert_cost - load_cost))
+    print("insert tablet cost: %.3f s" % insert_cost)
+    print("total cost: %.3f s" % (end - st))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='tablet performance comparison')
+    parser.add_argument('--row', type=int, default=10000, help="the row number of the input tablet")
+    parser.add_argument('--col', type=int, default=5000, help="the column number of the input tablet")
+    parser.add_argument('--check_result', '-c', action="store_true", help="True if check out the result")
+    parser.add_argument('--use_new', '-n', action="store_false", help="True if use the new tablet insert")
+    parser.add_argument('--seed', type=int, default=0, help="the random seed for generating csv data")
+    parser.add_argument('--data_file_name', type=str, default='sample.csv', help="the path of csv data")
+    args = parser.parse_args()
+
+    measure_tstype_infos = {
+        's0': TSDataType.BOOLEAN,
+        's1': TSDataType.FLOAT,
+        's2': TSDataType.INT32,
+        's3': TSDataType.DOUBLE,
+        's4': TSDataType.INT64,
+        's5': TSDataType.TEXT,
+    }
+    # if not os.path.exists(args.data_file_name):
+    random.seed(a=args.seed, version=2)
+    generate_csv_data(measure_tstype_infos, args.data_file_name, args.row, args.seed)
+
+    performance_test(measure_tstype_infos, data_file_name=args.data_file_name, use_new=args.use_new,
+                     check_result=args.check_result, row=args.row, col=args.col)
diff --git a/example/session/src/main/java/org/apache/iotdb/TabletExample.java b/example/session/src/main/java/org/apache/iotdb/TabletExample.java
new file mode 100644
index 0000000..4f3b6d2
--- /dev/null
+++ b/example/session/src/main/java/org/apache/iotdb/TabletExample.java
@@ -0,0 +1,178 @@
+package org.apache.iotdb;
+
+import org.apache.iotdb.session.Session;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.utils.Binary;
+import org.apache.iotdb.tsfile.write.record.Tablet;
+import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+public class TabletExample {
+
+  private static final String TIME_STR = "time";
+
+  /**
+   * load csv data.
+   *
+   * @param measureTSTypeInfos key: measurement name, value: measurement data type
+   * @param dataFileName the csv file name to load
+   * @return key: measurement name, value: series in format of {@link ArrayList}
+   * @throws IOException if the csv format is incorrect
+   */
+  private static Map<String, ArrayList> loadCSVData(
+      Map<String, TSDataType> measureTSTypeInfos, String dataFileName) throws IOException {
+    measureTSTypeInfos.put(TIME_STR, TSDataType.INT64);
+    try (BufferedReader reader = new BufferedReader(new FileReader(dataFileName))) {
+      String headline = reader.readLine();
+      if (headline == null) {
+        throw new IOException("Given csv data file has not headers");
+      }
+      // check the csv file format
+      String[] fileColumns = headline.split(",");
+      Map<String, Integer> columnToIdMap = new HashMap<>();
+      for (int col = 0; col < fileColumns.length; col++) {
+        String columnName = fileColumns[col];
+        if (columnToIdMap.containsKey(columnName)) {
+          throw new IOException(
+              String.format("csv file contains duplicate columns: %s", columnName));
+        }
+        columnToIdMap.put(columnName, col);
+      }
+      Map<String, ArrayList> ret = new HashMap<>();
+      // make sure that all measurements can be found from the data file
+      for (Entry<String, TSDataType> entry : measureTSTypeInfos.entrySet()) {
+        String measurement = entry.getKey();
+        if (!columnToIdMap.containsKey(entry.getKey())) {
+          throw new IOException(String.format("measurement %s's is not in csv file.", measurement));
+        } else {
+          ret.put(measurement, new ArrayList<>());
+        }
+      }
+
+      String line;
+      while ((line = reader.readLine()) != null) {
+        String[] items = line.split(",");
+        for (Entry<String, TSDataType> entry : measureTSTypeInfos.entrySet()) {
+          String measurement = entry.getKey();
+          TSDataType dataType = entry.getValue();
+          int idx = columnToIdMap.get(measurement);
+          switch (dataType) {
+            case BOOLEAN:
+              ret.get(measurement).add(Boolean.parseBoolean(items[idx]));
+              break;
+            case INT32:
+              ret.get(measurement).add(Integer.parseInt(items[idx]));
+              break;
+            case INT64:
+              ret.get(measurement).add(Long.parseLong(items[idx]));
+              break;
+            case FLOAT:
+              ret.get(measurement).add(Float.parseFloat(items[idx]));
+              break;
+            case DOUBLE:
+              ret.get(measurement).add(Double.parseDouble(items[idx]));
+              break;
+            case TEXT:
+              ret.get(measurement).add(Binary.valueOf(items[idx]));
+              break;
+            case VECTOR:
+              throw new IOException(String.format("data type %s is not yet.", TSDataType.VECTOR));
+          }
+        }
+      }
+      return ret;
+    } finally {
+      measureTSTypeInfos.remove(TIME_STR);
+    }
+  }
+
+  /**
+   * Read csv file and insert tablet to IoTDB
+   *
+   * @param args: arg(with default value): arg0: dataFileName(sample.csv), arg1: rowSize(10000),
+   * arg2: colSize(5000).
+   */
+  public static void main(String[] args) throws Exception {
+
+    Session session = new Session("127.0.0.1", 6667, "root", "root");
+    session.open();
+    String dataFileName = "/Users/kangrong/code/github/iotdb-apache/client-py/sample.csv";
+    int rowSize = 10000;
+    int colSize = 5000;
+    if (args.length > 1) {
+      dataFileName = args[0];
+    }
+    if (args.length > 2) {
+      rowSize = Integer.parseInt(args[1]);
+    }
+    if (args.length > 3) {
+      colSize = Integer.parseInt(args[2]);
+    }
+
+    // construct the tablet's measurements.
+    Map<String, TSDataType> measureTSTypeInfos = new HashMap<>();
+    measureTSTypeInfos.put("s0", TSDataType.BOOLEAN);
+    measureTSTypeInfos.put("s1", TSDataType.FLOAT);
+    measureTSTypeInfos.put("s2", TSDataType.INT32);
+    measureTSTypeInfos.put("s3", TSDataType.DOUBLE);
+    measureTSTypeInfos.put("s4", TSDataType.INT64);
+    measureTSTypeInfos.put("s5", TSDataType.TEXT);
+    List<IMeasurementSchema> schemas = new ArrayList<>();
+    measureTSTypeInfos.forEach((mea, type) -> schemas.add(new MeasurementSchema(mea, type)));
+
+    System.out.println(
+        String.format(
+            "Test Java: csv file name: %s, row: %d, col: %d", dataFileName, rowSize, colSize));
+    System.out.println(String.format("Total points: %d", rowSize * colSize * schemas.size()));
+
+    // test start
+    long allStart = System.nanoTime();
+
+    Map<String, ArrayList> data = loadCSVData(measureTSTypeInfos, dataFileName);
+    long loadCost = System.nanoTime() - allStart;
+
+    long insertCost = 0;
+    for (int i = 0; i < colSize; i++) {
+      String deviceId = "root.sg" + i % 8 + "." + i;
+
+      Tablet ta = new Tablet(deviceId, schemas, rowSize);
+      ta.rowSize = rowSize;
+      for (int t = 0; t < ta.rowSize; t++) {
+        ta.addTimestamp(t, (Long) data.get(TIME_STR).get(t));
+        for (Entry<String, TSDataType> entry : measureTSTypeInfos.entrySet()) {
+          String mea = entry.getKey();
+          ta.addValue(mea, t, data.get(mea).get(t));
+        }
+      }
+      long insertSt = System.nanoTime();
+      session.insertTablet(ta, false);
+      insertCost += (System.nanoTime() - insertSt);
+    }
+    // test end
+    long allEnd = System.nanoTime();
+
+    session.executeNonQueryStatement("delete timeseries root.*");
+    session.close();
+
+    System.out.println(String.format("load cost: %.3f",
+        ((float) loadCost / 1000_000_000)));
+    System.out.println(String.format("construct tablet cost: %.3f",
+        ((float) (allEnd - allStart - insertCost - loadCost) / 1000_000_000)));
+    System.out.println(String.format("insert tablet cost: %.3f",
+        ((float) insertCost / 1000_000_000)));
+    System.out.println(String.format("total cost: %.3f",
+        ((float) (allEnd - allStart) / 1000_000_000)));
+    System.out.println(String.format("%.3f",
+        ((float) loadCost / 1000_000_000)));
+
+  }
+}