You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/12/26 06:12:49 UTC

[iotdb] branch master updated: [IOTDB-5264] NumpyTablet supports insert with None values (#8576)

This is an automated email from the ASF dual-hosted git repository.

qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 1921b6d66d [IOTDB-5264] NumpyTablet supports insert with None values (#8576)
1921b6d66d is described below

commit 1921b6d66d1addb6273eb46129efcac969c57a23
Author: Haonan <hh...@outlook.com>
AuthorDate: Mon Dec 26 14:12:43 2022 +0800

    [IOTDB-5264] NumpyTablet supports insert with None values (#8576)
---
 client-py/README.md                                | 37 ++++++++++++++++-
 client-py/SessionExample.py                        | 26 ++++++++++++
 client-py/iotdb/utils/BitMap.py                    |  9 +++++
 client-py/iotdb/utils/NumpyTablet.py               | 33 ++++++++++++---
 client-py/tests/test_numpy_tablet.py               | 47 ++++++++++++++++++++++
 client-py/tests/test_session.py                    | 26 ++++++++++++
 .../UserGuide/API/Programming-Python-Native-API.md | 39 +++++++++++++++++-
 .../UserGuide/API/Programming-Python-Native-API.md | 39 +++++++++++++++++-
 8 files changed, 245 insertions(+), 11 deletions(-)

diff --git a/client-py/README.md b/client-py/README.md
index 68c1697acf..dcb6bdcca1 100644
--- a/client-py/README.md
+++ b/client-py/README.md
@@ -172,6 +172,18 @@ tablet_ = Tablet(
     device_id, measurements_, data_types_, values_, timestamps_
 )
 session.insert_tablet(tablet_)
+
+values_ = [
+    [None, 10, 11, 1.1, 10011.1, "test01"],
+    [True, None, 11111, 1.25, 101.0, "test02"],
+    [False, 100, None, 188.1, 688.25, "test03"],
+    [True, 0, 0, 0, None, None],
+]
+timestamps_ = [16, 17, 18, 19]
+tablet_ = Tablet(
+    device_id, measurements_, data_types_, values_, timestamps_
+)
+session.insert_tablet(tablet_)
 ```
 * Numpy Tablet
 
@@ -203,9 +215,32 @@ np_values_ = [
 ]
 np_timestamps_ = np.array([1, 2, 3, 4], TSDataType.INT64.np_dtype())
 np_tablet_ = NumpyTablet(
-  "root.sg_test_01.d_02", measurements_, data_types_, np_values_, np_timestamps_
+  device_id, measurements_, data_types_, np_values_, np_timestamps_
 )
 session.insert_tablet(np_tablet_)
+
+# insert one numpy tablet with none into the database.
+np_values_ = [
+    np.array([False, True, False, True], TSDataType.BOOLEAN.np_dtype()),
+    np.array([10, 100, 100, 0], TSDataType.INT32.np_dtype()),
+    np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()),
+    np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()),
+    np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()),
+    np.array(["test01", "test02", "test03", "test04"], TSDataType.TEXT.np_dtype()),
+]
+np_timestamps_ = np.array([98, 99, 100, 101], TSDataType.INT64.np_dtype())
+np_bitmaps_ = []
+for i in range(len(measurements_)):
+    np_bitmaps_.append(BitMap(len(np_timestamps_)))
+np_bitmaps_[0].mark(0)
+np_bitmaps_[1].mark(1)
+np_bitmaps_[2].mark(2)
+np_bitmaps_[4].mark(3)
+np_bitmaps_[5].mark(3)
+np_tablet_with_none = NumpyTablet(
+    device_id, measurements_, data_types_, np_values_, np_timestamps_, np_bitmaps_
+)
+session.insert_tablet(np_tablet_with_none)
 ```
 
 * Insert multiple Tablets
diff --git a/client-py/SessionExample.py b/client-py/SessionExample.py
index a91b456ca0..e1cb1ef4c2 100644
--- a/client-py/SessionExample.py
+++ b/client-py/SessionExample.py
@@ -23,6 +23,7 @@ from iotdb.Session import Session
 from iotdb.template.InternalNode import InternalNode
 from iotdb.template.MeasurementNode import MeasurementNode
 from iotdb.template.Template import Template
+from iotdb.utils.BitMap import BitMap
 from iotdb.utils.IoTDBConstants import TSDataType, TSEncoding, Compressor
 from iotdb.utils.Tablet import Tablet
 from iotdb.utils.NumpyTablet import NumpyTablet
@@ -218,6 +219,31 @@ np_tablet_unsorted = NumpyTablet(
     np_values_unsorted,
     np_timestamps_unsorted,
 )
+
+# insert one numpy tablet into the database.
+np_values_ = [
+    np.array([False, True, False, True], TSDataType.BOOLEAN.np_dtype()),
+    np.array([10, 100, 100, 0], TSDataType.INT32.np_dtype()),
+    np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()),
+    np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()),
+    np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()),
+    np.array(["test01", "test02", "test03", "test04"]),
+]
+np_timestamps_ = np.array([98, 99, 100, 101], TSDataType.INT64.np_dtype())
+np_bitmaps_ = []
+for i in range(len(measurements_)):
+    np_bitmaps_.append(BitMap(len(np_timestamps_)))
+np_bitmaps_[0].mark(0)
+np_bitmaps_[1].mark(1)
+np_bitmaps_[2].mark(2)
+np_bitmaps_[4].mark(3)
+np_bitmaps_[5].mark(3)
+np_tablet_with_none = NumpyTablet(
+    "root.sg_test_01.d_02", measurements_, data_types_, np_values_, np_timestamps_, np_bitmaps_
+)
+session.insert_tablet(np_tablet_with_none)
+
+
 session.insert_tablet(np_tablet_unsorted)
 print(np_tablet_unsorted.get_timestamps())
 for value in np_tablet_unsorted.get_values():
diff --git a/client-py/iotdb/utils/BitMap.py b/client-py/iotdb/utils/BitMap.py
index 7b171f6f81..621bf6c7df 100644
--- a/client-py/iotdb/utils/BitMap.py
+++ b/client-py/iotdb/utils/BitMap.py
@@ -28,3 +28,12 @@ class BitMap(object):
 
     def mark(self, position):
         self.bits[position // 8] |= BitMap.BIT_UTIL[position % 8]
+
+    def is_all_unmarked(self):
+        for i in range(self.__size // 8):
+            if self.bits[i] != 0:
+                return False
+        for i in range(self.__size % 8):
+            if (self.bits[self.__size // 8] & BitMap.BIT_UTIL[i]) != 0:
+                return False
+        return True
diff --git a/client-py/iotdb/utils/NumpyTablet.py b/client-py/iotdb/utils/NumpyTablet.py
index b81a172a40..b217841f74 100644
--- a/client-py/iotdb/utils/NumpyTablet.py
+++ b/client-py/iotdb/utils/NumpyTablet.py
@@ -22,7 +22,7 @@ from iotdb.utils.BitMap import BitMap
 
 
 class NumpyTablet(object):
-    def __init__(self, device_id, measurements, data_types, values, timestamps):
+    def __init__(self, device_id, measurements, data_types, values, timestamps, bitmaps=None):
         """
         creating a numpy tablet for insertion
           for example, considering device: root.sg1.d1
@@ -66,6 +66,7 @@ class NumpyTablet(object):
         self.__data_types = data_types
         self.__row_number = len(timestamps)
         self.__column_number = len(measurements)
+        self.bitmaps = bitmaps
 
     @staticmethod
     def check_sorted(timestamps):
@@ -116,6 +117,23 @@ class NumpyTablet(object):
                 bs = value.tobytes()
             bs_list.append(bs)
             bs_len += len(bs)
+        if self.bitmaps is not None:
+            format_str_list = [">"]
+            values_tobe_packed = []
+            for i in range(self.__column_number):
+                format_str_list.append("?")
+                if self.bitmaps[i] is None or self.bitmaps[i].is_all_unmarked():
+                    values_tobe_packed.append(False)
+                else:
+                    values_tobe_packed.append(True)
+                    format_str_list.append(str(self.__row_number // 8 + 1))
+                    format_str_list.append("c")
+                    for j in range(self.__row_number // 8 + 1):
+                        values_tobe_packed.append(bytes([self.bitmaps[i].bits[j]]))
+            format_str = "".join(format_str_list)
+            bs = struct.pack(format_str, *values_tobe_packed)
+            bs_list.append(bs)
+            bs_len += len(bs)
         ret = memoryview(bytearray(bs_len))
         offset = 0
         for bs in bs_list:
@@ -124,8 +142,11 @@ class NumpyTablet(object):
             offset += _l
         return ret
 
-    def __mark_none_value(self, bitmaps, bitmap, column, row):
-        if bitmap is None:
-            bitmap = BitMap(self.__row_number)
-            bitmaps.insert(column, bitmap)
-        bitmap.mark(row)
+    def mark_none_value(self, column, row):
+        if self.bitmaps is None:
+            self.bitmaps = []
+            for i in range(self.__column_number):
+                self.bitmaps.append(None)
+        if self.bitmaps[column] is None:
+            self.bitmaps[column] = BitMap(self.__row_number)
+        self.bitmaps[column].mark(row)
diff --git a/client-py/tests/test_numpy_tablet.py b/client-py/tests/test_numpy_tablet.py
index b984193975..ddcb17b070 100644
--- a/client-py/tests/test_numpy_tablet.py
+++ b/client-py/tests/test_numpy_tablet.py
@@ -17,6 +17,8 @@
 #
 
 import numpy as np
+
+from iotdb.utils.BitMap import BitMap
 from iotdb.utils.IoTDBConstants import TSDataType
 from iotdb.utils.NumpyTablet import NumpyTablet
 from iotdb.utils.Tablet import Tablet
@@ -59,6 +61,51 @@ def test_numpy_tablet_serialization():
     assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
 
 
+def test_numpy_tablet_with_none_serialization():
+
+    measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
+    data_types_ = [
+        TSDataType.BOOLEAN,
+        TSDataType.INT32,
+        TSDataType.INT64,
+        TSDataType.FLOAT,
+        TSDataType.DOUBLE,
+        TSDataType.TEXT,
+    ]
+    values_ = [
+        [None, 10, 11, 1.1, 10011.1, "test01"],
+        [True, None, 11111, 1.25, 101.0, "test02"],
+        [False, 100, None, 188.1, 688.25, "test03"],
+        [True, 0, 0, 0, None, None],
+    ]
+    timestamps_ = [16, 17, 18, 19]
+    tablet_ = Tablet(
+        "root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_
+    )
+    np_values_ = [
+        np.array([False, True, False, True], np.dtype(">?")),
+        np.array([10, 0, 100, 0], np.dtype(">i4")),
+        np.array([11, 11111, 0, 0], np.dtype(">i8")),
+        np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")),
+        np.array([10011.1, 101.0, 688.25, 0], np.dtype(">f8")),
+        np.array(["test01", "test02", "test03", ""]),
+    ]
+    np_timestamps_ = np.array([16, 17, 18, 19], np.dtype(">i8"))
+    np_bitmaps_ = []
+    for i in range(len(measurements_)):
+        np_bitmaps_.append(BitMap(len(np_timestamps_)))
+    np_bitmaps_[0].mark(0)
+    np_bitmaps_[1].mark(1)
+    np_bitmaps_[2].mark(2)
+    np_bitmaps_[4].mark(3)
+    np_bitmaps_[5].mark(3)
+    np_tablet_ = NumpyTablet(
+        "root.sg_test_01.d_01", measurements_, data_types_, np_values_, np_timestamps_, np_bitmaps_
+    )
+    assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps()
+    assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
+
+
 def test_sort_numpy_tablet():
 
     measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
diff --git a/client-py/tests/test_session.py b/client-py/tests/test_session.py
index e7ae79fcb9..92bc87d012 100644
--- a/client-py/tests/test_session.py
+++ b/client-py/tests/test_session.py
@@ -20,6 +20,7 @@
 import numpy as np
 
 from iotdb.Session import Session
+from iotdb.utils.BitMap import BitMap
 from iotdb.utils.IoTDBConstants import TSDataType, TSEncoding, Compressor
 from iotdb.utils.NumpyTablet import NumpyTablet
 from iotdb.utils.Tablet import Tablet
@@ -290,6 +291,31 @@ def test_session():
             test_fail()
             print_message("insert tablet with empty cells failed")
 
+        # insert one numpy tablet with empty cells into the database.
+        np_values_ = [
+            np.array([False, True, False, True], np.dtype(">?")),
+            np.array([10, 0, 100, 0], np.dtype(">i4")),
+            np.array([11, 11111, 0, 0], np.dtype(">i8")),
+            np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")),
+            np.array([10011.1, 101.0, 688.25, 0], np.dtype(">f8")),
+            np.array(["test01", "test02", "test03", ""]),
+        ]
+        np_timestamps_ = np.array([30, 31, 32, 33], np.dtype(">i8"))
+        np_bitmaps_ = []
+        for i in range(len(measurements_)):
+            np_bitmaps_.append(BitMap(len(np_timestamps_)))
+        np_bitmaps_[0].mark(0)
+        np_bitmaps_[1].mark(1)
+        np_bitmaps_[2].mark(2)
+        np_bitmaps_[4].mark(3)
+        np_bitmaps_[5].mark(3)
+        np_tablet_ = NumpyTablet(
+            "root.sg_test_01.d_01", measurements_, data_types_, np_values_, np_timestamps_, np_bitmaps_
+        )
+        if session.insert_tablet(np_tablet_) < 0:
+            test_fail()
+            print_message("insert numpy tablet with empty cells failed")
+
         # insert records of one device
         time_list = [1, 2, 3]
         measurements_list = [
diff --git a/docs/UserGuide/API/Programming-Python-Native-API.md b/docs/UserGuide/API/Programming-Python-Native-API.md
index 30ea146e4b..c1cd8995ad 100644
--- a/docs/UserGuide/API/Programming-Python-Native-API.md
+++ b/docs/UserGuide/API/Programming-Python-Native-API.md
@@ -153,6 +153,18 @@ tablet_ = Tablet(
     device_id, measurements_, data_types_, values_, timestamps_
 )
 session.insert_tablet(tablet_)
+
+values_ = [
+    [None, 10, 11, 1.1, 10011.1, "test01"],
+    [True, None, 11111, 1.25, 101.0, "test02"],
+    [False, 100, None, 188.1, 688.25, "test03"],
+    [True, 0, 0, 0, None, None],
+]
+timestamps_ = [16, 17, 18, 19]
+tablet_ = Tablet(
+    device_id, measurements_, data_types_, values_, timestamps_
+)
+session.insert_tablet(tablet_)
 ```
 * Numpy Tablet
 
@@ -180,13 +192,36 @@ np_values_ = [
     np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()),
     np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()),
     np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()),
-    np.array(["test01", "test02", "test03", "test04"]),
+    np.array(["test01", "test02", "test03", "test04"], TSDataType.TEXT.np_dtype()),
 ]
 np_timestamps_ = np.array([1, 2, 3, 4], TSDataType.INT64.np_dtype())
 np_tablet_ = NumpyTablet(
-  "root.sg_test_01.d_02", measurements_, data_types_, np_values_, np_timestamps_
+  device_id, measurements_, data_types_, np_values_, np_timestamps_
 )
 session.insert_tablet(np_tablet_)
+
+# insert one numpy tablet with None into the database.
+np_values_ = [
+    np.array([False, True, False, True], TSDataType.BOOLEAN.np_dtype()),
+    np.array([10, 100, 100, 0], TSDataType.INT32.np_dtype()),
+    np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()),
+    np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()),
+    np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()),
+    np.array(["test01", "test02", "test03", "test04"], TSDataType.TEXT.np_dtype()),
+]
+np_timestamps_ = np.array([98, 99, 100, 101], TSDataType.INT64.np_dtype())
+np_bitmaps_ = []
+for i in range(len(measurements_)):
+    np_bitmaps_.append(BitMap(len(np_timestamps_)))
+np_bitmaps_[0].mark(0)
+np_bitmaps_[1].mark(1)
+np_bitmaps_[2].mark(2)
+np_bitmaps_[4].mark(3)
+np_bitmaps_[5].mark(3)
+np_tablet_with_none = NumpyTablet(
+    device_id, measurements_, data_types_, np_values_, np_timestamps_, np_bitmaps_
+)
+session.insert_tablet(np_tablet_with_none)
 ```
 
 * Insert multiple Tablets
diff --git a/docs/zh/UserGuide/API/Programming-Python-Native-API.md b/docs/zh/UserGuide/API/Programming-Python-Native-API.md
index 115488b729..3b213d8b79 100644
--- a/docs/zh/UserGuide/API/Programming-Python-Native-API.md
+++ b/docs/zh/UserGuide/API/Programming-Python-Native-API.md
@@ -153,6 +153,18 @@ tablet_ = Tablet(
     device_id, measurements_, data_types_, values_, timestamps_
 )
 session.insert_tablet(tablet_)
+
+values_ = [
+    [None, 10, 11, 1.1, 10011.1, "test01"],
+    [True, None, 11111, 1.25, 101.0, "test02"],
+    [False, 100, None, 188.1, 688.25, "test03"],
+    [True, 0, 0, 0, None, None],
+]
+timestamps_ = [16, 17, 18, 19]
+tablet_ = Tablet(
+    device_id, measurements_, data_types_, values_, timestamps_
+)
+session.insert_tablet(tablet_)
 ```
 * Numpy Tablet
 
@@ -179,13 +191,36 @@ np_values_ = [
     np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()),
     np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()),
     np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()),
-    np.array(["test01", "test02", "test03", "test04"]),
+    np.array(["test01", "test02", "test03", "test04"], TSDataType.TEXT.np_dtype()),
 ]
 np_timestamps_ = np.array([1, 2, 3, 4], TSDataType.INT64.np_dtype())
 np_tablet_ = NumpyTablet(
-  "root.sg_test_01.d_02", measurements_, data_types_, np_values_, np_timestamps_
+  device_id, measurements_, data_types_, np_values_, np_timestamps_
 )
 session.insert_tablet(np_tablet_)
+
+# insert one numpy tablet with None into the database.
+np_values_ = [
+    np.array([False, True, False, True], TSDataType.BOOLEAN.np_dtype()),
+    np.array([10, 100, 100, 0], TSDataType.INT32.np_dtype()),
+    np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()),
+    np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()),
+    np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()),
+    np.array(["test01", "test02", "test03", "test04"], TSDataType.TEXT.np_dtype()),
+]
+np_timestamps_ = np.array([98, 99, 100, 101], TSDataType.INT64.np_dtype())
+np_bitmaps_ = []
+for i in range(len(measurements_)):
+    np_bitmaps_.append(BitMap(len(np_timestamps_)))
+np_bitmaps_[0].mark(0)
+np_bitmaps_[1].mark(1)
+np_bitmaps_[2].mark(2)
+np_bitmaps_[4].mark(3)
+np_bitmaps_[5].mark(3)
+np_tablet_with_none = NumpyTablet(
+    device_id, measurements_, data_types_, np_values_, np_timestamps_, np_bitmaps_
+)
+session.insert_tablet(np_tablet_with_none)
 ```
 
 * 插入多个 Tablet