You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ha...@apache.org on 2022/04/07 09:37:47 UTC

[iotdb] 04/04: [IOTDB-2859] Fix python tablet with None value is incorrect (#5441)

This is an automated email from the ASF dual-hosted git repository.

haonan pushed a commit to branch rel/0.13
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit 9b1d0822ef09ab36ac9a06fcc9aab488b2def496
Author: Haonan <hh...@outlook.com>
AuthorDate: Thu Apr 7 17:00:48 2022 +0800

    [IOTDB-2859] Fix python tablet with None value is incorrect (#5441)
---
 client-py/iotdb/utils/IoTDBRpcDataSet.py |   2 +-
 client-py/iotdb/utils/Tablet.py          |  23 ++++---
 client-py/tests/test_tablet.py           | 109 +++++++++++++++++++++++++++++++
 client-py/tests/test_todf.py             |   4 +-
 4 files changed, 123 insertions(+), 15 deletions(-)

diff --git a/client-py/iotdb/utils/IoTDBRpcDataSet.py b/client-py/iotdb/utils/IoTDBRpcDataSet.py
index 83468ad429..9d4bfa33cb 100644
--- a/client-py/iotdb/utils/IoTDBRpcDataSet.py
+++ b/client-py/iotdb/utils/IoTDBRpcDataSet.py
@@ -233,7 +233,7 @@ class IoTDBRpcDataSet(object):
                         data_array.append(value)
                         j += 1
                         offset += length
-                    data_array = np.array(data_array, dtype=np.object)
+                    data_array = np.array(data_array, dtype=object)
                 else:
                     raise RuntimeError("unsupported data type {}.".format(data_type))
                 if data_array.dtype.byteorder == ">":
diff --git a/client-py/iotdb/utils/Tablet.py b/client-py/iotdb/utils/Tablet.py
index 2f41889f22..58fc84603a 100644
--- a/client-py/iotdb/utils/Tablet.py
+++ b/client-py/iotdb/utils/Tablet.py
@@ -94,7 +94,7 @@ class Tablet(object):
         has_none = False
         for i in range(self.__column_number):
             bitmap = None
-            bitmaps.insert(i, bitmap)
+            bitmaps.append(bitmap)
             if self.__data_types[i] == TSDataType.BOOLEAN:
                 format_str_list.append(str(self.__row_number))
                 format_str_list.append("?")
@@ -103,7 +103,7 @@ class Tablet(object):
                         values_tobe_packed.append(self.__values[j][i])
                     else:
                         values_tobe_packed.append(False)
-                        self.__mark_none_value(bitmaps, bitmap, i, j)
+                        self.__mark_none_value(bitmaps, i, j)
                         has_none = True
 
             elif self.__data_types[i] == TSDataType.INT32:
@@ -114,7 +114,7 @@ class Tablet(object):
                         values_tobe_packed.append(self.__values[j][i])
                     else:
                         values_tobe_packed.append(0)
-                        self.__mark_none_value(bitmaps, bitmap, i, j)
+                        self.__mark_none_value(bitmaps, i, j)
                         has_none = True
 
             elif self.__data_types[i] == TSDataType.INT64:
@@ -125,7 +125,7 @@ class Tablet(object):
                         values_tobe_packed.append(self.__values[j][i])
                     else:
                         values_tobe_packed.append(0)
-                        self.__mark_none_value(bitmaps, bitmap, i, j)
+                        self.__mark_none_value(bitmaps, i, j)
                         has_none = True
 
             elif self.__data_types[i] == TSDataType.FLOAT:
@@ -136,7 +136,7 @@ class Tablet(object):
                         values_tobe_packed.append(self.__values[j][i])
                     else:
                         values_tobe_packed.append(0)
-                        self.__mark_none_value(bitmaps, bitmap, i, j)
+                        self.__mark_none_value(bitmaps, i, j)
                         has_none = True
 
             elif self.__data_types[i] == TSDataType.DOUBLE:
@@ -147,7 +147,7 @@ class Tablet(object):
                         values_tobe_packed.append(self.__values[j][i])
                     else:
                         values_tobe_packed.append(0)
-                        self.__mark_none_value(bitmaps, bitmap, i, j)
+                        self.__mark_none_value(bitmaps, i, j)
                         has_none = True
 
             elif self.__data_types[i] == TSDataType.TEXT:
@@ -166,7 +166,7 @@ class Tablet(object):
                         format_str_list.append("s")
                         values_tobe_packed.append(len(value_bytes))
                         values_tobe_packed.append(value_bytes)
-                        self.__mark_none_value(bitmaps, bitmap, i, j)
+                        self.__mark_none_value(bitmaps, i, j)
                         has_none = True
 
             else:
@@ -186,8 +186,7 @@ class Tablet(object):
         format_str = "".join(format_str_list)
         return struct.pack(format_str, *values_tobe_packed)
 
-    def __mark_none_value(self, bitmaps, bitmap, column, row):
-        if bitmap is None:
-            bitmap = BitMap(self.__row_number)
-            bitmaps.insert(column, bitmap)
-        bitmap.mark(row)
+    def __mark_none_value(self, bitmaps, column, row):
+        if bitmaps[column] is None:
+            bitmaps[column] = BitMap(self.__row_number)
+        bitmaps[column].mark(row)
diff --git a/client-py/tests/test_tablet.py b/client-py/tests/test_tablet.py
new file mode 100644
index 0000000000..fcb5513367
--- /dev/null
+++ b/client-py/tests/test_tablet.py
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import pandas as pd
+from pandas.testing import assert_frame_equal
+
+from iotdb.IoTDBContainer import IoTDBContainer
+from iotdb.Session import Session
+from iotdb.utils.IoTDBConstants import TSDataType
+from iotdb.utils.Tablet import Tablet
+
+
+def test_tablet_insertion():
+    with IoTDBContainer("iotdb:dev") as db:
+        db: IoTDBContainer
+        session = Session(db.get_container_host_ip(), db.get_exposed_port(6667))
+        session.open(False)
+
+        measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
+        data_types_ = [
+            TSDataType.BOOLEAN,
+            TSDataType.INT32,
+            TSDataType.INT64,
+            TSDataType.FLOAT,
+            TSDataType.DOUBLE,
+            TSDataType.TEXT,
+        ]
+        values_ = [
+            [False, 10, 11, 1.1, 10011.1, "test01"],
+            [True, 100, 11111, 1.25, 101.0, "test02"],
+            [False, 100, 1, 188.1, 688.25, "test03"],
+            [True, 0, 0, 0, 6.25, "test04"],
+        ]
+        timestamps_ = [16, 17, 18, 19]
+        tablet_ = Tablet(
+            "root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_
+        )
+        session.insert_tablet(tablet_)
+        columns = []
+        for measurement in measurements_:
+            columns.append("root.sg_test_01.d_01." + measurement)
+        df_input = pd.DataFrame(values_, None, columns)
+        df_input.insert(0, "Time", timestamps_)
+
+        session_data_set = session.execute_query_statement(
+            "select s_01, s_02, s_03, s_04, s_05, s_06 from root.sg_test_01.d_01"
+        )
+        df_output = session_data_set.todf()
+        df_output = df_output[df_input.columns.tolist()]
+
+        session.close()
+    assert_frame_equal(df_input, df_output, False)
+
+
+def test_nullable_tablet_insertion():
+    with IoTDBContainer("iotdb:dev") as db:
+        db: IoTDBContainer
+        session = Session(db.get_container_host_ip(), db.get_exposed_port(6667))
+        session.open(False)
+
+        measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
+        data_types_ = [
+            TSDataType.BOOLEAN,
+            TSDataType.INT32,
+            TSDataType.INT64,
+            TSDataType.FLOAT,
+            TSDataType.DOUBLE,
+            TSDataType.TEXT,
+        ]
+        values_ = [
+            [None, None, 11, 1.1, 10011.1, "test01"],
+            [True, None, 11111, 1.25, 101.0, "test02"],
+            [False, 100, 1, None, 688.25, "test03"],
+            [True, None, 0, 0, 6.25, None],
+        ]
+        timestamps_ = [16, 17, 18, 19]
+        tablet_ = Tablet(
+            "root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_
+        )
+        session.insert_tablet(tablet_)
+        columns = []
+        for measurement in measurements_:
+            columns.append("root.sg_test_01.d_01." + measurement)
+        df_input = pd.DataFrame(values_, None, columns)
+        df_input.insert(0, "Time", timestamps_)
+
+        session_data_set = session.execute_query_statement(
+            "select s_01, s_02, s_03, s_04, s_05, s_06 from root.sg_test_01.d_01"
+        )
+        df_output = session_data_set.todf()
+        df_output = df_output[df_input.columns.tolist()]
+
+        session.close()
+    assert_frame_equal(df_input, df_output, False)
diff --git a/client-py/tests/test_todf.py b/client-py/tests/test_todf.py
index a155ef15dd..feedcb3228 100644
--- a/client-py/tests/test_todf.py
+++ b/client-py/tests/test_todf.py
@@ -121,7 +121,7 @@ def test_with_null_query():
         )
         data[ts_path_lst[5]] = np.random.choice(
             ["text1", "text2"], size=data_nums
-        ).astype(np.object)
+        ).astype(object)
 
         data_empty = {}
         for ts_path in ts_path_lst:
@@ -133,7 +133,7 @@ def test_with_null_query():
                     tmp_array = pd.Series(tmp_array).astype("Int64")
             elif data[ts_path].dtype == np.float32 or data[ts_path].dtype == np.double:
                 tmp_array = np.full(data_nums, np.nan, data[ts_path].dtype)
-            elif data[ts_path].dtype == np.bool:
+            elif data[ts_path].dtype == bool:
                 tmp_array = np.full(data_nums, np.nan, np.float32)
                 tmp_array = pd.Series(tmp_array).astype("boolean")
             else: