You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by jf...@apache.org on 2021/03/25 20:45:23 UTC

[iotdb] 01/01: [CLIENT-PY] Added support for pandas dataframe to make data science easier.

This is an automated email from the ASF dual-hosted git repository.

jfeinauer pushed a commit to branch feature/client-py-pandas
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit 8a7aa947535f0cb9cc18dc33f659b947789bf4b8
Author: Julian Feinauer <j....@pragmaticminds.de>
AuthorDate: Thu Mar 25 21:44:47 2021 +0100

    [CLIENT-PY] Added support for pandas dataframe to make data science easier.
---
 client-py/pypi/README.md                    | 28 +++++++++++
 client-py/pypi/setup.py                     |  1 +
 client-py/src/iotdb/Session.py              |  1 +
 client-py/src/iotdb/iotdb_utils.py          | 74 +++++++++++++++++++++++++++++
 client-py/src/iotdb/utils/SessionDataSet.py |  7 +--
 5 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/client-py/pypi/README.md b/client-py/pypi/README.md
index dc7182c..0871f3f 100644
--- a/client-py/pypi/README.md
+++ b/client-py/pypi/README.md
@@ -70,4 +70,32 @@ session.open(False)
 zone = session.get_time_zone()
 session.close()
 
+```
+
+## Pandas Support
+
+To easily transform a query result to a [Pandas Dataframe](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)
+the SessionDataSet has a method `.todf()` which consumes the dataset and transforms it to a pandas dataframe.
+
+Example:
+
+```python
+
+from iotdb.Session import Session
+
+ip = "127.0.0.1"
+port_ = "6667"
+username_ = 'root'
+password_ = 'root'
+session = Session(ip, port_, username_, password_)
+session.open(False)
+result = session.execute_query_statement("SELECT * FROM root.*")
+
+# Transform to Pandas Dataset
+df = result.todf()
+
+session.close()
+
+# Now you can work with the dataframe
+df = ...
 ```
\ No newline at end of file
diff --git a/client-py/pypi/setup.py b/client-py/pypi/setup.py
index c3ec019..69d5559 100644
--- a/client-py/pypi/setup.py
+++ b/client-py/pypi/setup.py
@@ -41,6 +41,7 @@ setuptools.setup(
     packages=setuptools.find_packages(),
     install_requires=[
               'thrift>=0.13.0',
+              'pandas>=1.0.0,<1.99.99'
           ],
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/client-py/src/iotdb/Session.py b/client-py/src/iotdb/Session.py
index 1d7a4a4..7c08e10 100644
--- a/client-py/src/iotdb/Session.py
+++ b/client-py/src/iotdb/Session.py
@@ -473,6 +473,7 @@ class Session(object):
         return SessionDataSet(sql, resp.columns, resp.dataTypeList, resp.columnNameIndexMap, resp.queryId,
                               self.__client, self.__session_id, resp.queryDataSet, resp.ignoreTimeStamp)
 
+
     def execute_non_query_statement(self, sql):
         """
         execute non-query sql statement
diff --git a/client-py/src/iotdb/iotdb_utils.py b/client-py/src/iotdb/iotdb_utils.py
new file mode 100644
index 0000000..bd4573e
--- /dev/null
+++ b/client-py/src/iotdb/iotdb_utils.py
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import pandas as pd
+
+from iotdb.utils.Field import Field
+from iotdb.utils.IoTDBConstants import TSDataType
+from iotdb.utils.SessionDataSet import SessionDataSet
+
+
+def resultset_to_pandas(result_set: SessionDataSet) -> pd.DataFrame:
+    """
+    Transforms a SessionDataSet from IoTDB to a Pandas Data Frame
+    Each Field from IoTDB is a column in Pandas
+    :param result_set:
+    :return:
+    """
+    # get column names and fields
+    column_names = result_set.get_column_names()
+
+    value_dict = {}
+
+    for i in range(len(column_names)):
+        value_dict[column_names[i]] = []
+
+    while result_set.has_next():
+        record = result_set.next()
+
+        value_dict["Time"].append(record.get_timestamp())
+
+        for col in range(len(record.get_fields())):
+            field: Field = record.get_fields()[col]
+
+            value_dict[column_names[col + 1]].append(
+                get_typed_point(field)
+            )
+
+    return pd.DataFrame(value_dict)
+
+
+def get_typed_point(field: Field, none_value=None):
+    choices = {
+        # In Case of Boolean, cast to 0 / 1
+        TSDataType.BOOLEAN: lambda field: 1 if field.get_bool_value() else 0,
+        TSDataType.TEXT: lambda field: field.get_string_value(),
+        TSDataType.FLOAT: lambda field: field.get_float_value(),
+        TSDataType.INT32: lambda field: field.get_int_value(),
+        TSDataType.DOUBLE: lambda field: field.get_double_value(),
+        TSDataType.INT64: lambda field: field.get_long_value(),
+    }
+
+    result_next_type: TSDataType = field.get_data_type()
+
+    if result_next_type in choices.keys():
+        return choices.get(result_next_type)(field)
+    elif result_next_type is None:
+        return none_value
+    else:
+        raise Exception(f"Unknown DataType {result_next_type}!")
diff --git a/client-py/src/iotdb/utils/SessionDataSet.py b/client-py/src/iotdb/utils/SessionDataSet.py
index 91b1989..f75a0e2 100644
--- a/client-py/src/iotdb/utils/SessionDataSet.py
+++ b/client-py/src/iotdb/utils/SessionDataSet.py
@@ -30,6 +30,8 @@ from .RowRecord import RowRecord
 
 import struct
 
+from ..iotdb_utils import resultset_to_pandas
+
 
 class SessionDataSet(object):
 
@@ -104,6 +106,5 @@ class SessionDataSet(object):
     def close_operation_handle(self):
         self.iotdb_rpc_data_set.close()
 
-
-
-
+    def todf(self):
+        return resultset_to_pandas(self)