You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by jf...@apache.org on 2021/03/25 20:45:23 UTC
[iotdb] 01/01: [CLIENT-PY] Added support for pandas dataframe to
make data science easier.
This is an automated email from the ASF dual-hosted git repository.
jfeinauer pushed a commit to branch feature/client-py-pandas
in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 8a7aa947535f0cb9cc18dc33f659b947789bf4b8
Author: Julian Feinauer <j....@pragmaticminds.de>
AuthorDate: Thu Mar 25 21:44:47 2021 +0100
[CLIENT-PY] Added support for pandas dataframe to make data science easier.
---
client-py/pypi/README.md | 28 +++++++++++
client-py/pypi/setup.py | 1 +
client-py/src/iotdb/Session.py | 1 +
client-py/src/iotdb/iotdb_utils.py | 74 +++++++++++++++++++++++++++++
client-py/src/iotdb/utils/SessionDataSet.py | 7 +--
5 files changed, 108 insertions(+), 3 deletions(-)
diff --git a/client-py/pypi/README.md b/client-py/pypi/README.md
index dc7182c..0871f3f 100644
--- a/client-py/pypi/README.md
+++ b/client-py/pypi/README.md
@@ -70,4 +70,32 @@ session.open(False)
zone = session.get_time_zone()
session.close()
+```
+
+## Pandas Support
+
+To easily transform a query result to a [Pandas Dataframe](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)
+the SessionDataSet has a method `.todf()` which consumes the dataset and transforms it to a pandas dataframe.
+
+Example:
+
+```python
+
+from iotdb.Session import Session
+
+ip = "127.0.0.1"
+port_ = "6667"
+username_ = 'root'
+password_ = 'root'
+session = Session(ip, port_, username_, password_)
+session.open(False)
+result = session.execute_query_statement("SELECT * FROM root.*")
+
+# Transform to Pandas Dataset
+df = result.todf()
+
+session.close()
+
+# Now you can work with the dataframe
+df = ...
```
\ No newline at end of file
diff --git a/client-py/pypi/setup.py b/client-py/pypi/setup.py
index c3ec019..69d5559 100644
--- a/client-py/pypi/setup.py
+++ b/client-py/pypi/setup.py
@@ -41,6 +41,7 @@ setuptools.setup(
packages=setuptools.find_packages(),
install_requires=[
'thrift>=0.13.0',
+ 'pandas>=1.0.0,<1.99.99'
],
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/client-py/src/iotdb/Session.py b/client-py/src/iotdb/Session.py
index 1d7a4a4..7c08e10 100644
--- a/client-py/src/iotdb/Session.py
+++ b/client-py/src/iotdb/Session.py
@@ -473,6 +473,7 @@ class Session(object):
return SessionDataSet(sql, resp.columns, resp.dataTypeList, resp.columnNameIndexMap, resp.queryId,
self.__client, self.__session_id, resp.queryDataSet, resp.ignoreTimeStamp)
+
def execute_non_query_statement(self, sql):
"""
execute non-query sql statement
diff --git a/client-py/src/iotdb/iotdb_utils.py b/client-py/src/iotdb/iotdb_utils.py
new file mode 100644
index 0000000..bd4573e
--- /dev/null
+++ b/client-py/src/iotdb/iotdb_utils.py
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import pandas as pd
+
+from iotdb.utils.Field import Field
+from iotdb.utils.IoTDBConstants import TSDataType
+from iotdb.utils.SessionDataSet import SessionDataSet
+
+
+def resultset_to_pandas(result_set: SessionDataSet) -> pd.DataFrame:
+ """
+ Transforms a SessionDataSet from IoTDB to a Pandas Data Frame
+ Each Field from IoTDB is a column in Pandas
+ :param result_set:
+ :return:
+ """
+ # get column names and fields
+ column_names = result_set.get_column_names()
+
+ value_dict = {}
+
+ for i in range(len(column_names)):
+ value_dict[column_names[i]] = []
+
+ while result_set.has_next():
+ record = result_set.next()
+
+ value_dict["Time"].append(record.get_timestamp())
+
+ for col in range(len(record.get_fields())):
+ field: Field = record.get_fields()[col]
+
+ value_dict[column_names[col + 1]].append(
+ get_typed_point(field)
+ )
+
+ return pd.DataFrame(value_dict)
+
+
+def get_typed_point(field: Field, none_value=None):
+ choices = {
+ # In Case of Boolean, cast to 0 / 1
+ TSDataType.BOOLEAN: lambda field: 1 if field.get_bool_value() else 0,
+ TSDataType.TEXT: lambda field: field.get_string_value(),
+ TSDataType.FLOAT: lambda field: field.get_float_value(),
+ TSDataType.INT32: lambda field: field.get_int_value(),
+ TSDataType.DOUBLE: lambda field: field.get_double_value(),
+ TSDataType.INT64: lambda field: field.get_long_value(),
+ }
+
+ result_next_type: TSDataType = field.get_data_type()
+
+ if result_next_type in choices.keys():
+ return choices.get(result_next_type)(field)
+ elif result_next_type is None:
+ return none_value
+ else:
+ raise Exception(f"Unknown DataType {result_next_type}!")
diff --git a/client-py/src/iotdb/utils/SessionDataSet.py b/client-py/src/iotdb/utils/SessionDataSet.py
index 91b1989..f75a0e2 100644
--- a/client-py/src/iotdb/utils/SessionDataSet.py
+++ b/client-py/src/iotdb/utils/SessionDataSet.py
@@ -30,6 +30,8 @@ from .RowRecord import RowRecord
import struct
+from ..iotdb_utils import resultset_to_pandas
+
class SessionDataSet(object):
@@ -104,6 +106,5 @@ class SessionDataSet(object):
def close_operation_handle(self):
self.iotdb_rpc_data_set.close()
-
-
-
+ def todf(self):
+ return resultset_to_pandas(self)