You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/08/30 23:07:33 UTC
[arrow-datafusion-python] branch master updated: upgrade to datafusion 11 (#42)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push:
new a4bf58e upgrade to datafusion 11 (#42)
a4bf58e is described below
commit a4bf58ee9183ea8c44c448522c565af7a081cf29
Author: Andy Grove <an...@gmail.com>
AuthorDate: Tue Aug 30 17:07:28 2022 -0600
upgrade to datafusion 11 (#42)
---
.gitignore | 1 +
Cargo.lock | 62 +++++++++++++++++++++++++++++++++++--------------------------
Cargo.toml | 6 +++---
src/udaf.rs | 18 +++++++++++-------
4 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/.gitignore b/.gitignore
index b57efb7..5e6b18b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ __pycache__/
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version
+venv
diff --git a/Cargo.lock b/Cargo.lock
index bdbf3cc..5feaa23 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -57,9 +57,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
-version = "18.0.0"
+version = "20.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5f89d2bc04fa746ee395d20c4cbfa508e4cce5c00bae816f0fae434fcfb9853"
+checksum = "c72a69495f06c8abb65b76a87be192a26fa724380d1f292d4e558a32afed9989"
dependencies = [
"ahash",
"bitflags",
@@ -76,8 +76,8 @@ dependencies = [
"multiversion",
"num",
"pyo3",
- "rand 0.8.5",
"regex",
+ "regex-syntax",
"serde",
"serde_derive",
"serde_json",
@@ -285,9 +285,9 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54617e523e447c9a139fdf3682eeca8f909934bd28cdd0032ebd0ff9783775e1"
+checksum = "430b3983c7164cb113f297f45b68a69893c212cb4b80a8aeb6a8069eb93f745e"
dependencies = [
"ahash",
"arrow",
@@ -326,23 +326,24 @@ dependencies = [
[[package]]
name = "datafusion-common"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "794ca54d3b144038c36b7a31d64c9545abb2edbdda6da055e481fb8a13e4e33b"
+checksum = "594210b4819cc786d1a3dc7b17ff4f9b0c6ee522bcd0a4a52f80a41fd38d53c4"
dependencies = [
"arrow",
"object_store",
"ordered-float 3.0.0",
"parquet",
"pyo3",
+ "serde_json",
"sqlparser",
]
[[package]]
name = "datafusion-expr"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0087a4e55a861c7040314f217672259304fd26b5f174a065867df6b4ac659896"
+checksum = "b91d4a86776ce8f7fe5df34955481d6fe77876dd278bf13098d6a1bdd3c24fb8"
dependencies = [
"ahash",
"arrow",
@@ -352,9 +353,9 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b822b1a9f4f9c953b142190229085e2856fa9ee52844aa86b40d55edd6e7cc38"
+checksum = "360f86f7dc943ca8e0da39982febac0a0fc0329d7ee58ea046438c9fed6dfec8"
dependencies = [
"arrow",
"async-trait",
@@ -368,9 +369,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2328a0e901a89c46391be9445e6e55b6dd8002d4d177e578b0c4a2486ef07cda"
+checksum = "a465299f2eeb2741b33777b42f607fe56458e137d0d7b80f69be72e771a48b81"
dependencies = [
"ahash",
"arrow",
@@ -409,9 +410,9 @@ dependencies = [
[[package]]
name = "datafusion-row"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef6b51e6398ed6dcc5e072c16722b9838f472b0c0ffe25b5df536927cda6044f"
+checksum = "959a42a1f35c8fa1b47698df6995ab5ae8477e81c9c42852476666aeac4f80b7"
dependencies = [
"arrow",
"datafusion-common",
@@ -421,9 +422,9 @@ dependencies = [
[[package]]
name = "datafusion-sql"
-version = "10.0.0"
+version = "11.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb9ae561d6c3dcd09d253ff28f71396b576fca05fe4d0f4fb0e75ee2fc951c72"
+checksum = "c69404e8774fe2c7d64998e94d856f32d3a908f9dc7215ce01e09895f13b4b62"
dependencies = [
"ahash",
"arrow",
@@ -1003,15 +1004,16 @@ dependencies = [
[[package]]
name = "object_store"
-version = "0.3.0"
+version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "857af043f5d9f36ed4f71815857f79b841412dda1cf0ca5a29608874f6f038e2"
+checksum = "cf3845781c5ecf37b3e3610df73fff11487591eba423a987e1b21bb4d389c326"
dependencies = [
"async-trait",
"bytes",
"chrono",
"futures",
"itertools",
+ "parking_lot",
"percent-encoding",
"snafu",
"tokio",
@@ -1069,10 +1071,11 @@ dependencies = [
[[package]]
name = "parquet"
-version = "18.0.0"
+version = "20.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65f61759af307fad711e7656c705218402a8a79b776c893c20fef96e8ffd2a7d"
+checksum = "d0f0af698fcf8d1d9f2971766ebef25821ffe8c39c91837c276dcd97e075d950"
dependencies = [
+ "ahash",
"arrow",
"base64",
"brotli",
@@ -1081,11 +1084,13 @@ dependencies = [
"chrono",
"flate2",
"futures",
+ "hashbrown",
"lz4",
"num",
"num-bigint",
"parquet-format",
"rand 0.8.5",
+ "seq-macro",
"snap",
"thrift",
"tokio",
@@ -1307,9 +1312,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
[[package]]
name = "regex-syntax"
-version = "0.6.26"
+version = "0.6.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64"
+checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
[[package]]
name = "remove_dir_all"
@@ -1347,6 +1352,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+[[package]]
+name = "seq-macro"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0772c5c30e1a0d91f6834f8e545c69281c099dfa9a3ac58d96a9fd629c8d4898"
+
[[package]]
name = "serde"
version = "1.0.137"
@@ -1370,7 +1381,6 @@ version = "1.0.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c"
dependencies = [
- "indexmap",
"itoa 1.0.2",
"ryu",
"serde",
@@ -1429,9 +1439,9 @@ checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451"
[[package]]
name = "sqlparser"
-version = "0.18.0"
+version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f531637a13132fa3d38c54d4cd8f115905e5dc3e72f6e77bd6160481f482e25d"
+checksum = "30c67d4d5de027da1da5a4ed4623f09ab5131d808364279a5f5abee5de9b8db3"
dependencies = [
"log",
]
diff --git a/Cargo.toml b/Cargo.toml
index 05a21e0..e4d521c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,9 +34,9 @@ default = ["mimalloc"]
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
rand = "0.7"
pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] }
-datafusion = { version = "^10.0.0", features = ["pyarrow"] }
-datafusion-expr = { version = "^10.0.0" }
-datafusion-common = { version = "^10.0.0", features = ["pyarrow"] }
+datafusion = { version = "^11.0.0", features = ["pyarrow"] }
+datafusion-expr = { version = "^11.0.0" }
+datafusion-common = { version = "^11.0.0", features = ["pyarrow"] }
uuid = { version = "0.8", features = ["v4"] }
mimalloc = { version = "*", optional = true, default-features = false }
async-trait = "0.1"
diff --git a/src/udaf.rs b/src/udaf.rs
index 741ce18..c5499bf 100644
--- a/src/udaf.rs
+++ b/src/udaf.rs
@@ -22,12 +22,12 @@ use pyo3::{prelude::*, types::PyTuple};
use datafusion::arrow::array::ArrayRef;
use datafusion::arrow::datatypes::DataType;
use datafusion::arrow::pyarrow::PyArrowConvert;
+use datafusion::common::ScalarValue;
use datafusion::error::{DataFusionError, Result};
+use datafusion::logical_expr::{
+ Accumulator, AccumulatorFunctionImplementation, AggregateState, AggregateUDF,
+};
use datafusion::logical_plan;
-use datafusion_common::ScalarValue;
-use datafusion_expr::Accumulator;
-use datafusion_expr::AccumulatorFunctionImplementation;
-use datafusion_expr::AggregateUDF;
use crate::expression::PyExpr;
use crate::utils::parse_volatility;
@@ -44,9 +44,13 @@ impl RustAccumulator {
}
impl Accumulator for RustAccumulator {
- fn state(&self) -> Result<Vec<ScalarValue>> {
- Python::with_gil(|py| self.accum.as_ref(py).call_method0("state")?.extract())
- .map_err(|e| DataFusionError::Execution(format!("{}", e)))
+ fn state(&self) -> Result<Vec<AggregateState>> {
+ let py_result: PyResult<Vec<ScalarValue>> =
+ Python::with_gil(|py| self.accum.as_ref(py).call_method0("state")?.extract());
+ match py_result {
+ Ok(r) => Ok(r.into_iter().map(AggregateState::Scalar).collect()),
+ Err(e) => Err(DataFusionError::Execution(format!("{}", e))),
+ }
}
fn evaluate(&self) -> Result<ScalarValue> {