You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/05/17 11:03:50 UTC
[arrow-datafusion] branch master updated: Update to arrow-rs 14.0.0 (#2528)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 25725c0cf Update to arrow-rs 14.0.0 (#2528)
25725c0cf is described below
commit 25725c0cfd1bc54f735c41a57c13d9b0e15c8d2b
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue May 17 07:03:43 2022 -0400
Update to arrow-rs 14.0.0 (#2528)
* TEMP: Patch to use apache repo
* Update to arrow 14.0.0
* Consolidate to single OffsetSizeTrait
* Update for new API
* clippy
* moar clippy
* TEMP: patch datafusion cli
* fixup
* Update datafusion-cli deps
---
Cargo.toml | 1 +
ballista-cli/Cargo.toml | 2 +-
ballista/rust/core/Cargo.toml | 2 +-
ballista/rust/core/src/client.rs | 11 ++++-
ballista/rust/executor/Cargo.toml | 4 +-
datafusion-cli/Cargo.lock | 55 ++++++++++++----------
datafusion-cli/Cargo.toml | 3 +-
datafusion-examples/Cargo.toml | 2 +-
datafusion-examples/examples/flight_client.rs | 3 +-
datafusion/common/Cargo.toml | 4 +-
datafusion/core/Cargo.toml | 4 +-
datafusion/core/fuzz-utils/Cargo.toml | 2 +-
datafusion/core/src/from_slice.rs | 10 ++--
datafusion/expr/Cargo.toml | 2 +-
datafusion/jit/Cargo.toml | 2 +-
datafusion/physical-expr/Cargo.toml | 2 +-
.../physical-expr/src/aggregate/approx_distinct.rs | 16 +++----
datafusion/physical-expr/src/crypto_expressions.rs | 5 +-
.../physical-expr/src/datetime_expressions.rs | 4 +-
datafusion/physical-expr/src/expressions/binary.rs | 4 +-
.../physical-expr/src/expressions/in_list.rs | 8 ++--
datafusion/physical-expr/src/regex_expressions.rs | 6 +--
datafusion/physical-expr/src/string_expressions.rs | 26 +++++-----
.../physical-expr/src/unicode_expressions.rs | 22 ++++-----
datafusion/row/Cargo.toml | 2 +-
25 files changed, 108 insertions(+), 94 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index feb3ebe5c..ef7dd30c2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,3 +38,4 @@ exclude = ["ballista-cli", "datafusion-cli"]
[profile.release]
codegen-units = 1
lto = true
+
diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml
index 02d254677..3638a6227 100644
--- a/ballista-cli/Cargo.toml
+++ b/ballista-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.59"
readme = "README.md"
[dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
ballista = { path = "../ballista/rust/client", version = "0.7.0" }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "8.0.0" }
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index db2b60ca4..3d43aceb4 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -35,7 +35,7 @@ simd = ["datafusion/simd"]
[dependencies]
ahash = { version = "0.7", default-features = false }
-arrow-flight = { version = "13" }
+arrow-flight = { version = "14.0.0" }
async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
clap = { version = "3", features = ["derive", "cargo"] }
diff --git a/ballista/rust/core/src/client.rs b/ballista/rust/core/src/client.rs
index 54418884d..a5c4a062b 100644
--- a/ballista/rust/core/src/client.rs
+++ b/ballista/rust/core/src/client.rs
@@ -17,6 +17,7 @@
//! Client API for sending requests to executors.
+use std::collections::HashMap;
use std::sync::Arc;
use std::{
@@ -31,6 +32,7 @@ use crate::serde::scheduler::Action;
use arrow_flight::utils::flight_data_to_arrow_batch;
use arrow_flight::Ticket;
use arrow_flight::{flight_service_client::FlightServiceClient, FlightData};
+use datafusion::arrow::array::ArrayRef;
use datafusion::arrow::{
datatypes::{Schema, SchemaRef},
error::{ArrowError, Result as ArrowResult},
@@ -131,11 +133,16 @@ impl BallistaClient {
struct FlightDataStream {
stream: Streaming<FlightData>,
schema: SchemaRef,
+ dictionaries_by_id: HashMap<i64, ArrayRef>,
}
impl FlightDataStream {
pub fn new(stream: Streaming<FlightData>, schema: SchemaRef) -> Self {
- Self { stream, schema }
+ Self {
+ stream,
+ schema,
+ dictionaries_by_id: HashMap::new(),
+ }
}
}
@@ -154,7 +161,7 @@ impl Stream for FlightDataStream {
flight_data_to_arrow_batch(
&flight_data_chunk,
self.schema.clone(),
- &[],
+ &self.dictionaries_by_id,
)
});
Some(converted_chunk)
diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml
index 3a2b45c5c..959a55078 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -34,8 +34,8 @@ snmalloc = ["snmalloc-rs"]
[dependencies]
anyhow = "1"
-arrow = { version = "13" }
-arrow-flight = { version = "13" }
+arrow = { version = "14.0.0" }
+arrow-flight = { version = "14.0.0" }
async-trait = "0.1.41"
ballista-core = { path = "../core", version = "0.7.0" }
chrono = { version = "0.4", default-features = false }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 4309a4216..26d6fb9c4 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -57,9 +57,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a"
+checksum = "0612b6a634de6c3f5e63fdaa6932f7bc598f92de0462ac6e69b0aebd77e093aa"
dependencies = [
"bitflags",
"chrono",
@@ -191,6 +191,12 @@ version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+[[package]]
+name = "bytes"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
+
[[package]]
name = "cc"
version = "1.0.73"
@@ -337,7 +343,7 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"ahash",
"arrow",
@@ -369,7 +375,7 @@ dependencies = [
[[package]]
name = "datafusion-cli"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"arrow",
"clap",
@@ -383,7 +389,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"arrow",
"ordered-float 3.0.0",
@@ -393,7 +399,7 @@ dependencies = [
[[package]]
name = "datafusion-data-access"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"async-trait",
"chrono",
@@ -406,7 +412,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"ahash",
"arrow",
@@ -416,7 +422,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"ahash",
"arrow",
@@ -439,7 +445,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
-version = "7.0.0"
+version = "8.0.0"
dependencies = [
"arrow",
"datafusion-common",
@@ -804,9 +810,9 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "itoa"
-version = "1.0.1"
+version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
+checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
[[package]]
name = "jobserver"
@@ -1143,9 +1149,9 @@ dependencies = [
[[package]]
name = "os_str_bytes"
-version = "6.0.0"
+version = "6.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
+checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435"
[[package]]
name = "parking_lot"
@@ -1172,14 +1178,15 @@ dependencies = [
[[package]]
name = "parquet"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c6d737baed48775e87a69aa262f1fa2f1d6bd074dedbe9cac244b9aabf2a0b4"
+checksum = "ba1185ee1da5091e40b86519265a44d2704e3916ff867059c915141cab14d413"
dependencies = [
"arrow",
"base64",
"brotli",
"byteorder",
+ "bytes",
"chrono",
"flate2",
"lz4",
@@ -1405,9 +1412,9 @@ dependencies = [
[[package]]
name = "ryu"
-version = "1.0.9"
+version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
+checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
[[package]]
name = "scopeguard"
@@ -1439,7 +1446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c"
dependencies = [
"indexmap",
- "itoa 1.0.1",
+ "itoa 1.0.2",
"ryu",
"serde",
]
@@ -1527,9 +1534,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]]
name = "syn"
-version = "1.0.93"
+version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04066589568b72ec65f42d65a1a52436e954b168773148893c020269563decf2"
+checksum = "a07e33e919ebcd69113d5be0e4d70c5707004ff45188910106854f38b960df4a"
dependencies = [
"proc-macro2",
"quote",
@@ -1812,18 +1819,18 @@ checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]]
name = "zstd"
-version = "0.11.1+zstd.1.5.2"
+version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a16b8414fde0414e90c612eba70985577451c4c504b99885ebed24762cb81a"
+checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
-version = "5.0.1+zstd.1.5.2"
+version = "5.0.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c12659121420dd6365c5c3de4901f97145b79651fb1d25814020ed2ed0585ae"
+checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
dependencies = [
"libc",
"zstd-sys",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index c39479432..27d602934 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.59"
readme = "README.md"
[dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "8.0.0" }
dirs = "4.0.0"
@@ -37,3 +37,4 @@ env_logger = "0.9"
mimalloc = { version = "0.1", default-features = false }
rustyline = "9.0"
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
+
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index b6e5424d7..963efdf0d 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]
[dev-dependencies]
-arrow-flight = { version = "13" }
+arrow-flight = { version = "14.0.0" }
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
futures = "0.3"
diff --git a/datafusion-examples/examples/flight_client.rs b/datafusion-examples/examples/flight_client.rs
index e20397cb9..9fea1ab7d 100644
--- a/datafusion-examples/examples/flight_client.rs
+++ b/datafusion-examples/examples/flight_client.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use std::collections::HashMap;
use std::convert::TryFrom;
use std::sync::Arc;
@@ -62,7 +63,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// all the remaining stream messages should be dictionary and record batches
let mut results = vec![];
- let dictionaries_by_field = vec![None; schema.fields().len()];
+ let dictionaries_by_field = HashMap::new();
while let Some(flight_data) = stream.message().await? {
let record_batch = flight_data_to_arrow_batch(
&flight_data,
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 652dde5fb..6a46a11f6 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -38,10 +38,10 @@ jit = ["cranelift-module"]
pyarrow = ["pyo3"]
[dependencies]
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
cranelift-module = { version = "0.83.0", optional = true }
ordered-float = "3.0"
-parquet = { version = "13", features = ["arrow"], optional = true }
+parquet = { version = "14.0.0", features = ["arrow"], optional = true }
pyo3 = { version = "0.16", optional = true }
sqlparser = "0.17"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 54fac119b..0678df1c1 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -55,7 +55,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions"]
[dependencies]
ahash = { version = "0.7", default-features = false }
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
chrono = { version = "0.4", default-features = false }
@@ -73,7 +73,7 @@ num-traits = { version = "0.2", optional = true }
num_cpus = "1.13.0"
ordered-float = "3.0"
parking_lot = "0.12"
-parquet = { version = "13", features = ["arrow"] }
+parquet = { version = "14.0.0", features = ["arrow"] }
paste = "^1.0"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.16", optional = true }
diff --git a/datafusion/core/fuzz-utils/Cargo.toml b/datafusion/core/fuzz-utils/Cargo.toml
index 858543d40..4089ad74e 100644
--- a/datafusion/core/fuzz-utils/Cargo.toml
+++ b/datafusion/core/fuzz-utils/Cargo.toml
@@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
env_logger = "0.9.0"
rand = "0.8"
diff --git a/datafusion/core/src/from_slice.rs b/datafusion/core/src/from_slice.rs
index 42b8671d1..2fedc668a 100644
--- a/datafusion/core/src/from_slice.rs
+++ b/datafusion/core/src/from_slice.rs
@@ -20,8 +20,8 @@
//! This file essentially exists to ease the transition onto arrow2
use arrow::array::{
- ArrayData, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray,
- GenericStringArray, PrimitiveArray, StringOffsetSizeTrait,
+ ArrayData, BooleanArray, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
+ PrimitiveArray,
};
use arrow::buffer::{Buffer, MutableBuffer};
use arrow::datatypes::{ArrowPrimitiveType, DataType};
@@ -50,7 +50,7 @@ where
/// default implementation for binary array types, adapted from `From<Vec<_>>`
impl<S, I, OffsetSize> FromSlice<S, I> for GenericBinaryArray<OffsetSize>
where
- OffsetSize: BinaryOffsetSizeTrait,
+ OffsetSize: OffsetSizeTrait,
S: AsRef<[I]>,
I: AsRef<[u8]>,
{
@@ -69,7 +69,7 @@ where
offsets.push(length_so_far);
values.extend_from_slice(s);
}
- let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
+ let array_data = ArrayData::builder(Self::get_data_type())
.len(slice.len())
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values));
@@ -81,7 +81,7 @@ where
/// default implementation for utf8 array types, adapted from `From<Vec<_>>`
impl<S, I, OffsetSize> FromSlice<S, I> for GenericStringArray<OffsetSize>
where
- OffsetSize: StringOffsetSizeTrait,
+ OffsetSize: OffsetSizeTrait,
S: AsRef<[I]>,
I: AsRef<str>,
{
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 25786e670..22c73e6a9 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -36,6 +36,6 @@ path = "src/lib.rs"
[dependencies]
ahash = { version = "0.7", default-features = false }
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
datafusion-common = { path = "../common", version = "8.0.0" }
sqlparser = "0.17"
diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml
index e3278be6d..260e78d1b 100644
--- a/datafusion/jit/Cargo.toml
+++ b/datafusion/jit/Cargo.toml
@@ -36,7 +36,7 @@ path = "src/lib.rs"
jit = []
[dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
cranelift = "0.83.0"
cranelift-jit = "0.83.0"
cranelift-module = "0.83.0"
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index f9a472555..c6c286f0b 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]
[dependencies]
ahash = { version = "0.7", default-features = false }
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
blake2 = { version = "^0.10.2", optional = true }
blake3 = { version = "1.0", optional = true }
chrono = { version = "0.4", default-features = false }
diff --git a/datafusion/physical-expr/src/aggregate/approx_distinct.rs b/datafusion/physical-expr/src/aggregate/approx_distinct.rs
index 391258447..c67d1c9d3 100644
--- a/datafusion/physical-expr/src/aggregate/approx_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/approx_distinct.rs
@@ -21,8 +21,8 @@ use super::hyperloglog::HyperLogLog;
use crate::expressions::format_state_name;
use crate::{AggregateExpr, PhysicalExpr};
use arrow::array::{
- ArrayRef, BinaryArray, BinaryOffsetSizeTrait, GenericBinaryArray, GenericStringArray,
- PrimitiveArray, StringOffsetSizeTrait,
+ ArrayRef, BinaryArray, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
+ PrimitiveArray,
};
use arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type, Int64Type, Int8Type,
@@ -119,7 +119,7 @@ impl AggregateExpr for ApproxDistinct {
#[derive(Debug)]
struct BinaryHLLAccumulator<T>
where
- T: BinaryOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
hll: HyperLogLog<Vec<u8>>,
phantom_data: PhantomData<T>,
@@ -127,7 +127,7 @@ where
impl<T> BinaryHLLAccumulator<T>
where
- T: BinaryOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
/// new approx_distinct accumulator
pub fn new() -> Self {
@@ -141,7 +141,7 @@ where
#[derive(Debug)]
struct StringHLLAccumulator<T>
where
- T: StringOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
hll: HyperLogLog<String>,
phantom_data: PhantomData<T>,
@@ -149,7 +149,7 @@ where
impl<T> StringHLLAccumulator<T>
where
- T: StringOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
/// new approx_distinct accumulator
pub fn new() -> Self {
@@ -259,7 +259,7 @@ macro_rules! downcast_value {
impl<T> Accumulator for BinaryHLLAccumulator<T>
where
- T: BinaryOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
let array: &GenericBinaryArray<T> =
@@ -275,7 +275,7 @@ where
impl<T> Accumulator for StringHLLAccumulator<T>
where
- T: StringOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
let array: &GenericStringArray<T> =
diff --git a/datafusion/physical-expr/src/crypto_expressions.rs b/datafusion/physical-expr/src/crypto_expressions.rs
index 95bedd4af..e0314317c 100644
--- a/datafusion/physical-expr/src/crypto_expressions.rs
+++ b/datafusion/physical-expr/src/crypto_expressions.rs
@@ -19,8 +19,7 @@
use arrow::{
array::{
- Array, ArrayRef, BinaryArray, GenericStringArray, StringArray,
- StringOffsetSizeTrait,
+ Array, ArrayRef, BinaryArray, GenericStringArray, OffsetSizeTrait, StringArray,
},
datatypes::DataType,
};
@@ -127,7 +126,7 @@ impl DigestAlgorithm {
/// digest a string array to their hash values
fn digest_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
where
- T: StringOffsetSizeTrait,
+ T: OffsetSizeTrait,
{
let input_value = value
.as_any()
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs
index 9a8351d0d..1e1cfd07f 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -18,7 +18,7 @@
//! DateTime expressions
use arrow::{
- array::{Array, ArrayRef, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait},
+ array::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait, PrimitiveArray},
compute::kernels::cast_utils::string_to_timestamp_nanos,
datatypes::{
ArrowPrimitiveType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
@@ -57,7 +57,7 @@ pub(crate) fn unary_string_to_primitive_function<'a, T, O, F>(
) -> Result<PrimitiveArray<O>>
where
O: ArrowPrimitiveType,
- T: StringOffsetSizeTrait,
+ T: OffsetSizeTrait,
F: Fn(&'a str) -> Result<O::Native>,
{
if args.len() != 1 {
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index b7a0da3e0..e507f8fe8 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -1351,7 +1351,7 @@ where
.collect())
}
-fn is_distinct_from_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn is_distinct_from_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
@@ -1397,7 +1397,7 @@ where
.collect())
}
-fn is_not_distinct_from_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn is_not_distinct_from_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 7094a718d..a872f65c6 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
use arrow::array::GenericStringArray;
use arrow::array::{
ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
- Int64Array, Int8Array, StringOffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
+ Int64Array, Int8Array, OffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
UInt8Array,
};
use arrow::datatypes::ArrowPrimitiveType;
@@ -250,14 +250,14 @@ fn not_in_list_primitive<T: ArrowPrimitiveType>(
}
// whether each value on the left (can be null) is contained in the non-null list
-fn in_list_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn in_list_utf8<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
values: &[&str],
) -> Result<BooleanArray> {
compare_op_scalar!(array, values, |x, v: &[&str]| v.contains(&x))
}
-fn not_in_list_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn not_in_list_utf8<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
values: &[&str],
) -> Result<BooleanArray> {
@@ -341,7 +341,7 @@ impl InListExpr {
/// Compare for specific utf8 types
#[allow(clippy::unnecessary_wraps)]
- fn compare_utf8<T: StringOffsetSizeTrait>(
+ fn compare_utf8<T: OffsetSizeTrait>(
&self,
array: ArrayRef,
list_values: Vec<ColumnarValue>,
diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs
index 69de68e16..c53c94615 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -21,7 +21,7 @@
//! Regex expressions
-use arrow::array::{ArrayRef, GenericStringArray, StringOffsetSizeTrait};
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::compute;
use datafusion_common::{DataFusionError, Result};
use hashbrown::HashMap;
@@ -45,7 +45,7 @@ macro_rules! downcast_string_arg {
}
/// extract a specific group from a string column, using a regular expression
-pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
2 => {
let values = downcast_string_arg!(args[0], "string", T);
@@ -79,7 +79,7 @@ fn regex_replace_posix_groups(replacement: &str) -> String {
/// Replaces substring(s) matching a POSIX regular expression.
///
/// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'`
-pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
// creating Regex is expensive so create hashmap for memoization
let mut patterns: HashMap<String, Regex> = HashMap::new();
diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs
index 1f369cdad..c13a853bb 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -24,7 +24,7 @@
use arrow::{
array::{
Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, Int64Array,
- PrimitiveArray, StringArray, StringOffsetSizeTrait,
+ OffsetSizeTrait, PrimitiveArray, StringArray,
},
datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
};
@@ -98,8 +98,8 @@ pub(crate) fn unary_string_function<'a, T, O, F, R>(
) -> Result<GenericStringArray<O>>
where
R: AsRef<str>,
- O: StringOffsetSizeTrait,
- T: StringOffsetSizeTrait,
+ O: OffsetSizeTrait,
+ T: OffsetSizeTrait,
F: Fn(&'a str) -> R,
{
if args.len() != 1 {
@@ -167,7 +167,7 @@ where
/// Returns the numeric code of the first character of the argument.
/// ascii('x') = 120
-pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn ascii<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let result = string_array
@@ -185,7 +185,7 @@ pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Removes the longest string containing only characters in characters (a space by default) from the start and end of string.
/// btrim('xyxtrimyyx', 'xyz') = 'trim'
-pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
1 => {
let string_array = downcast_string_arg!(args[0], "string", T);
@@ -363,7 +363,7 @@ pub fn concat_ws(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
/// initcap('hi THOMAS') = 'Hi Thomas'
-pub fn initcap<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
// first map is the iterator, second is for the `Option<_>`
@@ -399,7 +399,7 @@ pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
/// Removes the longest string containing only characters in characters (a space by default) from the start of string.
/// ltrim('zzzytest', 'xyz') = 'test'
-pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
1 => {
let string_array = downcast_string_arg!(args[0], "string", T);
@@ -438,7 +438,7 @@ pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Repeats string the specified number of times.
/// repeat('Pg', 4) = 'PgPgPgPg'
-pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn repeat<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let number_array = downcast_arg!(args[1], "number", Int64Array);
@@ -456,7 +456,7 @@ pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Replaces all occurrences in string of substring from with substring to.
/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
-pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let from_array = downcast_string_arg!(args[1], "from", T);
let to_array = downcast_string_arg!(args[2], "to", T);
@@ -476,7 +476,7 @@ pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
/// Removes the longest string containing only characters in characters (a space by default) from the end of string.
/// rtrim('testxxzx', 'xyz') = 'test'
-pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
1 => {
let string_array = downcast_string_arg!(args[0], "string", T);
@@ -515,7 +515,7 @@ pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let delimiter_array = downcast_string_arg!(args[1], "delimiter", T);
let n_array = downcast_arg!(args[2], "n", Int64Array);
@@ -547,7 +547,7 @@ pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRe
/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = 't'
-pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let prefix_array = downcast_string_arg!(args[1], "prefix", T);
@@ -567,7 +567,7 @@ pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayR
/// to_hex(2147483647) = '7fffffff'
pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
where
- T::Native: StringOffsetSizeTrait,
+ T::Native: OffsetSizeTrait,
{
let integer_array = downcast_primitive_array_arg!(args[0], "integer", T);
diff --git a/datafusion/physical-expr/src/unicode_expressions.rs b/datafusion/physical-expr/src/unicode_expressions.rs
index 86a2ef7ba..0730d24f5 100644
--- a/datafusion/physical-expr/src/unicode_expressions.rs
+++ b/datafusion/physical-expr/src/unicode_expressions.rs
@@ -22,9 +22,7 @@
//! Unicode expressions
use arrow::{
- array::{
- ArrayRef, GenericStringArray, Int64Array, PrimitiveArray, StringOffsetSizeTrait,
- },
+ array::{ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, PrimitiveArray},
datatypes::{ArrowNativeType, ArrowPrimitiveType},
};
use datafusion_common::{DataFusionError, Result};
@@ -64,7 +62,7 @@ macro_rules! downcast_arg {
/// character_length('josé') = 4
pub fn character_length<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
where
- T::Native: StringOffsetSizeTrait,
+ T::Native: OffsetSizeTrait,
{
let string_array: &GenericStringArray<T::Native> = args[0]
.as_any()
@@ -89,7 +87,7 @@ where
/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
/// left('abcde', 2) = 'ab'
-pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let n_array = downcast_arg!(args[1], "n", Int64Array);
let result = string_array
@@ -122,7 +120,7 @@ pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right).
/// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
2 => {
let string_array = downcast_string_arg!(args[0], "string", T);
@@ -211,7 +209,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Reverses the order of the characters in the string.
/// reverse('abcde') = 'edcba'
-pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let result = string_array
@@ -226,7 +224,7 @@ pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
/// right('abcde', 2) = 'de'
-pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let n_array = downcast_arg!(args[1], "n", Int64Array);
@@ -274,7 +272,7 @@ pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
/// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
2 => {
let string_array = downcast_string_arg!(args[0], "string", T);
@@ -353,7 +351,7 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// strpos('high', 'ig') = 2
pub fn strpos<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
where
- T::Native: StringOffsetSizeTrait,
+ T::Native: OffsetSizeTrait,
{
let string_array: &GenericStringArray<T::Native> = args[0]
.as_any()
@@ -410,7 +408,7 @@ where
/// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).)
/// substr('alphabet', 3) = 'phabet'
/// substr('alphabet', 3, 2) = 'ph'
-pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn substr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
2 => {
let string_array = downcast_string_arg!(args[0], "string", T);
@@ -497,7 +495,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.
/// translate('12345', '143', 'ax') = 'a2x5'
-pub fn translate<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn translate<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = downcast_string_arg!(args[0], "string", T);
let from_array = downcast_string_arg!(args[1], "from", T);
let to_array = downcast_string_arg!(args[2], "to", T);
diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml
index 13b485fb7..f9a150699 100644
--- a/datafusion/row/Cargo.toml
+++ b/datafusion/row/Cargo.toml
@@ -37,7 +37,7 @@ path = "src/lib.rs"
jit = ["datafusion-jit"]
[dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
datafusion-common = { path = "../common", version = "8.0.0" }
datafusion-jit = { path = "../jit", version = "8.0.0", optional = true }
paste = "^1.0"