You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/05/17 11:03:50 UTC

[arrow-datafusion] branch master updated: Update to arrow-rs 14.0.0 (#2528)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 25725c0cf Update to arrow-rs 14.0.0  (#2528)
25725c0cf is described below

commit 25725c0cfd1bc54f735c41a57c13d9b0e15c8d2b
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue May 17 07:03:43 2022 -0400

    Update to arrow-rs 14.0.0  (#2528)
    
    * TEMP: Patch to use apache repo
    
    * Update to arrow 14.0.0
    
    * Consolidate to single OffsetSizeTrait
    
    * Update for new API
    
    * clippy
    
    * moar clippy
    
    * TEMP: patch datafusion cli
    
    * fixup
    
    * Update datafusion-cli deps
---
 Cargo.toml                                         |  1 +
 ballista-cli/Cargo.toml                            |  2 +-
 ballista/rust/core/Cargo.toml                      |  2 +-
 ballista/rust/core/src/client.rs                   | 11 ++++-
 ballista/rust/executor/Cargo.toml                  |  4 +-
 datafusion-cli/Cargo.lock                          | 55 ++++++++++++----------
 datafusion-cli/Cargo.toml                          |  3 +-
 datafusion-examples/Cargo.toml                     |  2 +-
 datafusion-examples/examples/flight_client.rs      |  3 +-
 datafusion/common/Cargo.toml                       |  4 +-
 datafusion/core/Cargo.toml                         |  4 +-
 datafusion/core/fuzz-utils/Cargo.toml              |  2 +-
 datafusion/core/src/from_slice.rs                  | 10 ++--
 datafusion/expr/Cargo.toml                         |  2 +-
 datafusion/jit/Cargo.toml                          |  2 +-
 datafusion/physical-expr/Cargo.toml                |  2 +-
 .../physical-expr/src/aggregate/approx_distinct.rs | 16 +++----
 datafusion/physical-expr/src/crypto_expressions.rs |  5 +-
 .../physical-expr/src/datetime_expressions.rs      |  4 +-
 datafusion/physical-expr/src/expressions/binary.rs |  4 +-
 .../physical-expr/src/expressions/in_list.rs       |  8 ++--
 datafusion/physical-expr/src/regex_expressions.rs  |  6 +--
 datafusion/physical-expr/src/string_expressions.rs | 26 +++++-----
 .../physical-expr/src/unicode_expressions.rs       | 22 ++++-----
 datafusion/row/Cargo.toml                          |  2 +-
 25 files changed, 108 insertions(+), 94 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index feb3ebe5c..ef7dd30c2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,3 +38,4 @@ exclude = ["ballista-cli", "datafusion-cli"]
 [profile.release]
 codegen-units = 1
 lto = true
+
diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml
index 02d254677..3638a6227 100644
--- a/ballista-cli/Cargo.toml
+++ b/ballista-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.59"
 readme = "README.md"
 
 [dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
 ballista = { path = "../ballista/rust/client", version = "0.7.0" }
 clap = { version = "3", features = ["derive", "cargo"] }
 datafusion = { path = "../datafusion/core", version = "8.0.0" }
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index db2b60ca4..3d43aceb4 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -35,7 +35,7 @@ simd = ["datafusion/simd"]
 [dependencies]
 ahash = { version = "0.7", default-features = false }
 
-arrow-flight = { version = "13" }
+arrow-flight = { version = "14.0.0" }
 async-trait = "0.1.41"
 chrono = { version = "0.4", default-features = false }
 clap = { version = "3", features = ["derive", "cargo"] }
diff --git a/ballista/rust/core/src/client.rs b/ballista/rust/core/src/client.rs
index 54418884d..a5c4a062b 100644
--- a/ballista/rust/core/src/client.rs
+++ b/ballista/rust/core/src/client.rs
@@ -17,6 +17,7 @@
 
 //! Client API for sending requests to executors.
 
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use std::{
@@ -31,6 +32,7 @@ use crate::serde::scheduler::Action;
 use arrow_flight::utils::flight_data_to_arrow_batch;
 use arrow_flight::Ticket;
 use arrow_flight::{flight_service_client::FlightServiceClient, FlightData};
+use datafusion::arrow::array::ArrayRef;
 use datafusion::arrow::{
     datatypes::{Schema, SchemaRef},
     error::{ArrowError, Result as ArrowResult},
@@ -131,11 +133,16 @@ impl BallistaClient {
 struct FlightDataStream {
     stream: Streaming<FlightData>,
     schema: SchemaRef,
+    dictionaries_by_id: HashMap<i64, ArrayRef>,
 }
 
 impl FlightDataStream {
     pub fn new(stream: Streaming<FlightData>, schema: SchemaRef) -> Self {
-        Self { stream, schema }
+        Self {
+            stream,
+            schema,
+            dictionaries_by_id: HashMap::new(),
+        }
     }
 }
 
@@ -154,7 +161,7 @@ impl Stream for FlightDataStream {
                         flight_data_to_arrow_batch(
                             &flight_data_chunk,
                             self.schema.clone(),
-                            &[],
+                            &self.dictionaries_by_id,
                         )
                     });
                 Some(converted_chunk)
diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml
index 3a2b45c5c..959a55078 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -34,8 +34,8 @@ snmalloc = ["snmalloc-rs"]
 
 [dependencies]
 anyhow = "1"
-arrow = { version = "13" }
-arrow-flight = { version = "13" }
+arrow = { version = "14.0.0" }
+arrow-flight = { version = "14.0.0" }
 async-trait = "0.1.41"
 ballista-core = { path = "../core", version = "0.7.0" }
 chrono = { version = "0.4", default-features = false }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 4309a4216..26d6fb9c4 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -57,9 +57,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
 
 [[package]]
 name = "arrow"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a"
+checksum = "0612b6a634de6c3f5e63fdaa6932f7bc598f92de0462ac6e69b0aebd77e093aa"
 dependencies = [
  "bitflags",
  "chrono",
@@ -191,6 +191,12 @@ version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 
+[[package]]
+name = "bytes"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
+
 [[package]]
 name = "cc"
 version = "1.0.73"
@@ -337,7 +343,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -369,7 +375,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "arrow",
  "clap",
@@ -383,7 +389,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "arrow",
  "ordered-float 3.0.0",
@@ -393,7 +399,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-data-access"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "async-trait",
  "chrono",
@@ -406,7 +412,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -416,7 +422,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -439,7 +445,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-row"
-version = "7.0.0"
+version = "8.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -804,9 +810,9 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
 
 [[package]]
 name = "itoa"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
+checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
 
 [[package]]
 name = "jobserver"
@@ -1143,9 +1149,9 @@ dependencies = [
 
 [[package]]
 name = "os_str_bytes"
-version = "6.0.0"
+version = "6.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
+checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435"
 
 [[package]]
 name = "parking_lot"
@@ -1172,14 +1178,15 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c6d737baed48775e87a69aa262f1fa2f1d6bd074dedbe9cac244b9aabf2a0b4"
+checksum = "ba1185ee1da5091e40b86519265a44d2704e3916ff867059c915141cab14d413"
 dependencies = [
  "arrow",
  "base64",
  "brotli",
  "byteorder",
+ "bytes",
  "chrono",
  "flate2",
  "lz4",
@@ -1405,9 +1412,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
+checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
 
 [[package]]
 name = "scopeguard"
@@ -1439,7 +1446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c"
 dependencies = [
  "indexmap",
- "itoa 1.0.1",
+ "itoa 1.0.2",
  "ryu",
  "serde",
 ]
@@ -1527,9 +1534,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
 
 [[package]]
 name = "syn"
-version = "1.0.93"
+version = "1.0.94"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04066589568b72ec65f42d65a1a52436e954b168773148893c020269563decf2"
+checksum = "a07e33e919ebcd69113d5be0e4d70c5707004ff45188910106854f38b960df4a"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1812,18 +1819,18 @@ checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
 
 [[package]]
 name = "zstd"
-version = "0.11.1+zstd.1.5.2"
+version = "0.11.2+zstd.1.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a16b8414fde0414e90c612eba70985577451c4c504b99885ebed24762cb81a"
+checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
 dependencies = [
  "zstd-safe",
 ]
 
 [[package]]
 name = "zstd-safe"
-version = "5.0.1+zstd.1.5.2"
+version = "5.0.2+zstd.1.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c12659121420dd6365c5c3de4901f97145b79651fb1d25814020ed2ed0585ae"
+checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
 dependencies = [
  "libc",
  "zstd-sys",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index c39479432..27d602934 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -29,7 +29,7 @@ rust-version = "1.59"
 readme = "README.md"
 
 [dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
 clap = { version = "3", features = ["derive", "cargo"] }
 datafusion = { path = "../datafusion/core", version = "8.0.0" }
 dirs = "4.0.0"
@@ -37,3 +37,4 @@ env_logger = "0.9"
 mimalloc = { version = "0.1", default-features = false }
 rustyline = "9.0"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
+
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index b6e5424d7..963efdf0d 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
 required-features = ["datafusion/avro"]
 
 [dev-dependencies]
-arrow-flight = { version = "13" }
+arrow-flight = { version = "14.0.0" }
 async-trait = "0.1.41"
 datafusion = { path = "../datafusion/core" }
 futures = "0.3"
diff --git a/datafusion-examples/examples/flight_client.rs b/datafusion-examples/examples/flight_client.rs
index e20397cb9..9fea1ab7d 100644
--- a/datafusion-examples/examples/flight_client.rs
+++ b/datafusion-examples/examples/flight_client.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashMap;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
@@ -62,7 +63,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     // all the remaining stream messages should be dictionary and record batches
     let mut results = vec![];
-    let dictionaries_by_field = vec![None; schema.fields().len()];
+    let dictionaries_by_field = HashMap::new();
     while let Some(flight_data) = stream.message().await? {
         let record_batch = flight_data_to_arrow_batch(
             &flight_data,
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 652dde5fb..6a46a11f6 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -38,10 +38,10 @@ jit = ["cranelift-module"]
 pyarrow = ["pyo3"]
 
 [dependencies]
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
 avro-rs = { version = "0.13", features = ["snappy"], optional = true }
 cranelift-module = { version = "0.83.0", optional = true }
 ordered-float = "3.0"
-parquet = { version = "13", features = ["arrow"], optional = true }
+parquet = { version = "14.0.0", features = ["arrow"], optional = true }
 pyo3 = { version = "0.16", optional = true }
 sqlparser = "0.17"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 54fac119b..0678df1c1 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -55,7 +55,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions"]
 
 [dependencies]
 ahash = { version = "0.7", default-features = false }
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
 async-trait = "0.1.41"
 avro-rs = { version = "0.13", features = ["snappy"], optional = true }
 chrono = { version = "0.4", default-features = false }
@@ -73,7 +73,7 @@ num-traits = { version = "0.2", optional = true }
 num_cpus = "1.13.0"
 ordered-float = "3.0"
 parking_lot = "0.12"
-parquet = { version = "13", features = ["arrow"] }
+parquet = { version = "14.0.0", features = ["arrow"] }
 paste = "^1.0"
 pin-project-lite = "^0.2.7"
 pyo3 = { version = "0.16", optional = true }
diff --git a/datafusion/core/fuzz-utils/Cargo.toml b/datafusion/core/fuzz-utils/Cargo.toml
index 858543d40..4089ad74e 100644
--- a/datafusion/core/fuzz-utils/Cargo.toml
+++ b/datafusion/core/fuzz-utils/Cargo.toml
@@ -23,6 +23,6 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
 env_logger = "0.9.0"
 rand = "0.8"
diff --git a/datafusion/core/src/from_slice.rs b/datafusion/core/src/from_slice.rs
index 42b8671d1..2fedc668a 100644
--- a/datafusion/core/src/from_slice.rs
+++ b/datafusion/core/src/from_slice.rs
@@ -20,8 +20,8 @@
 //! This file essentially exists to ease the transition onto arrow2
 
 use arrow::array::{
-    ArrayData, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray,
-    GenericStringArray, PrimitiveArray, StringOffsetSizeTrait,
+    ArrayData, BooleanArray, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
+    PrimitiveArray,
 };
 use arrow::buffer::{Buffer, MutableBuffer};
 use arrow::datatypes::{ArrowPrimitiveType, DataType};
@@ -50,7 +50,7 @@ where
 /// default implementation for binary array types, adapted from `From<Vec<_>>`
 impl<S, I, OffsetSize> FromSlice<S, I> for GenericBinaryArray<OffsetSize>
 where
-    OffsetSize: BinaryOffsetSizeTrait,
+    OffsetSize: OffsetSizeTrait,
     S: AsRef<[I]>,
     I: AsRef<[u8]>,
 {
@@ -69,7 +69,7 @@ where
             offsets.push(length_so_far);
             values.extend_from_slice(s);
         }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
+        let array_data = ArrayData::builder(Self::get_data_type())
             .len(slice.len())
             .add_buffer(Buffer::from_slice_ref(&offsets))
             .add_buffer(Buffer::from_slice_ref(&values));
@@ -81,7 +81,7 @@ where
 /// default implementation for utf8 array types, adapted from `From<Vec<_>>`
 impl<S, I, OffsetSize> FromSlice<S, I> for GenericStringArray<OffsetSize>
 where
-    OffsetSize: StringOffsetSizeTrait,
+    OffsetSize: OffsetSizeTrait,
     S: AsRef<[I]>,
     I: AsRef<str>,
 {
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 25786e670..22c73e6a9 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -36,6 +36,6 @@ path = "src/lib.rs"
 
 [dependencies]
 ahash = { version = "0.7", default-features = false }
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
 datafusion-common = { path = "../common", version = "8.0.0" }
 sqlparser = "0.17"
diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml
index e3278be6d..260e78d1b 100644
--- a/datafusion/jit/Cargo.toml
+++ b/datafusion/jit/Cargo.toml
@@ -36,7 +36,7 @@ path = "src/lib.rs"
 jit = []
 
 [dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
 cranelift = "0.83.0"
 cranelift-jit = "0.83.0"
 cranelift-module = "0.83.0"
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index f9a472555..c6c286f0b 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]
 
 [dependencies]
 ahash = { version = "0.7", default-features = false }
-arrow = { version = "13", features = ["prettyprint"] }
+arrow = { version = "14.0.0", features = ["prettyprint"] }
 blake2 = { version = "^0.10.2", optional = true }
 blake3 = { version = "1.0", optional = true }
 chrono = { version = "0.4", default-features = false }
diff --git a/datafusion/physical-expr/src/aggregate/approx_distinct.rs b/datafusion/physical-expr/src/aggregate/approx_distinct.rs
index 391258447..c67d1c9d3 100644
--- a/datafusion/physical-expr/src/aggregate/approx_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/approx_distinct.rs
@@ -21,8 +21,8 @@ use super::hyperloglog::HyperLogLog;
 use crate::expressions::format_state_name;
 use crate::{AggregateExpr, PhysicalExpr};
 use arrow::array::{
-    ArrayRef, BinaryArray, BinaryOffsetSizeTrait, GenericBinaryArray, GenericStringArray,
-    PrimitiveArray, StringOffsetSizeTrait,
+    ArrayRef, BinaryArray, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
+    PrimitiveArray,
 };
 use arrow::datatypes::{
     ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type, Int64Type, Int8Type,
@@ -119,7 +119,7 @@ impl AggregateExpr for ApproxDistinct {
 #[derive(Debug)]
 struct BinaryHLLAccumulator<T>
 where
-    T: BinaryOffsetSizeTrait,
+    T: OffsetSizeTrait,
 {
     hll: HyperLogLog<Vec<u8>>,
     phantom_data: PhantomData<T>,
@@ -127,7 +127,7 @@ where
 
 impl<T> BinaryHLLAccumulator<T>
 where
-    T: BinaryOffsetSizeTrait,
+    T: OffsetSizeTrait,
 {
     /// new approx_distinct accumulator
     pub fn new() -> Self {
@@ -141,7 +141,7 @@ where
 #[derive(Debug)]
 struct StringHLLAccumulator<T>
 where
-    T: StringOffsetSizeTrait,
+    T: OffsetSizeTrait,
 {
     hll: HyperLogLog<String>,
     phantom_data: PhantomData<T>,
@@ -149,7 +149,7 @@ where
 
 impl<T> StringHLLAccumulator<T>
 where
-    T: StringOffsetSizeTrait,
+    T: OffsetSizeTrait,
 {
     /// new approx_distinct accumulator
     pub fn new() -> Self {
@@ -259,7 +259,7 @@ macro_rules! downcast_value {
 
 impl<T> Accumulator for BinaryHLLAccumulator<T>
 where
-    T: BinaryOffsetSizeTrait,
+    T: OffsetSizeTrait,
 {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let array: &GenericBinaryArray<T> =
@@ -275,7 +275,7 @@ where
 
 impl<T> Accumulator for StringHLLAccumulator<T>
 where
-    T: StringOffsetSizeTrait,
+    T: OffsetSizeTrait,
 {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let array: &GenericStringArray<T> =
diff --git a/datafusion/physical-expr/src/crypto_expressions.rs b/datafusion/physical-expr/src/crypto_expressions.rs
index 95bedd4af..e0314317c 100644
--- a/datafusion/physical-expr/src/crypto_expressions.rs
+++ b/datafusion/physical-expr/src/crypto_expressions.rs
@@ -19,8 +19,7 @@
 
 use arrow::{
     array::{
-        Array, ArrayRef, BinaryArray, GenericStringArray, StringArray,
-        StringOffsetSizeTrait,
+        Array, ArrayRef, BinaryArray, GenericStringArray, OffsetSizeTrait, StringArray,
     },
     datatypes::DataType,
 };
@@ -127,7 +126,7 @@ impl DigestAlgorithm {
     /// digest a string array to their hash values
     fn digest_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
     where
-        T: StringOffsetSizeTrait,
+        T: OffsetSizeTrait,
     {
         let input_value = value
             .as_any()
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs
index 9a8351d0d..1e1cfd07f 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -18,7 +18,7 @@
 //! DateTime expressions
 
 use arrow::{
-    array::{Array, ArrayRef, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait},
+    array::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait, PrimitiveArray},
     compute::kernels::cast_utils::string_to_timestamp_nanos,
     datatypes::{
         ArrowPrimitiveType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
@@ -57,7 +57,7 @@ pub(crate) fn unary_string_to_primitive_function<'a, T, O, F>(
 ) -> Result<PrimitiveArray<O>>
 where
     O: ArrowPrimitiveType,
-    T: StringOffsetSizeTrait,
+    T: OffsetSizeTrait,
     F: Fn(&'a str) -> Result<O::Native>,
 {
     if args.len() != 1 {
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index b7a0da3e0..e507f8fe8 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -1351,7 +1351,7 @@ where
         .collect())
 }
 
-fn is_distinct_from_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn is_distinct_from_utf8<OffsetSize: OffsetSizeTrait>(
     left: &GenericStringArray<OffsetSize>,
     right: &GenericStringArray<OffsetSize>,
 ) -> Result<BooleanArray> {
@@ -1397,7 +1397,7 @@ where
         .collect())
 }
 
-fn is_not_distinct_from_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn is_not_distinct_from_utf8<OffsetSize: OffsetSizeTrait>(
     left: &GenericStringArray<OffsetSize>,
     right: &GenericStringArray<OffsetSize>,
 ) -> Result<BooleanArray> {
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 7094a718d..a872f65c6 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use arrow::array::GenericStringArray;
 use arrow::array::{
     ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
-    Int64Array, Int8Array, StringOffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
+    Int64Array, Int8Array, OffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
     UInt8Array,
 };
 use arrow::datatypes::ArrowPrimitiveType;
@@ -250,14 +250,14 @@ fn not_in_list_primitive<T: ArrowPrimitiveType>(
 }
 
 // whether each value on the left (can be null) is contained in the non-null list
-fn in_list_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn in_list_utf8<OffsetSize: OffsetSizeTrait>(
     array: &GenericStringArray<OffsetSize>,
     values: &[&str],
 ) -> Result<BooleanArray> {
     compare_op_scalar!(array, values, |x, v: &[&str]| v.contains(&x))
 }
 
-fn not_in_list_utf8<OffsetSize: StringOffsetSizeTrait>(
+fn not_in_list_utf8<OffsetSize: OffsetSizeTrait>(
     array: &GenericStringArray<OffsetSize>,
     values: &[&str],
 ) -> Result<BooleanArray> {
@@ -341,7 +341,7 @@ impl InListExpr {
 
     /// Compare for specific utf8 types
     #[allow(clippy::unnecessary_wraps)]
-    fn compare_utf8<T: StringOffsetSizeTrait>(
+    fn compare_utf8<T: OffsetSizeTrait>(
         &self,
         array: ArrayRef,
         list_values: Vec<ColumnarValue>,
diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs
index 69de68e16..c53c94615 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -21,7 +21,7 @@
 
 //! Regex expressions
 
-use arrow::array::{ArrayRef, GenericStringArray, StringOffsetSizeTrait};
+use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
 use arrow::compute;
 use datafusion_common::{DataFusionError, Result};
 use hashbrown::HashMap;
@@ -45,7 +45,7 @@ macro_rules! downcast_string_arg {
 }
 
 /// extract a specific group from a string column, using a regular expression
-pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let values = downcast_string_arg!(args[0], "string", T);
@@ -79,7 +79,7 @@ fn regex_replace_posix_groups(replacement: &str) -> String {
 /// Replaces substring(s) matching a POSIX regular expression.
 ///
 /// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'`
-pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     // creating Regex is expensive so create hashmap for memoization
     let mut patterns: HashMap<String, Regex> = HashMap::new();
 
diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs
index 1f369cdad..c13a853bb 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -24,7 +24,7 @@
 use arrow::{
     array::{
         Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, Int64Array,
-        PrimitiveArray, StringArray, StringOffsetSizeTrait,
+        OffsetSizeTrait, PrimitiveArray, StringArray,
     },
     datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
 };
@@ -98,8 +98,8 @@ pub(crate) fn unary_string_function<'a, T, O, F, R>(
 ) -> Result<GenericStringArray<O>>
 where
     R: AsRef<str>,
-    O: StringOffsetSizeTrait,
-    T: StringOffsetSizeTrait,
+    O: OffsetSizeTrait,
+    T: OffsetSizeTrait,
     F: Fn(&'a str) -> R,
 {
     if args.len() != 1 {
@@ -167,7 +167,7 @@ where
 
 /// Returns the numeric code of the first character of the argument.
 /// ascii('x') = 120
-pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn ascii<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
 
     let result = string_array
@@ -185,7 +185,7 @@ pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Removes the longest string containing only characters in characters (a space by default) from the start and end of string.
 /// btrim('xyxtrimyyx', 'xyz') = 'trim'
-pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         1 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -363,7 +363,7 @@ pub fn concat_ws(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
 /// initcap('hi THOMAS') = 'Hi Thomas'
-pub fn initcap<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn initcap<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
 
     // first map is the iterator, second is for the `Option<_>`
@@ -399,7 +399,7 @@ pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
 /// Removes the longest string containing only characters in characters (a space by default) from the start of string.
 /// ltrim('zzzytest', 'xyz') = 'test'
-pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         1 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -438,7 +438,7 @@ pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Repeats string the specified number of times.
 /// repeat('Pg', 4) = 'PgPgPgPg'
-pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn repeat<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let number_array = downcast_arg!(args[1], "number", Int64Array);
 
@@ -456,7 +456,7 @@ pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Replaces all occurrences in string of substring from with substring to.
 /// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
-pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let from_array = downcast_string_arg!(args[1], "from", T);
     let to_array = downcast_string_arg!(args[2], "to", T);
@@ -476,7 +476,7 @@ pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
 
 /// Removes the longest string containing only characters in characters (a space by default) from the end of string.
 /// rtrim('testxxzx', 'xyz') = 'test'
-pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         1 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -515,7 +515,7 @@ pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
 /// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let delimiter_array = downcast_string_arg!(args[1], "delimiter", T);
     let n_array = downcast_arg!(args[2], "n", Int64Array);
@@ -547,7 +547,7 @@ pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRe
 
 /// Returns true if string starts with prefix.
 /// starts_with('alphabet', 'alph') = 't'
-pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn starts_with<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let prefix_array = downcast_string_arg!(args[1], "prefix", T);
 
@@ -567,7 +567,7 @@ pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayR
 /// to_hex(2147483647) = '7fffffff'
 pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
 where
-    T::Native: StringOffsetSizeTrait,
+    T::Native: OffsetSizeTrait,
 {
     let integer_array = downcast_primitive_array_arg!(args[0], "integer", T);
 
diff --git a/datafusion/physical-expr/src/unicode_expressions.rs b/datafusion/physical-expr/src/unicode_expressions.rs
index 86a2ef7ba..0730d24f5 100644
--- a/datafusion/physical-expr/src/unicode_expressions.rs
+++ b/datafusion/physical-expr/src/unicode_expressions.rs
@@ -22,9 +22,7 @@
 //! Unicode expressions
 
 use arrow::{
-    array::{
-        ArrayRef, GenericStringArray, Int64Array, PrimitiveArray, StringOffsetSizeTrait,
-    },
+    array::{ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, PrimitiveArray},
     datatypes::{ArrowNativeType, ArrowPrimitiveType},
 };
 use datafusion_common::{DataFusionError, Result};
@@ -64,7 +62,7 @@ macro_rules! downcast_arg {
 /// character_length('josé') = 4
 pub fn character_length<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
 where
-    T::Native: StringOffsetSizeTrait,
+    T::Native: OffsetSizeTrait,
 {
     let string_array: &GenericStringArray<T::Native> = args[0]
         .as_any()
@@ -89,7 +87,7 @@ where
 
 /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
 /// left('abcde', 2) = 'ab'
-pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let n_array = downcast_arg!(args[1], "n", Int64Array);
     let result = string_array
@@ -122,7 +120,7 @@ pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right).
 /// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -211,7 +209,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Reverses the order of the characters in the string.
 /// reverse('abcde') = 'edcba'
-pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
 
     let result = string_array
@@ -226,7 +224,7 @@ pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
 
 /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
 /// right('abcde', 2) = 'de'
-pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let n_array = downcast_arg!(args[1], "n", Int64Array);
 
@@ -274,7 +272,7 @@ pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
 /// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -353,7 +351,7 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 /// strpos('high', 'ig') = 2
 pub fn strpos<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
 where
-    T::Native: StringOffsetSizeTrait,
+    T::Native: OffsetSizeTrait,
 {
     let string_array: &GenericStringArray<T::Native> = args[0]
         .as_any()
@@ -410,7 +408,7 @@ where
 /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).)
 /// substr('alphabet', 3) = 'phabet'
 /// substr('alphabet', 3, 2) = 'ph'
-pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn substr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -497,7 +495,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.
 /// translate('12345', '143', 'ax') = 'a2x5'
-pub fn translate<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn translate<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let from_array = downcast_string_arg!(args[1], "from", T);
     let to_array = downcast_string_arg!(args[2], "to", T);
diff --git a/datafusion/row/Cargo.toml b/datafusion/row/Cargo.toml
index 13b485fb7..f9a150699 100644
--- a/datafusion/row/Cargo.toml
+++ b/datafusion/row/Cargo.toml
@@ -37,7 +37,7 @@ path = "src/lib.rs"
 jit = ["datafusion-jit"]
 
 [dependencies]
-arrow = { version = "13" }
+arrow = { version = "14.0.0" }
 datafusion-common = { path = "../common", version = "8.0.0" }
 datafusion-jit = { path = "../jit", version = "8.0.0", optional = true }
 paste = "^1.0"