You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/04/19 17:09:41 UTC

[arrow-datafusion] branch master updated: Update to Arrow 12.0.0, update tonic and prost (#2253)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 7548e961b Update to Arrow 12.0.0, update tonic and prost (#2253)
7548e961b is described below

commit 7548e961b58f75fc26fd4fb16c99377f6f468fc9
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue Apr 19 13:09:36 2022 -0400

    Update to Arrow 12.0.0, update tonic and prost (#2253)
    
    * Update to arrow-rs 12
    
    * Update for change in arrow interface
    
    * Update prost/tonic
    
    * add ticket reference
    
    * Setup building with standardized action, install protoc
    
    * Update etcd-client requirement from 0.8 to 0.9
    
    Updates the requirements on [etcd-client](https://github.com/etcdv3/etcd-client) to permit the latest version.
    - [Release notes](https://github.com/etcdv3/etcd-client/releases)
    - [Commits](https://github.com/etcdv3/etcd-client/compare/0.8.0...v0.9.0)
    
    ---
    updated-dependencies:
    - dependency-name: etcd-client
      dependency-type: direct:production
    ...
    
    Signed-off-by: dependabot[bot] <su...@github.com>
    
    Co-authored-by: dependabot[bot] <49...@users.noreply.github.com>
---
 .../actions/setup-builder/action.yaml              | 33 +++++++++++------
 .github/workflows/rust.yml                         | 42 +++++++++++-----------
 ballista-examples/Cargo.toml                       |  4 +--
 ballista/rust/core/Cargo.toml                      | 10 +++---
 ballista/rust/executor/Cargo.toml                  |  6 ++--
 ballista/rust/scheduler/Cargo.toml                 |  8 ++---
 datafusion-cli/Cargo.toml                          |  2 +-
 datafusion-examples/Cargo.toml                     |  6 ++--
 datafusion/common/Cargo.toml                       |  4 +--
 datafusion/core/Cargo.toml                         |  4 +--
 datafusion/core/fuzz-utils/Cargo.toml              |  2 +-
 .../core/src/physical_plan/file_format/json.rs     | 20 +++++++----
 datafusion/expr/Cargo.toml                         |  2 +-
 datafusion/jit/Cargo.toml                          |  2 +-
 datafusion/physical-expr/Cargo.toml                |  2 +-
 datafusion/proto/Cargo.toml                        |  4 +--
 16 files changed, 84 insertions(+), 67 deletions(-)

diff --git a/datafusion/core/fuzz-utils/Cargo.toml b/.github/actions/setup-builder/action.yaml
similarity index 55%
copy from datafusion/core/fuzz-utils/Cargo.toml
copy to .github/actions/setup-builder/action.yaml
index 65e36797a..13a3008b7 100644
--- a/datafusion/core/fuzz-utils/Cargo.toml
+++ b/.github/actions/setup-builder/action.yaml
@@ -15,14 +15,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
-[package]
-name = "fuzz-utils"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-arrow = { version = "11.1", features = ["prettyprint"] }
-env_logger = "0.9.0"
-rand = "0.8"
+name: Prepare Rust Builder
+description: 'Prepare Rust Build Environment'
+inputs:
+  rust-version:
+    description: 'version of rust to install (e.g. stable)'
+    required: true
+    default: 'stable'
+runs:
+  using: "composite"
+  steps:
+    - name: Install Build Dependencies
+      shell: bash
+      run: |
+        apt-get update
+        apt-get install -y protobuf-compiler
+    - name: Setup Rust toolchain
+      shell: bash
+      run: |
+        echo "Installing ${{ inputs.rust-version }}"
+        rustup toolchain install ${{ inputs.rust-version }}
+        rustup default ${{ inputs.rust-version }}
+        rustup component add rustfmt
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index cae7864da..8646f8058 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -54,10 +54,9 @@ jobs:
           path: /github/home/target
           key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
       - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{ matrix.rust }}
       - name: Build workspace in debug mode
         run: |
           cargo build
@@ -117,10 +116,9 @@ jobs:
           # this key equals the ones on `linux-build-lib` for re-use
           key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
       - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{ matrix.rust }}
       - name: Run tests
         run: |
           export ARROW_TEST_DATA=$(pwd)/testing/data
@@ -285,10 +283,9 @@ jobs:
           echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
           python -m pip install pyarrow
       - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{ matrix.rust }}
       - name: Run tests
         run: |
           cd datafusion
@@ -343,10 +340,12 @@ jobs:
           # this key equals the ones on `linux-build-lib` for re-use
           key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
       - name: Setup Rust toolchain
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{ matrix.rust }}
+      - name: Install Clippy
         run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy
+          rustup component add clippy
       - name: Run clippy
         run: |
           cargo clippy --all-targets --workspace -- -D warnings
@@ -420,10 +419,9 @@ jobs:
           # this key equals the ones on `linux-build-lib` for re-use
           key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
       - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{ matrix.rust }}
       - name: Run tests
         run: |
           export ARROW_TEST_DATA=$(pwd)/testing/data
@@ -466,9 +464,9 @@ jobs:
           # this key equals the ones on `linux-build-lib` for re-use
           key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
       - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{ matrix.rust }}
       - name: Install cargo-tomlfmt
         run: |
           which cargo-tomlfmt || cargo install cargo-tomlfmt
diff --git a/ballista-examples/Cargo.toml b/ballista-examples/Cargo.toml
index fb956ea2c..afdd4862b 100644
--- a/ballista-examples/Cargo.toml
+++ b/ballista-examples/Cargo.toml
@@ -39,6 +39,6 @@ ballista = { path = "../ballista/rust/client", version = "0.6.0" }
 datafusion = { path = "../datafusion/core" }
 futures = "0.3"
 num_cpus = "1.13.0"
-prost = "0.9"
+prost = "0.10"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
-tonic = "0.6"
+tonic = "0.7"
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index 8e2768341..94c6dfdb7 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -34,7 +34,7 @@ simd = ["datafusion/simd"]
 [dependencies]
 ahash = { version = "0.7", default-features = false }
 
-arrow-flight = { version = "11.1" }
+arrow-flight = { version = "12" }
 async-trait = "0.1.41"
 chrono = { version = "0.4", default-features = false }
 clap = { version = "3", features = ["derive", "cargo"] }
@@ -49,12 +49,12 @@ once_cell = "1.9.0"
 
 parking_lot = "0.12"
 parse_arg = "0.1.3"
-prost = "0.9"
-prost-types = "0.9"
+prost = "0.10"
+prost-types = "0.10"
 serde = { version = "1", features = ["derive"] }
 sqlparser = "0.16"
 tokio = "1.0"
-tonic = "0.6"
+tonic = "0.7"
 uuid = { version = "0.8", features = ["v4"] }
 walkdir = "2.3.2"
 
@@ -63,4 +63,4 @@ tempfile = "3"
 
 [build-dependencies]
 rustc_version = "0.4.0"
-tonic-build = { version = "0.6" }
+tonic-build = { version = "0.7" }
diff --git a/ballista/rust/executor/Cargo.toml b/ballista/rust/executor/Cargo.toml
index 9ee793b7b..c0dfe1046 100644
--- a/ballista/rust/executor/Cargo.toml
+++ b/ballista/rust/executor/Cargo.toml
@@ -33,8 +33,8 @@ snmalloc = ["snmalloc-rs"]
 
 [dependencies]
 anyhow = "1"
-arrow = { version = "11.1" }
-arrow-flight = { version = "11.1" }
+arrow = { version = "12" }
+arrow-flight = { version = "12" }
 async-trait = "0.1.41"
 ballista-core = { path = "../core", version = "0.6.0" }
 chrono = { version = "0.4", default-features = false }
@@ -49,7 +49,7 @@ snmalloc-rs = { version = "0.2", optional = true }
 tempfile = "3"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] }
 tokio-stream = { version = "0.1", features = ["net"] }
-tonic = "0.6"
+tonic = "0.7"
 uuid = { version = "0.8", features = ["v4"] }
 
 [dev-dependencies]
diff --git a/ballista/rust/scheduler/Cargo.toml b/ballista/rust/scheduler/Cargo.toml
index 25465adf5..884573c2d 100644
--- a/ballista/rust/scheduler/Cargo.toml
+++ b/ballista/rust/scheduler/Cargo.toml
@@ -42,7 +42,7 @@ clap = { version = "3", features = ["derive", "cargo"] }
 configure_me = "0.4.0"
 datafusion = { path = "../../../datafusion/core", version = "7.0.0" }
 env_logger = "0.9"
-etcd-client = { version = "0.8", optional = true }
+etcd-client = { version = "0.9", optional = true }
 futures = "0.3"
 http = "0.2"
 http-body = "0.4"
@@ -50,13 +50,13 @@ hyper = "0.14.4"
 log = "0.4"
 parking_lot = "0.12"
 parse_arg = "0.1.3"
-prost = "0.9"
+prost = "0.10"
 rand = "0.8"
 serde = { version = "1", features = ["derive"] }
 sled_package = { package = "sled", version = "0.34", optional = true }
 tokio = { version = "1.0", features = ["full"] }
 tokio-stream = { version = "0.1", features = ["net"], optional = true }
-tonic = "0.6"
+tonic = "0.7"
 tower = { version = "0.4" }
 warp = "0.3"
 
@@ -66,4 +66,4 @@ uuid = { version = "0.8", features = ["v4"] }
 
 [build-dependencies]
 configure_me_codegen = "0.4.1"
-tonic-build = { version = "0.6" }
+tonic-build = { version = "0.7" }
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 3a9a97083..e9895deb1 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -28,7 +28,7 @@ repository = "https://github.com/apache/arrow-datafusion"
 rust-version = "1.59"
 
 [dependencies]
-arrow = { version = "11.1" }
+arrow = { version = "12" }
 ballista = { path = "../ballista/rust/client", version = "0.6.0", optional = true }
 clap = { version = "3", features = ["derive", "cargo"] }
 datafusion = { path = "../datafusion/core", version = "7.0.0" }
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 8a9c2feb4..e64a44f70 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -34,11 +34,11 @@ path = "examples/avro_sql.rs"
 required-features = ["datafusion/avro"]
 
 [dev-dependencies]
-arrow-flight = { version = "11.1" }
+arrow-flight = { version = "12" }
 async-trait = "0.1.41"
 datafusion = { path = "../datafusion/core" }
 futures = "0.3"
 num_cpus = "1.13.0"
-prost = "0.9"
+prost = "0.10"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
-tonic = "0.6"
+tonic = "0.7"
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 4ac4e978e..1ba19a056 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -38,10 +38,10 @@ jit = ["cranelift-module"]
 pyarrow = ["pyo3"]
 
 [dependencies]
-arrow = { version = "11.1", features = ["prettyprint"] }
+arrow = { version = "12", features = ["prettyprint"] }
 avro-rs = { version = "0.13", features = ["snappy"], optional = true }
 cranelift-module = { version = "0.82.0", optional = true }
 ordered-float = "2.10"
-parquet = { version = "11.1", features = ["arrow"], optional = true }
+parquet = { version = "12", features = ["arrow"], optional = true }
 pyo3 = { version = "0.16", optional = true }
 sqlparser = "0.16"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 77d82cb46..467fe7b54 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -55,7 +55,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions"]
 
 [dependencies]
 ahash = { version = "0.7", default-features = false }
-arrow = { version = "11.1", features = ["prettyprint"] }
+arrow = { version = "12", features = ["prettyprint"] }
 async-trait = "0.1.41"
 avro-rs = { version = "0.13", features = ["snappy"], optional = true }
 chrono = { version = "0.4", default-features = false }
@@ -72,7 +72,7 @@ num-traits = { version = "0.2", optional = true }
 num_cpus = "1.13.0"
 ordered-float = "2.10"
 parking_lot = "0.12"
-parquet = { version = "11.1", features = ["arrow"] }
+parquet = { version = "12", features = ["arrow"] }
 paste = "^1.0"
 pin-project-lite= "^0.2.7"
 pyo3 = { version = "0.16", optional = true }
diff --git a/datafusion/core/fuzz-utils/Cargo.toml b/datafusion/core/fuzz-utils/Cargo.toml
index 65e36797a..9255920a3 100644
--- a/datafusion/core/fuzz-utils/Cargo.toml
+++ b/datafusion/core/fuzz-utils/Cargo.toml
@@ -23,6 +23,6 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-arrow = { version = "11.1", features = ["prettyprint"] }
+arrow = { version = "12", features = ["prettyprint"] }
 env_logger = "0.9.0"
 rand = "0.8"
diff --git a/datafusion/core/src/physical_plan/file_format/json.rs b/datafusion/core/src/physical_plan/file_format/json.rs
index ef9d498a0..5c02a9c92 100644
--- a/datafusion/core/src/physical_plan/file_format/json.rs
+++ b/datafusion/core/src/physical_plan/file_format/json.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 //! Execution plan for reading line-delimited JSON files
+use arrow::json::reader::DecoderOptions;
 use async_trait::async_trait;
 
 use crate::error::{DataFusionError, Result};
@@ -102,12 +103,19 @@ impl ExecutionPlan for NdJsonExec {
 
         // The json reader cannot limit the number of records, so `remaining` is ignored.
         let fun = move |file, _remaining: &Option<usize>| {
-            Box::new(json::Reader::new(
-                file,
-                Arc::clone(&file_schema),
-                batch_size,
-                proj.clone(),
-            )) as BatchIter
+            // TODO: make DecoderOptions implement Clone so we can
+            // clone here rather than recreating the options each time
+            // https://github.com/apache/arrow-rs/issues/1580
+            let options = DecoderOptions::new().with_batch_size(batch_size);
+
+            let options = if let Some(proj) = proj.clone() {
+                options.with_projection(proj)
+            } else {
+                options
+            };
+
+            Box::new(json::Reader::new(file, Arc::clone(&file_schema), options))
+                as BatchIter
         };
 
         Ok(Box::pin(FileStream::new(
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 33f51e387..7459490ef 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -36,6 +36,6 @@ path = "src/lib.rs"
 
 [dependencies]
 ahash = { version = "0.7", default-features = false }
-arrow = { version = "11.1", features = ["prettyprint"] }
+arrow = { version = "12", features = ["prettyprint"] }
 datafusion-common = { path = "../common", version = "7.0.0" }
 sqlparser = "0.16"
diff --git a/datafusion/jit/Cargo.toml b/datafusion/jit/Cargo.toml
index 052f5d82e..1ccfd9418 100644
--- a/datafusion/jit/Cargo.toml
+++ b/datafusion/jit/Cargo.toml
@@ -36,7 +36,7 @@ path = "src/lib.rs"
 jit = []
 
 [dependencies]
-arrow = { version = "11.1" }
+arrow = { version = "12" }
 cranelift = "0.82.0"
 cranelift-jit = "0.82.0"
 cranelift-module = "0.82.0"
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 6c39a0049..2ae5fa319 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]
 
 [dependencies]
 ahash = { version = "0.7", default-features = false }
-arrow = { version = "11.1", features = ["prettyprint"] }
+arrow = { version = "12", features = ["prettyprint"] }
 blake2 = { version = "^0.10.2", optional = true }
 blake3 = { version = "1.0", optional = true }
 chrono = { version = "0.4", default-features = false }
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index 5b1cdae72..bafc32712 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -36,7 +36,7 @@ path = "src/lib.rs"
 
 [dependencies]
 datafusion = { path = "../core", version = "7.0.0" }
-prost = "0.9"
+prost = "0.10"
 
 [build-dependencies]
-tonic-build = { version = "0.6" }
+tonic-build = { version = "0.7" }