You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/12/04 11:43:19 UTC
[arrow-datafusion] branch master updated: Update rust vesion to 1.57 (#1395)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new d047900 Update rust vesion to 1.57 (#1395)
d047900 is described below
commit d04790041caecdc53077de37db3e30388f8ff38c
Author: Carlos <wx...@gmail.com>
AuthorDate: Sat Dec 4 19:43:12 2021 +0800
Update rust vesion to 1.57 (#1395)
* Update rust vesion to 1.57
* Clean ups related to clippy
* remove more unused fields
* Remove some more unused code
* remove data_type from Average, but keep in consutrctor
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
ballista-examples/Cargo.toml | 2 +-
ballista/rust/client/Cargo.toml | 2 +-
ballista/rust/core/src/config.rs | 12 ++++++------
ballista/rust/core/src/serde/scheduler/mod.rs | 12 +-----------
ballista/rust/executor/src/flight_service.rs | 6 +++---
ballista/rust/scheduler/src/state/mod.rs | 2 +-
benchmarks/Cargo.toml | 2 +-
benchmarks/src/bin/tpch.rs | 14 ++++++--------
datafusion-cli/Cargo.toml | 2 +-
datafusion-cli/Dockerfile | 2 +-
datafusion-examples/Cargo.toml | 2 +-
datafusion/Cargo.toml | 2 +-
datafusion/src/physical_plan/datetime_expressions.rs | 6 +++++-
datafusion/src/physical_plan/expressions/average.rs | 9 +++++----
datafusion/src/physical_plan/sort_preserving_merge.rs | 6 +++---
datafusion/src/physical_plan/string_expressions.rs | 5 +----
datafusion/src/physical_plan/windows/built_in.rs | 10 +---------
datafusion/src/physical_plan/windows/mod.rs | 2 --
datafusion/src/scalar.rs | 4 ++--
datafusion/src/sql/parser.rs | 12 ++++++++----
datafusion/src/test/exec.rs | 4 ++--
dev/docker/ballista-base.dockerfile | 2 +-
python/Cargo.toml | 2 +-
23 files changed, 53 insertions(+), 69 deletions(-)
diff --git a/ballista-examples/Cargo.toml b/ballista-examples/Cargo.toml
index 65cdec4..a2d2fd6 100644
--- a/ballista-examples/Cargo.toml
+++ b/ballista-examples/Cargo.toml
@@ -26,7 +26,7 @@ license = "Apache-2.0"
keywords = [ "arrow", "distributed", "query", "sql" ]
edition = "2021"
publish = false
-rust-version = "1.56"
+rust-version = "1.57"
[dependencies]
datafusion = { path = "../datafusion" }
diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml
index f444689..7736e94 100644
--- a/ballista/rust/client/Cargo.toml
+++ b/ballista/rust/client/Cargo.toml
@@ -24,7 +24,7 @@ homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
authors = ["Apache Arrow <de...@arrow.apache.org>"]
edition = "2021"
-rust-version = "1.56"
+rust-version = "1.57"
[dependencies]
ballista-core = { path = "../core", version = "0.6.0" }
diff --git a/ballista/rust/core/src/config.rs b/ballista/rust/core/src/config.rs
index dcc0bdb..5d7b3c5 100644
--- a/ballista/rust/core/src/config.rs
+++ b/ballista/rust/core/src/config.rs
@@ -31,22 +31,22 @@ pub const BALLISTA_DEFAULT_SHUFFLE_PARTITIONS: &str = "ballista.shuffle.partitio
#[derive(Debug, Clone)]
pub struct ConfigEntry {
name: String,
- description: String,
- data_type: DataType,
+ _description: String,
+ _data_type: DataType,
default_value: Option<String>,
}
impl ConfigEntry {
fn new(
name: String,
- description: String,
- data_type: DataType,
+ _description: String,
+ _data_type: DataType,
default_value: Option<String>,
) -> Self {
Self {
name,
- description,
- data_type,
+ _description,
+ _data_type,
default_value,
}
}
diff --git a/ballista/rust/core/src/serde/scheduler/mod.rs b/ballista/rust/core/src/serde/scheduler/mod.rs
index a20d955..8c13c32 100644
--- a/ballista/rust/core/src/serde/scheduler/mod.rs
+++ b/ballista/rust/core/src/serde/scheduler/mod.rs
@@ -101,23 +101,13 @@ impl From<protobuf::ExecutorMetadata> for ExecutorMeta {
}
/// Summary of executed partition
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, Default)]
pub struct PartitionStats {
pub(crate) num_rows: Option<u64>,
pub(crate) num_batches: Option<u64>,
pub(crate) num_bytes: Option<u64>,
}
-impl Default for PartitionStats {
- fn default() -> Self {
- Self {
- num_rows: None,
- num_batches: None,
- num_bytes: None,
- }
- }
-}
-
impl fmt::Display for PartitionStats {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
diff --git a/ballista/rust/executor/src/flight_service.rs b/ballista/rust/executor/src/flight_service.rs
index 27b1a33..cf5ab17 100644
--- a/ballista/rust/executor/src/flight_service.rs
+++ b/ballista/rust/executor/src/flight_service.rs
@@ -54,12 +54,12 @@ type FlightDataReceiver = Receiver<Result<FlightData, Status>>;
#[derive(Clone)]
pub struct BallistaFlightService {
/// Executor
- executor: Arc<Executor>,
+ _executor: Arc<Executor>,
}
impl BallistaFlightService {
- pub fn new(executor: Arc<Executor>) -> Self {
- Self { executor }
+ pub fn new(_executor: Arc<Executor>) -> Self {
+ Self { _executor }
}
}
diff --git a/ballista/rust/scheduler/src/state/mod.rs b/ballista/rust/scheduler/src/state/mod.rs
index 64f5953..ef6de83 100644
--- a/ballista/rust/scheduler/src/state/mod.rs
+++ b/ballista/rust/scheduler/src/state/mod.rs
@@ -567,7 +567,7 @@ fn find_unresolved_shuffles(
Ok(plan
.children()
.iter()
- .map(|child| find_unresolved_shuffles(child))
+ .map(find_unresolved_shuffles)
.collect::<Result<Vec<_>>>()?
.into_iter()
.flatten()
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index 4fc41b3..c042778 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -25,7 +25,7 @@ homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
license = "Apache-2.0"
publish = false
-rust-version = "1.56"
+rust-version = "1.57"
[features]
simd = ["datafusion/simd"]
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index 7bc6510..2e074d2 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -75,10 +75,9 @@ struct BallistaBenchmarkOpt {
#[structopt(short = "i", long = "iterations", default_value = "3")]
iterations: usize,
- /// Batch size when reading CSV or Parquet files
- #[structopt(short = "s", long = "batch-size", default_value = "8192")]
- batch_size: usize,
-
+ // /// Batch size when reading CSV or Parquet files
+ // #[structopt(short = "s", long = "batch-size", default_value = "8192")]
+ // batch_size: usize,
/// Path to data files
#[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
path: PathBuf,
@@ -87,10 +86,9 @@ struct BallistaBenchmarkOpt {
#[structopt(short = "f", long = "format", default_value = "csv")]
file_format: String,
- /// Load the data into a MemTable before executing the query
- #[structopt(short = "m", long = "mem-table")]
- mem_table: bool,
-
+ // /// Load the data into a MemTable before executing the query
+ // #[structopt(short = "m", long = "mem-table")]
+ // mem_table: bool,
/// Number of partitions to process in parallel
#[structopt(short = "p", long = "partitions", default_value = "2")]
partitions: usize,
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 0434f09..2722750 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -24,7 +24,7 @@ keywords = [ "arrow", "datafusion", "ballista", "query", "sql" ]
license = "Apache-2.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
-rust-version = "1.56"
+rust-version = "1.57"
[dependencies]
clap = "2.33"
diff --git a/datafusion-cli/Dockerfile b/datafusion-cli/Dockerfile
index fe177b6..fed1418 100644
--- a/datafusion-cli/Dockerfile
+++ b/datafusion-cli/Dockerfile
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-FROM rust:1.56 as builder
+FROM rust:1.57 as builder
COPY ./datafusion /usr/src/datafusion
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 3e8a6ec..d81100f 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -26,7 +26,7 @@ license = "Apache-2.0"
keywords = [ "arrow", "query", "sql" ]
edition = "2021"
publish = false
-rust-version = "1.56"
+rust-version = "1.57"
[[example]]
name = "avro_sql"
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index fbe84e3..73ed9c0 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -31,7 +31,7 @@ include = [
"Cargo.toml",
]
edition = "2021"
-rust-version = "1.56"
+rust-version = "1.57"
[lib]
name = "datafusion"
diff --git a/datafusion/src/physical_plan/datetime_expressions.rs b/datafusion/src/physical_plan/datetime_expressions.rs
index a776c42f..d103127 100644
--- a/datafusion/src/physical_plan/datetime_expressions.rs
+++ b/datafusion/src/physical_plan/datetime_expressions.rs
@@ -42,6 +42,7 @@ use arrow::{
};
use chrono::prelude::*;
use chrono::Duration;
+use std::borrow::Borrow;
/// given a function `op` that maps a `&str` to a Result of an arrow native type,
/// returns a `PrimitiveArray` after the application
@@ -77,7 +78,10 @@ where
})?;
// first map is the iterator, second is for the `Option<_>`
- array.iter().map(|x| x.map(|x| op(x)).transpose()).collect()
+ array
+ .iter()
+ .map(|x| x.map(op.borrow()).transpose())
+ .collect()
}
// given an function that maps a `&str` to a arrow native type,
diff --git a/datafusion/src/physical_plan/expressions/average.rs b/datafusion/src/physical_plan/expressions/average.rs
index 2e21819..17d3041 100644
--- a/datafusion/src/physical_plan/expressions/average.rs
+++ b/datafusion/src/physical_plan/expressions/average.rs
@@ -37,8 +37,6 @@ use super::{format_state_name, sum};
#[derive(Debug)]
pub struct Avg {
name: String,
- data_type: DataType,
- nullable: bool,
expr: Arc<dyn PhysicalExpr>,
}
@@ -69,11 +67,14 @@ impl Avg {
name: impl Into<String>,
data_type: DataType,
) -> Self {
+ // Average is always Float64, but Avg::new() has a data_type
+ // parameter to keep a consistent signature with the other
+ // Aggregate expressions.
+ assert_eq!(data_type, DataType::Float64);
+
Self {
name: name.into(),
expr,
- data_type,
- nullable: true,
}
}
}
diff --git a/datafusion/src/physical_plan/sort_preserving_merge.rs b/datafusion/src/physical_plan/sort_preserving_merge.rs
index 3f4827b..c90c653 100644
--- a/datafusion/src/physical_plan/sort_preserving_merge.rs
+++ b/datafusion/src/physical_plan/sort_preserving_merge.rs
@@ -346,7 +346,7 @@ struct SortPreservingMergeStream {
receivers: Vec<mpsc::Receiver<ArrowResult<RecordBatch>>>,
/// Drop helper for tasks feeding the [`receivers`](Self::receivers)
- drop_helper: AbortOnDropMany<()>,
+ _drop_helper: AbortOnDropMany<()>,
/// For each input stream maintain a dequeue of SortKeyCursor
///
@@ -379,7 +379,7 @@ struct SortPreservingMergeStream {
impl SortPreservingMergeStream {
fn new(
receivers: Vec<mpsc::Receiver<ArrowResult<RecordBatch>>>,
- drop_helper: AbortOnDropMany<()>,
+ _drop_helper: AbortOnDropMany<()>,
schema: SchemaRef,
expressions: &[PhysicalSortExpr],
target_batch_size: usize,
@@ -394,7 +394,7 @@ impl SortPreservingMergeStream {
schema,
cursors,
receivers,
- drop_helper,
+ _drop_helper,
column_expressions: expressions.iter().map(|x| x.expr.clone()).collect(),
sort_options: expressions.iter().map(|x| x.options).collect(),
target_batch_size,
diff --git a/datafusion/src/physical_plan/string_expressions.rs b/datafusion/src/physical_plan/string_expressions.rs
index e6d234f..a9e4c2f 100644
--- a/datafusion/src/physical_plan/string_expressions.rs
+++ b/datafusion/src/physical_plan/string_expressions.rs
@@ -117,10 +117,7 @@ where
let string_array = downcast_string_arg!(args[0], "string", T);
// first map is the iterator, second is for the `Option<_>`
- Ok(string_array
- .iter()
- .map(|string| string.map(|s| op(s)))
- .collect())
+ Ok(string_array.iter().map(|string| string.map(&op)).collect())
}
fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) -> Result<ColumnarValue>
diff --git a/datafusion/src/physical_plan/windows/built_in.rs b/datafusion/src/physical_plan/windows/built_in.rs
index 82040de..de627cb 100644
--- a/datafusion/src/physical_plan/windows/built_in.rs
+++ b/datafusion/src/physical_plan/windows/built_in.rs
@@ -18,10 +18,8 @@
//! Physical exec for built-in window function expressions.
use crate::error::{DataFusionError, Result};
-use crate::logical_plan::window_frames::WindowFrame;
use crate::physical_plan::{
- expressions::PhysicalSortExpr,
- window_functions::{BuiltInWindowFunction, BuiltInWindowFunctionExpr},
+ expressions::PhysicalSortExpr, window_functions::BuiltInWindowFunctionExpr,
PhysicalExpr, WindowExpr,
};
use arrow::compute::concat;
@@ -33,28 +31,22 @@ use std::sync::Arc;
/// A window expr that takes the form of a built in window function
#[derive(Debug)]
pub struct BuiltInWindowExpr {
- fun: BuiltInWindowFunction,
expr: Arc<dyn BuiltInWindowFunctionExpr>,
partition_by: Vec<Arc<dyn PhysicalExpr>>,
order_by: Vec<PhysicalSortExpr>,
- window_frame: Option<WindowFrame>,
}
impl BuiltInWindowExpr {
/// create a new built-in window function expression
pub(super) fn new(
- fun: BuiltInWindowFunction,
expr: Arc<dyn BuiltInWindowFunctionExpr>,
partition_by: &[Arc<dyn PhysicalExpr>],
order_by: &[PhysicalSortExpr],
- window_frame: Option<WindowFrame>,
) -> Self {
Self {
- fun,
expr,
partition_by: partition_by.to_vec(),
order_by: order_by.to_vec(),
- window_frame,
}
}
}
diff --git a/datafusion/src/physical_plan/windows/mod.rs b/datafusion/src/physical_plan/windows/mod.rs
index 8b182f9..497cbc3 100644
--- a/datafusion/src/physical_plan/windows/mod.rs
+++ b/datafusion/src/physical_plan/windows/mod.rs
@@ -64,11 +64,9 @@ pub fn create_window_expr(
window_frame,
)),
WindowFunction::BuiltInWindowFunction(fun) => Arc::new(BuiltInWindowExpr::new(
- fun.clone(),
create_built_in_window_expr(fun, args, input_schema, name)?,
partition_by,
order_by,
- window_frame,
)),
})
}
diff --git a/datafusion/src/scalar.rs b/datafusion/src/scalar.rs
index 4a5a2c3..c06ccb1 100644
--- a/datafusion/src/scalar.rs
+++ b/datafusion/src/scalar.rs
@@ -68,7 +68,7 @@ pub enum ScalarValue {
/// large binary
LargeBinary(Option<Vec<u8>>),
/// list of nested ScalarValue (boxed to reduce size_of(ScalarValue))
- #[allow(clippy::box_vec)]
+ #[allow(clippy::box_collection)]
List(Option<Box<Vec<ScalarValue>>>, Box<DataType>),
/// Date stored as a signed 32bit int
Date32(Option<i32>),
@@ -87,7 +87,7 @@ pub enum ScalarValue {
/// Interval with DayTime unit
IntervalDayTime(Option<i64>),
/// struct of nested ScalarValue (boxed to reduce size_of(ScalarValue))
- #[allow(clippy::box_vec)]
+ #[allow(clippy::box_collection)]
Struct(Option<Box<Vec<ScalarValue>>>, Box<Vec<Field>>),
}
diff --git a/datafusion/src/sql/parser.rs b/datafusion/src/sql/parser.rs
index 49ae869..3352577 100644
--- a/datafusion/src/sql/parser.rs
+++ b/datafusion/src/sql/parser.rs
@@ -85,7 +85,7 @@ pub struct CreateExternalTable {
#[derive(Debug, Clone, PartialEq)]
pub enum Statement {
/// ANSI SQL AST node
- Statement(SQLStatement),
+ Statement(Box<SQLStatement>),
/// Extension: `CREATE EXTERNAL TABLE`
CreateExternalTable(CreateExternalTable),
}
@@ -167,13 +167,17 @@ impl<'a> DFParser<'a> {
}
_ => {
// use the native parser
- Ok(Statement::Statement(self.parser.parse_statement()?))
+ Ok(Statement::Statement(Box::from(
+ self.parser.parse_statement()?,
+ )))
}
}
}
_ => {
// use the native parser
- Ok(Statement::Statement(self.parser.parse_statement()?))
+ Ok(Statement::Statement(Box::from(
+ self.parser.parse_statement()?,
+ )))
}
}
}
@@ -183,7 +187,7 @@ impl<'a> DFParser<'a> {
if self.parser.parse_keyword(Keyword::EXTERNAL) {
self.parse_create_external_table()
} else {
- Ok(Statement::Statement(self.parser.parse_create()?))
+ Ok(Statement::Statement(Box::from(self.parser.parse_create()?)))
}
}
diff --git a/datafusion/src/test/exec.rs b/datafusion/src/test/exec.rs
index fd10b9c..4a9534f 100644
--- a/datafusion/src/test/exec.rs
+++ b/datafusion/src/test/exec.rs
@@ -549,7 +549,7 @@ impl ExecutionPlan for BlockingExec {
async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
Ok(Box::pin(BlockingStream {
schema: Arc::clone(&self.schema),
- refs: Arc::clone(&self.refs),
+ _refs: Arc::clone(&self.refs),
}))
}
@@ -577,7 +577,7 @@ pub struct BlockingStream {
schema: SchemaRef,
/// Ref-counting helper to check if the stream are still in memory.
- refs: Arc<()>,
+ _refs: Arc<()>,
}
impl Stream for BlockingStream {
diff --git a/dev/docker/ballista-base.dockerfile b/dev/docker/ballista-base.dockerfile
index df4f32c..cf845e0 100644
--- a/dev/docker/ballista-base.dockerfile
+++ b/dev/docker/ballista-base.dockerfile
@@ -23,7 +23,7 @@
# Base image extends debian:buster-slim
-FROM rust:1.56.0-buster AS builder
+FROM rust:1.57.0-buster AS builder
RUN apt update && apt -y install musl musl-dev musl-tools libssl-dev openssl
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 568f3c7..974a614 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -25,7 +25,7 @@ description = "Build and run queries against data"
readme = "README.md"
license = "Apache-2.0"
edition = "2021"
-rust-version = "1.56"
+rust-version = "1.57"
[dependencies]
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }