You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2019/08/01 09:15:17 UTC

[arrow] branch master updated: ARROW-6085: [Rust] [DataFusion] Add traits for physical query plan

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9064571  ARROW-6085: [Rust] [DataFusion] Add traits for physical query plan
9064571 is described below

commit 90645719c1386758c30ff2ddc85c2938ce209936
Author: Andy Grove <an...@gmail.com>
AuthorDate: Thu Aug 1 11:14:45 2019 +0200

    ARROW-6085: [Rust] [DataFusion] Add traits for physical query plan
    
    This is the first of quite a few PRs to add support for parallel (multi-threaded) query execution based on the PoC in https://github.com/apache/arrow/pull/4221
    
    Closes #4975 from andygrove/ARROW-6085 and squashes the following commits:
    
    f3a18d72b <Andy Grove> Add traits for physical query plan
    
    Authored-by: Andy Grove <an...@gmail.com>
    Signed-off-by: Neville Dipale <ne...@gmail.com>
---
 rust/datafusion/src/execution/mod.rs           |  1 +
 rust/datafusion/src/execution/physical_plan.rs | 44 ++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/rust/datafusion/src/execution/mod.rs b/rust/datafusion/src/execution/mod.rs
index d4f57a7..bd9ef91 100644
--- a/rust/datafusion/src/execution/mod.rs
+++ b/rust/datafusion/src/execution/mod.rs
@@ -22,6 +22,7 @@ pub mod context;
 pub mod expression;
 pub mod filter;
 pub mod limit;
+pub mod physical_plan;
 pub mod projection;
 pub mod relation;
 pub mod scalar_relation;
diff --git a/rust/datafusion/src/execution/physical_plan.rs b/rust/datafusion/src/execution/physical_plan.rs
new file mode 100644
index 0000000..79fb88e
--- /dev/null
+++ b/rust/datafusion/src/execution/physical_plan.rs
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Traits for physical query plan, supporting parallel execution for partitioned relations.
+
+use arrow::datatypes::Schema;
+use arrow::record_batch::RecordBatch;
+use std::sync::Arc;
+
+use crate::error::Result;
+
+/// Partition-aware execution plan for a relation
+pub trait ExecutionPlan {
+    /// Get the schema for this execution plan
+    fn schema(&self) -> Arc<Schema>;
+    /// Get the partitions for this execution plan. Each partition can be executed in parallel.
+    fn partitions(&self) -> Result<Vec<Arc<Partition>>>;
+}
+
+/// Represents a partition of an execution plan that can be executed on a thread
+pub trait Partition: Send + Sync {
+    /// Execute this partition and return an iterator over RecordBatch
+    fn execute(&self) -> Result<Arc<BatchIterator>>;
+}
+
+/// Iterator over RecordBatch that can be sent between threads
+pub trait BatchIterator: Send + Sync {
+    /// Get the next RecordBatch
+    fn next(&self) -> Result<Option<RecordBatch>>;
+}