You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/08 19:14:33 UTC
[GitHub] [arrow-datafusion] alamb commented on issue #2712: Common Subexpression Eliminiation pass errors if run twice on some plans: Schema contains duplicate unqualified field name 'IsNull-Column-sys.host'
alamb commented on issue #2712:
URL: https://github.com/apache/arrow-datafusion/issues/2712#issuecomment-1150297976
Here is a self contained reproducer:
```rust
use std::sync::Arc;
use datafusion::prelude::*;
use datafusion::arrow::array::Int32Array;
use datafusion::datasource::MemTable;
use datafusion::execution::context::TaskContext;
use datafusion::logical_plan::{LogicalPlanBuilder, provider_as_source};
use datafusion::physical_plan::collect;
use datafusion::error::Result;
use datafusion::arrow::{self, record_batch::RecordBatch};
#[tokio::main]
async fn main() -> Result<()> {
let ctx = SessionContext::new();
let a: Int32Array = vec![Some(1)].into_iter().collect();
let batch = RecordBatch::try_from_iter(vec![
("a", Arc::new(a) as _),
]).unwrap();
let t = MemTable::try_new(batch.schema(), vec![vec![batch]]).unwrap();
let projection = None;
let builder = LogicalPlanBuilder::scan(
"cpu_load_short",
provider_as_source(Arc::new(t)),
projection
).unwrap()
.filter(col("a").is_null()
.or(col("a").eq(lit(2)))
.or(col("a").is_null().and(col("a").eq(lit(5))))
.or(col("a").is_null().or(col("a").eq(lit(2))))
)
.unwrap();
let logical_plan = builder.build().unwrap();
// manually optimize the plan
let state = ctx.state.read().clone();
let logical_plan = state.optimize(&logical_plan).unwrap();
// THIS IS THE KEY: optimize it a second time
let logical_plan = state.optimize(&logical_plan).unwrap();
let physical_plan = state.query_planner.create_physical_plan(&logical_plan, &state).await.unwrap();
let task_ctx = Arc::new(TaskContext::from(&state));
let results: Vec<RecordBatch> = collect(physical_plan, task_ctx).await.unwrap();
// format the results
println!("Results:\n\n{}", arrow::util::pretty::pretty_format_batches(&results).unwrap());
Ok(())
}
```
Cargo.toml:
```toml
[package]
name = "rust_arrow_playground"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ahash = "0.7"
tokio = "1.8.2"
tokio-stream = "0.1"
async-trait = "0.1"
futures-util = { version = "0.3.1" }
datafusion = { path = "/Users/alamb/Software/arrow-datafusion/datafusion/core", default-features = false }
once_cell = "1.8.0"
rand = "0.8"
```
When run errors like this:
```
cd /Users/alamb/Software/rust_datafusion_playground && RUST_BACKTRACE=1 CARGO_TARGET_DIR=/Users/alamb/Software/df-target cargo run
Compiling rust_arrow_playground v0.1.0 (/Users/alamb/Software/rust_datafusion_playground)
Finished dev [unoptimized + debuginfo] target(s) in 3.77s
Running `/Users/alamb/Software/df-target/debug/rust_arrow_playground`
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: SchemaError(DuplicateUnqualifiedField { name: "IsNull-Column-cpu_load_short.a" })', src/main.rs:46:54
stack backtrace:
0: rust_begin_unwind
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/panicking.rs:584:5
1: core::panicking::panic_fmt
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/panicking.rs:143:14
2: core::result::unwrap_failed
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/result.rs:1785:5
3: core::result::Result<T,E>::unwrap
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/result.rs:1078:23
4: rust_arrow_playground::main::{{closure}}
at ./src/main.rs:46:24
5: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/future/mod.rs:91:19
6: tokio::park::thread::CachedParkThread::block_on::{{closure}}
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/park/thread.rs:263:54
7: tokio::coop::with_budget::{{closure}}
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:102:9
8: std::thread::local::LocalKey<T>::try_with
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/thread/local.rs:442:16
9: std::thread::local::LocalKey<T>::with
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/thread/local.rs:418:9
10: tokio::coop::with_budget
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:95:5
11: tokio::coop::budget
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:72:5
12: tokio::park::thread::CachedParkThread::block_on
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/park/thread.rs:263:31
13: tokio::runtime::enter::Enter::block_on
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/enter.rs:151:13
14: tokio::runtime::thread_pool::ThreadPool::block_on
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/thread_pool/mod.rs:90:9
15: tokio::runtime::Runtime::block_on
at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/mod.rs:482:43
16: rust_arrow_playground::main
at ./src/main.rs:55:5
17: core::ops::function::FnOnce::call_once
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/ops/function.rs:227:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org