You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/08 19:14:33 UTC

[GitHub] [arrow-datafusion] alamb commented on issue #2712: Common Subexpression Eliminiation pass errors if run twice on some plans: Schema contains duplicate unqualified field name 'IsNull-Column-sys.host'

alamb commented on issue #2712:
URL: https://github.com/apache/arrow-datafusion/issues/2712#issuecomment-1150297976

   Here is a self contained reproducer:
   
   ```rust
   use std::sync::Arc;
   
   use datafusion::prelude::*;
   use datafusion::arrow::array::Int32Array;
   use datafusion::datasource::MemTable;
   use datafusion::execution::context::TaskContext;
   use datafusion::logical_plan::{LogicalPlanBuilder, provider_as_source};
   use datafusion::physical_plan::collect;
   use datafusion::error::Result;
   use datafusion::arrow::{self, record_batch::RecordBatch};
   
   #[tokio::main]
   async fn main() -> Result<()> {
       let ctx = SessionContext::new();
   
       let a: Int32Array = vec![Some(1)].into_iter().collect();
   
       let batch = RecordBatch::try_from_iter(vec![
           ("a", Arc::new(a) as _),
       ]).unwrap();
   
       let t = MemTable::try_new(batch.schema(), vec![vec![batch]]).unwrap();
   
       let projection = None;
       let builder = LogicalPlanBuilder::scan(
           "cpu_load_short",
           provider_as_source(Arc::new(t)),
           projection
       ).unwrap()
           .filter(col("a").is_null()
                        .or(col("a").eq(lit(2)))
                        .or(col("a").is_null().and(col("a").eq(lit(5))))
                        .or(col("a").is_null().or(col("a").eq(lit(2))))
           )
           .unwrap();
   
   
       let logical_plan = builder.build().unwrap();
   
       // manually optimize the plan
       let state = ctx.state.read().clone();
   
       let logical_plan = state.optimize(&logical_plan).unwrap();
       // THIS IS THE KEY: optimize it a second time
       let logical_plan = state.optimize(&logical_plan).unwrap();
   
       let physical_plan = state.query_planner.create_physical_plan(&logical_plan, &state).await.unwrap();
   
       let task_ctx = Arc::new(TaskContext::from(&state));
       let results: Vec<RecordBatch> = collect(physical_plan, task_ctx).await.unwrap();
   
       // format the results
       println!("Results:\n\n{}", arrow::util::pretty::pretty_format_batches(&results).unwrap());
       Ok(())
   }
   ```
   
   Cargo.toml:
   
   ```toml
   [package]
   name = "rust_arrow_playground"
   version = "0.1.0"
   edition = "2018"
   
   # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
   
   [dependencies]
   ahash = "0.7"
   tokio = "1.8.2"
   tokio-stream = "0.1"
   async-trait = "0.1"
   futures-util = { version = "0.3.1" }
   datafusion = { path = "/Users/alamb/Software/arrow-datafusion/datafusion/core", default-features = false }
   once_cell = "1.8.0"
   rand = "0.8"
   ```
   
   When run errors like this:
   
   ```
   cd /Users/alamb/Software/rust_datafusion_playground && RUST_BACKTRACE=1 CARGO_TARGET_DIR=/Users/alamb/Software/df-target cargo run
      Compiling rust_arrow_playground v0.1.0 (/Users/alamb/Software/rust_datafusion_playground)
       Finished dev [unoptimized + debuginfo] target(s) in 3.77s
        Running `/Users/alamb/Software/df-target/debug/rust_arrow_playground`
   thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: SchemaError(DuplicateUnqualifiedField { name: "IsNull-Column-cpu_load_short.a" })', src/main.rs:46:54
   stack backtrace:
      0: rust_begin_unwind
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/panicking.rs:584:5
      1: core::panicking::panic_fmt
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/panicking.rs:143:14
      2: core::result::unwrap_failed
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/result.rs:1785:5
      3: core::result::Result<T,E>::unwrap
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/result.rs:1078:23
      4: rust_arrow_playground::main::{{closure}}
                at ./src/main.rs:46:24
      5: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/future/mod.rs:91:19
      6: tokio::park::thread::CachedParkThread::block_on::{{closure}}
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/park/thread.rs:263:54
      7: tokio::coop::with_budget::{{closure}}
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:102:9
      8: std::thread::local::LocalKey<T>::try_with
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/thread/local.rs:442:16
      9: std::thread::local::LocalKey<T>::with
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/thread/local.rs:418:9
     10: tokio::coop::with_budget
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:95:5
     11: tokio::coop::budget
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/coop.rs:72:5
     12: tokio::park::thread::CachedParkThread::block_on
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/park/thread.rs:263:31
     13: tokio::runtime::enter::Enter::block_on
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/enter.rs:151:13
     14: tokio::runtime::thread_pool::ThreadPool::block_on
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/thread_pool/mod.rs:90:9
     15: tokio::runtime::Runtime::block_on
                at /Users/alamb/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.19.2/src/runtime/mod.rs:482:43
     16: rust_arrow_playground::main
                at ./src/main.rs:55:5
     17: core::ops::function::FnOnce::call_once
                at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/ops/function.rs:227:5
   note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org