You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2020/12/03 11:18:39 UTC

[arrow] branch master updated: ARROW-10753: [Rust] [DataFusion] Fix parsing of negative numbers in DataFusion

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 016f76c  ARROW-10753: [Rust] [DataFusion] Fix parsing of negative numbers in DataFusion
016f76c is described below

commit 016f76c8c02e769d58b3e785a87674d98ce83367
Author: Evan Chan <ev...@urbanlogiq.com>
AuthorDate: Thu Dec 3 06:17:28 2020 -0500

    ARROW-10753: [Rust] [DataFusion] Fix parsing of negative numbers in DataFusion
    
    Currently, DataFusion SQL statements that compare negative numbers result in an exception, as negative numbers are incorrectly parsed.  The error thrown is `InternalError("SQL binary operator cannot be interpreted as a unary operator")`
    
    This PR adds a quick fix in SQL planner.rs, to parse SQL of the form ` UnaryOp { op: Minus, expr: Value(Number("1")) } } ` etc from SQL such as `WHERE col1 >= -1` to allow proper evaluation of the above SQL as a literal number.
    
    Closes #8815 from velvia/evan/arrow-10753-fix
    
    Authored-by: Evan Chan <ev...@urbanlogiq.com>
    Signed-off-by: Andrew Lamb <an...@nerdnetworks.org>
---
 rust/arrow/src/array/transform/mod.rs |  4 +---
 rust/datafusion/src/sql/planner.rs    | 12 ++++++++++--
 rust/datafusion/tests/sql.rs          | 31 +++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs
index 6d54d7f..9c4149e 100644
--- a/rust/arrow/src/array/transform/mod.rs
+++ b/rust/arrow/src/array/transform/mod.rs
@@ -274,9 +274,7 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
         DataType::FixedSizeList(_, _) => {}
         DataType::Union(_) => {}
         */
-        _ => {
-            todo!("Take and filter operations still not supported for this datatype")
-        }
+        _ => todo!("Take and filter operations still not supported for this datatype"),
     })
 }
 
diff --git a/rust/datafusion/src/sql/planner.rs b/rust/datafusion/src/sql/planner.rs
index 8dfbeb0..a8f91f4 100644
--- a/rust/datafusion/src/sql/planner.rs
+++ b/rust/datafusion/src/sql/planner.rs
@@ -638,10 +638,18 @@ impl<'a, S: SchemaProvider> SqlToRel<'a, S> {
                 Ok(Expr::IsNotNull(Box::new(self.sql_to_rex(expr, schema)?)))
             }
 
-            SQLExpr::UnaryOp { ref op, ref expr } => match *op {
-                UnaryOperator::Not => {
+            SQLExpr::UnaryOp { ref op, ref expr } => match (op, expr.as_ref()) {
+                (UnaryOperator::Not, _) => {
                     Ok(Expr::Not(Box::new(self.sql_to_rex(expr, schema)?)))
                 }
+                (UnaryOperator::Minus, SQLExpr::Value(Value::Number(n))) =>
+                // Parse negative numbers properly
+                {
+                    match n.parse::<i64>() {
+                        Ok(n) => Ok(lit(-n)),
+                        Err(_) => Ok(lit(-n.parse::<f64>().unwrap())),
+                    }
+                }
                 _ => Err(DataFusionError::Internal(format!(
                     "SQL binary operator cannot be interpreted as a unary operator"
                 ))),
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 5dd1b2b..3a549fc 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -1497,6 +1497,37 @@ async fn csv_query_sum_cast() {
 }
 
 #[tokio::test]
+async fn query_where_neg_num() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+    register_aggregate_csv_by_sql(&mut ctx).await;
+
+    // Negative numbers do not parse correctly as of Arrow 2.0.0
+    let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2 and c7 < 10";
+    let actual = execute(&mut ctx, sql).await;
+    let expected = vec![
+        vec!["7", "45465"],
+        vec!["5", "40622"],
+        vec!["0", "61069"],
+        vec!["2", "20120"],
+        vec!["4", "39363"],
+    ];
+    assert_eq!(expected, actual);
+
+    // Also check floating point neg numbers
+    let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2.9 and c7 < 10";
+    let actual = execute(&mut ctx, sql).await;
+    let expected = vec![
+        vec!["7", "45465"],
+        vec!["5", "40622"],
+        vec!["0", "61069"],
+        vec!["2", "20120"],
+        vec!["4", "39363"],
+    ];
+    assert_eq!(expected, actual);
+    Ok(())
+}
+
+#[tokio::test]
 async fn like() -> Result<()> {
     let mut ctx = ExecutionContext::new();
     register_aggregate_csv_by_sql(&mut ctx).await;