You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "alamb (via GitHub)" <gi...@apache.org> on 2023/03/17 11:40:44 UTC

[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #5618: WITH ORDER support on CREATE EXTERNAL TABLE

alamb commented on code in PR #5618:
URL: https://github.com/apache/arrow-datafusion/pull/5618#discussion_r1140119378


##########
datafusion/core/tests/sql/order.rs:
##########
@@ -39,3 +42,31 @@ async fn sort_with_lots_of_repetition_values() -> Result<()> {
     }
     Ok(())
 }
+
+#[tokio::test]
+async fn create_external_table_with_order() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "CREATE EXTERNAL TABLE dt (a_id integer, a_str string, a_bool boolean) STORED AS CSV WITH ORDER (a_id ASC) LOCATION 'file://path/to/table';";
+    if let LogicalPlan::CreateExternalTable(cmd) =
+        ctx.state().create_logical_plan(sql).await?
+    {
+        let listing_table_factory = Arc::new(ListingTableFactory::new());
+        let table_dyn = listing_table_factory.create(&ctx.state(), &cmd).await?;
+        let table = table_dyn.as_any().downcast_ref::<ListingTable>().unwrap();
+        assert_eq!(
+            &cmd.order_exprs,

Review Comment:
   I would recommend explicitly asserting the content of the order by exprs here -- as written if `cmd.order_exprs` were empty the test would still pass, I think.



##########
datafusion/sql/src/parser.rs:
##########
@@ -713,11 +782,144 @@ mod tests {
         });
         expect_parse_ok(sql, expected)?;
 
+        // Ordered Col
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'";
+        let display = None;
+        let expected = Statement::CreateExternalTable(CreateExternalTable {
+            name: "t".into(),
+            columns: vec![make_column_def("c1", DataType::Int(display))],
+            file_type: "CSV".to_string(),
+            has_header: false,
+            delimiter: ',',
+            location: "foo.csv".into(),
+            table_partition_cols: vec![],
+            order_exprs: vec![OrderByExpr {
+                expr: Identifier(Ident {
+                    value: "c1".to_owned(),
+                    quote_style: None,
+                }),
+                asc: None,
+                nulls_first: None,
+            }],
+            if_not_exists: false,
+            file_compression_type: UNCOMPRESSED,
+            options: HashMap::new(),
+        });
+        expect_parse_ok(sql, expected)?;
+
+        // Ordered Col
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'";
+        let display = None;
+        let expected = Statement::CreateExternalTable(CreateExternalTable {
+            name: "t".into(),
+            columns: vec![make_column_def("c1", DataType::Int(display))],
+            file_type: "CSV".to_string(),
+            has_header: false,
+            delimiter: ',',
+            location: "foo.csv".into(),
+            table_partition_cols: vec![],
+            order_exprs: vec![OrderByExpr {
+                expr: Identifier(Ident {
+                    value: "c1".to_owned(),
+                    quote_style: None,
+                }),
+                asc: None,
+                nulls_first: Some(true),
+            }],
+            if_not_exists: false,
+            file_compression_type: UNCOMPRESSED,
+            options: HashMap::new(),
+        });
+        expect_parse_ok(sql, expected)?;
+
+        // Ordered Col
+        let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
+        let display = None;
+        let expected = Statement::CreateExternalTable(CreateExternalTable {
+            name: "t".into(),
+            columns: vec![
+                make_column_def("c1", DataType::Int(display)),
+                make_column_def("c2", DataType::Int(display)),
+            ],
+            file_type: "CSV".to_string(),
+            has_header: false,
+            delimiter: ',',
+            location: "foo.csv".into(),
+            table_partition_cols: vec![],
+            order_exprs: vec![
+                OrderByExpr {
+                    expr: Identifier(Ident {
+                        value: "c1".to_owned(),
+                        quote_style: None,
+                    }),
+                    asc: Some(true),
+                    nulls_first: None,
+                },
+                OrderByExpr {
+                    expr: Identifier(Ident {
+                        value: "c2".to_owned(),
+                        quote_style: None,
+                    }),
+                    asc: Some(false),
+                    nulls_first: Some(true),

Review Comment:
   I didn't see any test for `nulls_first: Some(false)`



##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -1632,6 +1632,8 @@ pub struct CreateExternalTable {
     pub if_not_exists: bool,
     /// SQL used to create the table, if available
     pub definition: Option<String>,
+    /// Order expressions

Review Comment:
   ```suggestion
       /// Order expressions supplied by user.
       ///
       /// NOTE: DataFusion will assume the data is sorted in this way
       /// and does not verify it. Thus if the data is not actually sorted as
       /// described, WRONG ANSERS can be produced. 
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org