You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/08/26 20:39:27 UTC

[arrow-datafusion] branch master updated: fix empty csv schema (#3272)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 90a0e7c45 fix empty csv schema (#3272)
90a0e7c45 is described below

commit 90a0e7c4590aa512fa31097d325dcef6c1e5782d
Author: comphead <co...@users.noreply.github.com>
AuthorDate: Fri Aug 26 13:39:21 2022 -0700

    fix empty csv schema (#3272)
---
 datafusion/core/src/datasource/file_format/csv.rs |  2 +-
 datafusion/core/tests/empty.csv                   |  1 +
 datafusion/core/tests/sql/create_drop.rs          | 26 +++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index d72a6e767..2e6994f4b 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -116,10 +116,10 @@ impl FileFormat for CsvFormat {
                 Some(records_to_read),
                 self.has_header,
             )?;
+            schemas.push(schema.clone());
             if records_read == 0 {
                 continue;
             }
-            schemas.push(schema.clone());
             records_to_read -= records_read;
             if records_to_read == 0 {
                 break;
diff --git a/datafusion/core/tests/empty.csv b/datafusion/core/tests/empty.csv
new file mode 100644
index 000000000..f1968a090
--- /dev/null
+++ b/datafusion/core/tests/empty.csv
@@ -0,0 +1 @@
+c1,c2,c3
diff --git a/datafusion/core/tests/sql/create_drop.rs b/datafusion/core/tests/sql/create_drop.rs
index 1d28f2e33..cca742c4a 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -261,3 +261,29 @@ async fn create_pipe_delimited_csv_table() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn create_csv_table_empty_file() -> Result<()> {
+    let ctx =
+        SessionContext::with_config(SessionConfig::new().with_information_schema(true));
+
+    let sql = "CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION 'tests/empty.csv'";
+    ctx.sql(sql).await.unwrap();
+    let sql =
+        "select column_name, data_type, ordinal_position from information_schema.columns";
+    let results = execute_to_batches(&ctx, sql).await;
+
+    let expected = vec![
+        "+-------------+-----------+------------------+",
+        "| column_name | data_type | ordinal_position |",
+        "+-------------+-----------+------------------+",
+        "| c1          | Utf8      | 0                |",
+        "| c2          | Utf8      | 1                |",
+        "| c3          | Utf8      | 2                |",
+        "+-------------+-----------+------------------+",
+    ];
+
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}