You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/08/26 20:39:27 UTC
[arrow-datafusion] branch master updated: fix empty csv schema (#3272)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 90a0e7c45 fix empty csv schema (#3272)
90a0e7c45 is described below
commit 90a0e7c4590aa512fa31097d325dcef6c1e5782d
Author: comphead <co...@users.noreply.github.com>
AuthorDate: Fri Aug 26 13:39:21 2022 -0700
fix empty csv schema (#3272)
---
datafusion/core/src/datasource/file_format/csv.rs | 2 +-
datafusion/core/tests/empty.csv | 1 +
datafusion/core/tests/sql/create_drop.rs | 26 +++++++++++++++++++++++
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index d72a6e767..2e6994f4b 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -116,10 +116,10 @@ impl FileFormat for CsvFormat {
Some(records_to_read),
self.has_header,
)?;
+ schemas.push(schema.clone());
if records_read == 0 {
continue;
}
- schemas.push(schema.clone());
records_to_read -= records_read;
if records_to_read == 0 {
break;
diff --git a/datafusion/core/tests/empty.csv b/datafusion/core/tests/empty.csv
new file mode 100644
index 000000000..f1968a090
--- /dev/null
+++ b/datafusion/core/tests/empty.csv
@@ -0,0 +1 @@
+c1,c2,c3
diff --git a/datafusion/core/tests/sql/create_drop.rs b/datafusion/core/tests/sql/create_drop.rs
index 1d28f2e33..cca742c4a 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -261,3 +261,29 @@ async fn create_pipe_delimited_csv_table() -> Result<()> {
Ok(())
}
+
+#[tokio::test]
+async fn create_csv_table_empty_file() -> Result<()> {
+ let ctx =
+ SessionContext::with_config(SessionConfig::new().with_information_schema(true));
+
+ let sql = "CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION 'tests/empty.csv'";
+ ctx.sql(sql).await.unwrap();
+ let sql =
+ "select column_name, data_type, ordinal_position from information_schema.columns";
+ let results = execute_to_batches(&ctx, sql).await;
+
+ let expected = vec![
+ "+-------------+-----------+------------------+",
+ "| column_name | data_type | ordinal_position |",
+ "+-------------+-----------+------------------+",
+ "| c1 | Utf8 | 0 |",
+ "| c2 | Utf8 | 1 |",
+ "| c3 | Utf8 | 2 |",
+ "+-------------+-----------+------------------+",
+ ];
+
+ assert_batches_eq!(expected, &results);
+
+ Ok(())
+}