You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/03 17:11:55 UTC

[arrow-ballista] branch master updated: Allow automatic schema inference when registering csv (#313)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-ballista.git


The following commit(s) were added to refs/heads/master by this push:
     new c882c1fb Allow automatic schema inference when registering csv (#313)
c882c1fb is described below

commit c882c1fb339eeeb5c2ba19cd3dc95f69f1d0a6b4
Author: r.4ntix <an...@antix.blue>
AuthorDate: Tue Oct 4 01:11:51 2022 +0800

    Allow automatic schema inference when registering csv (#313)
---
 ballista/rust/client/src/context.rs | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/ballista/rust/client/src/context.rs b/ballista/rust/client/src/context.rs
index e91bae0e..797a527d 100644
--- a/ballista/rust/client/src/context.rs
+++ b/ballista/rust/client/src/context.rs
@@ -381,16 +381,15 @@ impl BallistaContext {
                 match (if_not_exists, table_exists) {
                     (_, false) => match file_type.to_lowercase().as_str() {
                         "csv" => {
-                            self.register_csv(
-                                name,
-                                location,
-                                CsvReadOptions::new()
-                                    .schema(&schema.as_ref().to_owned().into())
-                                    .has_header(*has_header)
-                                    .delimiter(*delimiter as u8)
-                                    .table_partition_cols(table_partition_cols.to_vec()),
-                            )
-                            .await?;
+                            let mut options = CsvReadOptions::new()
+                                .has_header(*has_header)
+                                .delimiter(*delimiter as u8)
+                                .table_partition_cols(table_partition_cols.to_vec());
+                            let csv_schema = schema.as_ref().to_owned().into();
+                            if !schema.fields().is_empty() {
+                                options = options.schema(&csv_schema);
+                            }
+                            self.register_csv(name, location, options).await?;
                             Ok(Arc::new(DataFrame::new(ctx.state.clone(), &plan)))
                         }
                         "parquet" => {