You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/05/04 22:15:48 UTC

[arrow-datafusion] branch master updated: Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code (#253)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new f7bd7b9  Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code (#253)
f7bd7b9 is described below

commit f7bd7b9f0e5a28b1fada2dbb60151b5af9b16545
Author: Jiayu Liu <Ji...@users.noreply.github.com>
AuthorDate: Wed May 5 06:15:38 2021 +0800

    Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code (#253)
    
    * refactor common build_file_list
    
    * fixing clippy
---
 datafusion/src/datasource/csv.rs        |  3 +--
 datafusion/src/physical_plan/common.rs  | 17 ++++++++++++++---
 datafusion/src/physical_plan/csv.rs     |  3 +--
 datafusion/src/physical_plan/parquet.rs |  3 +--
 4 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs
index 1bd1b4b..33cbeb1 100644
--- a/datafusion/src/datasource/csv.rs
+++ b/datafusion/src/datasource/csv.rs
@@ -71,8 +71,7 @@ impl CsvFile {
         let schema = Arc::new(match options.schema {
             Some(s) => s.clone(),
             None => {
-                let mut filenames: Vec<String> = vec![];
-                common::build_file_list(path, &mut filenames, options.file_extension)?;
+                let filenames = common::build_file_list(path, options.file_extension)?;
                 if filenames.is_empty() {
                     return Err(DataFusionError::Plan(format!(
                         "No files found at {path} with file extension {file_extension}",
diff --git a/datafusion/src/physical_plan/common.rs b/datafusion/src/physical_plan/common.rs
index 9de7ee2..f1ed374 100644
--- a/datafusion/src/physical_plan/common.rs
+++ b/datafusion/src/physical_plan/common.rs
@@ -78,8 +78,19 @@ pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatc
         .map_err(DataFusionError::from)
 }
 
-/// Recursively build a list of files in a directory with a given extension
-pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Result<()> {
+/// Recursively builds a list of files in a directory with a given extension
+pub fn build_file_list(dir: &str, ext: &str) -> Result<Vec<String>> {
+    let mut filenames: Vec<String> = Vec::new();
+    build_file_list_recurse(dir, &mut filenames, ext)?;
+    Ok(filenames)
+}
+
+/// Recursively build a list of files in a directory with a given extension with an accumulator list
+fn build_file_list_recurse(
+    dir: &str,
+    filenames: &mut Vec<String>,
+    ext: &str,
+) -> Result<()> {
     let metadata = metadata(dir)?;
     if metadata.is_file() {
         if dir.ends_with(ext) {
@@ -91,7 +102,7 @@ pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Res
             let path = entry.path();
             if let Some(path_name) = path.to_str() {
                 if path.is_dir() {
-                    build_file_list(path_name, filenames, ext)?;
+                    build_file_list_recurse(path_name, filenames, ext)?;
                 } else if path_name.ends_with(ext) {
                     filenames.push(path_name.to_string());
                 }
diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs
index b96a702..9ab8177 100644
--- a/datafusion/src/physical_plan/csv.rs
+++ b/datafusion/src/physical_plan/csv.rs
@@ -199,8 +199,7 @@ impl CsvExec {
     ) -> Result<Self> {
         let file_extension = String::from(options.file_extension);
 
-        let mut filenames: Vec<String> = vec![];
-        common::build_file_list(path, &mut filenames, file_extension.as_str())?;
+        let filenames = common::build_file_list(path, file_extension.as_str())?;
         if filenames.is_empty() {
             return Err(DataFusionError::Execution(format!(
                 "No files found at {path} with file extension {file_extension}",
diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs
index d41d696..09dd48d 100644
--- a/datafusion/src/physical_plan/parquet.rs
+++ b/datafusion/src/physical_plan/parquet.rs
@@ -118,8 +118,7 @@ impl ParquetExec {
     ) -> Result<Self> {
         // build a list of filenames from the specified path, which could be a single file or
         // a directory containing one or more parquet files
-        let mut filenames: Vec<String> = vec![];
-        common::build_file_list(path, &mut filenames, ".parquet")?;
+        let filenames = common::build_file_list(path, ".parquet")?;
         if filenames.is_empty() {
             Err(DataFusionError::Plan(format!(
                 "No Parquet files found at path {}",