You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/05/04 22:15:48 UTC
[arrow-datafusion] branch master updated: Refactor
datafusion/src/physical_plan/common.rs build_file_list to take less param
and reuse code (#253)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new f7bd7b9 Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code (#253)
f7bd7b9 is described below
commit f7bd7b9f0e5a28b1fada2dbb60151b5af9b16545
Author: Jiayu Liu <Ji...@users.noreply.github.com>
AuthorDate: Wed May 5 06:15:38 2021 +0800
Refactor datafusion/src/physical_plan/common.rs build_file_list to take less param and reuse code (#253)
* refactor common build_file_list
* fixing clippy
---
datafusion/src/datasource/csv.rs | 3 +--
datafusion/src/physical_plan/common.rs | 17 ++++++++++++++---
datafusion/src/physical_plan/csv.rs | 3 +--
datafusion/src/physical_plan/parquet.rs | 3 +--
4 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs
index 1bd1b4b..33cbeb1 100644
--- a/datafusion/src/datasource/csv.rs
+++ b/datafusion/src/datasource/csv.rs
@@ -71,8 +71,7 @@ impl CsvFile {
let schema = Arc::new(match options.schema {
Some(s) => s.clone(),
None => {
- let mut filenames: Vec<String> = vec![];
- common::build_file_list(path, &mut filenames, options.file_extension)?;
+ let filenames = common::build_file_list(path, options.file_extension)?;
if filenames.is_empty() {
return Err(DataFusionError::Plan(format!(
"No files found at {path} with file extension {file_extension}",
diff --git a/datafusion/src/physical_plan/common.rs b/datafusion/src/physical_plan/common.rs
index 9de7ee2..f1ed374 100644
--- a/datafusion/src/physical_plan/common.rs
+++ b/datafusion/src/physical_plan/common.rs
@@ -78,8 +78,19 @@ pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatc
.map_err(DataFusionError::from)
}
-/// Recursively build a list of files in a directory with a given extension
-pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Result<()> {
+/// Recursively builds a list of files in a directory with a given extension
+pub fn build_file_list(dir: &str, ext: &str) -> Result<Vec<String>> {
+ let mut filenames: Vec<String> = Vec::new();
+ build_file_list_recurse(dir, &mut filenames, ext)?;
+ Ok(filenames)
+}
+
+/// Recursively build a list of files in a directory with a given extension with an accumulator list
+fn build_file_list_recurse(
+ dir: &str,
+ filenames: &mut Vec<String>,
+ ext: &str,
+) -> Result<()> {
let metadata = metadata(dir)?;
if metadata.is_file() {
if dir.ends_with(ext) {
@@ -91,7 +102,7 @@ pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Res
let path = entry.path();
if let Some(path_name) = path.to_str() {
if path.is_dir() {
- build_file_list(path_name, filenames, ext)?;
+ build_file_list_recurse(path_name, filenames, ext)?;
} else if path_name.ends_with(ext) {
filenames.push(path_name.to_string());
}
diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs
index b96a702..9ab8177 100644
--- a/datafusion/src/physical_plan/csv.rs
+++ b/datafusion/src/physical_plan/csv.rs
@@ -199,8 +199,7 @@ impl CsvExec {
) -> Result<Self> {
let file_extension = String::from(options.file_extension);
- let mut filenames: Vec<String> = vec![];
- common::build_file_list(path, &mut filenames, file_extension.as_str())?;
+ let filenames = common::build_file_list(path, file_extension.as_str())?;
if filenames.is_empty() {
return Err(DataFusionError::Execution(format!(
"No files found at {path} with file extension {file_extension}",
diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs
index d41d696..09dd48d 100644
--- a/datafusion/src/physical_plan/parquet.rs
+++ b/datafusion/src/physical_plan/parquet.rs
@@ -118,8 +118,7 @@ impl ParquetExec {
) -> Result<Self> {
// build a list of filenames from the specified path, which could be a single file or
// a directory containing one or more parquet files
- let mut filenames: Vec<String> = vec![];
- common::build_file_list(path, &mut filenames, ".parquet")?;
+ let filenames = common::build_file_list(path, ".parquet")?;
if filenames.is_empty() {
Err(DataFusionError::Plan(format!(
"No Parquet files found at path {}",