You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/01/03 20:35:44 UTC
[arrow-datafusion] branch master updated: fix: ListingSchemaProvider directory paths (#4788)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 34a8b864d fix: ListingSchemaProvider directory paths (#4788)
34a8b864d is described below
commit 34a8b864d08d6199547c019414b0ac77b75d8e61
Author: cfraz89 <cf...@gmail.com>
AuthorDate: Wed Jan 4 07:35:38 2023 +1100
fix: ListingSchemaProvider directory paths (#4788)
- Append trailing slash to table paths if they are directories
---
datafusion/core/src/catalog/listing_schema.rs | 56 ++++++++++++++++++++++++++-
1 file changed, 54 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog/listing_schema.rs
index be2337839..dcc15cb06 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog/listing_schema.rs
@@ -94,16 +94,23 @@ impl ListingSchemaProvider {
let base = Path::new(self.path.as_ref());
let mut tables = HashSet::new();
for file in entries.iter() {
+ // The listing will initially be a file. However if we've recursed up to match our base, we know our path is a directory.
+ let mut is_dir = false;
let mut parent = Path::new(file.location.as_ref());
while let Some(p) = parent.parent() {
if p == base {
- tables.insert(parent);
+ tables.insert(TablePath {
+ is_dir,
+ path: parent,
+ });
}
parent = p;
+ is_dir = true;
}
}
for table in tables.iter() {
let file_name = table
+ .path
.file_name()
.ok_or_else(|| {
DataFusionError::Internal("Cannot parse file name!".to_string())
@@ -113,7 +120,7 @@ impl ListingSchemaProvider {
DataFusionError::Internal("Cannot parse file name!".to_string())
})?;
let table_name = file_name.split('.').collect_vec()[0];
- let table_path = table.to_str().ok_or_else(|| {
+ let table_path = table.to_string().ok_or_else(|| {
DataFusionError::Internal("Cannot parse file name!".to_string())
})?;
@@ -197,3 +204,48 @@ impl SchemaProvider for ListingSchemaProvider {
.contains_key(name)
}
}
+
+/// Stores metadata along with a table's path.
+/// Primarily whether the path is a directory or not.
+#[derive(Eq, PartialEq, Hash, Debug)]
+struct TablePath<'a> {
+ path: &'a Path,
+ is_dir: bool,
+}
+
+impl TablePath<'_> {
+ /// Format the path with a '/' appended if its a directory.
+ /// Clients (eg. object_store listing) can and will use the presence of trailing slash as a heuristic
+ fn to_string(&self) -> Option<String> {
+ self.path.to_str().map(|path| {
+ if self.is_dir {
+ format!("{path}/")
+ } else {
+ path.to_string()
+ }
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn table_path_ends_with_slash_when_is_dir() {
+ let table_path = TablePath {
+ path: Path::new("/file"),
+ is_dir: true,
+ };
+ assert!(table_path.to_string().expect("table path").ends_with("/"));
+ }
+
+ #[test]
+ fn dir_table_path_str_does_not_end_with_slash_when_not_is_dir() {
+ let table_path = TablePath {
+ path: Path::new("/file"),
+ is_dir: false,
+ };
+ assert!(!table_path.to_string().expect("table_path").ends_with("/"));
+ }
+}