You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/05/17 07:31:07 UTC

[arrow-rs] branch master updated: Return NotFound for directories in Head and Get (#4230) (#4231)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 98867f5b2 Return NotFound for directories in Head and Get (#4230) (#4231)
98867f5b2 is described below

commit 98867f5b2f61d1ed0539856240f45bbf004423de
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed May 17 08:31:01 2023 +0100

    Return NotFound for directories in Head and Get (#4230) (#4231)
    
    * Return NotFound for directories in Head and Get (#4230)
    
    * Fix webdav
    
    * Fix error message
---
 object_store/src/azure/client.rs | 14 ++++++++++++-
 object_store/src/http/client.rs  | 11 ++++++----
 object_store/src/http/mod.rs     | 20 ++++++++-----------
 object_store/src/lib.rs          |  8 ++++++++
 object_store/src/local.rs        | 43 +++++++++++++++++++++++++---------------
 5 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index 4611986e3..893e261fe 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -257,7 +257,19 @@ impl AzureClient {
                 path: path.as_ref(),
             })?;
 
-        Ok(response)
+        match response.headers().get("x-ms-resource-type") {
+            Some(resource) if resource.as_ref() != b"file" => {
+                Err(crate::Error::NotFound {
+                    path: path.to_string(),
+                    source: format!(
+                        "Not a file, got x-ms-resource-type: {}",
+                        String::from_utf8_lossy(resource.as_ref())
+                    )
+                    .into(),
+                })
+            }
+            _ => Ok(response),
+        }
     }
 
     /// Make an Azure Delete request <https://docs.microsoft.com/en-us/rest/api/storageservices/delete-blob>
diff --git a/object_store/src/http/client.rs b/object_store/src/http/client.rs
index 4e58eb0b2..6feacbba6 100644
--- a/object_store/src/http/client.rs
+++ b/object_store/src/http/client.rs
@@ -238,10 +238,13 @@ impl Client {
             .send_retry(&self.retry_config)
             .await
             .map_err(|source| match source.status() {
-                Some(StatusCode::NOT_FOUND) => crate::Error::NotFound {
-                    source: Box::new(source),
-                    path: location.to_string(),
-                },
+                // Some stores return METHOD_NOT_ALLOWED for get on directories
+                Some(StatusCode::NOT_FOUND | StatusCode::METHOD_NOT_ALLOWED) => {
+                    crate::Error::NotFound {
+                        source: Box::new(source),
+                        path: location.to_string(),
+                    }
+                }
                 _ => Error::Request { source }.into(),
             })
     }
diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs
index bed19722c..124b7da2f 100644
--- a/object_store/src/http/mod.rs
+++ b/object_store/src/http/mod.rs
@@ -60,15 +60,6 @@ enum Error {
         url: String,
     },
 
-    #[snafu(display("Object is a directory"))]
-    IsDirectory,
-
-    #[snafu(display("PROPFIND response contained no valid objects"))]
-    NoObjects,
-
-    #[snafu(display("PROPFIND response contained more than one object"))]
-    MultipleObjects,
-
     #[snafu(display("Request error: {}", source))]
     Reqwest { source: reqwest::Error },
 }
@@ -134,12 +125,17 @@ impl ObjectStore for HttpStore {
                 let response = status.response.into_iter().next().unwrap();
                 response.check_ok()?;
                 match response.is_dir() {
-                    true => Err(Error::IsDirectory.into()),
+                    true => Err(crate::Error::NotFound {
+                        path: location.to_string(),
+                        source: "Is directory".to_string().into(),
+                    }),
                     false => response.object_meta(self.client.base_url()),
                 }
             }
-            0 => Err(Error::NoObjects.into()),
-            _ => Err(Error::MultipleObjects.into()),
+            x => Err(crate::Error::NotFound {
+                path: location.to_string(),
+                source: format!("Expected 1 result, got {x}").into(),
+            }),
         }
     }
 
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 75f9ca7df..0f3ed809e 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -880,6 +880,14 @@ mod tests {
         assert_eq!(result.common_prefixes.len(), 1);
         assert_eq!(result.common_prefixes[0], Path::from("test_dir"));
 
+        // Should return not found
+        let err = storage.get(&Path::from("test_dir")).await.unwrap_err();
+        assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err);
+
+        // Should return not found
+        let err = storage.head(&Path::from("test_dir")).await.unwrap_err();
+        assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err);
+
         // List everything starting with a prefix that should return results
         let prefix = Path::from("test_dir");
         let content_list = flatten_list_stream(storage, Some(&prefix)).await.unwrap();
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index 26a8bf336..52719f1cb 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -419,18 +419,23 @@ impl ObjectStore for LocalFileSystem {
 
         maybe_spawn_blocking(move || {
             let metadata = match metadata(&path) {
-                Err(e) => Err(if e.kind() == ErrorKind::NotFound {
-                    Error::NotFound {
+                Err(e) => Err(match e.kind() {
+                    ErrorKind::NotFound => Error::NotFound {
                         path: path.clone(),
                         source: e,
-                    }
-                } else {
-                    Error::Metadata {
+                    },
+                    _ => Error::Metadata {
                         source: e.into(),
                         path: location.to_string(),
-                    }
+                    },
                 }),
-                Ok(m) => Ok(m),
+                Ok(m) => match m.is_file() {
+                    true => Ok(m),
+                    false => Err(Error::NotFound {
+                        path,
+                        source: io::Error::new(ErrorKind::NotFound, "is not file"),
+                    }),
+                },
             }?;
             convert_metadata(metadata, location)
         })
@@ -878,19 +883,25 @@ fn read_range(file: &mut File, path: &PathBuf, range: Range<usize>) -> Result<By
 }
 
 fn open_file(path: &PathBuf) -> Result<File> {
-    let file = File::open(path).map_err(|e| {
-        if e.kind() == std::io::ErrorKind::NotFound {
-            Error::NotFound {
+    let file = match File::open(path).and_then(|f| Ok((f.metadata()?, f))) {
+        Err(e) => Err(match e.kind() {
+            ErrorKind::NotFound => Error::NotFound {
                 path: path.clone(),
                 source: e,
-            }
-        } else {
-            Error::UnableToOpenFile {
+            },
+            _ => Error::UnableToOpenFile {
                 path: path.clone(),
                 source: e,
-            }
-        }
-    })?;
+            },
+        }),
+        Ok((metadata, file)) => match metadata.is_file() {
+            true => Ok(file),
+            false => Err(Error::NotFound {
+                path: path.clone(),
+                source: io::Error::new(ErrorKind::NotFound, "not a file"),
+            }),
+        },
+    }?;
     Ok(file)
 }