You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/01/01 14:14:58 UTC

[arrow] branch master updated: ARROW-10996: [Rust] [Parquet] change return value type of get_arrow_schema_from_metadata()

This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cd22be6  ARROW-10996: [Rust] [Parquet] change return value type of get_arrow_schema_from_metadata()
cd22be6 is described below

commit cd22be6efedbf9832b5ea875ca59bb42de7b6c28
Author: mqy <me...@gmail.com>
AuthorDate: Fri Jan 1 14:13:40 2021 +0000

    ARROW-10996: [Rust] [Parquet] change return value type of get_arrow_schema_from_metadata()
    
    https://github.com/apache/arrow/pull/8936 updated crate `flatbuffers` to 0.8.0 , but function `get_arrow_schema_from_metadata` still returning `Option` rather than `Result`. This PR fixes this issue.
    
    Closes #9058 from mqy/get_arrow_schema_from_metadata
    
    Authored-by: mqy <me...@gmail.com>
    Signed-off-by: Jorge C. Leitao <jo...@gmail.com>
---
 rust/parquet/src/arrow/schema.rs | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs
index 22213d4..eb5e94d 100644
--- a/rust/parquet/src/arrow/schema.rs
+++ b/rust/parquet/src/arrow/schema.rs
@@ -42,18 +42,14 @@ pub fn parquet_to_arrow_schema(
     key_value_metadata: &Option<Vec<KeyValue>>,
 ) -> Result<Schema> {
     let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default();
-    let arrow_schema_metadata = metadata
+    metadata
         .remove(super::ARROW_SCHEMA_META_KEY)
-        .map(|encoded| get_arrow_schema_from_metadata(&encoded));
-
-    match arrow_schema_metadata {
-        Some(Some(schema)) => Ok(schema),
-        _ => parquet_to_arrow_schema_by_columns(
+        .map(|encoded| get_arrow_schema_from_metadata(&encoded))
+        .unwrap_or(parquet_to_arrow_schema_by_columns(
             parquet_schema,
             0..parquet_schema.columns().len(),
             key_value_metadata,
-        ),
-    }
+        ))
 }
 
 /// Convert parquet schema to arrow schema including optional metadata,
@@ -123,7 +119,7 @@ where
     let arrow_schema_metadata = metadata
         .remove(super::ARROW_SCHEMA_META_KEY)
         .map(|encoded| get_arrow_schema_from_metadata(&encoded))
-        .unwrap_or_default();
+        .map_or(Ok(None), |v| v.map(Some))?;
 
     // add the Arrow metadata to the Parquet metadata
     if let Some(arrow_schema) = &arrow_schema_metadata {
@@ -175,7 +171,7 @@ where
 }
 
 /// Try to convert Arrow schema metadata into a schema
-fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option<Schema> {
+fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
     let decoded = base64::decode(encoded_meta);
     match decoded {
         Ok(bytes) => {
@@ -187,28 +183,25 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option<Schema> {
             match arrow::ipc::root_as_message(slice) {
                 Ok(message) => message
                     .header_as_schema()
-                    .map(arrow::ipc::convert::fb_to_schema),
+                    .map(arrow::ipc::convert::fb_to_schema)
+                    .ok_or(ArrowError("the message is not Arrow Schema".to_string())),
                 Err(err) => {
                     // The flatbuffers implementation returns an error on verification error.
-                    // TODO: return error to caller?
-                    eprintln!(
+                    Err(ArrowError(format!(
                         "Unable to get root as message stored in {}: {:?}",
                         super::ARROW_SCHEMA_META_KEY,
                         err
-                    );
-                    None
+                    )))
                 }
             }
         }
         Err(err) => {
             // The C++ implementation returns an error if the schema can't be parsed.
-            // To prevent this, we explicitly log this, then compute the schema without the metadata
-            eprintln!(
+            Err(ArrowError(format!(
                 "Unable to decode the encoded schema stored in {}, {:?}",
                 super::ARROW_SCHEMA_META_KEY,
                 err
-            );
-            None
+            )))
         }
     }
 }