You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/22 09:13:43 UTC

[arrow-rs] branch master updated: Add external variant to ParquetError (#3285) (#3574)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 892a80385 Add external variant to ParquetError (#3285) (#3574)
892a80385 is described below

commit 892a80385bdc1ee4c033e3b848fed882982e352d
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sun Jan 22 09:13:38 2023 +0000

    Add external variant to ParquetError (#3285) (#3574)
---
 parquet/src/arrow/arrow_writer/levels.rs |   4 +-
 parquet/src/column/mod.rs                |  26 +-
 parquet/src/encodings/decoding.rs        |   7 +-
 parquet/src/encodings/encoding/mod.rs    |  11 +-
 parquet/src/errors.rs                    |  39 +--
 parquet/src/file/footer.rs               |  24 +-
 parquet/src/record/api.rs                |  24 +-
 parquet/src/record/reader.rs             |  17 +-
 parquet/src/schema/parser.rs             | 409 ++++++++++++-------------------
 9 files changed, 236 insertions(+), 325 deletions(-)

diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 182f68c49..15197c02e 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -1085,7 +1085,7 @@ mod tests {
                 .unwrap();
 
         let struct_null_level =
-            calculate_array_levels(batch.column(0), batch.schema().field(0));
+            calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();
 
         // create second batch
         // define schema
@@ -1108,7 +1108,7 @@ mod tests {
                 .unwrap();
 
         let struct_non_null_level =
-            calculate_array_levels(batch.column(0), batch.schema().field(0));
+            calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();
 
         // The 2 levels should not be the same
         if struct_non_null_level == struct_null_level {
diff --git a/parquet/src/column/mod.rs b/parquet/src/column/mod.rs
index 93a4f00d2..cb0c035dd 100644
--- a/parquet/src/column/mod.rs
+++ b/parquet/src/column/mod.rs
@@ -36,18 +36,18 @@
 //! repetition levels and read them to verify write/read correctness.
 //!
 //! ```rust,no_run
-//! use std::{fs, path::Path, sync::Arc};
-//!
-//! use parquet::{
-//!     column::{reader::ColumnReader, writer::ColumnWriter},
-//!     data_type::Int32Type,
-//!     file::{
-//!         properties::WriterProperties,
-//!         reader::{FileReader, SerializedFileReader},
-//!         writer::SerializedFileWriter,
-//!     },
-//!     schema::parser::parse_message_type,
-//! };
+//! # use std::{fs, path::Path, sync::Arc};
+//! #
+//! # use parquet::{
+//! #    column::{reader::ColumnReader, writer::ColumnWriter},
+//! #    data_type::Int32Type,
+//! #    file::{
+//! #        properties::WriterProperties,
+//! #        reader::{FileReader, SerializedFileReader},
+//! #        writer::SerializedFileWriter,
+//! #    },
+//! #    schema::parser::parse_message_type,
+//! # };
 //!
 //! let path = Path::new("/path/to/column_sample.parquet");
 //!
@@ -111,7 +111,7 @@
 //!     }
 //! }
 //!
-//! assert_eq!(res, Ok((3, 5)));
+//! assert_eq!(res.unwrap(), (3, 5));
 //! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
 //! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
 //! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 7e3058ba7..805833587 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -1946,11 +1946,12 @@ mod tests {
         let decoder = get_decoder::<T>(descr, encoding);
         match err {
             Some(parquet_error) => {
-                assert!(decoder.is_err());
-                assert_eq!(decoder.err().unwrap(), parquet_error);
+                assert_eq!(
+                    decoder.err().unwrap().to_string(),
+                    parquet_error.to_string()
+                );
             }
             None => {
-                assert!(decoder.is_ok());
                 assert_eq!(decoder.unwrap().encoding(), encoding);
             }
         }
diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs
index 78f4a8b97..b7e30c4ec 100644
--- a/parquet/src/encodings/encoding/mod.rs
+++ b/parquet/src/encodings/encoding/mod.rs
@@ -1081,13 +1081,12 @@ mod tests {
         let encoder = get_encoder::<T>(encoding);
         match err {
             Some(parquet_error) => {
-                assert!(encoder.is_err());
-                assert_eq!(encoder.err().unwrap(), parquet_error);
-            }
-            None => {
-                assert!(encoder.is_ok());
-                assert_eq!(encoder.unwrap().encoding(), encoding);
+                assert_eq!(
+                    encoder.err().unwrap().to_string(),
+                    parquet_error.to_string()
+                )
             }
+            None => assert_eq!(encoder.unwrap().encoding(), encoding),
         }
     }
 
diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index cbbd24053..703ff51f4 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -17,12 +17,13 @@
 
 //! Common Parquet errors and macros.
 
+use std::error::Error;
 use std::{cell, io, result, str};
 
 #[cfg(feature = "arrow")]
 use arrow_schema::ArrowError;
 
-#[derive(Debug, PartialEq, Clone, Eq)]
+#[derive(Debug)]
 pub enum ParquetError {
     /// General Parquet error.
     /// Returned when code violates normal workflow of working with Parquet files.
@@ -39,66 +40,72 @@ pub enum ParquetError {
     /// Returned when reading into arrow or writing from arrow.
     ArrowError(String),
     IndexOutOfBound(usize, usize),
+    /// An external error variant
+    External(Box<dyn Error + Send + Sync>),
 }
 
 impl std::fmt::Display for ParquetError {
     fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match *self {
-            ParquetError::General(ref message) => {
+        match &self {
+            ParquetError::General(message) => {
                 write!(fmt, "Parquet error: {}", message)
             }
-            ParquetError::NYI(ref message) => write!(fmt, "NYI: {}", message),
-            ParquetError::EOF(ref message) => write!(fmt, "EOF: {}", message),
+            ParquetError::NYI(message) => write!(fmt, "NYI: {}", message),
+            ParquetError::EOF(message) => write!(fmt, "EOF: {}", message),
             #[cfg(feature = "arrow")]
-            ParquetError::ArrowError(ref message) => write!(fmt, "Arrow: {}", message),
-            ParquetError::IndexOutOfBound(ref index, ref bound) => {
+            ParquetError::ArrowError(message) => write!(fmt, "Arrow: {}", message),
+            ParquetError::IndexOutOfBound(index, ref bound) => {
                 write!(fmt, "Index {} out of bound: {}", index, bound)
             }
+            ParquetError::External(e) => write!(fmt, "External: {}", e),
         }
     }
 }
 
-impl std::error::Error for ParquetError {
-    fn cause(&self) -> Option<&dyn ::std::error::Error> {
-        None
+impl Error for ParquetError {
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            ParquetError::External(e) => Some(e.as_ref()),
+            _ => None,
+        }
     }
 }
 
 impl From<io::Error> for ParquetError {
     fn from(e: io::Error) -> ParquetError {
-        ParquetError::General(format!("underlying IO error: {}", e))
+        ParquetError::External(Box::new(e))
     }
 }
 
 #[cfg(any(feature = "snap", test))]
 impl From<snap::Error> for ParquetError {
     fn from(e: snap::Error) -> ParquetError {
-        ParquetError::General(format!("underlying snap error: {}", e))
+        ParquetError::External(Box::new(e))
     }
 }
 
 impl From<thrift::Error> for ParquetError {
     fn from(e: thrift::Error) -> ParquetError {
-        ParquetError::General(format!("underlying Thrift error: {}", e))
+        ParquetError::External(Box::new(e))
     }
 }
 
 impl From<cell::BorrowMutError> for ParquetError {
     fn from(e: cell::BorrowMutError) -> ParquetError {
-        ParquetError::General(format!("underlying borrow error: {}", e))
+        ParquetError::External(Box::new(e))
     }
 }
 
 impl From<str::Utf8Error> for ParquetError {
     fn from(e: str::Utf8Error) -> ParquetError {
-        ParquetError::General(format!("underlying utf8 error: {}", e))
+        ParquetError::External(Box::new(e))
     }
 }
 
 #[cfg(feature = "arrow")]
 impl From<ArrowError> for ParquetError {
     fn from(e: ArrowError) -> ParquetError {
-        ParquetError::ArrowError(format!("underlying Arrow error: {}", e))
+        ParquetError::External(Box::new(e))
     }
 }
 
diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
index 27c07b78d..760caa977 100644
--- a/parquet/src/file/footer.rs
+++ b/parquet/src/file/footer.rs
@@ -156,10 +156,9 @@ mod tests {
     fn test_parse_metadata_size_smaller_than_footer() {
         let test_file = tempfile::tempfile().unwrap();
         let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
         assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Size is smaller than footer")
+            reader_result.unwrap_err().to_string(),
+            "Parquet error: Invalid Parquet file. Size is smaller than footer"
         );
     }
 
@@ -167,10 +166,9 @@ mod tests {
     fn test_parse_metadata_corrupt_footer() {
         let data = Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
         let reader_result = parse_metadata(&data);
-        assert!(reader_result.is_err());
         assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Corrupt footer")
+            reader_result.unwrap_err().to_string(),
+            "Parquet error: Invalid Parquet file. Corrupt footer"
         );
     }
 
@@ -178,12 +176,9 @@ mod tests {
     fn test_parse_metadata_invalid_length() {
         let test_file = Bytes::from(vec![0, 0, 0, 255, b'P', b'A', b'R', b'1']);
         let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
         assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!(
-                "Invalid Parquet file. Metadata length is less than zero (-16777216)"
-            )
+            reader_result.unwrap_err().to_string(),
+            "Parquet error: Invalid Parquet file. Metadata length is less than zero (-16777216)"
         );
     }
 
@@ -191,12 +186,9 @@ mod tests {
     fn test_parse_metadata_invalid_start() {
         let test_file = Bytes::from(vec![255, 0, 0, 0, b'P', b'A', b'R', b'1']);
         let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
         assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!(
-                "Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
-            )
+            reader_result.unwrap_err().to_string(),
+            "Parquet error: Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
         );
     }
 
diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs
index 0880e7179..8c942cb44 100644
--- a/parquet/src/record/api.rs
+++ b/parquet/src/record/api.rs
@@ -1536,16 +1536,16 @@ mod tests {
         ]);
 
         assert_eq!(
-            ParquetError::General("Cannot access Group as Float".to_string()),
-            row.get_float(0).unwrap_err()
+            row.get_float(0).unwrap_err().to_string(),
+            "Parquet error: Cannot access Group as Float"
         );
         assert_eq!(
-            ParquetError::General("Cannot access ListInternal as Float".to_string()),
-            row.get_float(1).unwrap_err()
+            row.get_float(1).unwrap_err().to_string(),
+            "Parquet error: Cannot access ListInternal as Float"
         );
         assert_eq!(
-            ParquetError::General("Cannot access MapInternal as Float".to_string()),
-            row.get_float(2).unwrap_err()
+            row.get_float(2).unwrap_err().to_string(),
+            "Parquet error: Cannot access MapInternal as Float",
         );
     }
 
@@ -1680,8 +1680,8 @@ mod tests {
             ("Y".to_string(), Field::Int(2)),
         ]))]);
         assert_eq!(
-            general_err!("Cannot access Group as Float".to_string()),
-            list.get_float(0).unwrap_err()
+            list.get_float(0).unwrap_err().to_string(),
+            "Parquet error: Cannot access Group as Float"
         );
 
         let list = make_list(vec![Field::ListInternal(make_list(vec![
@@ -1691,8 +1691,8 @@ mod tests {
             Field::Int(12),
         ]))]);
         assert_eq!(
-            general_err!("Cannot access ListInternal as Float".to_string()),
-            list.get_float(0).unwrap_err()
+            list.get_float(0).unwrap_err().to_string(),
+            "Parquet error: Cannot access ListInternal as Float"
         );
 
         let list = make_list(vec![Field::MapInternal(make_map(vec![
@@ -1701,8 +1701,8 @@ mod tests {
             (Field::Int(3), Field::Float(2.3)),
         ]))]);
         assert_eq!(
-            general_err!("Cannot access MapInternal as Float".to_string()),
-            list.get_float(0).unwrap_err()
+            list.get_float(0).unwrap_err().to_string(),
+            "Parquet error: Cannot access MapInternal as Float",
         );
     }
 
diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs
index 0b7e04587..a84693536 100644
--- a/parquet/src/record/reader.rs
+++ b/parquet/src/record/reader.rs
@@ -824,7 +824,7 @@ impl Iterator for ReaderIter {
 mod tests {
     use super::*;
 
-    use crate::errors::{ParquetError, Result};
+    use crate::errors::Result;
     use crate::file::reader::{FileReader, SerializedFileReader};
     use crate::record::api::{Field, Row, RowAccessor, RowFormatter};
     use crate::schema::parser::parse_message_type;
@@ -1452,10 +1452,9 @@ mod tests {
     ";
         let schema = parse_message_type(schema).unwrap();
         let res = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema));
-        assert!(res.is_err());
         assert_eq!(
-            res.unwrap_err(),
-            general_err!("Root schema does not contain projection")
+            res.unwrap_err().to_string(),
+            "Parquet error: Root schema does not contain projection"
         );
     }
 
@@ -1469,10 +1468,9 @@ mod tests {
     ";
         let schema = parse_message_type(schema).unwrap();
         let res = test_row_group_rows("nested_maps.snappy.parquet", Some(schema));
-        assert!(res.is_err());
         assert_eq!(
-            res.unwrap_err(),
-            general_err!("Root schema does not contain projection")
+            res.unwrap_err().to_string(),
+            "Parquet error: Root schema does not contain projection"
         );
     }
 
@@ -1542,10 +1540,9 @@ mod tests {
         let reader = SerializedFileReader::try_from(path.as_path()).unwrap();
         let res = RowIter::from_file_into(Box::new(reader)).project(proj);
 
-        assert!(res.is_err());
         assert_eq!(
-            res.err().unwrap(),
-            general_err!("Root schema does not contain projection")
+            res.err().unwrap().to_string(),
+            "Parquet error: Root schema does not contain projection"
         );
     }
 
diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs
index 140e3e085..c09f13603 100644
--- a/parquet/src/schema/parser.rs
+++ b/parquet/src/schema/parser.rs
@@ -628,30 +628,26 @@ mod tests {
         assert!(assert_token(None, "b").is_err());
     }
 
-    #[test]
-    fn test_parse_message_type_invalid() {
-        let mut iter = Tokenizer::from_str("test");
-        let result = Parser {
+    fn parse(schema: &str) -> Result<Type, ParquetError> {
+        let mut iter = Tokenizer::from_str(schema);
+        Parser {
             tokenizer: &mut iter,
         }
-        .parse_message_type();
-        assert!(result.is_err());
+        .parse_message_type()
+    }
+
+    #[test]
+    fn test_parse_message_type_invalid() {
         assert_eq!(
-            result.unwrap_err().to_string(),
+            parse("test").unwrap_err().to_string(),
             "Parquet error: Message type does not start with 'message'"
         );
     }
 
     #[test]
     fn test_parse_message_type_no_name() {
-        let mut iter = Tokenizer::from_str("message");
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
         assert_eq!(
-            result.unwrap_err().to_string(),
+            parse("message").unwrap_err().to_string(),
             "Parquet error: Expected name, found None"
         );
     }
@@ -659,46 +655,34 @@ mod tests {
     #[test]
     fn test_parse_message_type_fixed_byte_array() {
         let schema = "
-    message schema {
-      REQUIRED FIXED_LEN_BYTE_ARRAY col;
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
+            message schema {
+              REQUIRED FIXED_LEN_BYTE_ARRAY col;
+            }
+        ";
+        assert_eq!(
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Expected '(', found token 'col'"
+        );
 
         let schema = "
-    message schema {
-      REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
+            message schema {
+              REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
+            }
+        ";
+        parse(schema).unwrap();
     }
 
     #[test]
     fn test_parse_message_type_integer() {
         // Invalid integer syntax
         let schema = "
-    message root {
-      optional int64 f1 (INTEGER());
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
+            message root {
+              optional int64 f1 (INTEGER());
+            }
+        ";
         assert_eq!(
-            result,
-            Err(general_err!("Failed to parse bit_width for INTEGER type"))
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse bit_width for INTEGER type"
         );
 
         // Invalid integer syntax, needs both bit-width and UTC sign
@@ -707,123 +691,87 @@ mod tests {
       optional int64 f1 (INTEGER(32,));
     }
     ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
         assert_eq!(
-            result,
-            Err(general_err!("Incorrect bit width 32 for INT64"))
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Incorrect bit width 32 for INT64"
         );
 
         // Invalid integer because of non-numeric bit width
         let schema = "
-    message root {
-      optional int32 f1 (INTEGER(eight,true));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
+            message root {
+              optional int32 f1 (INTEGER(eight,true));
+            }
+        ";
         assert_eq!(
-            result,
-            Err(general_err!("Failed to parse bit_width for INTEGER type"))
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse bit_width for INTEGER type"
         );
 
         // Valid types
         let schema = "
-    message root {
-      optional int32 f1 (INTEGER(8,false));
-      optional int32 f2 (INTEGER(8,true));
-      optional int32 f3 (INTEGER(16,false));
-      optional int32 f4 (INTEGER(16,true));
-      optional int32 f5 (INTEGER(32,false));
-      optional int32 f6 (INTEGER(32,true));
-      optional int64 f7 (INTEGER(64,false));
-      optional int64 f7 (INTEGER(64,true));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
+            message root {
+              optional int32 f1 (INTEGER(8,false));
+              optional int32 f2 (INTEGER(8,true));
+              optional int32 f3 (INTEGER(16,false));
+              optional int32 f4 (INTEGER(16,true));
+              optional int32 f5 (INTEGER(32,false));
+              optional int32 f6 (INTEGER(32,true));
+              optional int64 f7 (INTEGER(64,false));
+              optional int64 f7 (INTEGER(64,true));
+            }
+        ";
+        parse(schema).unwrap();
     }
 
     #[test]
     fn test_parse_message_type_temporal() {
         // Invalid timestamp syntax
         let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP();
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
+            message root {
+              optional int64 f1 (TIMESTAMP();
+            }
+        ";
         assert_eq!(
-            result,
-            Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse timeunit for TIMESTAMP type"
         );
 
         // Invalid timestamp syntax, needs both unit and UTC adjustment
         let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP(MILLIS,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
+            message root {
+              optional int64 f1 (TIMESTAMP(MILLIS,));
+            }
+        ";
         assert_eq!(
-            result,
-            Err(general_err!(
-                "Failed to parse timezone info for TIMESTAMP type"
-            ))
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse timezone info for TIMESTAMP type"
         );
 
         // Invalid timestamp because of unknown unit
         let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP(YOCTOS,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
+            message root {
+              optional int64 f1 (TIMESTAMP(YOCTOS,));
+            }
+        ";
+
         assert_eq!(
-            result,
-            Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse timeunit for TIMESTAMP type"
         );
 
         // Valid types
         let schema = "
-    message root {
-      optional int32 f1 (DATE);
-      optional int32 f2 (TIME(MILLIS,true));
-      optional int64 f3 (TIME(MICROS,false));
-      optional int64 f4 (TIME(NANOS,true));
-      optional int64 f5 (TIMESTAMP(MILLIS,true));
-      optional int64 f6 (TIMESTAMP(MICROS,true));
-      optional int64 f7 (TIMESTAMP(NANOS,false));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
+            message root {
+              optional int32 f1 (DATE);
+              optional int32 f2 (TIME(MILLIS,true));
+              optional int64 f3 (TIME(MICROS,false));
+              optional int64 f4 (TIME(NANOS,true));
+              optional int64 f5 (TIMESTAMP(MILLIS,true));
+              optional int64 f6 (TIMESTAMP(MICROS,true));
+              optional int64 f7 (TIMESTAMP(NANOS,false));
+            }
+        ";
+        parse(schema).unwrap();
     }
 
     #[test]
@@ -833,86 +781,68 @@ mod tests {
 
         // Invalid decimal syntax
         let schema = "
-    message root {
-      optional int32 f1 (DECIMAL();
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
+            message root {
+              optional int32 f1 (DECIMAL();
+            }
+        ";
+        assert_eq!(
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse precision for DECIMAL type"
+        );
 
         // Invalid decimal, need precision and scale
         let schema = "
-    message root {
-      optional int32 f1 (DECIMAL());
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
+            message root {
+              optional int32 f1 (DECIMAL());
+            }
+        ";
+        assert_eq!(
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse precision for DECIMAL type"
+        );
 
         // Invalid decimal because of `,` - has precision, needs scale
         let schema = "
-    message root {
-      optional int32 f1 (DECIMAL(8,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
+            message root {
+              optional int32 f1 (DECIMAL(8,));
+            }
+        ";
+        assert_eq!(
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Failed to parse scale for DECIMAL type"
+        );
 
         // Invalid decimal because, we always require either precision or scale to be
         // specified as part of converted type
         let schema = "
-    message root {
-      optional int32 f3 (DECIMAL);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
+            message root {
+              optional int32 f3 (DECIMAL);
+            }
+        ";
+        assert_eq!(
+            parse(schema).unwrap_err().to_string(),
+            "Parquet error: Expected ')', found token ';'"
+        );
 
         // Valid decimal (precision, scale)
         let schema = "
-    message root {
-      optional int32 f1 (DECIMAL(8, 3));
-      optional int32 f2 (DECIMAL(8));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
+            message root {
+              optional int32 f1 (DECIMAL(8, 3));
+              optional int32 f2 (DECIMAL(8));
+            }
+        ";
+        parse(schema).unwrap();
     }
 
     #[test]
     fn test_parse_message_type_compare_1() {
         let schema = "
-    message root {
-      optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
-      optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
+            message root {
+              optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
+              optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
+            }
+        ";
+        let message = parse(schema).unwrap();
 
         let expected = Type::group_type_builder("root")
             .with_fields(&mut vec![
@@ -958,27 +888,22 @@ mod tests {
     #[test]
     fn test_parse_message_type_compare_2() {
         let schema = "
-    message root {
-      required group a0 {
-        optional group a1 (LIST) {
-          repeated binary a2 (UTF8);
-        }
+            message root {
+              required group a0 {
+                optional group a1 (LIST) {
+                  repeated binary a2 (UTF8);
+                }
 
-        optional group b1 (LIST) {
-          repeated group b2 {
-            optional int32 b3;
-            optional double b4;
-          }
-        }
-      }
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
+                optional group b1 (LIST) {
+                  repeated group b2 {
+                    optional int32 b3;
+                    optional double b4;
+                  }
+                }
+              }
+            }
+        ";
+        let message = parse(schema).unwrap();
 
         let expected = Type::group_type_builder("root")
             .with_fields(&mut vec![Arc::new(
@@ -1048,21 +973,16 @@ mod tests {
     #[test]
     fn test_parse_message_type_compare_3() {
         let schema = "
-    message root {
-      required int32 _1 (INT_8);
-      required int32 _2 (INT_16);
-      required float _3;
-      required double _4;
-      optional int32 _5 (DATE);
-      optional binary _6 (UTF8);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
+            message root {
+              required int32 _1 (INT_8);
+              required int32 _2 (INT_16);
+              required float _3;
+              required double _4;
+              optional int32 _5 (DATE);
+              optional binary _6 (UTF8);
+            }
+        ";
+        let message = parse(schema).unwrap();
 
         let mut fields = vec![
             Arc::new(
@@ -1116,25 +1036,20 @@ mod tests {
     #[test]
     fn test_parse_message_type_compare_4() {
         let schema = "
-    message root {
-      required int32 _1 (INTEGER(8,true));
-      required int32 _2 (INTEGER(16,false));
-      required float _3;
-      required double _4;
-      optional int32 _5 (DATE);
-      optional int32 _6 (TIME(MILLIS,false));
-      optional int64 _7 (TIME(MICROS,true));
-      optional int64 _8 (TIMESTAMP(MILLIS,true));
-      optional int64 _9 (TIMESTAMP(NANOS,false));
-      optional binary _10 (STRING);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
+            message root {
+              required int32 _1 (INTEGER(8,true));
+              required int32 _2 (INTEGER(16,false));
+              required float _3;
+              required double _4;
+              optional int32 _5 (DATE);
+              optional int32 _6 (TIME(MILLIS,false));
+              optional int64 _7 (TIME(MICROS,true));
+              optional int64 _8 (TIMESTAMP(MILLIS,true));
+              optional int64 _9 (TIMESTAMP(NANOS,false));
+              optional binary _10 (STRING);
+            }
+        ";
+        let message = parse(schema).unwrap();
 
         let mut fields = vec![
             Arc::new(