You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/22 09:13:43 UTC
[arrow-rs] branch master updated: Add external variant to ParquetError (#3285) (#3574)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 892a80385 Add external variant to ParquetError (#3285) (#3574)
892a80385 is described below
commit 892a80385bdc1ee4c033e3b848fed882982e352d
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sun Jan 22 09:13:38 2023 +0000
Add external variant to ParquetError (#3285) (#3574)
---
parquet/src/arrow/arrow_writer/levels.rs | 4 +-
parquet/src/column/mod.rs | 26 +-
parquet/src/encodings/decoding.rs | 7 +-
parquet/src/encodings/encoding/mod.rs | 11 +-
parquet/src/errors.rs | 39 +--
parquet/src/file/footer.rs | 24 +-
parquet/src/record/api.rs | 24 +-
parquet/src/record/reader.rs | 17 +-
parquet/src/schema/parser.rs | 409 ++++++++++++-------------------
9 files changed, 236 insertions(+), 325 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 182f68c49..15197c02e 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -1085,7 +1085,7 @@ mod tests {
.unwrap();
let struct_null_level =
- calculate_array_levels(batch.column(0), batch.schema().field(0));
+ calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();
// create second batch
// define schema
@@ -1108,7 +1108,7 @@ mod tests {
.unwrap();
let struct_non_null_level =
- calculate_array_levels(batch.column(0), batch.schema().field(0));
+ calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();
// The 2 levels should not be the same
if struct_non_null_level == struct_null_level {
diff --git a/parquet/src/column/mod.rs b/parquet/src/column/mod.rs
index 93a4f00d2..cb0c035dd 100644
--- a/parquet/src/column/mod.rs
+++ b/parquet/src/column/mod.rs
@@ -36,18 +36,18 @@
//! repetition levels and read them to verify write/read correctness.
//!
//! ```rust,no_run
-//! use std::{fs, path::Path, sync::Arc};
-//!
-//! use parquet::{
-//! column::{reader::ColumnReader, writer::ColumnWriter},
-//! data_type::Int32Type,
-//! file::{
-//! properties::WriterProperties,
-//! reader::{FileReader, SerializedFileReader},
-//! writer::SerializedFileWriter,
-//! },
-//! schema::parser::parse_message_type,
-//! };
+//! # use std::{fs, path::Path, sync::Arc};
+//! #
+//! # use parquet::{
+//! # column::{reader::ColumnReader, writer::ColumnWriter},
+//! # data_type::Int32Type,
+//! # file::{
+//! # properties::WriterProperties,
+//! # reader::{FileReader, SerializedFileReader},
+//! # writer::SerializedFileWriter,
+//! # },
+//! # schema::parser::parse_message_type,
+//! # };
//!
//! let path = Path::new("/path/to/column_sample.parquet");
//!
@@ -111,7 +111,7 @@
//! }
//! }
//!
-//! assert_eq!(res, Ok((3, 5)));
+//! assert_eq!(res.unwrap(), (3, 5));
//! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
//! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
//! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 7e3058ba7..805833587 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -1946,11 +1946,12 @@ mod tests {
let decoder = get_decoder::<T>(descr, encoding);
match err {
Some(parquet_error) => {
- assert!(decoder.is_err());
- assert_eq!(decoder.err().unwrap(), parquet_error);
+ assert_eq!(
+ decoder.err().unwrap().to_string(),
+ parquet_error.to_string()
+ );
}
None => {
- assert!(decoder.is_ok());
assert_eq!(decoder.unwrap().encoding(), encoding);
}
}
diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs
index 78f4a8b97..b7e30c4ec 100644
--- a/parquet/src/encodings/encoding/mod.rs
+++ b/parquet/src/encodings/encoding/mod.rs
@@ -1081,13 +1081,12 @@ mod tests {
let encoder = get_encoder::<T>(encoding);
match err {
Some(parquet_error) => {
- assert!(encoder.is_err());
- assert_eq!(encoder.err().unwrap(), parquet_error);
- }
- None => {
- assert!(encoder.is_ok());
- assert_eq!(encoder.unwrap().encoding(), encoding);
+ assert_eq!(
+ encoder.err().unwrap().to_string(),
+ parquet_error.to_string()
+ )
}
+ None => assert_eq!(encoder.unwrap().encoding(), encoding),
}
}
diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index cbbd24053..703ff51f4 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -17,12 +17,13 @@
//! Common Parquet errors and macros.
+use std::error::Error;
use std::{cell, io, result, str};
#[cfg(feature = "arrow")]
use arrow_schema::ArrowError;
-#[derive(Debug, PartialEq, Clone, Eq)]
+#[derive(Debug)]
pub enum ParquetError {
/// General Parquet error.
/// Returned when code violates normal workflow of working with Parquet files.
@@ -39,66 +40,72 @@ pub enum ParquetError {
/// Returned when reading into arrow or writing from arrow.
ArrowError(String),
IndexOutOfBound(usize, usize),
+ /// An external error variant
+ External(Box<dyn Error + Send + Sync>),
}
impl std::fmt::Display for ParquetError {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
- match *self {
- ParquetError::General(ref message) => {
+ match &self {
+ ParquetError::General(message) => {
write!(fmt, "Parquet error: {}", message)
}
- ParquetError::NYI(ref message) => write!(fmt, "NYI: {}", message),
- ParquetError::EOF(ref message) => write!(fmt, "EOF: {}", message),
+ ParquetError::NYI(message) => write!(fmt, "NYI: {}", message),
+ ParquetError::EOF(message) => write!(fmt, "EOF: {}", message),
#[cfg(feature = "arrow")]
- ParquetError::ArrowError(ref message) => write!(fmt, "Arrow: {}", message),
- ParquetError::IndexOutOfBound(ref index, ref bound) => {
+ ParquetError::ArrowError(message) => write!(fmt, "Arrow: {}", message),
+ ParquetError::IndexOutOfBound(index, ref bound) => {
write!(fmt, "Index {} out of bound: {}", index, bound)
}
+ ParquetError::External(e) => write!(fmt, "External: {}", e),
}
}
}
-impl std::error::Error for ParquetError {
- fn cause(&self) -> Option<&dyn ::std::error::Error> {
- None
+impl Error for ParquetError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ match self {
+ ParquetError::External(e) => Some(e.as_ref()),
+ _ => None,
+ }
}
}
impl From<io::Error> for ParquetError {
fn from(e: io::Error) -> ParquetError {
- ParquetError::General(format!("underlying IO error: {}", e))
+ ParquetError::External(Box::new(e))
}
}
#[cfg(any(feature = "snap", test))]
impl From<snap::Error> for ParquetError {
fn from(e: snap::Error) -> ParquetError {
- ParquetError::General(format!("underlying snap error: {}", e))
+ ParquetError::External(Box::new(e))
}
}
impl From<thrift::Error> for ParquetError {
fn from(e: thrift::Error) -> ParquetError {
- ParquetError::General(format!("underlying Thrift error: {}", e))
+ ParquetError::External(Box::new(e))
}
}
impl From<cell::BorrowMutError> for ParquetError {
fn from(e: cell::BorrowMutError) -> ParquetError {
- ParquetError::General(format!("underlying borrow error: {}", e))
+ ParquetError::External(Box::new(e))
}
}
impl From<str::Utf8Error> for ParquetError {
fn from(e: str::Utf8Error) -> ParquetError {
- ParquetError::General(format!("underlying utf8 error: {}", e))
+ ParquetError::External(Box::new(e))
}
}
#[cfg(feature = "arrow")]
impl From<ArrowError> for ParquetError {
fn from(e: ArrowError) -> ParquetError {
- ParquetError::ArrowError(format!("underlying Arrow error: {}", e))
+ ParquetError::External(Box::new(e))
}
}
diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
index 27c07b78d..760caa977 100644
--- a/parquet/src/file/footer.rs
+++ b/parquet/src/file/footer.rs
@@ -156,10 +156,9 @@ mod tests {
fn test_parse_metadata_size_smaller_than_footer() {
let test_file = tempfile::tempfile().unwrap();
let reader_result = parse_metadata(&test_file);
- assert!(reader_result.is_err());
assert_eq!(
- reader_result.err().unwrap(),
- general_err!("Invalid Parquet file. Size is smaller than footer")
+ reader_result.unwrap_err().to_string(),
+ "Parquet error: Invalid Parquet file. Size is smaller than footer"
);
}
@@ -167,10 +166,9 @@ mod tests {
fn test_parse_metadata_corrupt_footer() {
let data = Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
let reader_result = parse_metadata(&data);
- assert!(reader_result.is_err());
assert_eq!(
- reader_result.err().unwrap(),
- general_err!("Invalid Parquet file. Corrupt footer")
+ reader_result.unwrap_err().to_string(),
+ "Parquet error: Invalid Parquet file. Corrupt footer"
);
}
@@ -178,12 +176,9 @@ mod tests {
fn test_parse_metadata_invalid_length() {
let test_file = Bytes::from(vec![0, 0, 0, 255, b'P', b'A', b'R', b'1']);
let reader_result = parse_metadata(&test_file);
- assert!(reader_result.is_err());
assert_eq!(
- reader_result.err().unwrap(),
- general_err!(
- "Invalid Parquet file. Metadata length is less than zero (-16777216)"
- )
+ reader_result.unwrap_err().to_string(),
+ "Parquet error: Invalid Parquet file. Metadata length is less than zero (-16777216)"
);
}
@@ -191,12 +186,9 @@ mod tests {
fn test_parse_metadata_invalid_start() {
let test_file = Bytes::from(vec![255, 0, 0, 0, b'P', b'A', b'R', b'1']);
let reader_result = parse_metadata(&test_file);
- assert!(reader_result.is_err());
assert_eq!(
- reader_result.err().unwrap(),
- general_err!(
- "Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
- )
+ reader_result.unwrap_err().to_string(),
+ "Parquet error: Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
);
}
diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs
index 0880e7179..8c942cb44 100644
--- a/parquet/src/record/api.rs
+++ b/parquet/src/record/api.rs
@@ -1536,16 +1536,16 @@ mod tests {
]);
assert_eq!(
- ParquetError::General("Cannot access Group as Float".to_string()),
- row.get_float(0).unwrap_err()
+ row.get_float(0).unwrap_err().to_string(),
+ "Parquet error: Cannot access Group as Float"
);
assert_eq!(
- ParquetError::General("Cannot access ListInternal as Float".to_string()),
- row.get_float(1).unwrap_err()
+ row.get_float(1).unwrap_err().to_string(),
+ "Parquet error: Cannot access ListInternal as Float"
);
assert_eq!(
- ParquetError::General("Cannot access MapInternal as Float".to_string()),
- row.get_float(2).unwrap_err()
+ row.get_float(2).unwrap_err().to_string(),
+ "Parquet error: Cannot access MapInternal as Float",
);
}
@@ -1680,8 +1680,8 @@ mod tests {
("Y".to_string(), Field::Int(2)),
]))]);
assert_eq!(
- general_err!("Cannot access Group as Float".to_string()),
- list.get_float(0).unwrap_err()
+ list.get_float(0).unwrap_err().to_string(),
+ "Parquet error: Cannot access Group as Float"
);
let list = make_list(vec![Field::ListInternal(make_list(vec![
@@ -1691,8 +1691,8 @@ mod tests {
Field::Int(12),
]))]);
assert_eq!(
- general_err!("Cannot access ListInternal as Float".to_string()),
- list.get_float(0).unwrap_err()
+ list.get_float(0).unwrap_err().to_string(),
+ "Parquet error: Cannot access ListInternal as Float"
);
let list = make_list(vec![Field::MapInternal(make_map(vec![
@@ -1701,8 +1701,8 @@ mod tests {
(Field::Int(3), Field::Float(2.3)),
]))]);
assert_eq!(
- general_err!("Cannot access MapInternal as Float".to_string()),
- list.get_float(0).unwrap_err()
+ list.get_float(0).unwrap_err().to_string(),
+ "Parquet error: Cannot access MapInternal as Float",
);
}
diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs
index 0b7e04587..a84693536 100644
--- a/parquet/src/record/reader.rs
+++ b/parquet/src/record/reader.rs
@@ -824,7 +824,7 @@ impl Iterator for ReaderIter {
mod tests {
use super::*;
- use crate::errors::{ParquetError, Result};
+ use crate::errors::Result;
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::record::api::{Field, Row, RowAccessor, RowFormatter};
use crate::schema::parser::parse_message_type;
@@ -1452,10 +1452,9 @@ mod tests {
";
let schema = parse_message_type(schema).unwrap();
let res = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema));
- assert!(res.is_err());
assert_eq!(
- res.unwrap_err(),
- general_err!("Root schema does not contain projection")
+ res.unwrap_err().to_string(),
+ "Parquet error: Root schema does not contain projection"
);
}
@@ -1469,10 +1468,9 @@ mod tests {
";
let schema = parse_message_type(schema).unwrap();
let res = test_row_group_rows("nested_maps.snappy.parquet", Some(schema));
- assert!(res.is_err());
assert_eq!(
- res.unwrap_err(),
- general_err!("Root schema does not contain projection")
+ res.unwrap_err().to_string(),
+ "Parquet error: Root schema does not contain projection"
);
}
@@ -1542,10 +1540,9 @@ mod tests {
let reader = SerializedFileReader::try_from(path.as_path()).unwrap();
let res = RowIter::from_file_into(Box::new(reader)).project(proj);
- assert!(res.is_err());
assert_eq!(
- res.err().unwrap(),
- general_err!("Root schema does not contain projection")
+ res.err().unwrap().to_string(),
+ "Parquet error: Root schema does not contain projection"
);
}
diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs
index 140e3e085..c09f13603 100644
--- a/parquet/src/schema/parser.rs
+++ b/parquet/src/schema/parser.rs
@@ -628,30 +628,26 @@ mod tests {
assert!(assert_token(None, "b").is_err());
}
- #[test]
- fn test_parse_message_type_invalid() {
- let mut iter = Tokenizer::from_str("test");
- let result = Parser {
+ fn parse(schema: &str) -> Result<Type, ParquetError> {
+ let mut iter = Tokenizer::from_str(schema);
+ Parser {
tokenizer: &mut iter,
}
- .parse_message_type();
- assert!(result.is_err());
+ .parse_message_type()
+ }
+
+ #[test]
+ fn test_parse_message_type_invalid() {
assert_eq!(
- result.unwrap_err().to_string(),
+ parse("test").unwrap_err().to_string(),
"Parquet error: Message type does not start with 'message'"
);
}
#[test]
fn test_parse_message_type_no_name() {
- let mut iter = Tokenizer::from_str("message");
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_err());
assert_eq!(
- result.unwrap_err().to_string(),
+ parse("message").unwrap_err().to_string(),
"Parquet error: Expected name, found None"
);
}
@@ -659,46 +655,34 @@ mod tests {
#[test]
fn test_parse_message_type_fixed_byte_array() {
let schema = "
- message schema {
- REQUIRED FIXED_LEN_BYTE_ARRAY col;
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_err());
+ message schema {
+ REQUIRED FIXED_LEN_BYTE_ARRAY col;
+ }
+ ";
+ assert_eq!(
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Expected '(', found token 'col'"
+ );
let schema = "
- message schema {
- REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_ok());
+ message schema {
+ REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
+ }
+ ";
+ parse(schema).unwrap();
}
#[test]
fn test_parse_message_type_integer() {
// Invalid integer syntax
let schema = "
- message root {
- optional int64 f1 (INTEGER());
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
+ message root {
+ optional int64 f1 (INTEGER());
+ }
+ ";
assert_eq!(
- result,
- Err(general_err!("Failed to parse bit_width for INTEGER type"))
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse bit_width for INTEGER type"
);
// Invalid integer syntax, needs both bit-width and UTC sign
@@ -707,123 +691,87 @@ mod tests {
optional int64 f1 (INTEGER(32,));
}
";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
assert_eq!(
- result,
- Err(general_err!("Incorrect bit width 32 for INT64"))
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Incorrect bit width 32 for INT64"
);
// Invalid integer because of non-numeric bit width
let schema = "
- message root {
- optional int32 f1 (INTEGER(eight,true));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
+ message root {
+ optional int32 f1 (INTEGER(eight,true));
+ }
+ ";
assert_eq!(
- result,
- Err(general_err!("Failed to parse bit_width for INTEGER type"))
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse bit_width for INTEGER type"
);
// Valid types
let schema = "
- message root {
- optional int32 f1 (INTEGER(8,false));
- optional int32 f2 (INTEGER(8,true));
- optional int32 f3 (INTEGER(16,false));
- optional int32 f4 (INTEGER(16,true));
- optional int32 f5 (INTEGER(32,false));
- optional int32 f6 (INTEGER(32,true));
- optional int64 f7 (INTEGER(64,false));
- optional int64 f7 (INTEGER(64,true));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_ok());
+ message root {
+ optional int32 f1 (INTEGER(8,false));
+ optional int32 f2 (INTEGER(8,true));
+ optional int32 f3 (INTEGER(16,false));
+ optional int32 f4 (INTEGER(16,true));
+ optional int32 f5 (INTEGER(32,false));
+ optional int32 f6 (INTEGER(32,true));
+ optional int64 f7 (INTEGER(64,false));
+ optional int64 f7 (INTEGER(64,true));
+ }
+ ";
+ parse(schema).unwrap();
}
#[test]
fn test_parse_message_type_temporal() {
// Invalid timestamp syntax
let schema = "
- message root {
- optional int64 f1 (TIMESTAMP();
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
+ message root {
+ optional int64 f1 (TIMESTAMP();
+ }
+ ";
assert_eq!(
- result,
- Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse timeunit for TIMESTAMP type"
);
// Invalid timestamp syntax, needs both unit and UTC adjustment
let schema = "
- message root {
- optional int64 f1 (TIMESTAMP(MILLIS,));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
+ message root {
+ optional int64 f1 (TIMESTAMP(MILLIS,));
+ }
+ ";
assert_eq!(
- result,
- Err(general_err!(
- "Failed to parse timezone info for TIMESTAMP type"
- ))
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse timezone info for TIMESTAMP type"
);
// Invalid timestamp because of unknown unit
let schema = "
- message root {
- optional int64 f1 (TIMESTAMP(YOCTOS,));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
+ message root {
+ optional int64 f1 (TIMESTAMP(YOCTOS,));
+ }
+ ";
+
assert_eq!(
- result,
- Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse timeunit for TIMESTAMP type"
);
// Valid types
let schema = "
- message root {
- optional int32 f1 (DATE);
- optional int32 f2 (TIME(MILLIS,true));
- optional int64 f3 (TIME(MICROS,false));
- optional int64 f4 (TIME(NANOS,true));
- optional int64 f5 (TIMESTAMP(MILLIS,true));
- optional int64 f6 (TIMESTAMP(MICROS,true));
- optional int64 f7 (TIMESTAMP(NANOS,false));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_ok());
+ message root {
+ optional int32 f1 (DATE);
+ optional int32 f2 (TIME(MILLIS,true));
+ optional int64 f3 (TIME(MICROS,false));
+ optional int64 f4 (TIME(NANOS,true));
+ optional int64 f5 (TIMESTAMP(MILLIS,true));
+ optional int64 f6 (TIMESTAMP(MICROS,true));
+ optional int64 f7 (TIMESTAMP(NANOS,false));
+ }
+ ";
+ parse(schema).unwrap();
}
#[test]
@@ -833,86 +781,68 @@ mod tests {
// Invalid decimal syntax
let schema = "
- message root {
- optional int32 f1 (DECIMAL();
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_err());
+ message root {
+ optional int32 f1 (DECIMAL();
+ }
+ ";
+ assert_eq!(
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse precision for DECIMAL type"
+ );
// Invalid decimal, need precision and scale
let schema = "
- message root {
- optional int32 f1 (DECIMAL());
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_err());
+ message root {
+ optional int32 f1 (DECIMAL());
+ }
+ ";
+ assert_eq!(
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse precision for DECIMAL type"
+ );
// Invalid decimal because of `,` - has precision, needs scale
let schema = "
- message root {
- optional int32 f1 (DECIMAL(8,));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_err());
+ message root {
+ optional int32 f1 (DECIMAL(8,));
+ }
+ ";
+ assert_eq!(
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Failed to parse scale for DECIMAL type"
+ );
// Invalid decimal because, we always require either precision or scale to be
// specified as part of converted type
let schema = "
- message root {
- optional int32 f3 (DECIMAL);
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_err());
+ message root {
+ optional int32 f3 (DECIMAL);
+ }
+ ";
+ assert_eq!(
+ parse(schema).unwrap_err().to_string(),
+ "Parquet error: Expected ')', found token ';'"
+ );
// Valid decimal (precision, scale)
let schema = "
- message root {
- optional int32 f1 (DECIMAL(8, 3));
- optional int32 f2 (DECIMAL(8));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let result = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type();
- assert!(result.is_ok());
+ message root {
+ optional int32 f1 (DECIMAL(8, 3));
+ optional int32 f2 (DECIMAL(8));
+ }
+ ";
+ parse(schema).unwrap();
}
#[test]
fn test_parse_message_type_compare_1() {
let schema = "
- message root {
- optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
- optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let message = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type()
- .unwrap();
+ message root {
+ optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
+ optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
+ }
+ ";
+ let message = parse(schema).unwrap();
let expected = Type::group_type_builder("root")
.with_fields(&mut vec![
@@ -958,27 +888,22 @@ mod tests {
#[test]
fn test_parse_message_type_compare_2() {
let schema = "
- message root {
- required group a0 {
- optional group a1 (LIST) {
- repeated binary a2 (UTF8);
- }
+ message root {
+ required group a0 {
+ optional group a1 (LIST) {
+ repeated binary a2 (UTF8);
+ }
- optional group b1 (LIST) {
- repeated group b2 {
- optional int32 b3;
- optional double b4;
- }
- }
- }
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let message = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type()
- .unwrap();
+ optional group b1 (LIST) {
+ repeated group b2 {
+ optional int32 b3;
+ optional double b4;
+ }
+ }
+ }
+ }
+ ";
+ let message = parse(schema).unwrap();
let expected = Type::group_type_builder("root")
.with_fields(&mut vec![Arc::new(
@@ -1048,21 +973,16 @@ mod tests {
#[test]
fn test_parse_message_type_compare_3() {
let schema = "
- message root {
- required int32 _1 (INT_8);
- required int32 _2 (INT_16);
- required float _3;
- required double _4;
- optional int32 _5 (DATE);
- optional binary _6 (UTF8);
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let message = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type()
- .unwrap();
+ message root {
+ required int32 _1 (INT_8);
+ required int32 _2 (INT_16);
+ required float _3;
+ required double _4;
+ optional int32 _5 (DATE);
+ optional binary _6 (UTF8);
+ }
+ ";
+ let message = parse(schema).unwrap();
let mut fields = vec![
Arc::new(
@@ -1116,25 +1036,20 @@ mod tests {
#[test]
fn test_parse_message_type_compare_4() {
let schema = "
- message root {
- required int32 _1 (INTEGER(8,true));
- required int32 _2 (INTEGER(16,false));
- required float _3;
- required double _4;
- optional int32 _5 (DATE);
- optional int32 _6 (TIME(MILLIS,false));
- optional int64 _7 (TIME(MICROS,true));
- optional int64 _8 (TIMESTAMP(MILLIS,true));
- optional int64 _9 (TIMESTAMP(NANOS,false));
- optional binary _10 (STRING);
- }
- ";
- let mut iter = Tokenizer::from_str(schema);
- let message = Parser {
- tokenizer: &mut iter,
- }
- .parse_message_type()
- .unwrap();
+ message root {
+ required int32 _1 (INTEGER(8,true));
+ required int32 _2 (INTEGER(16,false));
+ required float _3;
+ required double _4;
+ optional int32 _5 (DATE);
+ optional int32 _6 (TIME(MILLIS,false));
+ optional int64 _7 (TIME(MICROS,true));
+ optional int64 _8 (TIMESTAMP(MILLIS,true));
+ optional int64 _9 (TIMESTAMP(NANOS,false));
+ optional binary _10 (STRING);
+ }
+ ";
+ let message = parse(schema).unwrap();
let mut fields = vec![
Arc::new(