You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/12/16 19:27:36 UTC

[arrow] branch master updated: ARROW-10927: [Rust][Parquet] Add Decimal to ArrayBuilderReader [REVERT]

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 71e37e2  ARROW-10927: [Rust][Parquet] Add Decimal to ArrayBuilderReader [REVERT]
71e37e2 is described below

commit 71e37e23e5a3aad396585df484625c2d0641840d
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Dec 16 12:26:17 2020 -0700

    ARROW-10927: [Rust][Parquet] Add Decimal to ArrayBuilderReader [REVERT]
    
    This reverts commit 06ac7509adb14f3109e1089221fd5cb9a28c8519 which caused build failures and will need rebasing.
    
    Closes #8940 from andygrove/revert-last-rust-pr
    
    Authored-by: Andy Grove <an...@gmail.com>
    Signed-off-by: Andy Grove <an...@gmail.com>
---
 rust/parquet/src/arrow/array_reader.rs | 35 +++--------------------
 rust/parquet/src/arrow/arrow_reader.rs | 27 ------------------
 rust/parquet/src/arrow/converter.rs    | 52 +++-------------------------------
 rust/parquet/src/arrow/schema.rs       | 15 ----------
 4 files changed, 8 insertions(+), 121 deletions(-)

diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs
index 037a881..145f315 100644
--- a/rust/parquet/src/arrow/array_reader.rs
+++ b/rust/parquet/src/arrow/array_reader.rs
@@ -55,10 +55,10 @@ use arrow::datatypes::{
 use arrow::util::bit_util;
 
 use crate::arrow::converter::{
-    BinaryArrayConverter, BinaryConverter, Converter, DecimalArrayConverter,
-    DecimalConverter, FixedLenBinaryConverter, FixedSizeArrayConverter,
-    Int96ArrayConverter, Int96Converter, LargeBinaryArrayConverter, LargeBinaryConverter,
-    LargeUtf8ArrayConverter, LargeUtf8Converter, Utf8ArrayConverter, Utf8Converter,
+    BinaryArrayConverter, BinaryConverter, Converter, FixedLenBinaryConverter,
+    FixedSizeArrayConverter, Int96ArrayConverter, Int96Converter,
+    LargeBinaryArrayConverter, LargeBinaryConverter, LargeUtf8ArrayConverter,
+    LargeUtf8Converter, Utf8ArrayConverter, Utf8Converter,
 };
 use crate::arrow::record_reader::RecordReader;
 use crate::arrow::schema::parquet_to_arrow_field;
@@ -1542,33 +1542,6 @@ impl<'a> ArrayReaderBuilder {
                     )?))
                 }
             }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY
-                if cur_type.get_basic_info().logical_type() == LogicalType::DECIMAL =>
-            {
-                let (precision, scale) = match *cur_type {
-                    Type::PrimitiveType {
-                        ref precision,
-                        ref scale,
-                        ..
-                    } => (*precision, *scale),
-                    _ => {
-                        return Err(ArrowError(
-                            "Expected a physical type, not a group type".to_string(),
-                        ))
-                    }
-                };
-                let converter =
-                    DecimalConverter::new(DecimalArrayConverter::new(precision, scale));
-                Ok(Box::new(ComplexObjectArrayReader::<
-                    FixedLenByteArrayType,
-                    DecimalConverter,
-                >::new(
-                    page_iterator,
-                    column_desc,
-                    converter,
-                    arrow_type,
-                )?))
-            }
             PhysicalType::FIXED_LEN_BYTE_ARRAY => {
                 let byte_width = match *cur_type {
                     Type::PrimitiveType {
diff --git a/rust/parquet/src/arrow/arrow_reader.rs b/rust/parquet/src/arrow/arrow_reader.rs
index ee8ff67..7d5b5b8 100644
--- a/rust/parquet/src/arrow/arrow_reader.rs
+++ b/rust/parquet/src/arrow/arrow_reader.rs
@@ -381,33 +381,6 @@ mod tests {
         >(2, message_type, &converter);
     }
 
-    #[test]
-    fn test_read_decimal_file() {
-        use arrow::array::DecimalArray;
-        let testdata =
-            env::var("PARQUET_TEST_DATA").expect("PARQUET_TEST_DATA not defined");
-        let path = format!("{}/fixed_length_decimal.parquet", testdata);
-        let parquet_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-
-        let mut record_reader = arrow_reader.get_record_reader(32).unwrap();
-
-        let batch = record_reader.next().unwrap().unwrap();
-        assert_eq!(batch.num_rows(), 24);
-        let col = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<DecimalArray>()
-            .unwrap();
-
-        let expected = 1..25;
-
-        for (i, v) in expected.enumerate() {
-            assert_eq!(col.value(i), v * 100_i128);
-        }
-    }
-
     /// Parameters for single_column_reader_test
     #[derive(Debug)]
     struct TestOptions {
diff --git a/rust/parquet/src/arrow/converter.rs b/rust/parquet/src/arrow/converter.rs
index 449f6d4..94d6987 100644
--- a/rust/parquet/src/arrow/converter.rs
+++ b/rust/parquet/src/arrow/converter.rs
@@ -18,9 +18,9 @@
 use crate::data_type::{ByteArray, DataType, FixedLenByteArray, Int96};
 // TODO: clean up imports (best done when there are few moving parts)
 use arrow::array::{
-    Array, ArrayRef, BinaryBuilder, DecimalBuilder, FixedSizeBinaryBuilder,
-    LargeBinaryBuilder, LargeStringBuilder, PrimitiveBuilder, PrimitiveDictionaryBuilder,
-    StringBuilder, StringDictionaryBuilder, TimestampNanosecondBuilder,
+    Array, ArrayRef, BinaryBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder,
+    LargeStringBuilder, PrimitiveBuilder, PrimitiveDictionaryBuilder, StringBuilder,
+    StringDictionaryBuilder, TimestampNanosecondBuilder,
 };
 use arrow::compute::cast;
 use std::convert::From;
@@ -30,7 +30,7 @@ use crate::errors::Result;
 use arrow::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType};
 
 use arrow::array::{
-    BinaryArray, DecimalArray, DictionaryArray, FixedSizeBinaryArray, LargeBinaryArray,
+    BinaryArray, DictionaryArray, FixedSizeBinaryArray, LargeBinaryArray,
     LargeStringArray, PrimitiveArray, StringArray, TimestampNanosecondArray,
 };
 use std::marker::PhantomData;
@@ -76,47 +76,6 @@ impl Converter<Vec<Option<FixedLenByteArray>>, FixedSizeBinaryArray>
     }
 }
 
-pub struct DecimalArrayConverter {
-    precision: i32,
-    scale: i32,
-}
-
-impl DecimalArrayConverter {
-    pub fn new(precision: i32, scale: i32) -> Self {
-        Self { precision, scale }
-    }
-
-    fn from_bytes_to_i128(b: &[u8]) -> i128 {
-        assert!(b.len() <= 16, "DecimalArray supports only up to size 16");
-        let first_bit = b[0] & 128u8 == 128u8;
-        let mut result = if first_bit { [255u8; 16] } else { [0u8; 16] };
-        for (i, v) in b.iter().enumerate() {
-            result[i + (16 - b.len())] = *v;
-        }
-        i128::from_be_bytes(result)
-    }
-}
-
-impl Converter<Vec<Option<ByteArray>>, DecimalArray> for DecimalArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<DecimalArray> {
-        let mut builder = DecimalBuilder::new(
-            source.len(),
-            self.precision as usize,
-            self.scale as usize,
-        );
-        for v in source {
-            match v {
-                Some(array) => {
-                    builder.append_value(Self::from_bytes_to_i128(array.data()))
-                }
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
 pub struct Int96ArrayConverter {}
 
 impl Converter<Vec<Option<Int96>>, TimestampNanosecondArray> for Int96ArrayConverter {
@@ -328,9 +287,6 @@ pub type FixedLenBinaryConverter = ArrayRefConverter<
     FixedSizeArrayConverter,
 >;
 
-pub type DecimalConverter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, DecimalArray, DecimalArrayConverter>;
-
 pub struct FromConverter<S, T> {
     _source: PhantomData<S>,
     _dest: PhantomData<T>,
diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs
index f8bda5e..e11dc45 100644
--- a/rust/parquet/src/arrow/schema.rs
+++ b/rust/parquet/src/arrow/schema.rs
@@ -606,21 +606,6 @@ impl ParquetTypeConverter<'_> {
     }
 
     fn from_fixed_len_byte_array(&self) -> Result<DataType> {
-        if self.schema.get_basic_info().logical_type() == LogicalType::DECIMAL {
-            let (precision, scale) = match self.schema {
-                Type::PrimitiveType {
-                    ref precision,
-                    ref scale,
-                    ..
-                } => (*precision, *scale),
-                _ => {
-                    return Err(ArrowError(
-                        "Expected a physical type, not a group type".to_string(),
-                    ))
-                }
-            };
-            return Ok(DataType::Decimal(precision as usize, scale as usize));
-        }
         let byte_width = match self.schema {
             Type::PrimitiveType {
                 ref type_length, ..