You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "alamb (via GitHub)" <gi...@apache.org> on 2023/06/01 19:45:29 UTC

[GitHub] [arrow-rs] alamb commented on a diff in pull request #4311: Add roundtrip tests for Decimal256 and fix issues (#4264)

alamb commented on code in PR #4311:
URL: https://github.com/apache/arrow-rs/pull/4311#discussion_r1213606312


##########
parquet/src/arrow/arrow_reader/mod.rs:
##########
@@ -2501,4 +2503,76 @@ mod tests {
 
         assert_eq!(&written.slice(0, 8), &read[0]);
     }
+
+    fn test_decimal_roundtrip<T: DecimalType>(values: &[T::Native]) {
+        // Precision <= 9 -> INT32
+        // Precision <= 18 -> INT64
+        // Precision > 18 -> FIXED_LEN_BYTE_ARRAY
+
+        let d1 = PrimitiveArray::<T>::from_iter_values(values.iter().copied())
+            .with_precision_and_scale(9, 2)
+            .unwrap();
+
+        let d2 = d1.clone().with_precision_and_scale(10, 8).unwrap();
+        let d3 = d1.clone().with_precision_and_scale(18, 8).unwrap();
+        let d4 = d1.clone().with_precision_and_scale(19, 8).unwrap();
+
+        let batch = RecordBatch::try_from_iter([
+            ("d1", Arc::new(d1) as ArrayRef),
+            ("d2", Arc::new(d2) as ArrayRef),
+            ("d3", Arc::new(d3) as ArrayRef),
+            ("d4", Arc::new(d4) as ArrayRef),
+        ])
+        .unwrap();
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let builder =
+            ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
+        let t1 = builder.parquet_schema().columns()[0].physical_type();
+        assert_eq!(t1, PhysicalType::INT32);
+        let t2 = builder.parquet_schema().columns()[1].physical_type();
+        assert_eq!(t2, PhysicalType::INT64);
+        let t3 = builder.parquet_schema().columns()[2].physical_type();
+        assert_eq!(t3, PhysicalType::INT64);
+        let t4 = builder.parquet_schema().columns()[3].physical_type();
+        assert_eq!(t4, PhysicalType::FIXED_LEN_BYTE_ARRAY);
+
+        let mut reader = builder.build().unwrap();
+        assert_eq!(batch.schema(), reader.schema());
+
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(batch.schema(), out.schema());
+
+        let c1 = out.columns()[0].as_primitive::<T>();
+        assert_eq!(c1.values(), &values);
+        assert_eq!(c1.precision(), 9);
+        assert_eq!(c1.scale(), 2);
+
+        let c2 = out.columns()[1].as_primitive::<T>();
+        assert_eq!(c2.values(), &values);
+        assert_eq!(c2.precision(), 10);
+        assert_eq!(c2.scale(), 8);
+
+        let c3 = out.columns()[2].as_primitive::<T>();
+        assert_eq!(c3.values(), &values);
+        assert_eq!(c3.precision(), 18);
+        assert_eq!(c3.scale(), 8);
+
+        let c4 = out.columns()[3].as_primitive::<T>();
+        assert_eq!(c4.values(), &values);
+        assert_eq!(c4.precision(), 19);
+        assert_eq!(c4.scale(), 8);
+    }
+
+    #[test]
+    fn test_decimal() {
+        let values = [1, 2, 3, 4, 5];

Review Comment:
   Would it  worth using values that require precision > 18? I know it may not matter but I wonder if there is some extra coverage that could be gained. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org