You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/21 06:49:55 UTC

[arrow-rs] branch master updated: Add unsigned primitive parquet tests (#2492)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 359b3afce Add unsigned primitive parquet tests (#2492)
359b3afce is described below

commit 359b3afce93e03c50409f0fae746a573217b16e6
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sun Aug 21 07:49:51 2022 +0100

    Add unsigned primitive parquet tests (#2492)
    
    * Add unsigned primitive tests
    
    * Clippy
---
 parquet/src/arrow/arrow_reader/mod.rs | 88 ++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index bf0fcd2bb..6476751e6 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -743,12 +743,93 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_unsigned_primitive_single_column_reader_test() {
+        run_single_column_reader_tests::<Int32Type, _, Int32Type>(
+            2,
+            ConvertedType::UINT_32,
+            Some(ArrowDataType::UInt32),
+            |vals| {
+                Arc::new(UInt32Array::from_iter(
+                    vals.iter().map(|x| x.map(|x| x as u32)),
+                ))
+            },
+            &[
+                Encoding::PLAIN,
+                Encoding::RLE_DICTIONARY,
+                Encoding::DELTA_BINARY_PACKED,
+            ],
+        );
+        run_single_column_reader_tests::<Int64Type, _, Int64Type>(
+            2,
+            ConvertedType::UINT_64,
+            Some(ArrowDataType::UInt64),
+            |vals| {
+                Arc::new(UInt64Array::from_iter(
+                    vals.iter().map(|x| x.map(|x| x as u64)),
+                ))
+            },
+            &[
+                Encoding::PLAIN,
+                Encoding::RLE_DICTIONARY,
+                Encoding::DELTA_BINARY_PACKED,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_unsigned_roundtrip() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("uint32", ArrowDataType::UInt32, true),
+            Field::new("uint64", ArrowDataType::UInt64, true),
+        ]));
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buf, schema.clone(), None).unwrap();
+
+        let original = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(UInt32Array::from_iter_values([
+                    0,
+                    i32::MAX as u32,
+                    u32::MAX,
+                ])),
+                Arc::new(UInt64Array::from_iter_values([
+                    0,
+                    i64::MAX as u64,
+                    u64::MAX,
+                ])),
+            ],
+        )
+        .unwrap();
+
+        writer.write(&original).unwrap();
+        writer.close().unwrap();
+
+        let mut reader =
+            ParquetRecordBatchReader::try_new(Bytes::from(buf), 1024).unwrap();
+        let ret = reader.next().unwrap().unwrap();
+        assert_eq!(ret, original);
+
+        // Check they can be downcast to the correct type
+        ret.column(0)
+            .as_any()
+            .downcast_ref::<UInt32Array>()
+            .unwrap();
+
+        ret.column(1)
+            .as_any()
+            .downcast_ref::<UInt64Array>()
+            .unwrap();
+    }
+
     struct RandFixedLenGen {}
 
     impl RandGen<FixedLenByteArrayType> for RandFixedLenGen {
         fn gen(len: i32) -> FixedLenByteArray {
             let mut v = vec![0u8; len as usize];
-            rand::thread_rng().fill_bytes(&mut v);
+            thread_rng().fill_bytes(&mut v);
             ByteArray::from(v).into()
         }
     }
@@ -1504,6 +1585,11 @@ mod tests {
 
                 assert_eq!(a.data_type(), b.data_type());
                 assert_eq!(a.data(), b.data(), "{:#?} vs {:#?}", a.data(), b.data());
+                assert_eq!(
+                    a.as_any().type_id(),
+                    b.as_any().type_id(),
+                    "incorrect type ids"
+                );
 
                 total_read = end;
             } else {