You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2020/11/07 12:42:56 UTC

[arrow] branch master updated: ARROW-10449 [Rust] Make Dictionary::keys be an array

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e284f4  ARROW-10449 [Rust] Make Dictionary::keys be an array
2e284f4 is described below

commit 2e284f48e8a57d832fd45f93b875d161709163a0
Author: Jorge C. Leitao <jo...@gmail.com>
AuthorDate: Sat Nov 7 14:42:17 2020 +0200

    ARROW-10449 [Rust] Make Dictionary::keys be an array
    
    This PR:
    
    1. Makes `DictionaryArray::keys` be an `PrimitiveArray`.
    2. Removes `NullIter` and many of the `unsafe` code that it contained
    3. Simplifies the parquet writer implementation around dictionaries
    4. Indirectly removes a bug on NullIter's `DoubleEndedIterator` on which the implementation was not following the spec with respect to `end == current` (compare with implementation of `DoubleEndedIterator` for std's `Vec`'s `IntoIter`).
    5. Fixes error in computing the size of a dictionary, which was double-counting `data` and `values` (`values` is a reference to `data`)
    6. Adds check that the dictionary's ArrayData's datatype matches `T::DATA_TYPE`.
    
    Since `NullIter` was not being directly tested, no tests were removed.
    
    This is backward incompatible and the migration requires replacing `dict.keys()`  by `dict.keys().into_iter()` whenever the user's intention is to use `keys()` as an iterator.
    
    Closes #8561 from jorgecarleitao/dictionary_clean
    
    Authored-by: Jorge C. Leitao <jo...@gmail.com>
    Signed-off-by: Neville Dipale <ne...@gmail.com>
---
 rust/arrow/src/array/array.rs            | 215 +++++++------------------------
 rust/arrow/src/array/builder.rs          |  27 ++--
 rust/arrow/src/array/equal.rs            |  15 +--
 rust/arrow/src/compute/kernels/filter.rs |   5 +-
 rust/arrow/src/compute/kernels/take.rs   |   7 +-
 rust/arrow/src/json/reader.rs            |   7 +-
 rust/parquet/src/arrow/arrow_writer.rs   |  99 +++++++-------
 7 files changed, 111 insertions(+), 264 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index e168f75..a6a976f 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -2129,28 +2129,32 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer, usize)> for StructArray {
 /// Example **with nullable** data:
 ///
 /// ```
-/// use arrow::array::DictionaryArray;
+/// use arrow::array::{DictionaryArray, Int8Array};
 /// use arrow::datatypes::Int8Type;
 /// let test = vec!["a", "a", "b", "c"];
 /// let array : DictionaryArray<Int8Type> = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect();
-/// assert_eq!(array.keys().collect::<Vec<Option<i8>>>(), vec![Some(0), Some(0), None, Some(1)]);
+/// assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)]));
 /// ```
 ///
 /// Example **without nullable** data:
 ///
 /// ```
-/// use arrow::array::DictionaryArray;
+/// use arrow::array::{DictionaryArray, Int8Array};
 /// use arrow::datatypes::Int8Type;
 /// let test = vec!["a", "a", "b", "c"];
 /// let array : DictionaryArray<Int8Type> = test.into_iter().collect();
-/// assert_eq!(array.keys().collect::<Vec<Option<i8>>>(), vec![Some(0), Some(0), Some(1), Some(2)]);
+/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
 /// ```
 pub struct DictionaryArray<K: ArrowPrimitiveType> {
-    /// Array of keys, stored as a PrimitiveArray<K>.
+    /// Data of this dictionary. Note that this is _not_ compatible with the C Data interface,
+    /// as, in the current implementation, `values` below are the first child of this struct.
     data: ArrayDataRef,
 
-    /// Pointer to the key values.
-    raw_values: RawPtrBox<K::Native>,
+    /// The keys of this dictionary. These are constructed from the buffer and null bitmap
+    /// of `data`.
+    /// Also, note that these do not correspond to the true values of this array. Rather, they map
+    /// to the real values.
+    keys: PrimitiveArray<K>,
 
     /// Array of dictionary values (can by any DataType).
     values: ArrayRef,
@@ -2159,112 +2163,10 @@ pub struct DictionaryArray<K: ArrowPrimitiveType> {
     is_ordered: bool,
 }
 
-#[derive(Debug)]
-enum Draining {
-    Ready,
-    Iterating,
-    Finished,
-}
-
-#[derive(Debug)]
-pub struct NullableIter<'a, T> {
-    data: &'a ArrayDataRef, // TODO: Use a pointer to the null bitmap.
-    ptr: *const T,
-    i: usize,
-    len: usize,
-    draining: Draining,
-}
-
-impl<'a, T> std::iter::Iterator for NullableIter<'a, T>
-where
-    T: Clone,
-{
-    type Item = Option<T>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
-            None
-        } else if self.data.is_null(i) {
-            self.i += 1;
-            Some(None)
-        } else {
-            self.i += 1;
-            unsafe { Some(Some((&*self.ptr.add(i)).clone())) }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len, Some(self.len))
-    }
-
-    fn nth(&mut self, n: usize) -> Option<Self::Item> {
-        let i = self.i;
-        if i + n >= self.len {
-            self.i = self.len;
-            None
-        } else if self.data.is_null(i + n) {
-            self.i += n + 1;
-            Some(None)
-        } else {
-            self.i += n + 1;
-            unsafe { Some(Some((&*self.ptr.add(i + n)).clone())) }
-        }
-    }
-}
-
-impl<'a, T> std::iter::DoubleEndedIterator for NullableIter<'a, T>
-where
-    T: Clone,
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        match self.draining {
-            Draining::Ready => {
-                self.draining = Draining::Iterating;
-                self.i = self.len - 1;
-                self.next_back()
-            }
-            Draining::Iterating => {
-                let i = self.i;
-                if i >= self.len {
-                    None
-                } else if self.data.is_null(i) {
-                    self.i = self.i.checked_sub(1).unwrap_or_else(|| {
-                        self.draining = Draining::Finished;
-                        0_usize
-                    });
-                    Some(None)
-                } else {
-                    match i.checked_sub(1) {
-                        Some(idx) => {
-                            self.i = idx;
-                            unsafe { Some(Some((&*self.ptr.add(i)).clone())) }
-                        }
-                        _ => {
-                            self.draining = Draining::Finished;
-                            unsafe { Some(Some((&*self.ptr).clone())) }
-                        }
-                    }
-                }
-            }
-            Draining::Finished => {
-                self.draining = Draining::Ready;
-                None
-            }
-        }
-    }
-}
-
 impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
     /// Return an iterator to the keys of this dictionary.
-    pub fn keys(&self) -> NullableIter<'_, K::Native> {
-        NullableIter::<'_, K::Native> {
-            data: &self.data,
-            ptr: unsafe { self.raw_values.get().add(self.data.offset()) },
-            i: 0,
-            len: self.data.len(),
-            draining: Draining::Ready,
-        }
+    pub fn keys(&self) -> &PrimitiveArray<K> {
+        &self.keys
     }
 
     /// Returns an array view of the keys of this dictionary
@@ -2305,12 +2207,12 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
 
     /// The length of the dictionary is the length of the keys array.
     pub fn len(&self) -> usize {
-        self.data.len()
+        self.keys.len()
     }
 
     /// Whether this dictionary is empty
     pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
+        self.keys.is_empty()
     }
 
     // Currently exists for compatibility purposes with Arrow IPC.
@@ -2333,13 +2235,24 @@ impl<T: ArrowPrimitiveType> From<ArrayDataRef> for DictionaryArray<T> {
             "DictionaryArray should contain a single child array (values)."
         );
 
-        let raw_values = data.buffers()[0].raw_data();
-        let dtype: &DataType = data.data_type();
-        let values = make_array(data.child_data()[0].clone());
-        if let DataType::Dictionary(_, _) = dtype {
+        if let DataType::Dictionary(key_data_type, _) = data.data_type() {
+            if key_data_type.as_ref() != &T::DATA_TYPE {
+                panic!("DictionaryArray's data type must match.")
+            };
+            // create a zero-copy of the keys' data
+            let keys = PrimitiveArray::<T>::from(Arc::new(ArrayData::new(
+                T::DATA_TYPE,
+                data.len(),
+                Some(data.null_count()),
+                data.null_buffer().cloned(),
+                data.offset(),
+                data.buffers().to_vec(),
+                vec![],
+            )));
+            let values = make_array(data.child_data()[0].clone());
             Self {
                 data,
-                raw_values: RawPtrBox::new(raw_values as *const T::Native),
+                keys,
                 values,
                 is_ordered: false,
             }
@@ -2410,32 +2323,25 @@ impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
         &self.data
     }
 
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [DictionaryArray].
     fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+        // Since both `keys` and `values` derive (are references from) `data`, we only need to account for `data`.
+        self.data.get_buffer_memory_size()
     }
 
-    /// Returns the total number of bytes of memory occupied physically by this [DictionaryArray].
     fn get_array_memory_size(&self) -> usize {
         self.data.get_array_memory_size()
-            + self.values().get_array_memory_size()
+            + self.keys.get_array_memory_size()
+            + self.values.get_array_memory_size()
             + mem::size_of_val(self)
     }
 }
 
 impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        const MAX_LEN: usize = 10;
-        let keys: Vec<_> = self.keys().take(MAX_LEN).collect();
-        let elipsis = if self.keys().count() > MAX_LEN {
-            "..."
-        } else {
-            ""
-        };
         writeln!(
             f,
-            "DictionaryArray {{keys: {:?}{} values: {:?}}}",
-            keys, elipsis, self.values
+            "DictionaryArray {{keys: {:?} values: {:?}}}",
+            self.keys, self.values
         )
     }
 }
@@ -3118,23 +3024,7 @@ mod tests {
         // Null count only makes sense in terms of the component arrays.
         assert_eq!(0, dict_array.null_count());
         assert_eq!(0, dict_array.values().null_count());
-        assert_eq!(Some(Some(3)), dict_array.keys().nth(1));
-        assert_eq!(Some(Some(4)), dict_array.keys().nth(2));
-
-        assert_eq!(
-            dict_array.keys().collect::<Vec<Option<i16>>>(),
-            vec![Some(2), Some(3), Some(4)]
-        );
-
-        assert_eq!(
-            dict_array.keys().rev().collect::<Vec<Option<i16>>>(),
-            vec![Some(4), Some(3), Some(2)]
-        );
-
-        assert_eq!(
-            dict_array.keys().rev().rev().collect::<Vec<Option<i16>>>(),
-            vec![Some(2), Some(3), Some(4)]
-        );
+        assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
 
         // Now test with a non-zero offset
         let dict_data = ArrayData::builder(dict_data_type)
@@ -3149,26 +3039,7 @@ mod tests {
         assert_eq!(value_data, values.data());
         assert_eq!(DataType::Int8, dict_array.value_type());
         assert_eq!(2, dict_array.len());
-        assert_eq!(Some(Some(3)), dict_array.keys().nth(0));
-        assert_eq!(Some(Some(4)), dict_array.keys().nth(1));
-
-        assert_eq!(
-            dict_array.keys().collect::<Vec<Option<i16>>>(),
-            vec![Some(3), Some(4)]
-        );
-    }
-
-    #[test]
-    fn test_dictionary_array_key_reverse() {
-        let test = vec!["a", "a", "b", "c"];
-        let array: DictionaryArray<Int8Type> = test
-            .iter()
-            .map(|&x| if x == "b" { None } else { Some(x) })
-            .collect();
-        assert_eq!(
-            array.keys().rev().collect::<Vec<Option<i8>>>(),
-            vec![Some(1), None, Some(0), Some(0)]
-        );
+        assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
     }
 
     #[test]
@@ -4307,7 +4178,7 @@ mod tests {
         builder.append(22345678).unwrap();
         let array = builder.finish();
         assert_eq!(
-            "DictionaryArray {keys: [Some(0), None, Some(1)] values: PrimitiveArray<UInt32>\n[\n  12345678,\n  22345678,\n]}\n",
+            "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n  0,\n  null,\n  1,\n] values: PrimitiveArray<UInt32>\n[\n  12345678,\n  22345678,\n]}\n",
             format!("{:?}", array)
         );
 
@@ -4319,7 +4190,7 @@ mod tests {
         }
         let array = builder.finish();
         assert_eq!(
-            "DictionaryArray {keys: [Some(0), Some(0), Some(0), Some(0), Some(0), Some(0), Some(0), Some(0), Some(0), Some(0)]... values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
+            "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n] values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
             format!("{:?}", array)
         );
     }
@@ -4332,13 +4203,13 @@ mod tests {
             .map(|&x| if x == "b" { None } else { Some(x) })
             .collect();
         assert_eq!(
-            "DictionaryArray {keys: [Some(0), Some(0), None, Some(1)] values: StringArray\n[\n  \"a\",\n  \"c\",\n]}\n",
+            "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  null,\n  1,\n] values: StringArray\n[\n  \"a\",\n  \"c\",\n]}\n",
             format!("{:?}", array)
         );
 
         let array: DictionaryArray<Int8Type> = test.into_iter().collect();
         assert_eq!(
-            "DictionaryArray {keys: [Some(0), Some(0), Some(1), Some(2)] values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
+            "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  1,\n  2,\n] values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
             format!("{:?}", array)
         );
     }
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
index 09fb9e7..21ddb7d 100644
--- a/rust/arrow/src/array/builder.rs
+++ b/rust/arrow/src/array/builder.rs
@@ -2326,7 +2326,7 @@ where
     ///
     /// ```
     /// use arrow::datatypes::Int16Type;
-    /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder};
+    /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array};
     /// use std::convert::TryFrom;
     ///
     /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]);
@@ -2338,9 +2338,9 @@ where
     ///
     /// let dictionary_array = builder.finish();
     ///
-    /// let keys: Vec<Option<i16>> = dictionary_array.keys().collect();
+    /// let keys = dictionary_array.keys();
     ///
-    /// assert_eq!(keys, vec![Some(2), None, Some(1)]);
+    /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)]));
     /// ```
     pub fn new_with_dictionary(
         keys_builder: PrimitiveBuilder<K>,
@@ -3430,8 +3430,10 @@ mod tests {
         builder.append(22345678).unwrap();
         let array = builder.finish();
 
-        // Keys are strongly typed.
-        let aks: Vec<_> = array.keys().collect();
+        assert_eq!(
+            array.keys(),
+            &UInt8Array::from(vec![Some(0), None, Some(1)])
+        );
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
@@ -3442,7 +3444,6 @@ mod tests {
         assert_eq!(array.is_null(1), true);
         assert_eq!(array.is_null(2), false);
 
-        assert_eq!(aks, vec![Some(0), None, Some(1)]);
         assert_eq!(avs, &[12345678, 22345678]);
     }
 
@@ -3458,14 +3459,15 @@ mod tests {
         builder.append("abc").unwrap();
         let array = builder.finish();
 
-        // Keys are strongly typed.
-        let aks: Vec<_> = array.keys().collect();
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
+        );
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
         let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
 
-        assert_eq!(aks, vec![Some(0), None, Some(1), Some(1), Some(0)]);
         assert_eq!(ava.value(0), "abc");
         assert_eq!(ava.value(1), "def");
     }
@@ -3486,14 +3488,15 @@ mod tests {
         builder.append("ghi").unwrap();
         let array = builder.finish();
 
-        // Keys are strongly typed.
-        let aks: Vec<_> = array.keys().collect();
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
+        );
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
         let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
 
-        assert_eq!(aks, vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)]);
         assert_eq!(ava.is_valid(0), false);
         assert_eq!(ava.value(1), "def");
         assert_eq!(ava.value(2), "abc");
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
index 26b4cd4..9a0b7e6 100644
--- a/rust/arrow/src/array/equal.rs
+++ b/rust/arrow/src/array/equal.rs
@@ -250,11 +250,9 @@ impl<T: ArrowPrimitiveType> ArrayEqual for DictionaryArray<T> {
         assert!(other_start_idx + (end_idx - start_idx) <= other.len());
         let other = other.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
 
-        let iter_a = self.keys().take(end_idx).skip(start_idx);
-        let iter_b = other.keys().skip(other_start_idx);
-
         // For now, all the values must be the same
-        iter_a.eq(iter_b)
+        self.keys()
+            .range_equals(other.keys(), start_idx, end_idx, other_start_idx)
             && self
                 .values()
                 .range_equals(&*other.values(), 0, other.values().len(), 0)
@@ -910,13 +908,8 @@ impl<OffsetSize: OffsetSizeTrait> PartialEq<GenericListArray<OffsetSize>> for Va
 
 impl<T: ArrowPrimitiveType> JsonEqual for DictionaryArray<T> {
     fn equals_json(&self, json: &[&Value]) -> bool {
-        self.keys().zip(json.iter()).all(|aj| match aj {
-            (None, Value::Null) => true,
-            (Some(a), Value::Number(j)) => {
-                a.to_usize().unwrap() as u64 == j.as_u64().unwrap()
-            }
-            _ => false,
-        })
+        // todo: this is wrong: we must test the values also
+        self.keys().equals_json(json)
     }
 }
 
diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs
index 05d5348..0d19443 100644
--- a/rust/arrow/src/compute/kernels/filter.rs
+++ b/rust/arrow/src/compute/kernels/filter.rs
@@ -1052,10 +1052,7 @@ mod tests {
         // but keys are filtered
         assert_eq!(2, d.len());
         assert_eq!(true, d.is_null(0));
-        assert_eq!(
-            "world",
-            values.value(d.keys().nth(1).unwrap().unwrap() as usize)
-        );
+        assert_eq!("world", values.value(d.keys().value(1) as usize));
     }
 
     #[test]
diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs
index 83139c3..08016b9 100644
--- a/rust/arrow/src/compute/kernels/take.rs
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -1157,8 +1157,6 @@ mod tests {
         assert_eq!(&expected_values, dict_values);
         assert_eq!(&expected_values, &result_values);
 
-        let result_keys: Int16Array = result.keys().collect::<Vec<_>>().into();
-
         let expected_keys = Int16Array::from(vec![
             Some(0),
             Some(0),
@@ -1168,9 +1166,6 @@ mod tests {
             Some(2),
             None,
         ]);
-
-        assert_eq!(expected_keys.len(), result_keys.len());
-        assert_eq!(expected_keys.data_type(), result_keys.data_type());
-        assert_eq!(expected_keys, result_keys);
+        assert_eq!(result.keys(), &expected_keys);
     }
 }
diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs
index 8959458..7a414db 100644
--- a/rust/arrow/src/json/reader.rs
+++ b/rust/arrow/src/json/reader.rs
@@ -1603,10 +1603,9 @@ mod tests {
         assert_eq!(true, dd.is_valid(2));
         assert_eq!(false, dd.is_valid(11));
 
-        let keys: Vec<_> = dd.keys().collect();
         assert_eq!(
-            keys,
-            vec![
+            dd.keys(),
+            &Int16Array::from(vec![
                 None,
                 Some(0),
                 Some(1),
@@ -1619,7 +1618,7 @@ mod tests {
                 Some(0),
                 Some(0),
                 None
-            ]
+            ])
         );
     }
 
diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index f2279fb..c0b2105 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -177,11 +177,7 @@ fn write_leaves(
             Ok(())
         }
         ArrowDataType::Dictionary(key_type, value_type) => {
-            use arrow_array::{
-                Int16DictionaryArray, Int32DictionaryArray, Int64DictionaryArray,
-                Int8DictionaryArray, PrimitiveArray, StringArray, UInt16DictionaryArray,
-                UInt32DictionaryArray, UInt64DictionaryArray, UInt8DictionaryArray,
-            };
+            use arrow_array::{PrimitiveArray, StringArray};
             use ArrowDataType::*;
             use ColumnWriter::*;
 
@@ -217,9 +213,10 @@ fn write_leaves(
                         .unwrap();
 
                     use std::convert::TryFrom;
-                    // This removes NULL values from the NullableIter, but
+                    // This removes NULL values from the keys, but
                     // they're encoded by the levels, so that's fine.
                     let materialized_values: Vec<_> = keys
+                        .into_iter()
                         .flatten()
                         .map(|key| {
                             usize::try_from(key).unwrap_or_else(|k| {
@@ -250,14 +247,14 @@ fn write_leaves(
             }
 
             dispatch_dictionary!(
-                Int8, Utf8, ByteArrayColumnWriter => Int8DictionaryArray, StringArray,
-                Int16, Utf8, ByteArrayColumnWriter => Int16DictionaryArray, StringArray,
-                Int32, Utf8, ByteArrayColumnWriter => Int32DictionaryArray, StringArray,
-                Int64, Utf8, ByteArrayColumnWriter => Int64DictionaryArray, StringArray,
-                UInt8, Utf8, ByteArrayColumnWriter => UInt8DictionaryArray, StringArray,
-                UInt16, Utf8, ByteArrayColumnWriter => UInt16DictionaryArray, StringArray,
-                UInt32, Utf8, ByteArrayColumnWriter => UInt32DictionaryArray, StringArray,
-                UInt64, Utf8, ByteArrayColumnWriter => UInt64DictionaryArray, StringArray,
+                Int8, Utf8, ByteArrayColumnWriter => arrow::datatypes::Int8Type, StringArray,
+                Int16, Utf8, ByteArrayColumnWriter => arrow::datatypes::Int16Type, StringArray,
+                Int32, Utf8, ByteArrayColumnWriter => arrow::datatypes::Int32Type, StringArray,
+                Int64, Utf8, ByteArrayColumnWriter => arrow::datatypes::Int64Type, StringArray,
+                UInt8, Utf8, ByteArrayColumnWriter => arrow::datatypes::UInt8Type, StringArray,
+                UInt16, Utf8, ByteArrayColumnWriter => arrow::datatypes::UInt16Type, StringArray,
+                UInt32, Utf8, ByteArrayColumnWriter => arrow::datatypes::UInt32Type, StringArray,
+                UInt64, Utf8, ByteArrayColumnWriter => arrow::datatypes::UInt64Type, StringArray,
             )?;
 
             row_group_writer.close_column(col_writer)?;
@@ -283,48 +280,40 @@ trait Materialize<K, V> {
     fn materialize(&self) -> Vec<Self::Output>;
 }
 
-macro_rules! materialize_string {
-    ($($k:ty,)*) => {
-        $(impl Materialize<$k, arrow_array::StringArray> for dyn Array {
-            type Output = ByteArray;
-
-            fn materialize(&self) -> Vec<Self::Output> {
-                use std::convert::TryFrom;
-
-                let typed_array = self.as_any()
-                    .downcast_ref::<$k>()
-                    .expect("Unable to get dictionary array");
-
-                let keys = typed_array.keys();
-
-                let value_buffer = typed_array.values();
-                let values = value_buffer
-                    .as_any()
-                    .downcast_ref::<arrow_array::StringArray>()
-                    .unwrap();
-
-                // This removes NULL values from the NullableIter, but
-                // they're encoded by the levels, so that's fine.
-                keys
-                    .flatten()
-                    .map(|key| usize::try_from(key).unwrap_or_else(|k| panic!("key {} does not fit in usize", k)))
-                    .map(|key| values.value(key))
-                    .map(ByteArray::from)
-                    .collect()
-            }
-        })*
-    };
-}
+impl<K> Materialize<K, arrow_array::StringArray> for dyn Array
+where
+    K: arrow::datatypes::ArrowDictionaryKeyType,
+{
+    type Output = ByteArray;
+
+    fn materialize(&self) -> Vec<Self::Output> {
+        use arrow::datatypes::ArrowNativeType;
 
-materialize_string! {
-    arrow_array::Int8DictionaryArray,
-    arrow_array::Int16DictionaryArray,
-    arrow_array::Int32DictionaryArray,
-    arrow_array::Int64DictionaryArray,
-    arrow_array::UInt8DictionaryArray,
-    arrow_array::UInt16DictionaryArray,
-    arrow_array::UInt32DictionaryArray,
-    arrow_array::UInt64DictionaryArray,
+        let typed_array = self
+            .as_any()
+            .downcast_ref::<arrow_array::DictionaryArray<K>>()
+            .expect("Unable to get dictionary array");
+
+        let keys = typed_array.keys();
+
+        let value_buffer = typed_array.values();
+        let values = value_buffer
+            .as_any()
+            .downcast_ref::<arrow_array::StringArray>()
+            .unwrap();
+
+        // This removes NULL values from the keys, but
+        // they're encoded by the levels, so that's fine.
+        keys.into_iter()
+            .flatten()
+            .map(|key| {
+                key.to_usize()
+                    .unwrap_or_else(|| panic!("key {:?} does not fit in usize", key))
+            })
+            .map(|key| values.value(key))
+            .map(ByteArray::from)
+            .collect()
+    }
 }
 
 fn write_dict<K, V, T>(