You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/10/02 11:57:06 UTC

[arrow-rs] branch master updated: Add DictionaryArray::with_values (#2797) (#2798)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 41dd12d5b Add DictionaryArray::with_values (#2797) (#2798)
41dd12d5b is described below

commit 41dd12d5b30357aad2ab2a818f58f2e1b0014e46
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sun Oct 2 12:57:00 2022 +0100

    Add DictionaryArray::with_values (#2797) (#2798)
---
 arrow-array/src/array/dictionary_array.rs | 54 +++++++++++++++++++++++++++++++
 arrow/src/compute/kernels/arity.rs        | 14 +++-----
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs
index 69a7b1961..96e91f729 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -337,6 +337,60 @@ impl<K: ArrowPrimitiveType> DictionaryArray<K> {
             values,
         })
     }
+
+    /// Returns a new dictionary with the same keys as the current instance
+    /// but with a different set of dictionary values
+    ///
+    /// This can be used to perform an operation on the values of a dictionary
+    ///
+    /// # Panics
+    ///
+    /// Panics if `values` has a length less than the current values
+    ///
+    /// ```
+    /// use arrow_array::builder::PrimitiveDictionaryBuilder;
+    /// use arrow_array::{Int8Array, Int64Array, ArrayAccessor};
+    /// use arrow_array::types::{Int32Type, Int8Type};
+    ///
+    /// // Construct a Dict(Int32, Int8)
+    /// let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int8Type>::with_capacity(2, 200);
+    /// for i in 0..100 {
+    ///     builder.append(i % 2).unwrap();
+    /// }
+    ///
+    /// let dictionary = builder.finish();
+    ///
+    /// // Perform a widening cast of dictionary values
+    /// let typed_dictionary = dictionary.downcast_dict::<Int8Array>().unwrap();
+    /// let values: Int64Array = typed_dictionary.values().unary(|x| x as i64);
+    ///
+    /// // Create a Dict(Int32,
+    /// let new = dictionary.with_values(&values);
+    ///
+    /// // Verify values are as expected
+    /// let new_typed = new.downcast_dict::<Int64Array>().unwrap();
+    /// for i in 0..100 {
+    ///     assert_eq!(new_typed.value(i), (i % 2) as i64)
+    /// }
+    /// ```
+    ///
+    pub fn with_values(&self, values: &dyn Array) -> Self {
+        assert!(values.len() >= self.values.len());
+
+        let builder = self
+            .data
+            .clone()
+            .into_builder()
+            .data_type(DataType::Dictionary(
+                Box::new(K::DATA_TYPE),
+                Box::new(values.data_type().clone()),
+            ))
+            .child_data(vec![values.data().clone()]);
+
+        // SAFETY:
+        // Offsets were valid before and verified length is greater than or equal
+        Self::from(unsafe { builder.build_unchecked() })
+    }
 }
 
 /// Constructs a `DictionaryArray` from an array data reference.
diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs
index cb5184c0e..11ae5a204 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -76,11 +76,8 @@ where
     F: Fn(T::Native) -> T::Native,
 {
     let dict_values = array.values().as_any().downcast_ref().unwrap();
-    let values = unary::<T, F, T>(dict_values, op).into_data();
-    let data = array.data().clone().into_builder().child_data(vec![values]);
-
-    let new_dict: DictionaryArray<K> = unsafe { data.build_unchecked() }.into();
-    Ok(Arc::new(new_dict))
+    let values = unary::<T, F, T>(dict_values, op);
+    Ok(Arc::new(array.with_values(&values)))
 }
 
 /// A helper function that applies a fallible unary function to a dictionary array with primitive value type.
@@ -98,11 +95,8 @@ where
     }
 
     let dict_values = array.values().as_any().downcast_ref().unwrap();
-    let values = try_unary::<T, F, T>(dict_values, op)?.into_data();
-    let data = array.data().clone().into_builder().child_data(vec![values]);
-
-    let new_dict: DictionaryArray<K> = unsafe { data.build_unchecked() }.into();
-    Ok(Arc::new(new_dict))
+    let values = try_unary::<T, F, T>(dict_values, op)?;
+    Ok(Arc::new(array.with_values(&values)))
 }
 
 /// Applies an infallible unary function to an array with primitive values.