You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/14 14:14:32 UTC

[arrow-rs] branch master updated: Faster unpacking of Int32Type dictionary (#4406)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d09fe562 Faster unpacking of Int32Type dictionary (#4406)
9d09fe562 is described below

commit 9d09fe562c65d4f52cdccb253690b5533f6cc23f
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Jun 14 15:14:26 2023 +0100

    Faster unpacking of Int32Type dictionary (#4406)
---
 arrow-cast/src/cast.rs | 43 +++++++++++++++----------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 32f422768..dea3f2acf 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -49,7 +49,7 @@ use crate::parse::{
 use arrow_array::{
     builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *,
 };
-use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer, ScalarBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::*;
 use arrow_select::take::take;
@@ -3466,34 +3466,21 @@ fn unpack_dictionary<K>(
 where
     K: ArrowDictionaryKeyType,
 {
-    let dict_array = array
-        .as_any()
-        .downcast_ref::<DictionaryArray<K>>()
-        .ok_or_else(|| {
-            ArrowError::ComputeError(
-                "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
-            )
-        })?;
-
-    // attempt to cast the dict values to the target type
-    // use the take kernel to expand out the dictionary
+    let dict_array = array.as_dictionary::<K>();
     let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
-
-    // Note take requires first casting the indices to u32
-    let keys_array: ArrayRef =
-        Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
-    let indices = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?;
-    let u32_indices =
-        indices
-            .as_any()
-            .downcast_ref::<UInt32Array>()
-            .ok_or_else(|| {
-                ArrowError::ComputeError(
-                    "Internal Error: Cannot cast dict indices to UInt32".to_string(),
-                )
-            })?;
-
-    take(cast_dict_values.as_ref(), u32_indices, None)
+    let keys = dict_array.keys();
+    match K::DATA_TYPE {
+        DataType::Int32 => {
+            // Dictionary guarantees all non-null keys >= 0
+            let buffer = ScalarBuffer::new(keys.values().inner().clone(), 0, keys.len());
+            let indices = PrimitiveArray::new(buffer, keys.nulls().cloned());
+            take::<UInt32Type>(cast_dict_values.as_ref(), &indices, None)
+        }
+        _ => {
+            let indices = cast_with_options(keys, &DataType::UInt32, cast_options)?;
+            take::<UInt32Type>(cast_dict_values.as_ref(), indices.as_primitive(), None)
+        }
+    }
 }
 
 /// Attempts to encode an array into an `ArrayDictionary` with index