You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/14 14:14:32 UTC
[arrow-rs] branch master updated: Faster unpacking of Int32Type dictionary (#4406)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 9d09fe562 Faster unpacking of Int32Type dictionary (#4406)
9d09fe562 is described below
commit 9d09fe562c65d4f52cdccb253690b5533f6cc23f
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Jun 14 15:14:26 2023 +0100
Faster unpacking of Int32Type dictionary (#4406)
---
arrow-cast/src/cast.rs | 43 +++++++++++++++----------------------------
1 file changed, 15 insertions(+), 28 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 32f422768..dea3f2acf 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -49,7 +49,7 @@ use crate::parse::{
use arrow_array::{
builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *,
};
-use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer, ScalarBuffer};
use arrow_data::ArrayData;
use arrow_schema::*;
use arrow_select::take::take;
@@ -3466,34 +3466,21 @@ fn unpack_dictionary<K>(
where
K: ArrowDictionaryKeyType,
{
- let dict_array = array
- .as_any()
- .downcast_ref::<DictionaryArray<K>>()
- .ok_or_else(|| {
- ArrowError::ComputeError(
- "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
- )
- })?;
-
- // attempt to cast the dict values to the target type
- // use the take kernel to expand out the dictionary
+ let dict_array = array.as_dictionary::<K>();
let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
-
- // Note take requires first casting the indices to u32
- let keys_array: ArrayRef =
- Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
- let indices = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?;
- let u32_indices =
- indices
- .as_any()
- .downcast_ref::<UInt32Array>()
- .ok_or_else(|| {
- ArrowError::ComputeError(
- "Internal Error: Cannot cast dict indices to UInt32".to_string(),
- )
- })?;
-
- take(cast_dict_values.as_ref(), u32_indices, None)
+ let keys = dict_array.keys();
+ match K::DATA_TYPE {
+ DataType::Int32 => {
+ // Dictionary guarantees all non-null keys >= 0
+ let buffer = ScalarBuffer::new(keys.values().inner().clone(), 0, keys.len());
+ let indices = PrimitiveArray::new(buffer, keys.nulls().cloned());
+ take::<UInt32Type>(cast_dict_values.as_ref(), &indices, None)
+ }
+ _ => {
+ let indices = cast_with_options(keys, &DataType::UInt32, cast_options)?;
+ take::<UInt32Type>(cast_dict_values.as_ref(), indices.as_primitive(), None)
+ }
+ }
}
/// Attempts to encode an array into an `ArrayDictionary` with index