You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/06 17:06:39 UTC

[GitHub] [arrow] alamb commented on a change in pull request #8346: ARROW-10164: [Rust] Add support for DictionaryArray to cast kernel

alamb commented on a change in pull request #8346:
URL: https://github.com/apache/arrow/pull/8346#discussion_r500443425



##########
File path: rust/arrow/src/compute/kernels/cast.rs
##########
@@ -755,10 +784,253 @@ where
     Ok(b.finish())
 }
 
+/// Attempts to cast an `ArrayDictionary` with index type K into
+/// `to_type` for supported type.
+///
+/// K is the key type
+fn dictionary_cast<K: ArrowDictionaryKeyType>(
+    array: &ArrayRef,
+    to_type: &DataType,
+) -> Result<ArrayRef> {
+    use DataType::*;
+
+    let dict_array = array
+        .as_any()
+        .downcast_ref::<DictionaryArray<K>>()
+        .ok_or_else(|| {
+            ArrowError::ComputeError(
+                "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
+            )
+        })?;
+
+    match to_type {
+        Dictionary(to_index_type, to_value_type) => {
+            let keys_array: ArrayRef = Arc::new(dict_array.keys_array());
+            let values_array: ArrayRef = dict_array.values();
+            let cast_keys = cast(&keys_array, to_index_type)?;
+            let cast_values = cast(&values_array, to_value_type)?;
+
+            // Failure to cast keys (because they don't fit in the
+            // target type) results in NULL values;
+            if cast_keys.null_count() > keys_array.null_count() {
+                return Err(ArrowError::ComputeError(format!(
+                    "Could not convert {} dictionary indexes from {:?} to {:?}",
+                    cast_keys.null_count() - keys_array.null_count(),
+                    keys_array.data_type(),
+                    to_index_type
+                )));
+            }
+
+            // keys are data, child_data is values (dictionary)
+            let data = Arc::new(ArrayData::new(
+                to_type.clone(),
+                cast_keys.len(),
+                Some(cast_keys.null_count()),
+                cast_keys
+                    .data()
+                    .null_bitmap()
+                    .clone()
+                    .map(|bitmap| bitmap.bits),
+                cast_keys.data().offset(),
+                cast_keys.data().buffers().to_vec(),
+                vec![cast_values.data()],
+            ));
+
+            // create the appropriate array type
+            let new_array: ArrayRef = match **to_index_type {
+                Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
+                Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
+                Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
+                Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
+                UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
+                UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
+                UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
+                UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
+                _ => {
+                    return Err(ArrowError::ComputeError(format!(
+                        "Unsupported type {:?} for dictionary index",
+                        to_index_type
+                    )))
+                }
+            };
+
+            Ok(new_array)
+        }
+        // numeric types
+        Int8 => unpack_dictionary_to_numeric::<K, Int8Type>(dict_array, to_type),
+        Int16 => unpack_dictionary_to_numeric::<K, Int16Type>(dict_array, to_type),
+        Int32 => unpack_dictionary_to_numeric::<K, Int32Type>(dict_array, to_type),
+        Int64 => unpack_dictionary_to_numeric::<K, Int64Type>(dict_array, to_type),
+        UInt8 => unpack_dictionary_to_numeric::<K, UInt8Type>(dict_array, to_type),
+        UInt16 => unpack_dictionary_to_numeric::<K, UInt16Type>(dict_array, to_type),
+        UInt32 => unpack_dictionary_to_numeric::<K, UInt32Type>(dict_array, to_type),
+        UInt64 => unpack_dictionary_to_numeric::<K, UInt64Type>(dict_array, to_type),
+        Utf8 => unpack_dictionary_to_string::<K>(dict_array),
+        _ => Err(ArrowError::ComputeError(format!(
+            "Unsupported output type for dictionary conversion: {:?}",
+            to_type
+        ))),
+    }
+}
+
+// Unpack the dictionary where the keys are of type <K> and the values
+// are of type <V> into a primative array of type to_type
+fn unpack_dictionary_to_numeric<K, V>(
+    dict_array: &DictionaryArray<K>,
+    to_type: &DataType,
+) -> Result<ArrayRef>
+where
+    K: ArrowDictionaryKeyType,
+    V: ArrowNumericType,
+{
+    // attempt to cast the dict values to the target type
+    let cast_dict_values = cast(&dict_array.values(), to_type)?;
+    let dict_values = cast_dict_values
+        .as_any()
+        .downcast_ref::<PrimitiveArray<V>>()
+        .unwrap();
+
+    let mut b = PrimitiveBuilder::<V>::new(dict_array.len());
+
+    // copy each element one at a time
+    for key in dict_array.keys() {

Review comment:
       that is an excellent idea -- I will look into doing so




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org