You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/10/28 23:34:06 UTC

[arrow-rs] branch master updated: Faster unpack_dict_comparison (#2968)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 94a7f4b69 Faster unpack_dict_comparison (#2968)
94a7f4b69 is described below

commit 94a7f4b69901754126186f4e18d08d59af76088e
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sat Oct 29 12:34:01 2022 +1300

    Faster unpack_dict_comparison (#2968)
---
 arrow/src/compute/kernels/comparison.rs | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index 143050ea9..94e7f9660 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -27,18 +27,19 @@ use crate::array::*;
 use crate::buffer::{buffer_unary_not, Buffer, MutableBuffer};
 use crate::compute::util::combine_option_bitmap;
 use crate::datatypes::{
-    ArrowNativeType, ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type,
-    Date64Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
-    IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
-    Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
-    TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
-    UInt8Type,
+    ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type, Date64Type, Float32Type,
+    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
+    IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, Time32MillisecondType,
+    Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit,
+    TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
+    TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 #[allow(unused_imports)]
 use crate::downcast_dictionary_array;
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
+use arrow_select::take::take;
+use num::ToPrimitive;
 use regex::Regex;
 use std::collections::HashMap;
 
@@ -1815,21 +1816,11 @@ fn unpack_dict_comparison<K>(
 ) -> Result<BooleanArray>
 where
     K: ArrowNumericType,
+    K::Native: ToPrimitive,
 {
-    assert_eq!(dict_comparison.len(), dict.values().len());
-
-    let result: BooleanArray = dict
-        .keys()
-        .iter()
-        .map(|key| {
-            key.map(|key| unsafe {
-                let key = key.as_usize();
-                dict_comparison.value_unchecked(key)
-            })
-        })
-        .collect();
-
-    Ok(result)
+    // TODO: Use take_boolean (#2967)
+    let array = take(&dict_comparison, dict.keys(), None)?;
+    Ok(BooleanArray::from(array.data().clone()))
 }
 
 /// Helper function to perform boolean lambda function on values from two arrays using