You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/07/25 10:57:30 UTC
[arrow-rs] branch master updated: use exponential search for
lexicographical_partition_ranges (#585)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 6c1a86e use exponential search for lexicographical_partition_ranges (#585)
6c1a86e is described below
commit 6c1a86e26ae45913fdabf3b5852f8bec289dcad9
Author: Jiayu Liu <Ji...@users.noreply.github.com>
AuthorDate: Sun Jul 25 18:57:23 2021 +0800
use exponential search for lexicographical_partition_ranges (#585)
---
arrow/src/compute/kernels/partition.rs | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/arrow/src/compute/kernels/partition.rs b/arrow/src/compute/kernels/partition.rs
index ad35e92..8a16942 100644
--- a/arrow/src/compute/kernels/partition.rs
+++ b/arrow/src/compute/kernels/partition.rs
@@ -73,6 +73,30 @@ impl<'a> LexicographicalPartitionIterator<'a> {
}
}
+/// Exponential search is to remedy for the case when array size and cardinality are both large
+/// see <https://en.wikipedia.org/wiki/Exponential_search>
+#[inline]
+fn exponential_search(
+ indices: &[usize],
+ target: &usize,
+ comparator: &LexicographicalComparator<'_>,
+) -> usize {
+ let mut bound = 1;
+ while bound < indices.len()
+ && comparator.compare(&indices[bound], target) != Ordering::Greater
+ {
+ bound *= 2;
+ }
+ // invariant after while loop:
+ // indices[bound / 2] <= target < indices[min(indices.len(), bound + 1)]
+ // where <= and < are defined by the comparator;
+ // note here we have right = min(indices.len(), bound + 1) because indices[bound] might
+ // actually be considered and must be included.
+ (bound / 2)
+ + indices[(bound / 2)..indices.len().min(bound + 1)]
+ .partition_point(|idx| comparator.compare(idx, target) != Ordering::Greater)
+}
+
impl<'a> Iterator for LexicographicalPartitionIterator<'a> {
type Item = Range<usize>;
@@ -87,11 +111,11 @@ impl<'a> Iterator for LexicographicalPartitionIterator<'a> {
// be careful that idx is of type &usize which points to the actual value within value_indices, which itself
// contains usize (0..row_count), providing access to lexicographical_comparator as pointers into the
// original columnar data.
- self.partition_point += self.value_indices[self.partition_point..]
- .partition_point(|idx| {
- self.comparator.compare(idx, &self.partition_point)
- != Ordering::Greater
- });
+ self.partition_point += exponential_search(
+ &self.value_indices[self.partition_point..],
+ &self.partition_point,
+ &self.comparator,
+ );
let start = self.previous_partition_point;
let end = self.partition_point;
self.previous_partition_point = self.partition_point;