You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/02 16:09:19 UTC
[arrow-rs] branch master updated: Add `RowSelection::from_selectors_and_combine` to merge RowSelectors (#2994)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f11372cb8 Add `RowSelection::from_selectors_and_combine` to merge RowSelectors (#2994)
f11372cb8 is described below
commit f11372cb8ff4d6fecbe1bd7b5ef3d66cba719c83
Author: Yang Jiang <ya...@ebay.com>
AuthorDate: Thu Nov 3 00:09:13 2022 +0800
Add `RowSelection::from_selectors_and_combine` to merge RowSelectors (#2994)
* Support merge RowSelectors when creating RowSelection
* remove useless
* change it to default from
* Update parquet/src/arrow/arrow_reader/selection.rs
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* fix comment
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
parquet/src/arrow/arrow_reader/selection.rs | 124 +++++++++++++++++++++++++++-
1 file changed, 123 insertions(+), 1 deletion(-)
diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs
index f1270926b..2328c4501 100644
--- a/parquet/src/arrow/arrow_reader/selection.rs
+++ b/parquet/src/arrow/arrow_reader/selection.rs
@@ -117,6 +117,38 @@ impl RowSelection {
Self { selectors }
}
+ /// Creates a [`RowSelection`] from a slice of uncombined `RowSelector`:
+ /// Like [skip(5),skip(5),read(10)].
+ /// After combine will return [skip(10),read(10)]
+ /// # Note
+ /// [`RowSelection`] must be combined prior to use within offset_index or else the code will panic.
+ fn from_selectors_and_combine(selectors: &[RowSelector]) -> Self {
+ if selectors.len() < 2 {
+ return Self {
+ selectors: Vec::from(selectors),
+ };
+ }
+ let first = selectors.first().unwrap();
+ let mut sum_rows = first.row_count;
+ let mut skip = first.skip;
+ let mut combined_result = vec![];
+
+ for s in selectors.iter().skip(1) {
+ if s.skip == skip {
+ sum_rows += s.row_count
+ } else {
+ add_selector(skip, sum_rows, &mut combined_result);
+ sum_rows = s.row_count;
+ skip = s.skip;
+ }
+ }
+ add_selector(skip, sum_rows, &mut combined_result);
+
+ Self {
+ selectors: combined_result,
+ }
+ }
+
/// Given an offset index, return the offset ranges for all data pages selected by `self`
#[cfg(any(test, feature = "async"))]
pub(crate) fn scan_ranges(
@@ -307,7 +339,7 @@ impl RowSelection {
impl From<Vec<RowSelector>> for RowSelection {
fn from(selectors: Vec<RowSelector>) -> Self {
- Self { selectors }
+ Self::from_selectors_and_combine(selectors.as_slice())
}
}
@@ -317,6 +349,15 @@ impl From<RowSelection> for VecDeque<RowSelector> {
}
}
+fn add_selector(skip: bool, sum_row: usize, combined_result: &mut Vec<RowSelector>) {
+ let selector = if skip {
+ RowSelector::skip(sum_row)
+ } else {
+ RowSelector::select(sum_row)
+ };
+ combined_result.push(selector);
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -470,6 +511,87 @@ mod tests {
);
}
+ #[test]
+ fn test_combine() {
+ let a = vec![
+ RowSelector::skip(3),
+ RowSelector::skip(3),
+ RowSelector::select(10),
+ RowSelector::skip(4),
+ ];
+
+ let b = vec![
+ RowSelector::skip(3),
+ RowSelector::skip(3),
+ RowSelector::select(10),
+ RowSelector::skip(4),
+ RowSelector::skip(0),
+ ];
+
+ let c = vec![
+ RowSelector::skip(2),
+ RowSelector::skip(4),
+ RowSelector::select(3),
+ RowSelector::select(3),
+ RowSelector::select(4),
+ RowSelector::skip(3),
+ RowSelector::skip(1),
+ RowSelector::skip(0),
+ ];
+
+ let expected = RowSelection::from(vec![
+ RowSelector::skip(6),
+ RowSelector::select(10),
+ RowSelector::skip(4),
+ ]);
+
+ assert_eq!(RowSelection::from_selectors_and_combine(&a), expected);
+ assert_eq!(RowSelection::from_selectors_and_combine(&b), expected);
+ assert_eq!(RowSelection::from_selectors_and_combine(&c), expected);
+ }
+
+ #[test]
+ fn test_combine_2elements() {
+ let a = vec![RowSelector::select(10), RowSelector::select(5)];
+ let a_expect = vec![RowSelector::select(15)];
+ assert_eq!(
+ RowSelection::from_selectors_and_combine(&a).selectors,
+ a_expect
+ );
+
+ let b = vec![RowSelector::select(10), RowSelector::skip(5)];
+ let b_expect = vec![RowSelector::select(10), RowSelector::skip(5)];
+ assert_eq!(
+ RowSelection::from_selectors_and_combine(&b).selectors,
+ b_expect
+ );
+
+ let c = vec![RowSelector::skip(10), RowSelector::select(5)];
+ let c_expect = vec![RowSelector::skip(10), RowSelector::select(5)];
+ assert_eq!(
+ RowSelection::from_selectors_and_combine(&c).selectors,
+ c_expect
+ );
+
+ let d = vec![RowSelector::skip(10), RowSelector::skip(5)];
+ let d_expect = vec![RowSelector::skip(15)];
+ assert_eq!(
+ RowSelection::from_selectors_and_combine(&d).selectors,
+ d_expect
+ );
+ }
+
+ #[test]
+ fn test_from_one_and_empty() {
+ let a = vec![RowSelector::select(10)];
+ let selection1 = RowSelection::from(a.clone());
+ assert_eq!(selection1.selectors, a);
+
+ let b = vec![];
+ let selection1 = RowSelection::from(b.clone());
+ assert_eq!(selection1.selectors, b)
+ }
+
#[test]
#[should_panic(expected = "selection exceeds the number of selected rows")]
fn test_and_longer() {