You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/03/24 17:11:12 UTC

[hive] 01/02: HIVE-22959 : Extend storage-api to expose FilterContext (Panos G via Gopal V)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch storage-branch-2.7
in repository https://gitbox.apache.org/repos/asf/hive.git

commit dade9919d904f8a4bff12a9130c150301a4713ed
Author: Panagiotis Garefalakis <pa...@cloudera.com>
AuthorDate: Mon Mar 16 10:26:36 2020 -0700

    HIVE-22959 : Extend storage-api to expose FilterContext (Panos G via Gopal V)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../hive/ql/io/filter/TestFilterContext.java       | 124 ++++++++++++++++++++
 .../hadoop/hive/ql/io/filter/FilterContext.java    |  72 ++++++++++++
 .../hive/ql/io/filter/MutableFilterContext.java    | 130 +++++++++++++++++++++
 3 files changed, 326 insertions(+)

diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
new file mode 100644
index 0000000..0bda620
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.filter;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+/**
+ * Test creation and manipulation of MutableFilterContext and FilterContext.
+ */
+public class TestFilterContext {
+
+  private int[] makeValidSelected() {
+    int[] selected = new int[512];
+    for (int i=0; i < selected.length; i++){
+      selected[i] = i*2;
+    }
+    return selected;
+  }
+
+  private int[] makeInvalidSelected() {
+    int[] selected = new int[512];
+    Arrays.fill(selected, 1);
+    return selected;
+  }
+
+  @Test
+  public void testInitFilterContext(){
+    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    int[] selected = makeValidSelected();
+
+    mutableFilterContext.setFilterContext(true, selected, selected.length);
+    FilterContext filterContext = mutableFilterContext.immutable();
+
+    Assert.assertEquals(true, filterContext.isSelectedInUse());
+    Assert.assertEquals(512, filterContext.getSelectedSize());
+    Assert.assertEquals(512, filterContext.getSelected().length);
+  }
+
+
+  @Test
+  public void testResetFilterContext(){
+    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    int[] selected = makeValidSelected();
+
+    mutableFilterContext.setFilterContext(true, selected, selected.length);
+    FilterContext filterContext = mutableFilterContext.immutable();
+
+    Assert.assertEquals(true, filterContext.isSelectedInUse());
+    Assert.assertEquals(512, filterContext.getSelectedSize());
+    Assert.assertEquals(512, filterContext.getSelected().length);
+
+    filterContext.resetFilterContext();
+
+    Assert.assertEquals(false, filterContext.isSelectedInUse());
+    Assert.assertEquals(0, filterContext.getSelectedSize());
+    Assert.assertEquals(null, filterContext.getSelected());
+  }
+
+  @Test(expected=AssertionError.class)
+  public void testInitInvalidFilterContext(){
+    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    int[] selected = makeInvalidSelected();
+
+    mutableFilterContext.setFilterContext(true, selected, selected.length);
+  }
+
+
+  @Test
+  public void testCopyFilterContext(){
+    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    int[] selected = makeValidSelected();
+
+    mutableFilterContext.setFilterContext(true, selected, selected.length);
+
+    MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext();
+    mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1);
+
+    mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy);
+    FilterContext filterContext = mutableFilterContext.immutable();
+
+    Assert.assertEquals(true, filterContext.isSelectedInUse());
+    Assert.assertEquals(1, filterContext.getSelectedSize());
+    Assert.assertEquals(100, filterContext.getSelected()[0]);
+    // make sure we kept the remaining array space
+    Assert.assertEquals(512, filterContext.getSelected().length);
+  }
+
+
+  @Test
+  public void testBorrowSelected(){
+    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2);
+
+    int[] borrowedSelected = mutableFilterContext.borrowSelected(1);
+    // make sure we borrowed the existing array
+    Assert.assertEquals(2, borrowedSelected.length);
+    Assert.assertEquals(100, borrowedSelected[0]);
+    Assert.assertEquals(200, borrowedSelected[1]);
+
+    borrowedSelected = mutableFilterContext.borrowSelected(3);
+    Assert.assertEquals(3, borrowedSelected.length);
+    Assert.assertEquals(0, borrowedSelected[0]);
+    Assert.assertEquals(0, borrowedSelected[1]);
+    Assert.assertEquals(0, borrowedSelected[2]);
+  }
+}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
new file mode 100644
index 0000000..d799d6f
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.filter;
+
+/**
+ * A representation of a Filter applied on the rows of a VectorizedRowBatch
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}.
+ *
+ * Each FilterContext consists of an array with the ids (int) of rows that are selected by the
+ * filter, an integer representing the number of selected rows, and a boolean showing if the filter
+ * actually selected any rows.
+ *
+ */
+public abstract class FilterContext {
+
+  protected boolean currBatchIsSelectedInUse = false;
+  protected int[] currBatchSelected = null;
+  protected int currBatchSelectedSize = 0;
+
+  public FilterContext() {
+    super();
+  }
+
+  /**
+   * Reset FilterContext variables.
+   */
+  public void resetFilterContext() {
+    this.currBatchIsSelectedInUse = false;
+    this.currBatchSelected = null;
+    this.currBatchSelectedSize = 0;
+  }
+
+  /**
+   * Is the filter applied?
+   * @return true if the filter is actually applied
+   */
+  public boolean isSelectedInUse() {
+    return this.currBatchIsSelectedInUse;
+  }
+
+  /**
+   * Return an int array with the rows that pass the filter.
+   * Do not modify the array returned!
+   * @return int array
+   */
+  public int[] getSelected() {
+    return this.currBatchSelected;
+  }
+
+  /**
+   * Return the number of rows that pass the filter.
+   * @return an int
+   */
+  public int getSelectedSize() {
+    return this.currBatchSelectedSize;
+  }
+}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
new file mode 100644
index 0000000..73ed766
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.filter;
+
+import java.util.Arrays;
+
+/**
+ * A representation of a Filter applied on the rows of a VectorizedRowBatch
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}.
+ *
+ * Each FilterContext consists of an array with the ids (int) of rows that are selected by the
+ * filter, an integer representing the number of selected rows, and a boolean showing if the filter
+ * actually selected any rows.
+ *
+ */
+public class MutableFilterContext extends FilterContext {
+
+  /**
+   * Set context with the given values by reference.
+   * @param isSelectedInUse if the filter is applied
+   * @param selected an array of the selected rows
+   * @param selectedSize the number of the selected rows
+   */
+  public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) {
+    this.currBatchIsSelectedInUse = isSelectedInUse;
+    this.currBatchSelected = selected;
+    this.currBatchSelectedSize = selectedSize;
+    // Avoid selected.length < selectedSize since we can borrow a larger array for selected
+    // Debug loop for selected array: use without assert when needed (asserts only fail in testing)
+    assert isValidSelected() : "Selected array may not contain duplicates or unordered values";
+  }
+
+  /**
+   * Copy context variables from the a given FilterContext.
+   * Always does a deep copy of the data.
+   * @param other FilterContext to copy from
+   */
+  public void copyFilterContextFrom(MutableFilterContext other) {
+    // assert if copying into self (can fail only in testing)
+    assert this != other: "May not copy a FilterContext to itself";
+
+    if (this != other) {
+      if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) {
+        // note: still allocating a full size buffer, for later use
+        this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length);
+      } else {
+        System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize);
+      }
+      this.currBatchSelectedSize = other.currBatchSelectedSize;
+      this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse;
+    }
+  }
+
+  /**
+   * Validate method checking if existing selected array contains accepted values.
+   * Values should be in order and without duplicates i.e [1,1,1] is illegal
+   * @return true if the selected array is valid
+   */
+  public boolean isValidSelected() {
+    for (int i = 1; i < this.currBatchSelectedSize; i++) {
+      if (this.currBatchSelected[i-1] >= this.currBatchSelected[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Borrow the current selected array to be modified if it satisfies minimum capacity.
+   * If it is too small or unset, allocates one.
+   * This method never returns null!
+   * @param minCapacity
+   * @return the current selected array to be modified
+   */
+  public int[] borrowSelected(int minCapacity) {
+    int[] existing = this.currBatchSelected;
+    this.currBatchSelected = null;
+    if (existing == null || existing.length < minCapacity) {
+      return new int[minCapacity];
+    }
+    return existing;
+  }
+
+  /**
+   * Get the immutable version of the current FilterContext.
+   * @return immutable FilterContext instance
+   */
+  public FilterContext immutable(){
+    return this;
+  }
+
+  /**
+   * Set the selectedInUse boolean showing if the filter is applied.
+   * @param selectedInUse
+   */
+  public void setSelectedInUse(boolean selectedInUse) {
+    this.currBatchIsSelectedInUse = selectedInUse;
+  }
+
+  /**
+   * Set the array of the rows that pass the filter by reference.
+   * @param selectedArray
+   */
+  public void setSelected(int[] selectedArray) {
+    this.currBatchSelected = selectedArray;
+  }
+
+  /**
+   * Set the number of the rows that pass the filter.
+   * @param selectedSize
+   */
+  public void setSelectedSize(int selectedSize) {
+    this.currBatchSelectedSize = selectedSize;
+  }
+}