You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/03 08:06:32 UTC

[hive] branch master updated: HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f4bc02  HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)
1f4bc02 is described below

commit 1f4bc024d202ff69ff512d4ba50ad9719cbe2854
Author: Owen O'Malley <om...@apache.org>
AuthorDate: Sun May 3 01:05:47 2020 -0700

    HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../hive/ql/io/filter/TestFilterContext.java       | 38 +++---------
 .../hive/ql/exec/vector/VectorizedRowBatch.java    | 63 +++++++++++++++++++-
 .../hadoop/hive/ql/io/filter/FilterContext.java    | 28 ++-------
 .../hive/ql/io/filter/MutableFilterContext.java    | 68 +++-------------------
 4 files changed, 84 insertions(+), 113 deletions(-)

diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
index 0bda620..c59cc09 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.io.filter;
 
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -43,7 +44,7 @@ public class TestFilterContext {
 
   @Test
   public void testInitFilterContext(){
-    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
     int[] selected = makeValidSelected();
 
     mutableFilterContext.setFilterContext(true, selected, selected.length);
@@ -57,7 +58,7 @@ public class TestFilterContext {
 
   @Test
   public void testResetFilterContext(){
-    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
     int[] selected = makeValidSelected();
 
     mutableFilterContext.setFilterContext(true, selected, selected.length);
@@ -67,55 +68,32 @@ public class TestFilterContext {
     Assert.assertEquals(512, filterContext.getSelectedSize());
     Assert.assertEquals(512, filterContext.getSelected().length);
 
-    filterContext.resetFilterContext();
+    filterContext.reset();
 
     Assert.assertEquals(false, filterContext.isSelectedInUse());
     Assert.assertEquals(0, filterContext.getSelectedSize());
-    Assert.assertEquals(null, filterContext.getSelected());
   }
 
   @Test(expected=AssertionError.class)
   public void testInitInvalidFilterContext(){
-    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
     int[] selected = makeInvalidSelected();
 
     mutableFilterContext.setFilterContext(true, selected, selected.length);
   }
 
-
-  @Test
-  public void testCopyFilterContext(){
-    MutableFilterContext mutableFilterContext = new MutableFilterContext();
-    int[] selected = makeValidSelected();
-
-    mutableFilterContext.setFilterContext(true, selected, selected.length);
-
-    MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext();
-    mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1);
-
-    mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy);
-    FilterContext filterContext = mutableFilterContext.immutable();
-
-    Assert.assertEquals(true, filterContext.isSelectedInUse());
-    Assert.assertEquals(1, filterContext.getSelectedSize());
-    Assert.assertEquals(100, filterContext.getSelected()[0]);
-    // make sure we kept the remaining array space
-    Assert.assertEquals(512, filterContext.getSelected().length);
-  }
-
-
   @Test
   public void testBorrowSelected(){
-    MutableFilterContext mutableFilterContext = new MutableFilterContext();
+    MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
     mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2);
 
-    int[] borrowedSelected = mutableFilterContext.borrowSelected(1);
+    int[] borrowedSelected = mutableFilterContext.updateSelected(1);
     // make sure we borrowed the existing array
     Assert.assertEquals(2, borrowedSelected.length);
     Assert.assertEquals(100, borrowedSelected[0]);
     Assert.assertEquals(200, borrowedSelected[1]);
 
-    borrowedSelected = mutableFilterContext.borrowSelected(3);
+    borrowedSelected = mutableFilterContext.updateSelected(3);
     Assert.assertEquals(3, borrowedSelected.length);
     Assert.assertEquals(0, borrowedSelected[0]);
     Assert.assertEquals(0, borrowedSelected[1]);
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
index 4a44345..0e678d3 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
@@ -21,6 +21,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.hadoop.hive.ql.io.filter.MutableFilterContext;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -31,7 +32,7 @@ import org.apache.hadoop.io.Writable;
  * The major fields are public by design to allow fast and convenient
  * access by the vectorized query execution code.
  */
-public class VectorizedRowBatch implements Writable {
+public class VectorizedRowBatch implements Writable, MutableFilterContext {
   public int numCols;           // number of columns
   public ColumnVector[] cols;   // a vector for each column
   public int size;              // number of rows that qualify (i.e. haven't been filtered out)
@@ -338,6 +339,7 @@ public class VectorizedRowBatch implements Writable {
    *  - resets each column
    *  - inits each column
    */
+  @Override
   public void reset() {
     selectedInUse = false;
     size = 0;
@@ -358,5 +360,64 @@ public class VectorizedRowBatch implements Writable {
     for (ColumnVector col : cols) {
       col.ensureSize(rows, false);
     }
+    updateSelected(rows);
+  }
+
+  @Override
+  public boolean isSelectedInUse() {
+    return selectedInUse;
+  }
+
+  @Override
+  public int[] getSelected() {
+    return selected;
+  }
+
+  @Override
+  public int getSelectedSize() {
+    return size;
+  }
+
+  @Override
+  public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) {
+    this.selectedInUse = isSelectedInUse;
+    this.selected = selected;
+    this.size = selectedSize;
+    // Avoid selected.length < selectedSize since we can borrow a larger array for selected
+    // Debug loop for selected array: use without assert when needed (asserts only fail in testing)
+    assert validateSelected() : "Selected array may not contain duplicates or unordered values";
+  }
+
+  @Override
+  public boolean validateSelected() {
+    for (int i = 1; i < size; i++) {
+      if (selected[i-1] >= selected[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public int[] updateSelected(int minCapacity) {
+    if (selected == null || selected.length < minCapacity) {
+      selected = new int[minCapacity];
+    }
+    return selected;
+  }
+
+  @Override
+  public void setSelectedInUse(boolean selectedInUse) {
+    this.selectedInUse = selectedInUse;
+  }
+
+  @Override
+  public void setSelected(int[] selectedArray) {
+    selected = selectedArray;
+  }
+
+  @Override
+  public void setSelectedSize(int selectedSize) {
+    size = selectedSize;
   }
 }
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
index d799d6f..dcf7b8b 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
@@ -26,47 +26,29 @@ package org.apache.hadoop.hive.ql.io.filter;
  * actually selected any rows.
  *
  */
-public abstract class FilterContext {
-
-  protected boolean currBatchIsSelectedInUse = false;
-  protected int[] currBatchSelected = null;
-  protected int currBatchSelectedSize = 0;
-
-  public FilterContext() {
-    super();
-  }
+public interface FilterContext {
 
   /**
    * Reset FilterContext variables.
    */
-  public void resetFilterContext() {
-    this.currBatchIsSelectedInUse = false;
-    this.currBatchSelected = null;
-    this.currBatchSelectedSize = 0;
-  }
+  void reset();
 
   /**
    * Is the filter applied?
    * @return true if the filter is actually applied
    */
-  public boolean isSelectedInUse() {
-    return this.currBatchIsSelectedInUse;
-  }
+  boolean isSelectedInUse();
 
   /**
    * Return an int array with the rows that pass the filter.
    * Do not modify the array returned!
    * @return int array
    */
-  public int[] getSelected() {
-    return this.currBatchSelected;
-  }
+  int[] getSelected();
 
   /**
    * Return the number of rows that pass the filter.
    * @return an int
    */
-  public int getSelectedSize() {
-    return this.currBatchSelectedSize;
-  }
+  int getSelectedSize();
 }
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
index 73ed766..55a4cc7 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
@@ -17,8 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.io.filter;
 
-import java.util.Arrays;
-
 /**
  * A representation of a Filter applied on the rows of a VectorizedRowBatch
  * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}.
@@ -28,7 +26,7 @@ import java.util.Arrays;
  * actually selected any rows.
  *
  */
-public class MutableFilterContext extends FilterContext {
+public interface MutableFilterContext extends FilterContext {
 
   /**
    * Set context with the given values by reference.
@@ -36,71 +34,29 @@ public class MutableFilterContext extends FilterContext {
    * @param selected an array of the selected rows
    * @param selectedSize the number of the selected rows
    */
-  public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) {
-    this.currBatchIsSelectedInUse = isSelectedInUse;
-    this.currBatchSelected = selected;
-    this.currBatchSelectedSize = selectedSize;
-    // Avoid selected.length < selectedSize since we can borrow a larger array for selected
-    // Debug loop for selected array: use without assert when needed (asserts only fail in testing)
-    assert isValidSelected() : "Selected array may not contain duplicates or unordered values";
-  }
-
-  /**
-   * Copy context variables from the a given FilterContext.
-   * Always does a deep copy of the data.
-   * @param other FilterContext to copy from
-   */
-  public void copyFilterContextFrom(MutableFilterContext other) {
-    // assert if copying into self (can fail only in testing)
-    assert this != other: "May not copy a FilterContext to itself";
-
-    if (this != other) {
-      if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) {
-        // note: still allocating a full size buffer, for later use
-        this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length);
-      } else {
-        System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize);
-      }
-      this.currBatchSelectedSize = other.currBatchSelectedSize;
-      this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse;
-    }
-  }
+  void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize);
 
   /**
    * Validate method checking if existing selected array contains accepted values.
    * Values should be in order and without duplicates i.e [1,1,1] is illegal
    * @return true if the selected array is valid
    */
-  public boolean isValidSelected() {
-    for (int i = 1; i < this.currBatchSelectedSize; i++) {
-      if (this.currBatchSelected[i-1] >= this.currBatchSelected[i]) {
-        return false;
-      }
-    }
-    return true;
-  }
+  boolean validateSelected();
 
   /**
-   * Borrow the current selected array to be modified if it satisfies minimum capacity.
+   * Get an array for selected that is expected to be modified.
    * If it is too small or unset, allocates one.
    * This method never returns null!
    * @param minCapacity
    * @return the current selected array to be modified
    */
-  public int[] borrowSelected(int minCapacity) {
-    int[] existing = this.currBatchSelected;
-    this.currBatchSelected = null;
-    if (existing == null || existing.length < minCapacity) {
-      return new int[minCapacity];
-    }
-    return existing;
-  }
+  int[] updateSelected(int minCapacity);
 
   /**
    * Get the immutable version of the current FilterContext.
    * @return immutable FilterContext instance
    */
-  public FilterContext immutable(){
+  default FilterContext immutable() {
     return this;
   }
 
@@ -108,23 +64,17 @@ public class MutableFilterContext extends FilterContext {
    * Set the selectedInUse boolean showing if the filter is applied.
    * @param selectedInUse
    */
-  public void setSelectedInUse(boolean selectedInUse) {
-    this.currBatchIsSelectedInUse = selectedInUse;
-  }
+  void setSelectedInUse(boolean selectedInUse);
 
   /**
    * Set the array of the rows that pass the filter by reference.
    * @param selectedArray
    */
-  public void setSelected(int[] selectedArray) {
-    this.currBatchSelected = selectedArray;
-  }
+  void setSelected(int[] selectedArray);
 
   /**
    * Set the number of the rows that pass the filter.
    * @param selectedSize
    */
-  public void setSelectedSize(int selectedSize) {
-    this.currBatchSelectedSize = selectedSize;
-  }
+  void setSelectedSize(int selectedSize);
 }