You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/03 08:06:32 UTC
[hive] branch master updated: HIVE-23215 : Make FilterContext and
MutableFilterContext interfaces (Owen O'malley,
Panos G via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 1f4bc02 HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)
1f4bc02 is described below
commit 1f4bc024d202ff69ff512d4ba50ad9719cbe2854
Author: Owen O'Malley <om...@apache.org>
AuthorDate: Sun May 3 01:05:47 2020 -0700
HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../hive/ql/io/filter/TestFilterContext.java | 38 +++---------
.../hive/ql/exec/vector/VectorizedRowBatch.java | 63 +++++++++++++++++++-
.../hadoop/hive/ql/io/filter/FilterContext.java | 28 ++-------
.../hive/ql/io/filter/MutableFilterContext.java | 68 +++-------------------
4 files changed, 84 insertions(+), 113 deletions(-)
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
index 0bda620..c59cc09 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.io.filter;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.junit.Assert;
import org.junit.Test;
@@ -43,7 +44,7 @@ public class TestFilterContext {
@Test
public void testInitFilterContext(){
- MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
int[] selected = makeValidSelected();
mutableFilterContext.setFilterContext(true, selected, selected.length);
@@ -57,7 +58,7 @@ public class TestFilterContext {
@Test
public void testResetFilterContext(){
- MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
int[] selected = makeValidSelected();
mutableFilterContext.setFilterContext(true, selected, selected.length);
@@ -67,55 +68,32 @@ public class TestFilterContext {
Assert.assertEquals(512, filterContext.getSelectedSize());
Assert.assertEquals(512, filterContext.getSelected().length);
- filterContext.resetFilterContext();
+ filterContext.reset();
Assert.assertEquals(false, filterContext.isSelectedInUse());
Assert.assertEquals(0, filterContext.getSelectedSize());
- Assert.assertEquals(null, filterContext.getSelected());
}
@Test(expected=AssertionError.class)
public void testInitInvalidFilterContext(){
- MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
int[] selected = makeInvalidSelected();
mutableFilterContext.setFilterContext(true, selected, selected.length);
}
-
- @Test
- public void testCopyFilterContext(){
- MutableFilterContext mutableFilterContext = new MutableFilterContext();
- int[] selected = makeValidSelected();
-
- mutableFilterContext.setFilterContext(true, selected, selected.length);
-
- MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext();
- mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1);
-
- mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy);
- FilterContext filterContext = mutableFilterContext.immutable();
-
- Assert.assertEquals(true, filterContext.isSelectedInUse());
- Assert.assertEquals(1, filterContext.getSelectedSize());
- Assert.assertEquals(100, filterContext.getSelected()[0]);
- // make sure we kept the remaining array space
- Assert.assertEquals(512, filterContext.getSelected().length);
- }
-
-
@Test
public void testBorrowSelected(){
- MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2);
- int[] borrowedSelected = mutableFilterContext.borrowSelected(1);
+ int[] borrowedSelected = mutableFilterContext.updateSelected(1);
// make sure we borrowed the existing array
Assert.assertEquals(2, borrowedSelected.length);
Assert.assertEquals(100, borrowedSelected[0]);
Assert.assertEquals(200, borrowedSelected[1]);
- borrowedSelected = mutableFilterContext.borrowSelected(3);
+ borrowedSelected = mutableFilterContext.updateSelected(3);
Assert.assertEquals(3, borrowedSelected.length);
Assert.assertEquals(0, borrowedSelected[0]);
Assert.assertEquals(0, borrowedSelected[1]);
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
index 4a44345..0e678d3 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
@@ -21,6 +21,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.hive.ql.io.filter.MutableFilterContext;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
@@ -31,7 +32,7 @@ import org.apache.hadoop.io.Writable;
* The major fields are public by design to allow fast and convenient
* access by the vectorized query execution code.
*/
-public class VectorizedRowBatch implements Writable {
+public class VectorizedRowBatch implements Writable, MutableFilterContext {
public int numCols; // number of columns
public ColumnVector[] cols; // a vector for each column
public int size; // number of rows that qualify (i.e. haven't been filtered out)
@@ -338,6 +339,7 @@ public class VectorizedRowBatch implements Writable {
* - resets each column
* - inits each column
*/
+ @Override
public void reset() {
selectedInUse = false;
size = 0;
@@ -358,5 +360,64 @@ public class VectorizedRowBatch implements Writable {
for (ColumnVector col : cols) {
col.ensureSize(rows, false);
}
+ updateSelected(rows);
+ }
+
+ @Override
+ public boolean isSelectedInUse() {
+ return selectedInUse;
+ }
+
+ @Override
+ public int[] getSelected() {
+ return selected;
+ }
+
+ @Override
+ public int getSelectedSize() {
+ return size;
+ }
+
+ @Override
+ public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) {
+ this.selectedInUse = isSelectedInUse;
+ this.selected = selected;
+ this.size = selectedSize;
+ // Avoid selected.length < selectedSize since we can borrow a larger array for selected
+ // Debug loop for selected array: use without assert when needed (asserts only fail in testing)
+ assert validateSelected() : "Selected array may not contain duplicates or unordered values";
+ }
+
+ @Override
+ public boolean validateSelected() {
+ for (int i = 1; i < size; i++) {
+ if (selected[i-1] >= selected[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public int[] updateSelected(int minCapacity) {
+ if (selected == null || selected.length < minCapacity) {
+ selected = new int[minCapacity];
+ }
+ return selected;
+ }
+
+ @Override
+ public void setSelectedInUse(boolean selectedInUse) {
+ this.selectedInUse = selectedInUse;
+ }
+
+ @Override
+ public void setSelected(int[] selectedArray) {
+ selected = selectedArray;
+ }
+
+ @Override
+ public void setSelectedSize(int selectedSize) {
+ size = selectedSize;
}
}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
index d799d6f..dcf7b8b 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
@@ -26,47 +26,29 @@ package org.apache.hadoop.hive.ql.io.filter;
* actually selected any rows.
*
*/
-public abstract class FilterContext {
-
- protected boolean currBatchIsSelectedInUse = false;
- protected int[] currBatchSelected = null;
- protected int currBatchSelectedSize = 0;
-
- public FilterContext() {
- super();
- }
+public interface FilterContext {
/**
* Reset FilterContext variables.
*/
- public void resetFilterContext() {
- this.currBatchIsSelectedInUse = false;
- this.currBatchSelected = null;
- this.currBatchSelectedSize = 0;
- }
+ void reset();
/**
* Is the filter applied?
* @return true if the filter is actually applied
*/
- public boolean isSelectedInUse() {
- return this.currBatchIsSelectedInUse;
- }
+ boolean isSelectedInUse();
/**
* Return an int array with the rows that pass the filter.
* Do not modify the array returned!
* @return int array
*/
- public int[] getSelected() {
- return this.currBatchSelected;
- }
+ int[] getSelected();
/**
* Return the number of rows that pass the filter.
* @return an int
*/
- public int getSelectedSize() {
- return this.currBatchSelectedSize;
- }
+ int getSelectedSize();
}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
index 73ed766..55a4cc7 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
@@ -17,8 +17,6 @@
*/
package org.apache.hadoop.hive.ql.io.filter;
-import java.util.Arrays;
-
/**
* A representation of a Filter applied on the rows of a VectorizedRowBatch
* {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}.
@@ -28,7 +26,7 @@ import java.util.Arrays;
* actually selected any rows.
*
*/
-public class MutableFilterContext extends FilterContext {
+public interface MutableFilterContext extends FilterContext {
/**
* Set context with the given values by reference.
@@ -36,71 +34,29 @@ public class MutableFilterContext extends FilterContext {
* @param selected an array of the selected rows
* @param selectedSize the number of the selected rows
*/
- public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) {
- this.currBatchIsSelectedInUse = isSelectedInUse;
- this.currBatchSelected = selected;
- this.currBatchSelectedSize = selectedSize;
- // Avoid selected.length < selectedSize since we can borrow a larger array for selected
- // Debug loop for selected array: use without assert when needed (asserts only fail in testing)
- assert isValidSelected() : "Selected array may not contain duplicates or unordered values";
- }
-
- /**
- * Copy context variables from the a given FilterContext.
- * Always does a deep copy of the data.
- * @param other FilterContext to copy from
- */
- public void copyFilterContextFrom(MutableFilterContext other) {
- // assert if copying into self (can fail only in testing)
- assert this != other: "May not copy a FilterContext to itself";
-
- if (this != other) {
- if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) {
- // note: still allocating a full size buffer, for later use
- this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length);
- } else {
- System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize);
- }
- this.currBatchSelectedSize = other.currBatchSelectedSize;
- this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse;
- }
- }
+ void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize);
/**
* Validate method checking if existing selected array contains accepted values.
* Values should be in order and without duplicates i.e [1,1,1] is illegal
* @return true if the selected array is valid
*/
- public boolean isValidSelected() {
- for (int i = 1; i < this.currBatchSelectedSize; i++) {
- if (this.currBatchSelected[i-1] >= this.currBatchSelected[i]) {
- return false;
- }
- }
- return true;
- }
+ boolean validateSelected();
/**
- * Borrow the current selected array to be modified if it satisfies minimum capacity.
+ * Get an array for selected that is expected to be modified.
* If it is too small or unset, allocates one.
* This method never returns null!
* @param minCapacity
* @return the current selected array to be modified
*/
- public int[] borrowSelected(int minCapacity) {
- int[] existing = this.currBatchSelected;
- this.currBatchSelected = null;
- if (existing == null || existing.length < minCapacity) {
- return new int[minCapacity];
- }
- return existing;
- }
+ int[] updateSelected(int minCapacity);
/**
* Get the immutable version of the current FilterContext.
* @return immutable FilterContext instance
*/
- public FilterContext immutable(){
+ default FilterContext immutable() {
return this;
}
@@ -108,23 +64,17 @@ public class MutableFilterContext extends FilterContext {
* Set the selectedInUse boolean showing if the filter is applied.
* @param selectedInUse
*/
- public void setSelectedInUse(boolean selectedInUse) {
- this.currBatchIsSelectedInUse = selectedInUse;
- }
+ void setSelectedInUse(boolean selectedInUse);
/**
* Set the array of the rows that pass the filter by reference.
* @param selectedArray
*/
- public void setSelected(int[] selectedArray) {
- this.currBatchSelected = selectedArray;
- }
+ void setSelected(int[] selectedArray);
/**
* Set the number of the rows that pass the filter.
* @param selectedSize
*/
- public void setSelectedSize(int selectedSize) {
- this.currBatchSelectedSize = selectedSize;
- }
+ void setSelectedSize(int selectedSize);
}