You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/03/24 17:11:12 UTC
[hive] 01/02: HIVE-22959 : Extend storage-api to expose
FilterContext (Panos G via Gopal V)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch storage-branch-2.7
in repository https://gitbox.apache.org/repos/asf/hive.git
commit dade9919d904f8a4bff12a9130c150301a4713ed
Author: Panagiotis Garefalakis <pa...@cloudera.com>
AuthorDate: Mon Mar 16 10:26:36 2020 -0700
HIVE-22959 : Extend storage-api to expose FilterContext (Panos G via Gopal V)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../hive/ql/io/filter/TestFilterContext.java | 124 ++++++++++++++++++++
.../hadoop/hive/ql/io/filter/FilterContext.java | 72 ++++++++++++
.../hive/ql/io/filter/MutableFilterContext.java | 130 +++++++++++++++++++++
3 files changed, 326 insertions(+)
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
new file mode 100644
index 0000000..0bda620
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.filter;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+/**
+ * Test creation and manipulation of MutableFilterContext and FilterContext.
+ */
+public class TestFilterContext {
+
+ private int[] makeValidSelected() {
+ int[] selected = new int[512];
+ for (int i=0; i < selected.length; i++){
+ selected[i] = i*2;
+ }
+ return selected;
+ }
+
+ private int[] makeInvalidSelected() {
+ int[] selected = new int[512];
+ Arrays.fill(selected, 1);
+ return selected;
+ }
+
+ @Test
+ public void testInitFilterContext(){
+ MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ int[] selected = makeValidSelected();
+
+ mutableFilterContext.setFilterContext(true, selected, selected.length);
+ FilterContext filterContext = mutableFilterContext.immutable();
+
+ Assert.assertEquals(true, filterContext.isSelectedInUse());
+ Assert.assertEquals(512, filterContext.getSelectedSize());
+ Assert.assertEquals(512, filterContext.getSelected().length);
+ }
+
+
+ @Test
+ public void testResetFilterContext(){
+ MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ int[] selected = makeValidSelected();
+
+ mutableFilterContext.setFilterContext(true, selected, selected.length);
+ FilterContext filterContext = mutableFilterContext.immutable();
+
+ Assert.assertEquals(true, filterContext.isSelectedInUse());
+ Assert.assertEquals(512, filterContext.getSelectedSize());
+ Assert.assertEquals(512, filterContext.getSelected().length);
+
+ filterContext.resetFilterContext();
+
+ Assert.assertEquals(false, filterContext.isSelectedInUse());
+ Assert.assertEquals(0, filterContext.getSelectedSize());
+ Assert.assertEquals(null, filterContext.getSelected());
+ }
+
+ @Test(expected=AssertionError.class)
+ public void testInitInvalidFilterContext(){
+ MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ int[] selected = makeInvalidSelected();
+
+ mutableFilterContext.setFilterContext(true, selected, selected.length);
+ }
+
+
+ @Test
+ public void testCopyFilterContext(){
+ MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ int[] selected = makeValidSelected();
+
+ mutableFilterContext.setFilterContext(true, selected, selected.length);
+
+ MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext();
+ mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1);
+
+ mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy);
+ FilterContext filterContext = mutableFilterContext.immutable();
+
+ Assert.assertEquals(true, filterContext.isSelectedInUse());
+ Assert.assertEquals(1, filterContext.getSelectedSize());
+ Assert.assertEquals(100, filterContext.getSelected()[0]);
+ // make sure we kept the remaining array space
+ Assert.assertEquals(512, filterContext.getSelected().length);
+ }
+
+
+ @Test
+ public void testBorrowSelected(){
+ MutableFilterContext mutableFilterContext = new MutableFilterContext();
+ mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2);
+
+ int[] borrowedSelected = mutableFilterContext.borrowSelected(1);
+ // make sure we borrowed the existing array
+ Assert.assertEquals(2, borrowedSelected.length);
+ Assert.assertEquals(100, borrowedSelected[0]);
+ Assert.assertEquals(200, borrowedSelected[1]);
+
+ borrowedSelected = mutableFilterContext.borrowSelected(3);
+ Assert.assertEquals(3, borrowedSelected.length);
+ Assert.assertEquals(0, borrowedSelected[0]);
+ Assert.assertEquals(0, borrowedSelected[1]);
+ Assert.assertEquals(0, borrowedSelected[2]);
+ }
+}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
new file mode 100644
index 0000000..d799d6f
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/FilterContext.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.filter;
+
+/**
+ * A representation of a Filter applied on the rows of a VectorizedRowBatch
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}.
+ *
+ * Each FilterContext consists of an array with the ids (int) of rows that are selected by the
+ * filter, an integer representing the number of selected rows, and a boolean showing if the filter
+ * actually selected any rows.
+ *
+ */
+public abstract class FilterContext {
+
+ protected boolean currBatchIsSelectedInUse = false;
+ protected int[] currBatchSelected = null;
+ protected int currBatchSelectedSize = 0;
+
+ public FilterContext() {
+ super();
+ }
+
+ /**
+ * Reset FilterContext variables.
+ */
+ public void resetFilterContext() {
+ this.currBatchIsSelectedInUse = false;
+ this.currBatchSelected = null;
+ this.currBatchSelectedSize = 0;
+ }
+
+ /**
+ * Is the filter applied?
+ * @return true if the filter is actually applied
+ */
+ public boolean isSelectedInUse() {
+ return this.currBatchIsSelectedInUse;
+ }
+
+ /**
+ * Return an int array with the rows that pass the filter.
+ * Do not modify the array returned!
+ * @return int array
+ */
+ public int[] getSelected() {
+ return this.currBatchSelected;
+ }
+
+ /**
+ * Return the number of rows that pass the filter.
+ * @return an int
+ */
+ public int getSelectedSize() {
+ return this.currBatchSelectedSize;
+ }
+}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
new file mode 100644
index 0000000..73ed766
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/filter/MutableFilterContext.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.filter;
+
+import java.util.Arrays;
+
+/**
+ * A representation of a Filter applied on the rows of a VectorizedRowBatch
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}.
+ *
+ * Each FilterContext consists of an array with the ids (int) of rows that are selected by the
+ * filter, an integer representing the number of selected rows, and a boolean showing if the filter
+ * actually selected any rows.
+ *
+ */
+public class MutableFilterContext extends FilterContext {
+
+ /**
+ * Set context with the given values by reference.
+ * @param isSelectedInUse if the filter is applied
+ * @param selected an array of the selected rows
+ * @param selectedSize the number of the selected rows
+ */
+ public void setFilterContext(boolean isSelectedInUse, int[] selected, int selectedSize) {
+ this.currBatchIsSelectedInUse = isSelectedInUse;
+ this.currBatchSelected = selected;
+ this.currBatchSelectedSize = selectedSize;
+ // Avoid selected.length < selectedSize since we can borrow a larger array for selected
+ // Debug loop for selected array: use without assert when needed (asserts only fail in testing)
+ assert isValidSelected() : "Selected array may not contain duplicates or unordered values";
+ }
+
+ /**
+ * Copy context variables from the a given FilterContext.
+ * Always does a deep copy of the data.
+ * @param other FilterContext to copy from
+ */
+ public void copyFilterContextFrom(MutableFilterContext other) {
+ // assert if copying into self (can fail only in testing)
+ assert this != other: "May not copy a FilterContext to itself";
+
+ if (this != other) {
+ if (this.currBatchSelected == null || this.currBatchSelected.length < other.currBatchSelectedSize) {
+ // note: still allocating a full size buffer, for later use
+ this.currBatchSelected = Arrays.copyOf(other.currBatchSelected, other.currBatchSelected.length);
+ } else {
+ System.arraycopy(other.currBatchSelected, 0, this.currBatchSelected, 0, other.currBatchSelectedSize);
+ }
+ this.currBatchSelectedSize = other.currBatchSelectedSize;
+ this.currBatchIsSelectedInUse = other.currBatchIsSelectedInUse;
+ }
+ }
+
+ /**
+ * Validate method checking if existing selected array contains accepted values.
+ * Values should be in order and without duplicates i.e [1,1,1] is illegal
+ * @return true if the selected array is valid
+ */
+ public boolean isValidSelected() {
+ for (int i = 1; i < this.currBatchSelectedSize; i++) {
+ if (this.currBatchSelected[i-1] >= this.currBatchSelected[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Borrow the current selected array to be modified if it satisfies minimum capacity.
+ * If it is too small or unset, allocates one.
+ * This method never returns null!
+ * @param minCapacity
+ * @return the current selected array to be modified
+ */
+ public int[] borrowSelected(int minCapacity) {
+ int[] existing = this.currBatchSelected;
+ this.currBatchSelected = null;
+ if (existing == null || existing.length < minCapacity) {
+ return new int[minCapacity];
+ }
+ return existing;
+ }
+
+ /**
+ * Get the immutable version of the current FilterContext.
+ * @return immutable FilterContext instance
+ */
+ public FilterContext immutable(){
+ return this;
+ }
+
+ /**
+ * Set the selectedInUse boolean showing if the filter is applied.
+ * @param selectedInUse
+ */
+ public void setSelectedInUse(boolean selectedInUse) {
+ this.currBatchIsSelectedInUse = selectedInUse;
+ }
+
+ /**
+ * Set the array of the rows that pass the filter by reference.
+ * @param selectedArray
+ */
+ public void setSelected(int[] selectedArray) {
+ this.currBatchSelected = selectedArray;
+ }
+
+ /**
+ * Set the number of the rows that pass the filter.
+ * @param selectedSize
+ */
+ public void setSelectedSize(int selectedSize) {
+ this.currBatchSelectedSize = selectedSize;
+ }
+}