You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/07/31 02:43:37 UTC
[37/43] hive git commit: HIVE-11253. Move SearchArgument and
VectorizedRowBatch classes to storage-api. (omalley reviewed by prasanthj)
HIVE-11253. Move SearchArgument and VectorizedRowBatch classes to storage-api.
(omalley reviewed by prasanthj)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9ae70cb4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9ae70cb4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9ae70cb4
Branch: refs/heads/spark
Commit: 9ae70cb4d11dae6cea45c29b0e87dc5da1e5555c
Parents: 70b56e3
Author: Owen O'Malley <om...@apache.org>
Authored: Wed Jul 29 09:36:08 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Wed Jul 29 09:36:08 2015 -0700
----------------------------------------------------------------------
common/pom.xml | 5 +
.../hadoop/hive/common/type/HiveDecimal.java | 312 ---------
pom.xml | 1 +
.../hive/ql/exec/vector/BytesColumnVector.java | 322 ---------
.../hive/ql/exec/vector/ColumnVector.java | 174 -----
.../ql/exec/vector/DecimalColumnVector.java | 106 ---
.../hive/ql/exec/vector/DoubleColumnVector.java | 143 ----
.../hive/ql/exec/vector/LongColumnVector.java | 189 -----
.../hive/ql/exec/vector/VectorizedRowBatch.java | 186 -----
.../hive/ql/io/sarg/SearchArgumentFactory.java | 39 --
.../hive/ql/io/sarg/SearchArgumentImpl.java | 697 -------------------
.../hive/ql/io/orc/TestInputOutputFormat.java | 17 +-
.../hive/ql/io/sarg/TestSearchArgumentImpl.java | 3 +-
.../hadoop/hive/ql/io/sarg/ExpressionTree.java | 157 -----
.../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 104 ---
.../hadoop/hive/ql/io/sarg/SearchArgument.java | 298 --------
.../hive/serde2/io/HiveDecimalWritable.java | 174 -----
storage-api/pom.xml | 85 +++
.../hadoop/hive/common/type/HiveDecimal.java | 312 +++++++++
.../hive/ql/exec/vector/BytesColumnVector.java | 322 +++++++++
.../hive/ql/exec/vector/ColumnVector.java | 173 +++++
.../ql/exec/vector/DecimalColumnVector.java | 106 +++
.../hive/ql/exec/vector/DoubleColumnVector.java | 143 ++++
.../hive/ql/exec/vector/LongColumnVector.java | 189 +++++
.../hive/ql/exec/vector/VectorizedRowBatch.java | 186 +++++
.../hadoop/hive/ql/io/sarg/ExpressionTree.java | 156 +++++
.../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 104 +++
.../hadoop/hive/ql/io/sarg/SearchArgument.java | 287 ++++++++
.../hive/ql/io/sarg/SearchArgumentFactory.java | 28 +
.../hive/ql/io/sarg/SearchArgumentImpl.java | 687 ++++++++++++++++++
.../hive/serde2/io/HiveDecimalWritable.java | 174 +++++
31 files changed, 2974 insertions(+), 2905 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/common/pom.xml
----------------------------------------------------------------------
diff --git a/common/pom.xml b/common/pom.xml
index aedf7ba..a7997e2 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -39,6 +39,11 @@
<artifactId>hive-shims</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-storage-api</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<!-- inter-project -->
<dependency>
<groupId>commons-cli</groupId>
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java b/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
deleted file mode 100644
index 7d7fb28..0000000
--- a/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
+++ /dev/null
@@ -1,312 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.type;
-
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.math.RoundingMode;
-
-/**
- *
- * HiveDecimal. Simple wrapper for BigDecimal. Adds fixed max precision and non scientific string
- * representation
- *
- */
-public class HiveDecimal implements Comparable<HiveDecimal> {
- public static final int MAX_PRECISION = 38;
- public static final int MAX_SCALE = 38;
-
- /**
- * Default precision/scale when user doesn't specify in the column metadata, such as
- * decimal and decimal(8).
- */
- public static final int USER_DEFAULT_PRECISION = 10;
- public static final int USER_DEFAULT_SCALE = 0;
-
- /**
- * Default precision/scale when system is not able to determine them, such as in case
- * of a non-generic udf.
- */
- public static final int SYSTEM_DEFAULT_PRECISION = 38;
- public static final int SYSTEM_DEFAULT_SCALE = 18;
-
- public static final HiveDecimal ZERO = new HiveDecimal(BigDecimal.ZERO);
- public static final HiveDecimal ONE = new HiveDecimal(BigDecimal.ONE);
-
- public static final int ROUND_FLOOR = BigDecimal.ROUND_FLOOR;
- public static final int ROUND_CEILING = BigDecimal.ROUND_CEILING;
- public static final int ROUND_HALF_UP = BigDecimal.ROUND_HALF_UP;
-
- private BigDecimal bd = BigDecimal.ZERO;
-
- private HiveDecimal(BigDecimal bd) {
- this.bd = bd;
- }
-
- public static HiveDecimal create(BigDecimal b) {
- return create(b, true);
- }
-
- public static HiveDecimal create(BigDecimal b, boolean allowRounding) {
- BigDecimal bd = normalize(b, allowRounding);
- return bd == null ? null : new HiveDecimal(bd);
- }
-
- public static HiveDecimal create(BigInteger unscaled, int scale) {
- BigDecimal bd = normalize(new BigDecimal(unscaled, scale), true);
- return bd == null ? null : new HiveDecimal(bd);
- }
-
- public static HiveDecimal create(String dec) {
- BigDecimal bd;
- try {
- bd = new BigDecimal(dec.trim());
- } catch (NumberFormatException ex) {
- return null;
- }
-
- bd = normalize(bd, true);
- return bd == null ? null : new HiveDecimal(bd);
- }
-
- public static HiveDecimal create(BigInteger bi) {
- BigDecimal bd = normalize(new BigDecimal(bi), true);
- return bd == null ? null : new HiveDecimal(bd);
- }
-
- public static HiveDecimal create(int i) {
- return new HiveDecimal(new BigDecimal(i));
- }
-
- public static HiveDecimal create(long l) {
- return new HiveDecimal(new BigDecimal(l));
- }
-
- @Override
- public String toString() {
- return bd.toPlainString();
- }
-
- public HiveDecimal setScale(int i) {
- return new HiveDecimal(bd.setScale(i, RoundingMode.HALF_UP));
- }
-
- @Override
- public int compareTo(HiveDecimal dec) {
- return bd.compareTo(dec.bd);
- }
-
- @Override
- public int hashCode() {
- return bd.hashCode();
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == null || obj.getClass() != getClass()) {
- return false;
- }
- return bd.equals(((HiveDecimal) obj).bd);
- }
-
- public int scale() {
- return bd.scale();
- }
-
- /**
- * Returns the number of digits (integer and fractional) in the number, which is equivalent
- * to SQL decimal precision. Note that this is different from BigDecimal.precision(),
- * which returns the precision of the unscaled value (BigDecimal.valueOf(0.01).precision() = 1,
- * whereas HiveDecimal.create("0.01").precision() = 2).
- * If you want the BigDecimal precision, use HiveDecimal.bigDecimalValue().precision()
- * @return
- */
- public int precision() {
- int bdPrecision = bd.precision();
- int bdScale = bd.scale();
-
- if (bdPrecision < bdScale) {
- // This can happen for numbers less than 0.1
- // For 0.001234: bdPrecision=4, bdScale=6
- // In this case, we'll set the type to have the same precision as the scale.
- return bdScale;
- }
- return bdPrecision;
- }
-
- public int intValue() {
- return bd.intValue();
- }
-
- public double doubleValue() {
- return bd.doubleValue();
- }
-
- public long longValue() {
- return bd.longValue();
- }
-
- public short shortValue() {
- return bd.shortValue();
- }
-
- public float floatValue() {
- return bd.floatValue();
- }
-
- public BigDecimal bigDecimalValue() {
- return bd;
- }
-
- public byte byteValue() {
- return bd.byteValue();
- }
-
- public HiveDecimal setScale(int adjustedScale, int rm) {
- return create(bd.setScale(adjustedScale, rm));
- }
-
- public HiveDecimal subtract(HiveDecimal dec) {
- return create(bd.subtract(dec.bd));
- }
-
- public HiveDecimal multiply(HiveDecimal dec) {
- return create(bd.multiply(dec.bd), false);
- }
-
- public BigInteger unscaledValue() {
- return bd.unscaledValue();
- }
-
- public HiveDecimal scaleByPowerOfTen(int n) {
- return create(bd.scaleByPowerOfTen(n));
- }
-
- public HiveDecimal abs() {
- return create(bd.abs());
- }
-
- public HiveDecimal negate() {
- return create(bd.negate());
- }
-
- public HiveDecimal add(HiveDecimal dec) {
- return create(bd.add(dec.bd));
- }
-
- public HiveDecimal pow(int n) {
- BigDecimal result = normalize(bd.pow(n), false);
- return result == null ? null : new HiveDecimal(result);
- }
-
- public HiveDecimal remainder(HiveDecimal dec) {
- return create(bd.remainder(dec.bd));
- }
-
- public HiveDecimal divide(HiveDecimal dec) {
- return create(bd.divide(dec.bd, MAX_SCALE, RoundingMode.HALF_UP), true);
- }
-
- /**
- * Get the sign of the underlying decimal.
- * @return 0 if the decimal is equal to 0, -1 if less than zero, and 1 if greater than 0
- */
- public int signum() {
- return bd.signum();
- }
-
- private static BigDecimal trim(BigDecimal d) {
- if (d.compareTo(BigDecimal.ZERO) == 0) {
- // Special case for 0, because java doesn't strip zeros correctly on that number.
- d = BigDecimal.ZERO;
- } else {
- d = d.stripTrailingZeros();
- if (d.scale() < 0) {
- // no negative scale decimals
- d = d.setScale(0);
- }
- }
- return d;
- }
-
- private static BigDecimal normalize(BigDecimal bd, boolean allowRounding) {
- if (bd == null) {
- return null;
- }
-
- bd = trim(bd);
-
- int intDigits = bd.precision() - bd.scale();
-
- if (intDigits > MAX_PRECISION) {
- return null;
- }
-
- int maxScale = Math.min(MAX_SCALE, Math.min(MAX_PRECISION - intDigits, bd.scale()));
- if (bd.scale() > maxScale ) {
- if (allowRounding) {
- bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
- // Trimming is again necessary, because rounding may introduce new trailing 0's.
- bd = trim(bd);
- } else {
- bd = null;
- }
- }
-
- return bd;
- }
-
- public static BigDecimal enforcePrecisionScale(BigDecimal bd, int maxPrecision, int maxScale) {
- if (bd == null) {
- return null;
- }
-
- bd = trim(bd);
-
- if (bd.scale() > maxScale) {
- bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
- }
-
- int maxIntDigits = maxPrecision - maxScale;
- int intDigits = bd.precision() - bd.scale();
- if (intDigits > maxIntDigits) {
- return null;
- }
-
- return bd;
- }
-
- public static HiveDecimal enforcePrecisionScale(HiveDecimal dec, int maxPrecision, int maxScale) {
- if (dec == null) {
- return null;
- }
-
- // Minor optimization, avoiding creating new objects.
- if (dec.precision() - dec.scale() <= maxPrecision - maxScale &&
- dec.scale() <= maxScale) {
- return dec;
- }
-
- BigDecimal bd = enforcePrecisionScale(dec.bd, maxPrecision, maxScale);
- if (bd == null) {
- return null;
- }
-
- return HiveDecimal.create(bd);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1abf738..196a0a4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -49,6 +49,7 @@
<module>service</module>
<module>shims</module>
<module>spark-client</module>
+ <module>storage-api</module>
<module>testutils</module>
<module>packaging</module>
</modules>
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
deleted file mode 100644
index 02c52fa..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ /dev/null
@@ -1,322 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-/**
- * This class supports string and binary data by value reference -- i.e. each field is
- * explicitly present, as opposed to provided by a dictionary reference.
- * In some cases, all the values will be in the same byte array to begin with,
- * but this need not be the case. If each value is in a separate byte
- * array to start with, or not all of the values are in the same original
- * byte array, you can still assign data by reference into this column vector.
- * This gives flexibility to use this in multiple situations.
- * <p>
- * When setting data by reference, the caller
- * is responsible for allocating the byte arrays used to hold the data.
- * You can also set data by value, as long as you call the initBuffer() method first.
- * You can mix "by value" and "by reference" in the same column vector,
- * though that use is probably not typical.
- */
-public class BytesColumnVector extends ColumnVector {
- public byte[][] vector;
- public int[] start; // start offset of each field
-
- /*
- * The length of each field. If the value repeats for every entry, then it is stored
- * in vector[0] and isRepeating from the superclass is set to true.
- */
- public int[] length;
- private byte[] buffer; // optional buffer to use when actually copying in data
- private int nextFree; // next free position in buffer
-
- // Estimate that there will be 16 bytes per entry
- static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE;
-
- // Proportion of extra space to provide when allocating more buffer space.
- static final float EXTRA_SPACE_FACTOR = (float) 1.2;
-
- /**
- * Use this constructor for normal operation.
- * All column vectors should be the default size normally.
- */
- public BytesColumnVector() {
- this(VectorizedRowBatch.DEFAULT_SIZE);
- }
-
- /**
- * Don't call this constructor except for testing purposes.
- *
- * @param size number of elements in the column vector
- */
- public BytesColumnVector(int size) {
- super(size);
- vector = new byte[size][];
- start = new int[size];
- length = new int[size];
- }
-
- /**
- * Additional reset work for BytesColumnVector (releasing scratch bytes for by value strings).
- */
- @Override
- public void reset() {
- super.reset();
- initBuffer(0);
- }
-
- /** Set a field by reference.
- *
- * @param elementNum index within column vector to set
- * @param sourceBuf container of source data
- * @param start start byte position within source
- * @param length length of source byte sequence
- */
- public void setRef(int elementNum, byte[] sourceBuf, int start, int length) {
- vector[elementNum] = sourceBuf;
- this.start[elementNum] = start;
- this.length[elementNum] = length;
- }
-
- /**
- * You must call initBuffer first before using setVal().
- * Provide the estimated number of bytes needed to hold
- * a full column vector worth of byte string data.
- *
- * @param estimatedValueSize Estimated size of buffer space needed
- */
- public void initBuffer(int estimatedValueSize) {
- nextFree = 0;
-
- // if buffer is already allocated, keep using it, don't re-allocate
- if (buffer != null) {
- return;
- }
-
- // allocate a little extra space to limit need to re-allocate
- int bufferSize = this.vector.length * (int)(estimatedValueSize * EXTRA_SPACE_FACTOR);
- if (bufferSize < DEFAULT_BUFFER_SIZE) {
- bufferSize = DEFAULT_BUFFER_SIZE;
- }
- buffer = new byte[bufferSize];
- }
-
- /**
- * Initialize buffer to default size.
- */
- public void initBuffer() {
- initBuffer(0);
- }
-
- /**
- * @return amount of buffer space currently allocated
- */
- public int bufferSize() {
- if (buffer == null) {
- return 0;
- }
- return buffer.length;
- }
-
- /**
- * Set a field by actually copying in to a local buffer.
- * If you must actually copy data in to the array, use this method.
- * DO NOT USE this method unless it's not practical to set data by reference with setRef().
- * Setting data by reference tends to run a lot faster than copying data in.
- *
- * @param elementNum index within column vector to set
- * @param sourceBuf container of source data
- * @param start start byte position within source
- * @param length length of source byte sequence
- */
- public void setVal(int elementNum, byte[] sourceBuf, int start, int length) {
- if ((nextFree + length) > buffer.length) {
- increaseBufferSpace(length);
- }
- System.arraycopy(sourceBuf, start, buffer, nextFree, length);
- vector[elementNum] = buffer;
- this.start[elementNum] = nextFree;
- this.length[elementNum] = length;
- nextFree += length;
- }
-
- /**
- * Set a field to the concatenation of two string values. Result data is copied
- * into the internal buffer.
- *
- * @param elementNum index within column vector to set
- * @param leftSourceBuf container of left argument
- * @param leftStart start of left argument
- * @param leftLen length of left argument
- * @param rightSourceBuf container of right argument
- * @param rightStart start of right argument
- * @param rightLen length of right arugment
- */
- public void setConcat(int elementNum, byte[] leftSourceBuf, int leftStart, int leftLen,
- byte[] rightSourceBuf, int rightStart, int rightLen) {
- int newLen = leftLen + rightLen;
- if ((nextFree + newLen) > buffer.length) {
- increaseBufferSpace(newLen);
- }
- vector[elementNum] = buffer;
- this.start[elementNum] = nextFree;
- this.length[elementNum] = newLen;
-
- System.arraycopy(leftSourceBuf, leftStart, buffer, nextFree, leftLen);
- nextFree += leftLen;
- System.arraycopy(rightSourceBuf, rightStart, buffer, nextFree, rightLen);
- nextFree += rightLen;
- }
-
- /**
- * Increase buffer space enough to accommodate next element.
- * This uses an exponential increase mechanism to rapidly
- * increase buffer size to enough to hold all data.
- * As batches get re-loaded, buffer space allocated will quickly
- * stabilize.
- *
- * @param nextElemLength size of next element to be added
- */
- public void increaseBufferSpace(int nextElemLength) {
-
- // Keep doubling buffer size until there will be enough space for next element.
- int newLength = 2 * buffer.length;
- while((nextFree + nextElemLength) > newLength) {
- newLength *= 2;
- }
-
- // Allocate new buffer, copy data to it, and set buffer to new buffer.
- byte[] newBuffer = new byte[newLength];
- System.arraycopy(buffer, 0, newBuffer, 0, nextFree);
- buffer = newBuffer;
- }
-
- /** Copy the current object contents into the output. Only copy selected entries,
- * as indicated by selectedInUse and the sel array.
- */
- public void copySelected(
- boolean selectedInUse, int[] sel, int size, BytesColumnVector output) {
-
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
- output.isRepeating = false;
-
- // Handle repeating case
- if (isRepeating) {
- output.setVal(0, vector[0], start[0], length[0]);
- output.isNull[0] = isNull[0];
- output.isRepeating = true;
- return;
- }
-
- // Handle normal case
-
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.setVal(i, vector[i], start[i], length[i]);
- }
- }
- else {
- for (int i = 0; i < size; i++) {
- output.setVal(i, vector[i], start[i], length[i]);
- }
- }
-
- // Copy nulls over if needed
- if (!noNulls) {
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.isNull[i] = isNull[i];
- }
- }
- else {
- System.arraycopy(isNull, 0, output.isNull, 0, size);
- }
- }
- }
-
- /** Simplify vector by brute-force flattening noNulls and isRepeating
- * This can be used to reduce combinatorial explosion of code paths in VectorExpressions
- * with many arguments, at the expense of loss of some performance.
- */
- public void flatten(boolean selectedInUse, int[] sel, int size) {
- flattenPush();
- if (isRepeating) {
- isRepeating = false;
-
- // setRef is used below and this is safe, because the reference
- // is to data owned by this column vector. If this column vector
- // gets re-used, the whole thing is re-used together so there
- // is no danger of a dangling reference.
-
- // Only copy data values if entry is not null. The string value
- // at position 0 is undefined if the position 0 value is null.
- if (noNulls || !isNull[0]) {
-
- // loops start at position 1 because position 0 is already set
- if (selectedInUse) {
- for (int j = 1; j < size; j++) {
- int i = sel[j];
- this.setRef(i, vector[0], start[0], length[0]);
- }
- } else {
- for (int i = 1; i < size; i++) {
- this.setRef(i, vector[0], start[0], length[0]);
- }
- }
- }
- flattenRepeatingNulls(selectedInUse, sel, size);
- }
- flattenNoNulls(selectedInUse, sel, size);
- }
-
- // Fill the all the vector entries with provided value
- public void fill(byte[] value) {
- noNulls = true;
- isRepeating = true;
- setRef(0, value, 0, value.length);
- }
-
- @Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- BytesColumnVector in = (BytesColumnVector) inputVector;
- setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]);
- }
-
- @Override
- public void init() {
- initBuffer(0);
- }
-
- @Override
- public void stringifyValue(StringBuilder buffer, int row) {
- if (isRepeating) {
- row = 0;
- }
- if (noNulls || !isNull[row]) {
- buffer.append('"');
- buffer.append(new String(this.buffer, start[row], length[row]));
- buffer.append('"');
- } else {
- buffer.append("null");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
deleted file mode 100644
index 4b5cf39..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.util.Arrays;
-
-/**
- * ColumnVector contains the shared structure for the sub-types,
- * including NULL information, and whether this vector
- * repeats, i.e. has all values the same, so only the first
- * one is set. This is used to accelerate query performance
- * by handling a whole vector in O(1) time when applicable.
- *
- * The fields are public by design since this is a performance-critical
- * structure that is used in the inner loop of query execution.
- */
-public abstract class ColumnVector {
-
- /*
- * The current kinds of column vectors.
- */
- public static enum Type {
- LONG,
- DOUBLE,
- BYTES,
- DECIMAL
- }
-
- /*
- * If hasNulls is true, then this array contains true if the value
- * is null, otherwise false. The array is always allocated, so a batch can be re-used
- * later and nulls added.
- */
- public boolean[] isNull;
-
- // If the whole column vector has no nulls, this is true, otherwise false.
- public boolean noNulls;
-
- /*
- * True if same value repeats for whole column vector.
- * If so, vector[0] holds the repeating value.
- */
- public boolean isRepeating;
-
- // Variables to hold state from before flattening so it can be easily restored.
- private boolean preFlattenIsRepeating;
- private boolean preFlattenNoNulls;
-
- /**
- * Constructor for super-class ColumnVector. This is not called directly,
- * but used to initialize inherited fields.
- *
- * @param len Vector length
- */
- public ColumnVector(int len) {
- isNull = new boolean[len];
- noNulls = true;
- isRepeating = false;
- }
-
- /**
- * Resets the column to default state
- * - fills the isNull array with false
- * - sets noNulls to true
- * - sets isRepeating to false
- */
- public void reset() {
- if (false == noNulls) {
- Arrays.fill(isNull, false);
- }
- noNulls = true;
- isRepeating = false;
- }
-
- abstract public void flatten(boolean selectedInUse, int[] sel, int size);
-
- // Simplify vector by brute-force flattening noNulls if isRepeating
- // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
- // with many arguments.
- public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) {
-
- boolean nullFillValue;
-
- if (noNulls) {
- nullFillValue = false;
- } else {
- nullFillValue = isNull[0];
- }
-
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- isNull[i] = nullFillValue;
- }
- } else {
- Arrays.fill(isNull, 0, size, nullFillValue);
- }
-
- // all nulls are now explicit
- noNulls = false;
- }
-
- public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) {
- if (noNulls) {
- noNulls = false;
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- isNull[i] = false;
- }
- } else {
- Arrays.fill(isNull, 0, size, false);
- }
- }
- }
-
- /**
- * Restore the state of isRepeating and noNulls to what it was
- * before flattening. This must only be called just after flattening
- * and then evaluating a VectorExpression on the column vector.
- * It is an optimization that allows other operations on the same
- * column to continue to benefit from the isRepeating and noNulls
- * indicators.
- */
- public void unFlatten() {
- isRepeating = preFlattenIsRepeating;
- noNulls = preFlattenNoNulls;
- }
-
- // Record repeating and no nulls state to be restored later.
- protected void flattenPush() {
- preFlattenIsRepeating = isRepeating;
- preFlattenNoNulls = noNulls;
- }
-
- /**
- * Set the element in this column vector from the given input vector.
- */
- public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector);
-
- /**
- * Initialize the column vector. This method can be overridden by specific column vector types.
- * Use this method only if the individual type of the column vector is not known, otherwise its
- * preferable to call specific initialization methods.
- */
- public void init() {
- // Do nothing by default
- }
-
- /**
- * Print the value for this column into the given string builder.
- * @param buffer the buffer to print into
- * @param row the id of the row to print
- */
- public abstract void stringifyValue(StringBuilder buffer,
- int row);
- }
-
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
deleted file mode 100644
index 74a9d5f..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.math.BigInteger;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
-public class DecimalColumnVector extends ColumnVector {
-
- /**
- * A vector of HiveDecimalWritable objects.
- *
- * For high performance and easy access to this low-level structure,
- * the fields are public by design (as they are in other ColumnVector
- * types).
- */
- public HiveDecimalWritable[] vector;
- public short scale;
- public short precision;
-
- public DecimalColumnVector(int precision, int scale) {
- this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
- }
-
- public DecimalColumnVector(int size, int precision, int scale) {
- super(size);
- this.precision = (short) precision;
- this.scale = (short) scale;
- vector = new HiveDecimalWritable[size];
- for (int i = 0; i < size; i++) {
- vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
- }
- }
-
- @Override
- public void flatten(boolean selectedInUse, int[] sel, int size) {
- // TODO Auto-generated method stub
- }
-
- @Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- HiveDecimal hiveDec = ((DecimalColumnVector) inputVector).vector[inputElementNum].getHiveDecimal(precision, scale);
- if (hiveDec == null) {
- noNulls = false;
- isNull[outElementNum] = true;
- } else {
- vector[outElementNum].set(hiveDec);
- }
- }
-
- @Override
- public void stringifyValue(StringBuilder buffer, int row) {
- if (isRepeating) {
- row = 0;
- }
- if (noNulls || !isNull[row]) {
- buffer.append(vector[row].toString());
- } else {
- buffer.append("null");
- }
- }
-
- public void set(int elementNum, HiveDecimalWritable writeable) {
- HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale);
- if (hiveDec == null) {
- noNulls = false;
- isNull[elementNum] = true;
- } else {
- vector[elementNum].set(hiveDec);
- }
- }
-
- public void set(int elementNum, HiveDecimal hiveDec) {
- HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale);
- if (checkedDec == null) {
- noNulls = false;
- isNull[elementNum] = true;
- } else {
- vector[elementNum].set(checkedDec);
- }
- }
-
- public void setNullDataValue(int elementNum) {
- // E.g. For scale 2 the minimum is "0.01"
- HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale);
- vector[elementNum].set(minimumNonZeroValue);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
deleted file mode 100644
index 4a7811d..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.util.Arrays;
-
-/**
- * This class represents a nullable double precision floating point column vector.
- * This class will be used for operations on all floating point types (float, double)
- * and as such will use a 64-bit double value to hold the biggest possible value.
- * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will
- * reduce the amount of code that needs to be generated and also will run fast since the
- * machine operates with 64-bit words.
- *
- * The vector[] field is public by design for high-performance access in the inner
- * loop of query execution.
- */
-public class DoubleColumnVector extends ColumnVector {
- public double[] vector;
- public static final double NULL_VALUE = Double.NaN;
-
- /**
- * Use this constructor by default. All column vectors
- * should normally be the default size.
- */
- public DoubleColumnVector() {
- this(VectorizedRowBatch.DEFAULT_SIZE);
- }
-
- /**
- * Don't use this except for testing purposes.
- *
- * @param len
- */
- public DoubleColumnVector(int len) {
- super(len);
- vector = new double[len];
- }
-
- // Copy the current object contents into the output. Only copy selected entries,
- // as indicated by selectedInUse and the sel array.
- public void copySelected(
- boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
-
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
- output.isRepeating = false;
-
- // Handle repeating case
- if (isRepeating) {
- output.vector[0] = vector[0];
- output.isNull[0] = isNull[0];
- output.isRepeating = true;
- return;
- }
-
- // Handle normal case
-
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.vector[i] = vector[i];
- }
- }
- else {
- System.arraycopy(vector, 0, output.vector, 0, size);
- }
-
- // Copy nulls over if needed
- if (!noNulls) {
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.isNull[i] = isNull[i];
- }
- }
- else {
- System.arraycopy(isNull, 0, output.isNull, 0, size);
- }
- }
- }
-
- // Fill the column vector with the provided value
- public void fill(double value) {
- noNulls = true;
- isRepeating = true;
- vector[0] = value;
- }
-
- // Simplify vector by brute-force flattening noNulls and isRepeating
- // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
- // with many arguments.
- public void flatten(boolean selectedInUse, int[] sel, int size) {
- flattenPush();
- if (isRepeating) {
- isRepeating = false;
- double repeatVal = vector[0];
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- vector[i] = repeatVal;
- }
- } else {
- Arrays.fill(vector, 0, size, repeatVal);
- }
- flattenRepeatingNulls(selectedInUse, sel, size);
- }
- flattenNoNulls(selectedInUse, sel, size);
- }
-
- @Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum];
- }
-
- @Override
- public void stringifyValue(StringBuilder buffer, int row) {
- if (isRepeating) {
- row = 0;
- }
- if (noNulls || !isNull[row]) {
- buffer.append(vector[row]);
- } else {
- buffer.append("null");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
deleted file mode 100644
index 5702584..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.util.Arrays;
-
-/**
- * This class represents a nullable int column vector.
- * This class will be used for operations on all integer types (tinyint, smallint, int, bigint)
- * and as such will use a 64-bit long value to hold the biggest possible value.
- * During copy-in/copy-out, smaller int types will be converted as needed. This will
- * reduce the amount of code that needs to be generated and also will run fast since the
- * machine operates with 64-bit words.
- *
- * The vector[] field is public by design for high-performance access in the inner
- * loop of query execution.
- */
-public class LongColumnVector extends ColumnVector {
- public long[] vector;
- public static final long NULL_VALUE = 1;
-
- /**
- * Use this constructor by default. All column vectors
- * should normally be the default size.
- */
- public LongColumnVector() {
- this(VectorizedRowBatch.DEFAULT_SIZE);
- }
-
- /**
- * Don't use this except for testing purposes.
- *
- * @param len the number of rows
- */
- public LongColumnVector(int len) {
- super(len);
- vector = new long[len];
- }
-
- // Copy the current object contents into the output. Only copy selected entries,
- // as indicated by selectedInUse and the sel array.
- public void copySelected(
- boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
-
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
- output.isRepeating = false;
-
- // Handle repeating case
- if (isRepeating) {
- output.vector[0] = vector[0];
- output.isNull[0] = isNull[0];
- output.isRepeating = true;
- return;
- }
-
- // Handle normal case
-
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.vector[i] = vector[i];
- }
- }
- else {
- System.arraycopy(vector, 0, output.vector, 0, size);
- }
-
- // Copy nulls over if needed
- if (!noNulls) {
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.isNull[i] = isNull[i];
- }
- }
- else {
- System.arraycopy(isNull, 0, output.isNull, 0, size);
- }
- }
- }
-
- // Copy the current object contents into the output. Only copy selected entries,
- // as indicated by selectedInUse and the sel array.
- public void copySelected(
- boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
-
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
- output.isRepeating = false;
-
- // Handle repeating case
- if (isRepeating) {
- output.vector[0] = vector[0]; // automatic conversion to double is done here
- output.isNull[0] = isNull[0];
- output.isRepeating = true;
- return;
- }
-
- // Handle normal case
-
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.vector[i] = vector[i];
- }
- }
- else {
- for(int i = 0; i < size; ++i) {
- output.vector[i] = vector[i];
- }
- }
-
- // Copy nulls over if needed
- if (!noNulls) {
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.isNull[i] = isNull[i];
- }
- }
- else {
- System.arraycopy(isNull, 0, output.isNull, 0, size);
- }
- }
- }
-
- // Fill the column vector with the provided value
- public void fill(long value) {
- noNulls = true;
- isRepeating = true;
- vector[0] = value;
- }
-
- // Simplify vector by brute-force flattening noNulls and isRepeating
- // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
- // with many arguments.
- public void flatten(boolean selectedInUse, int[] sel, int size) {
- flattenPush();
- if (isRepeating) {
- isRepeating = false;
- long repeatVal = vector[0];
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- vector[i] = repeatVal;
- }
- } else {
- Arrays.fill(vector, 0, size, repeatVal);
- }
- flattenRepeatingNulls(selectedInUse, sel, size);
- }
- flattenNoNulls(selectedInUse, sel, size);
- }
-
- @Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum];
- }
-
- @Override
- public void stringifyValue(StringBuilder buffer, int row) {
- if (isRepeating) {
- row = 0;
- }
- if (noNulls || !isNull[row]) {
- buffer.append(vector[row]);
- } else {
- buffer.append("null");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
deleted file mode 100644
index 7c18da6..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-
-/**
- * A VectorizedRowBatch is a set of rows, organized with each column
- * as a vector. It is the unit of query execution, organized to minimize
- * the cost per row and achieve high cycles-per-instruction.
- * The major fields are public by design to allow fast and convenient
- * access by the vectorized query execution code.
- */
-public class VectorizedRowBatch implements Writable {
- public int numCols; // number of columns
- public ColumnVector[] cols; // a vector for each column
- public int size; // number of rows that qualify (i.e. haven't been filtered out)
- public int[] selected; // array of positions of selected values
- public int[] projectedColumns;
- public int projectionSize;
-
- /*
- * If no filtering has been applied yet, selectedInUse is false,
- * meaning that all rows qualify. If it is true, then the selected[] array
- * records the offsets of qualifying rows.
- */
- public boolean selectedInUse;
-
- // If this is true, then there is no data in the batch -- we have hit the end of input.
- public boolean endOfFile;
-
- /*
- * This number is carefully chosen to minimize overhead and typically allows
- * one VectorizedRowBatch to fit in cache.
- */
- public static final int DEFAULT_SIZE = 1024;
-
- /**
- * Return a batch with the specified number of columns.
- * This is the standard constructor -- all batches should be the same size
- *
- * @param numCols the number of columns to include in the batch
- */
- public VectorizedRowBatch(int numCols) {
- this(numCols, DEFAULT_SIZE);
- }
-
- /**
- * Return a batch with the specified number of columns and rows.
- * Only call this constructor directly for testing purposes.
- * Batch size should normally always be defaultSize.
- *
- * @param numCols the number of columns to include in the batch
- * @param size the number of rows to include in the batch
- */
- public VectorizedRowBatch(int numCols, int size) {
- this.numCols = numCols;
- this.size = size;
- selected = new int[size];
- selectedInUse = false;
- this.cols = new ColumnVector[numCols];
- projectedColumns = new int[numCols];
-
- // Initially all columns are projected and in the same order
- projectionSize = numCols;
- for (int i = 0; i < numCols; i++) {
- projectedColumns[i] = i;
- }
- }
-
- /**
- * Returns the maximum size of the batch (number of rows it can hold)
- */
- public int getMaxSize() {
- return selected.length;
- }
-
- /**
- * Return count of qualifying rows.
- *
- * @return number of rows that have not been filtered out
- */
- public long count() {
- return size;
- }
-
- private static String toUTF8(Object o) {
- if(o == null || o instanceof NullWritable) {
- return "\\N"; /* as found in LazySimpleSerDe's nullSequence */
- }
- return o.toString();
- }
-
- @Override
- public String toString() {
- if (size == 0) {
- return "";
- }
- StringBuilder b = new StringBuilder();
- if (this.selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = selected[j];
- b.append('[');
- for (int k = 0; k < projectionSize; k++) {
- int projIndex = projectedColumns[k];
- ColumnVector cv = cols[projIndex];
- if (k > 0) {
- b.append(", ");
- }
- cv.stringifyValue(b, i);
- }
- b.append(']');
- if (j < size - 1) {
- b.append('\n');
- }
- }
- } else {
- for (int i = 0; i < size; i++) {
- b.append('[');
- for (int k = 0; k < projectionSize; k++) {
- int projIndex = projectedColumns[k];
- ColumnVector cv = cols[projIndex];
- if (k > 0) {
- b.append(", ");
- }
- cv.stringifyValue(b, i);
- }
- b.append(']');
- if (i < size - 1) {
- b.append('\n');
- }
- }
- }
- return b.toString();
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- throw new UnsupportedOperationException("Do you really need me?");
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- throw new UnsupportedOperationException("Don't call me");
- }
-
- /**
- * Resets the row batch to default state
- * - sets selectedInUse to false
- * - sets size to 0
- * - sets endOfFile to false
- * - resets each column
- * - inits each column
- */
- public void reset() {
- selectedInUse = false;
- size = 0;
- endOfFile = false;
- for (ColumnVector vc : cols) {
- if (vc != null) {
- vc.reset();
- vc.init();
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
deleted file mode 100644
index 6ad927d..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Input;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-
-/**
- * A factory for creating SearchArguments.
- */
-public class SearchArgumentFactory {
- public static Builder newBuilder() {
- return new SearchArgumentImpl.BuilderImpl();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
deleted file mode 100644
index 1582a75..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ /dev/null
@@ -1,697 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import java.sql.Timestamp;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Output;
-
-/**
- * The implementation of SearchArguments.
- */
-final class SearchArgumentImpl implements SearchArgument {
- public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class);
-
- static final class PredicateLeafImpl implements PredicateLeaf {
- private final Operator operator;
- private final Type type;
- private final String columnName;
- private final Object literal;
- private final List<Object> literalList;
-
- // Used by kryo
- @SuppressWarnings("unused")
- PredicateLeafImpl() {
- operator = null;
- type = null;
- columnName = null;
- literal = null;
- literalList = null;
- }
-
- PredicateLeafImpl(Operator operator,
- Type type,
- String columnName,
- Object literal,
- List<Object> literalList) {
- this.operator = operator;
- this.type = type;
- this.columnName = columnName;
- this.literal = literal;
- if (literal != null) {
- if (literal.getClass() != type.getValueClass()) {
- throw new IllegalArgumentException("Wrong value class " +
- literal.getClass().getName() + " for " + type + "." + operator +
- " leaf");
- }
- }
- this.literalList = literalList;
- if (literalList != null) {
- Class valueCls = type.getValueClass();
- for(Object lit: literalList) {
- if (lit != null && lit.getClass() != valueCls) {
- throw new IllegalArgumentException("Wrong value class item " +
- lit.getClass().getName() + " for " + type + "." + operator +
- " leaf");
- }
- }
- }
- }
-
- @Override
- public Operator getOperator() {
- return operator;
- }
-
- @Override
- public Type getType(){
- return type;
- }
-
- @Override
- public String getColumnName() {
- return columnName;
- }
-
- @Override
- public Object getLiteral() {
- // To get around a kryo 2.22 bug while deserialize a Timestamp into Date
- // (https://github.com/EsotericSoftware/kryo/issues/88)
- // When we see a Date, convert back into Timestamp
- if (literal instanceof java.util.Date) {
- return new Timestamp(((java.util.Date)literal).getTime());
- }
- return literal;
- }
-
- @Override
- public List<Object> getLiteralList() {
- return literalList;
- }
-
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- buffer.append('(');
- buffer.append(operator);
- buffer.append(' ');
- buffer.append(columnName);
- if (literal != null) {
- buffer.append(' ');
- buffer.append(literal);
- } else if (literalList != null) {
- for(Object lit: literalList) {
- buffer.append(' ');
- buffer.append(lit == null ? "null" : lit.toString());
- }
- }
- buffer.append(')');
- return buffer.toString();
- }
-
- private static boolean isEqual(Object left, Object right) {
-
- return left == right ||
- (left != null && right != null && left.equals(right));
- }
-
- @Override
- public boolean equals(Object other) {
- if (other == null || other.getClass() != getClass()) {
- return false;
- } else if (other == this) {
- return true;
- } else {
- PredicateLeafImpl o = (PredicateLeafImpl) other;
- return operator == o.operator &&
- type == o.type &&
- columnName.equals(o.columnName) &&
- isEqual(literal, o.literal) &&
- isEqual(literalList, o.literalList);
- }
- }
-
- @Override
- public int hashCode() {
- return operator.hashCode() +
- type.hashCode() * 17 +
- columnName.hashCode() * 3 * 17+
- (literal == null ? 0 : literal.hashCode()) * 101 * 3 * 17 +
- (literalList == null ? 0 : literalList.hashCode()) *
- 103 * 101 * 3 * 17;
- }
- }
-
-
- private final List<PredicateLeaf> leaves;
- private final ExpressionTree expression;
-
- SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
- this.expression = expression;
- this.leaves = leaves;
- }
-
- // Used by kyro
- @SuppressWarnings("unused")
- SearchArgumentImpl() {
- leaves = null;
- expression = null;
- }
-
- @Override
- public List<PredicateLeaf> getLeaves() {
- return leaves;
- }
-
- @Override
- public TruthValue evaluate(TruthValue[] leaves) {
- return expression == null ? TruthValue.YES : expression.evaluate(leaves);
- }
-
- @Override
- public ExpressionTree getExpression() {
- return expression;
- }
-
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- for(int i=0; i < leaves.size(); ++i) {
- buffer.append("leaf-");
- buffer.append(i);
- buffer.append(" = ");
- buffer.append(leaves.get(i).toString());
- buffer.append('\n');
- }
- buffer.append("expr = ");
- buffer.append(expression);
- return buffer.toString();
- }
-
- public String toKryo() {
- Output out = new Output(4 * 1024, 10 * 1024 * 1024);
- new Kryo().writeObject(out, this);
- out.close();
- return Base64.encodeBase64String(out.toBytes());
- }
-
- static class BuilderImpl implements Builder {
-
- // max threshold for CNF conversion. having >8 elements in andList will be
- // converted to maybe
- private static final int CNF_COMBINATIONS_THRESHOLD = 256;
-
- private final Deque<ExpressionTree> currentTree =
- new ArrayDeque<ExpressionTree>();
- private final Map<PredicateLeaf, Integer> leaves =
- new HashMap<PredicateLeaf, Integer>();
- private final ExpressionTree root =
- new ExpressionTree(ExpressionTree.Operator.AND);
- {
- currentTree.add(root);
- }
-
- @Override
- public Builder startOr() {
- ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR);
- currentTree.getFirst().getChildren().add(node);
- currentTree.addFirst(node);
- return this;
- }
-
- @Override
- public Builder startAnd() {
- ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND);
- currentTree.getFirst().getChildren().add(node);
- currentTree.addFirst(node);
- return this;
- }
-
- @Override
- public Builder startNot() {
- ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT);
- currentTree.getFirst().getChildren().add(node);
- currentTree.addFirst(node);
- return this;
- }
-
- @Override
- public Builder end() {
- ExpressionTree current = currentTree.removeFirst();
- if (current.getChildren().size() == 0) {
- throw new IllegalArgumentException("Can't create expression " + root +
- " with no children.");
- }
- if (current.getOperator() == ExpressionTree.Operator.NOT &&
- current.getChildren().size() != 1) {
- throw new IllegalArgumentException("Can't create not expression " +
- current + " with more than 1 child.");
- }
- return this;
- }
-
- private int addLeaf(PredicateLeaf leaf) {
- Integer result = leaves.get(leaf);
- if (result == null) {
- int id = leaves.size();
- leaves.put(leaf, id);
- return id;
- } else {
- return result;
- }
- }
-
- @Override
- public Builder lessThan(String column, PredicateLeaf.Type type,
- Object literal) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null || literal == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
- type, column, literal, null);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder lessThanEquals(String column, PredicateLeaf.Type type,
- Object literal) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null || literal == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
- type, column, literal, null);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder equals(String column, PredicateLeaf.Type type,
- Object literal) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null || literal == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
- type, column, literal, null);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
- Object literal) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null || literal == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- type, column, literal, null);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder in(String column, PredicateLeaf.Type type,
- Object... literal) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null || literal == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- if (literal.length == 0) {
- throw new IllegalArgumentException("Can't create in expression with "
- + "no arguments");
- }
- List<Object> argList = new ArrayList<Object>();
- argList.addAll(Arrays.asList(literal));
-
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.IN,
- type, column, null, argList);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder isNull(String column, PredicateLeaf.Type type) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
- type, column, null, null);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder between(String column, PredicateLeaf.Type type, Object lower,
- Object upper) {
- ExpressionTree parent = currentTree.getFirst();
- if (column == null || lower == null || upper == null) {
- parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
- } else {
- List<Object> argList = new ArrayList<Object>();
- argList.add(lower);
- argList.add(upper);
- PredicateLeaf leaf =
- new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
- type, column, null, argList);
- parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
- }
- return this;
- }
-
- @Override
- public Builder literal(TruthValue truth) {
- ExpressionTree parent = currentTree.getFirst();
- parent.getChildren().add(new ExpressionTree(truth));
- return this;
- }
-
- /**
- * Recursively explore the tree to find the leaves that are still reachable
- * after optimizations.
- * @param tree the node to check next
- * @param next the next available leaf id
- * @param leafReorder
- * @return the next available leaf id
- */
- static int compactLeaves(ExpressionTree tree, int next, int[] leafReorder) {
- if (tree.getOperator() == ExpressionTree.Operator.LEAF) {
- int oldLeaf = tree.getLeaf();
- if (leafReorder[oldLeaf] == -1) {
- leafReorder[oldLeaf] = next++;
- }
- } else if (tree.getChildren() != null){
- for(ExpressionTree child: tree.getChildren()) {
- next = compactLeaves(child, next, leafReorder);
- }
- }
- return next;
- }
-
- /**
- * Rewrite expression tree to update the leaves.
- * @param root the root of the tree to fix
- * @param leafReorder a map from old leaf ids to new leaf ids
- * @return the fixed root
- */
- static ExpressionTree rewriteLeaves(ExpressionTree root,
- int[] leafReorder) {
- if (root.getOperator() == ExpressionTree.Operator.LEAF) {
- return new ExpressionTree(leafReorder[root.getLeaf()]);
- } else if (root.getChildren() != null){
- List<ExpressionTree> children = root.getChildren();
- for(int i=0; i < children.size(); ++i) {
- children.set(i, rewriteLeaves(children.get(i), leafReorder));
- }
- }
- return root;
- }
-
- @Override
- public SearchArgument build() {
- if (currentTree.size() != 1) {
- throw new IllegalArgumentException("Failed to end " +
- currentTree.size() + " operations.");
- }
- ExpressionTree optimized = pushDownNot(root);
- optimized = foldMaybe(optimized);
- optimized = flatten(optimized);
- optimized = convertToCNF(optimized);
- optimized = flatten(optimized);
- int leafReorder[] = new int[leaves.size()];
- Arrays.fill(leafReorder, -1);
- int newLeafCount = compactLeaves(optimized, 0, leafReorder);
- optimized = rewriteLeaves(optimized, leafReorder);
- ArrayList<PredicateLeaf> leafList = new ArrayList<>(newLeafCount);
- // expand list to correct size
- for(int i=0; i < newLeafCount; ++i) {
- leafList.add(null);
- }
- // build the new list
- for(Map.Entry<PredicateLeaf, Integer> elem: leaves.entrySet()) {
- int newLoc = leafReorder[elem.getValue()];
- if (newLoc != -1) {
- leafList.set(newLoc, elem.getKey());
- }
- }
- return new SearchArgumentImpl(optimized, leafList);
- }
-
- /**
- * Push the negations all the way to just before the leaves. Also remove
- * double negatives.
- * @param root the expression to normalize
- * @return the normalized expression, which may share some or all of the
- * nodes of the original expression.
- */
- static ExpressionTree pushDownNot(ExpressionTree root) {
- if (root.getOperator() == ExpressionTree.Operator.NOT) {
- ExpressionTree child = root.getChildren().get(0);
- switch (child.getOperator()) {
- case NOT:
- return pushDownNot(child.getChildren().get(0));
- case CONSTANT:
- return new ExpressionTree(child.getConstant().not());
- case AND:
- root = new ExpressionTree(ExpressionTree.Operator.OR);
- for(ExpressionTree kid: child.getChildren()) {
- root.getChildren().add(pushDownNot(new
- ExpressionTree(ExpressionTree.Operator.NOT, kid)));
- }
- break;
- case OR:
- root = new ExpressionTree(ExpressionTree.Operator.AND);
- for(ExpressionTree kid: child.getChildren()) {
- root.getChildren().add(pushDownNot(new ExpressionTree
- (ExpressionTree.Operator.NOT, kid)));
- }
- break;
- // for leaf, we don't do anything
- default:
- break;
- }
- } else if (root.getChildren() != null) {
- // iterate through children and push down not for each one
- for(int i=0; i < root.getChildren().size(); ++i) {
- root.getChildren().set(i, pushDownNot(root.getChildren().get(i)));
- }
- }
- return root;
- }
-
- /**
- * Remove MAYBE values from the expression. If they are in an AND operator,
- * they are dropped. If they are in an OR operator, they kill their parent.
- * This assumes that pushDownNot has already been called.
- * @param expr The expression to clean up
- * @return The cleaned up expression
- */
- static ExpressionTree foldMaybe(ExpressionTree expr) {
- if (expr.getChildren() != null) {
- for(int i=0; i < expr.getChildren().size(); ++i) {
- ExpressionTree child = foldMaybe(expr.getChildren().get(i));
- if (child.getConstant() == TruthValue.YES_NO_NULL) {
- switch (expr.getOperator()) {
- case AND:
- expr.getChildren().remove(i);
- i -= 1;
- break;
- case OR:
- // a maybe will kill the or condition
- return child;
- default:
- throw new IllegalStateException("Got a maybe as child of " +
- expr);
- }
- } else {
- expr.getChildren().set(i, child);
- }
- }
- if (expr.getChildren().isEmpty()) {
- return new ExpressionTree(TruthValue.YES_NO_NULL);
- }
- }
- return expr;
- }
-
- /**
- * Converts multi-level ands and ors into single level ones.
- * @param root the expression to flatten
- * @return the flattened expression, which will always be root with
- * potentially modified children.
- */
- static ExpressionTree flatten(ExpressionTree root) {
- if (root.getChildren() != null) {
- // iterate through the index, so that if we add more children,
- // they don't get re-visited
- for(int i=0; i < root.getChildren().size(); ++i) {
- ExpressionTree child = flatten(root.getChildren().get(i));
- // do we need to flatten?
- if (child.getOperator() == root.getOperator() &&
- child.getOperator() != ExpressionTree.Operator.NOT) {
- boolean first = true;
- for(ExpressionTree grandkid: child.getChildren()) {
- // for the first grandkid replace the original parent
- if (first) {
- first = false;
- root.getChildren().set(i, grandkid);
- } else {
- root.getChildren().add(++i, grandkid);
- }
- }
- } else {
- root.getChildren().set(i, child);
- }
- }
- // if we have a singleton AND or OR, just return the child
- if ((root.getOperator() == ExpressionTree.Operator.OR ||
- root.getOperator() == ExpressionTree.Operator.AND) &&
- root.getChildren().size() == 1) {
- return root.getChildren().get(0);
- }
- }
- return root;
- }
-
- /**
- * Generate all combinations of items on the andList. For each item on the
- * andList, it generates all combinations of one child from each and
- * expression. Thus, (and a b) (and c d) will be expanded to: (or a c)
- * (or a d) (or b c) (or b d). If there are items on the nonAndList, they
- * are added to each or expression.
- * @param result a list to put the results onto
- * @param andList a list of and expressions
- * @param nonAndList a list of non-and expressions
- */
- private static void generateAllCombinations(List<ExpressionTree> result,
- List<ExpressionTree> andList,
- List<ExpressionTree> nonAndList
- ) {
- List<ExpressionTree> kids = andList.get(0).getChildren();
- if (result.isEmpty()) {
- for(ExpressionTree kid: kids) {
- ExpressionTree or = new ExpressionTree(ExpressionTree.Operator.OR);
- result.add(or);
- for(ExpressionTree node: nonAndList) {
- or.getChildren().add(new ExpressionTree(node));
- }
- or.getChildren().add(kid);
- }
- } else {
- List<ExpressionTree> work = new ArrayList<ExpressionTree>(result);
- result.clear();
- for(ExpressionTree kid: kids) {
- for(ExpressionTree or: work) {
- ExpressionTree copy = new ExpressionTree(or);
- copy.getChildren().add(kid);
- result.add(copy);
- }
- }
- }
- if (andList.size() > 1) {
- generateAllCombinations(result, andList.subList(1, andList.size()),
- nonAndList);
- }
- }
-
- /**
- * Convert an expression so that the top level operator is AND with OR
- * operators under it. This routine assumes that all of the NOT operators
- * have been pushed to the leaves via pushdDownNot.
- * @param root the expression
- * @return the normalized expression
- */
- static ExpressionTree convertToCNF(ExpressionTree root) {
- if (root.getChildren() != null) {
- // convert all of the children to CNF
- int size = root.getChildren().size();
- for(int i=0; i < size; ++i) {
- root.getChildren().set(i, convertToCNF(root.getChildren().get(i)));
- }
- if (root.getOperator() == ExpressionTree.Operator.OR) {
- // a list of leaves that weren't under AND expressions
- List<ExpressionTree> nonAndList = new ArrayList<ExpressionTree>();
- // a list of AND expressions that we need to distribute
- List<ExpressionTree> andList = new ArrayList<ExpressionTree>();
- for(ExpressionTree child: root.getChildren()) {
- if (child.getOperator() == ExpressionTree.Operator.AND) {
- andList.add(child);
- } else if (child.getOperator() == ExpressionTree.Operator.OR) {
- // pull apart the kids of the OR expression
- for(ExpressionTree grandkid: child.getChildren()) {
- nonAndList.add(grandkid);
- }
- } else {
- nonAndList.add(child);
- }
- }
- if (!andList.isEmpty()) {
- if (checkCombinationsThreshold(andList)) {
- root = new ExpressionTree(ExpressionTree.Operator.AND);
- generateAllCombinations(root.getChildren(), andList, nonAndList);
- } else {
- root = new ExpressionTree(TruthValue.YES_NO_NULL);
- }
- }
- }
- }
- return root;
- }
-
- private static boolean checkCombinationsThreshold(List<ExpressionTree> andList) {
- int numComb = 1;
- for (ExpressionTree tree : andList) {
- numComb *= tree.getChildren().size();
- if (numComb > CNF_COMBINATIONS_THRESHOLD) {
- return false;
- }
- }
- return true;
- }
-
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 46deda5..6cb8529 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Output;
+
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -40,6 +43,7 @@ import java.util.Set;
import java.util.TimeZone;
import java.util.TreeSet;
+import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -106,6 +110,13 @@ import org.junit.rules.TestName;
public class TestInputOutputFormat {
+ public static String toKryo(SearchArgument sarg) {
+ Output out = new Output(4 * 1024, 10 * 1024 * 1024);
+ new Kryo().writeObject(out, sarg);
+ out.close();
+ return Base64.encodeBase64String(out.toBytes());
+ }
+
Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp"));
static final int MILLIS_IN_DAY = 1000 * 60 * 60 * 24;
private static final SimpleDateFormat DATE_FORMAT =
@@ -1751,7 +1762,7 @@ public class TestInputOutputFormat {
types.add(builder.build());
SearchArgument isNull = SearchArgumentFactory.newBuilder()
.startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build();
- conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, isNull.toKryo());
+ conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
"url,cost");
options.include(new boolean[]{true, true, false, true, false});
@@ -1798,7 +1809,7 @@ public class TestInputOutputFormat {
.lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0))
.end()
.build();
- conf.set("sarg.pushdown", sarg.toKryo());
+ conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z,r");
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
@@ -1840,7 +1851,7 @@ public class TestInputOutputFormat {
.lessThan("z", PredicateLeaf.Type.STRING, new String("foo"))
.end()
.build();
- conf.set("sarg.pushdown", sarg.toKryo());
+ conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z");
properties.setProperty("columns", "z");
properties.setProperty("columns.types", "string");
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
index 3a51f4a..20de846 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
@@ -25,6 +25,7 @@ import com.google.common.collect.Sets;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.PredicateLeafImpl;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
@@ -464,7 +465,7 @@ public class TestSearchArgumentImpl {
.end()
.build();
- String serializedSarg = sarg.toKryo();
+ String serializedSarg = TestInputOutputFormat.toKryo(sarg);
SearchArgument sarg2 = ConvertAstToSearchArg.create(serializedSarg);
Field literalField = PredicateLeafImpl.class.getDeclaredField("literal");
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
deleted file mode 100644
index 2dd3a45..0000000
--- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * The inner representation of the SearchArgument. Most users should not
- * need this interface, it is only for file formats that need to translate
- * the SearchArgument into an internal form.
- */
-public class ExpressionTree {
- public enum Operator {OR, AND, NOT, LEAF, CONSTANT}
- private final Operator operator;
- private final List<ExpressionTree> children;
- private final int leaf;
- private final SearchArgument.TruthValue constant;
-
- ExpressionTree() {
- operator = null;
- children = null;
- leaf = 0;
- constant = null;
- }
-
- ExpressionTree(Operator op, ExpressionTree... kids) {
- operator = op;
- children = new ArrayList<ExpressionTree>();
- leaf = -1;
- this.constant = null;
- Collections.addAll(children, kids);
- }
-
- ExpressionTree(int leaf) {
- operator = Operator.LEAF;
- children = null;
- this.leaf = leaf;
- this.constant = null;
- }
-
- ExpressionTree(SearchArgument.TruthValue constant) {
- operator = Operator.CONSTANT;
- children = null;
- this.leaf = -1;
- this.constant = constant;
- }
-
- ExpressionTree(ExpressionTree other) {
- this.operator = other.operator;
- if (other.children == null) {
- this.children = null;
- } else {
- this.children = new ArrayList<ExpressionTree>();
- for(ExpressionTree child: other.children) {
- children.add(new ExpressionTree(child));
- }
- }
- this.leaf = other.leaf;
- this.constant = other.constant;
- }
-
- public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves
- ) {
- SearchArgument.TruthValue result = null;
- switch (operator) {
- case OR:
- for(ExpressionTree child: children) {
- result = child.evaluate(leaves).or(result);
- }
- return result;
- case AND:
- for(ExpressionTree child: children) {
- result = child.evaluate(leaves).and(result);
- }
- return result;
- case NOT:
- return children.get(0).evaluate(leaves).not();
- case LEAF:
- return leaves[leaf];
- case CONSTANT:
- return constant;
- default:
- throw new IllegalStateException("Unknown operator: " + operator);
- }
- }
-
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- switch (operator) {
- case OR:
- buffer.append("(or");
- for(ExpressionTree child: children) {
- buffer.append(' ');
- buffer.append(child.toString());
- }
- buffer.append(')');
- break;
- case AND:
- buffer.append("(and");
- for(ExpressionTree child: children) {
- buffer.append(' ');
- buffer.append(child.toString());
- }
- buffer.append(')');
- break;
- case NOT:
- buffer.append("(not ");
- buffer.append(children.get(0));
- buffer.append(')');
- break;
- case LEAF:
- buffer.append("leaf-");
- buffer.append(leaf);
- break;
- case CONSTANT:
- buffer.append(constant);
- break;
- }
- return buffer.toString();
- }
-
- public Operator getOperator() {
- return operator;
- }
-
- public List<ExpressionTree> getChildren() {
- return children;
- }
-
- public SearchArgument.TruthValue getConstant() {
- return constant;
- }
-
- public int getLeaf() {
- return leaf;
- }
-}
-