You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/05/13 19:50:32 UTC
[03/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley
reviewed by prasanthj)
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
new file mode 100644
index 0000000..c069a5f
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * ColumnVector contains the shared structure for the sub-types,
+ * including NULL information, and whether this vector
+ * repeats, i.e. has all values the same, so only the first
+ * one is set. This is used to accelerate query performance
+ * by handling a whole vector in O(1) time when applicable.
+ *
+ * The fields are public by design since this is a performance-critical
+ * structure that is used in the inner loop of query execution.
+ */
+public abstract class ColumnVector {
+
+ /*
+ * The current kinds of column vectors.
+ */
+ public static enum Type {
+ NONE, // Useful when the type of column vector has not be determined yet.
+ LONG,
+ DOUBLE,
+ BYTES,
+ DECIMAL,
+ TIMESTAMP,
+ INTERVAL_DAY_TIME,
+ STRUCT,
+ LIST,
+ MAP,
+ UNION
+ }
+
+ /*
+ * If hasNulls is true, then this array contains true if the value
+ * is null, otherwise false. The array is always allocated, so a batch can be re-used
+ * later and nulls added.
+ */
+ public boolean[] isNull;
+
+ // If the whole column vector has no nulls, this is true, otherwise false.
+ public boolean noNulls;
+
+ /*
+ * True if same value repeats for whole column vector.
+ * If so, vector[0] holds the repeating value.
+ */
+ public boolean isRepeating;
+
+ // Variables to hold state from before flattening so it can be easily restored.
+ private boolean preFlattenIsRepeating;
+ private boolean preFlattenNoNulls;
+
+ /**
+ * Constructor for super-class ColumnVector. This is not called directly,
+ * but used to initialize inherited fields.
+ *
+ * @param len Vector length
+ */
+ public ColumnVector(int len) {
+ isNull = new boolean[len];
+ noNulls = true;
+ isRepeating = false;
+ preFlattenNoNulls = true;
+ preFlattenIsRepeating = false;
+ }
+
+ /**
+ * Resets the column to default state
+ * - fills the isNull array with false
+ * - sets noNulls to true
+ * - sets isRepeating to false
+ */
+ public void reset() {
+ if (!noNulls) {
+ Arrays.fill(isNull, false);
+ }
+ noNulls = true;
+ isRepeating = false;
+ preFlattenNoNulls = true;
+ preFlattenIsRepeating = false;
+ }
+
+ /**
+ * Sets the isRepeating flag. Recurses over structs and unions so that the
+ * flags are set correctly.
+ * @param isRepeating
+ */
+ public void setRepeating(boolean isRepeating) {
+ this.isRepeating = isRepeating;
+ }
+
+ abstract public void flatten(boolean selectedInUse, int[] sel, int size);
+
+ // Simplify vector by brute-force flattening noNulls if isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel,
+ int size) {
+
+ boolean nullFillValue;
+
+ if (noNulls) {
+ nullFillValue = false;
+ } else {
+ nullFillValue = isNull[0];
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ isNull[i] = nullFillValue;
+ }
+ } else {
+ Arrays.fill(isNull, 0, size, nullFillValue);
+ }
+
+ // all nulls are now explicit
+ noNulls = false;
+ }
+
+ protected void flattenNoNulls(boolean selectedInUse, int[] sel,
+ int size) {
+ if (noNulls) {
+ noNulls = false;
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ isNull[sel[j]] = false;
+ }
+ } else {
+ Arrays.fill(isNull, 0, size, false);
+ }
+ }
+ }
+
+ /**
+ * Restore the state of isRepeating and noNulls to what it was
+ * before flattening. This must only be called just after flattening
+ * and then evaluating a VectorExpression on the column vector.
+ * It is an optimization that allows other operations on the same
+ * column to continue to benefit from the isRepeating and noNulls
+ * indicators.
+ */
+ public void unFlatten() {
+ isRepeating = preFlattenIsRepeating;
+ noNulls = preFlattenNoNulls;
+ }
+
+ // Record repeating and no nulls state to be restored later.
+ protected void flattenPush() {
+ preFlattenIsRepeating = isRepeating;
+ preFlattenNoNulls = noNulls;
+ }
+
+ /**
+ * Set the element in this column vector from the given input vector.
+ * This method can assume that the output does not have isRepeating set.
+ */
+ public abstract void setElement(int outElementNum, int inputElementNum,
+ ColumnVector inputVector);
+
+ /**
+ * Initialize the column vector. This method can be overridden by specific column vector types.
+ * Use this method only if the individual type of the column vector is not known, otherwise its
+ * preferable to call specific initialization methods.
+ */
+ public void init() {
+ // Do nothing by default
+ }
+
+ /**
+ * Ensure the ColumnVector can hold at least size values.
+ * This method is deliberately *not* recursive because the complex types
+ * can easily have more (or less) children than the upper levels.
+ * @param size the new minimum size
+ * @param presesrveData should the old data be preserved?
+ */
+ public void ensureSize(int size, boolean presesrveData) {
+ if (isNull.length < size) {
+ boolean[] oldArray = isNull;
+ isNull = new boolean[size];
+ if (presesrveData && !noNulls) {
+ if (isRepeating) {
+ isNull[0] = oldArray[0];
+ } else {
+ System.arraycopy(oldArray, 0, isNull, 0, oldArray.length);
+ }
+ }
+ }
+ }
+
+ /**
+ * Print the value for this column into the given string builder.
+ * @param buffer the buffer to print into
+ * @param row the id of the row to print
+ */
+ public abstract void stringifyValue(StringBuilder buffer,
+ int row);
+ }
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
new file mode 100644
index 0000000..0c52210
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+import java.math.BigInteger;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+public class DecimalColumnVector extends ColumnVector {
+
+ /**
+ * A vector of HiveDecimalWritable objects.
+ *
+ * For high performance and easy access to this low-level structure,
+ * the fields are public by design (as they are in other ColumnVector
+ * types).
+ */
+ public HiveDecimalWritable[] vector;
+ public short scale;
+ public short precision;
+
+ public DecimalColumnVector(int precision, int scale) {
+ this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
+ }
+
+ public DecimalColumnVector(int size, int precision, int scale) {
+ super(size);
+ this.precision = (short) precision;
+ this.scale = (short) scale;
+ vector = new HiveDecimalWritable[size];
+ for (int i = 0; i < size; i++) {
+ vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
+ }
+ }
+
+ // Fill the all the vector entries with provided value
+ public void fill(HiveDecimal value) {
+ noNulls = true;
+ isRepeating = true;
+ if (vector[0] == null) {
+ vector[0] = new HiveDecimalWritable(value);
+ } else {
+ vector[0].set(value);
+ }
+ }
+
+ // Fill the column vector with nulls
+ public void fillWithNulls() {
+ noNulls = false;
+ isRepeating = true;
+ vector[0] = null;
+ isNull[0] = true;
+ }
+
+ @Override
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ // TODO Auto-generated method stub
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ if (inputVector.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+ HiveDecimal hiveDec =
+ ((DecimalColumnVector) inputVector).vector[inputElementNum]
+ .getHiveDecimal(precision, scale);
+ if (hiveDec == null) {
+ isNull[outElementNum] = true;
+ noNulls = false;
+ } else {
+ isNull[outElementNum] = false;
+ vector[outElementNum].set(hiveDec);
+ }
+ } else {
+ isNull[outElementNum] = true;
+ noNulls = false;
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append(vector[row].toString());
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ public void set(int elementNum, HiveDecimalWritable writeable) {
+ if (writeable == null) {
+ noNulls = false;
+ isNull[elementNum] = true;
+ } else {
+ HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale);
+ if (hiveDec == null) {
+ noNulls = false;
+ isNull[elementNum] = true;
+ } else {
+ vector[elementNum].set(hiveDec);
+ }
+ }
+ }
+
+ public void set(int elementNum, HiveDecimal hiveDec) {
+ HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale);
+ if (checkedDec == null) {
+ noNulls = false;
+ isNull[elementNum] = true;
+ } else {
+ vector[elementNum].set(checkedDec);
+ }
+ }
+
+ public void setNullDataValue(int elementNum) {
+ // E.g. For scale 2 the minimum is "0.01"
+ HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale);
+ vector[elementNum].set(minimumNonZeroValue);
+ }
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (size > vector.length) {
+ HiveDecimalWritable[] oldArray = vector;
+ vector = new HiveDecimalWritable[size];
+ if (preserveData) {
+ // we copy all of the values to avoid creating more objects
+ System.arraycopy(oldArray, 0, vector, 0 , oldArray.length);
+ for(int i= oldArray.length; i < vector.length; ++i) {
+ vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
new file mode 100644
index 0000000..bd421f4
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable double precision floating point column vector.
+ * This class will be used for operations on all floating point types (float, double)
+ * and as such will use a 64-bit double value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class DoubleColumnVector extends ColumnVector {
+ public double[] vector;
+ public static final double NULL_VALUE = Double.NaN;
+
+ /**
+ * Use this constructor by default. All column vectors
+ * should normally be the default size.
+ */
+ public DoubleColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't use this except for testing purposes.
+ *
+ * @param len
+ */
+ public DoubleColumnVector(int len) {
+ super(len);
+ vector = new double[len];
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.vector[0] = vector[0];
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ else {
+ System.arraycopy(vector, 0, output.vector, 0, size);
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ // Fill the column vector with the provided value
+ public void fill(double value) {
+ noNulls = true;
+ isRepeating = true;
+ vector[0] = value;
+ }
+
+ // Fill the column vector with nulls
+ public void fillWithNulls() {
+ noNulls = false;
+ isRepeating = true;
+ vector[0] = NULL_VALUE;
+ isNull[0] = true;
+ }
+
+ // Simplify vector by brute-force flattening noNulls and isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+ double repeatVal = vector[0];
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ vector[i] = repeatVal;
+ }
+ } else {
+ Arrays.fill(vector, 0, size, repeatVal);
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ if (inputVector.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+ isNull[outElementNum] = false;
+ vector[outElementNum] =
+ ((DoubleColumnVector) inputVector).vector[inputElementNum];
+ } else {
+ isNull[outElementNum] = true;
+ noNulls = false;
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append(vector[row]);
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (size > vector.length) {
+ double[] oldArray = vector;
+ vector = new double[size];
+ if (preserveData) {
+ if (isRepeating) {
+ vector[0] = oldArray[0];
+ } else {
+ System.arraycopy(oldArray, 0, vector, 0 , oldArray.length);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
new file mode 100644
index 0000000..39ccea8
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -0,0 +1,348 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents a nullable interval day time column vector capable of handing a
+ * wide range of interval day time values.
+ *
+ * We store the 2 (value) fields of a HiveIntervalDayTime class in primitive arrays.
+ *
+ * We do this to avoid an array of Java HiveIntervalDayTime objects which would have poor storage
+ * and memory access characteristics.
+ *
+ * Generally, the caller will fill in a scratch HiveIntervalDayTime object with values from a row,
+ * work using the scratch HiveIntervalDayTime, and then perhaps update the column vector row
+ * with a result.
+ */
+public class IntervalDayTimeColumnVector extends ColumnVector {
+
+ /*
+ * The storage arrays for this column vector corresponds to the storage of a HiveIntervalDayTime:
+ */
+ private long[] totalSeconds;
+ // The values from HiveIntervalDayTime.getTotalSeconds().
+
+ private int[] nanos;
+ // The values from HiveIntervalDayTime.getNanos().
+
+ /*
+ * Scratch objects.
+ */
+ private final HiveIntervalDayTime scratchIntervalDayTime;
+
+ private Writable scratchWritable;
+ // Supports keeping a HiveIntervalDayTimeWritable object without having to import
+ // that definition...
+
+ /**
+ * Use this constructor by default. All column vectors
+ * should normally be the default size.
+ */
+ public IntervalDayTimeColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't use this except for testing purposes.
+ *
+ * @param len the number of rows
+ */
+ public IntervalDayTimeColumnVector(int len) {
+ super(len);
+
+ totalSeconds = new long[len];
+ nanos = new int[len];
+
+ scratchIntervalDayTime = new HiveIntervalDayTime();
+
+ scratchWritable = null; // Allocated by caller.
+ }
+
+ /**
+ * Return the number of rows.
+ * @return
+ */
+ public int getLength() {
+ return totalSeconds.length;
+ }
+
+ /**
+ * Return a row's HiveIntervalDayTime.getTotalSeconds() value.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public long getTotalSeconds(int elementNum) {
+ return totalSeconds[elementNum];
+ }
+
+ /**
+ * Return a row's HiveIntervalDayTime.getNanos() value.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public long getNanos(int elementNum) {
+ return nanos[elementNum];
+ }
+
+ /**
+ * Return a row's HiveIntervalDayTime.getDouble() value.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public double getDouble(int elementNum) {
+ return asScratchIntervalDayTime(elementNum).getDouble();
+ }
+
+ /**
+ * Set a HiveIntervalDayTime object from a row of the column.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param intervalDayTime
+ * @param elementNum
+ */
+ public void intervalDayTimeUpdate(HiveIntervalDayTime intervalDayTime, int elementNum) {
+ intervalDayTime.set(totalSeconds[elementNum], nanos[elementNum]);
+ }
+
+
+ /**
+ * Return the scratch HiveIntervalDayTime object set from a row.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public HiveIntervalDayTime asScratchIntervalDayTime(int elementNum) {
+ scratchIntervalDayTime.set(totalSeconds[elementNum], nanos[elementNum]);
+ return scratchIntervalDayTime;
+ }
+
+ /**
+ * Return the scratch HiveIntervalDayTime (contents undefined).
+ * @return
+ */
+ public HiveIntervalDayTime getScratchIntervalDayTime() {
+ return scratchIntervalDayTime;
+ }
+
+ /**
+ * Compare row to HiveIntervalDayTime.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @param intervalDayTime
+ * @return -1, 0, 1 standard compareTo values.
+ */
+ public int compareTo(int elementNum, HiveIntervalDayTime intervalDayTime) {
+ return asScratchIntervalDayTime(elementNum).compareTo(intervalDayTime);
+ }
+
+ /**
+ * Compare HiveIntervalDayTime to row.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param intervalDayTime
+ * @param elementNum
+ * @return -1, 0, 1 standard compareTo values.
+ */
+ public int compareTo(HiveIntervalDayTime intervalDayTime, int elementNum) {
+ return intervalDayTime.compareTo(asScratchIntervalDayTime(elementNum));
+ }
+
+ /**
+ * Compare a row to another TimestampColumnVector's row.
+ * @param elementNum1
+ * @param intervalDayTimeColVector2
+ * @param elementNum2
+ * @return
+ */
+ public int compareTo(int elementNum1, IntervalDayTimeColumnVector intervalDayTimeColVector2,
+ int elementNum2) {
+ return asScratchIntervalDayTime(elementNum1).compareTo(
+ intervalDayTimeColVector2.asScratchIntervalDayTime(elementNum2));
+ }
+
+ /**
+ * Compare another TimestampColumnVector's row to a row.
+ * @param intervalDayTimeColVector1
+ * @param elementNum1
+ * @param elementNum2
+ * @return
+ */
+ public int compareTo(IntervalDayTimeColumnVector intervalDayTimeColVector1, int elementNum1,
+ int elementNum2) {
+ return intervalDayTimeColVector1.asScratchIntervalDayTime(elementNum1).compareTo(
+ asScratchIntervalDayTime(elementNum2));
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+
+ IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputVector;
+
+ totalSeconds[outElementNum] = timestampColVector.totalSeconds[inputElementNum];
+ nanos[outElementNum] = timestampColVector.nanos[inputElementNum];
+ }
+
+ // Simplify vector by brute-force flattening noNulls and isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+ long repeatFastTime = totalSeconds[0];
+ int repeatNanos = nanos[0];
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ totalSeconds[i] = repeatFastTime;
+ nanos[i] = repeatNanos;
+ }
+ } else {
+ Arrays.fill(totalSeconds, 0, size, repeatFastTime);
+ Arrays.fill(nanos, 0, size, repeatNanos);
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ /**
+ * Set a row from a HiveIntervalDayTime.
+ * We assume the entry has already been isRepeated adjusted.
+ * @param elementNum
+ * @param intervalDayTime
+ */
+ public void set(int elementNum, HiveIntervalDayTime intervalDayTime) {
+ this.totalSeconds[elementNum] = intervalDayTime.getTotalSeconds();
+ this.nanos[elementNum] = intervalDayTime.getNanos();
+ }
+
+ /**
+ * Set a row from the current value in the scratch interval day time.
+ * @param elementNum
+ */
+ public void setFromScratchIntervalDayTime(int elementNum) {
+ this.totalSeconds[elementNum] = scratchIntervalDayTime.getTotalSeconds();
+ this.nanos[elementNum] = scratchIntervalDayTime.getNanos();
+ }
+
+ /**
+ * Set row to standard null value(s).
+ * We assume the entry has already been isRepeated adjusted.
+ * @param elementNum
+ */
+ public void setNullValue(int elementNum) {
+ totalSeconds[elementNum] = 0;
+ nanos[elementNum] = 1;
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.totalSeconds[0] = totalSeconds[0];
+ output.nanos[0] = nanos[0];
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.totalSeconds[i] = totalSeconds[i];
+ output.nanos[i] = nanos[i];
+ }
+ }
+ else {
+ System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size);
+ System.arraycopy(nanos, 0, output.nanos, 0, size);
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ /**
+ * Fill all the vector entries with a HiveIntervalDayTime.
+ * @param intervalDayTime
+ */
+ public void fill(HiveIntervalDayTime intervalDayTime) {
+ noNulls = true;
+ isRepeating = true;
+ totalSeconds[0] = intervalDayTime.getTotalSeconds();
+ nanos[0] = intervalDayTime.getNanos();
+ }
+
+ /**
+ * Return a convenience writable object stored by this column vector.
+ * Supports keeping a TimestampWritable object without having to import that definition...
+ * @return
+ */
+ public Writable getScratchWritable() {
+ return scratchWritable;
+ }
+
+ /**
+ * Set the convenience writable object stored by this column vector
+ * @param scratchWritable
+ */
+ public void setScratchWritable(Writable scratchWritable) {
+ this.scratchWritable = scratchWritable;
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ scratchIntervalDayTime.set(totalSeconds[row], nanos[row]);
+ buffer.append(scratchIntervalDayTime.toString());
+ } else {
+ buffer.append("null");
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
new file mode 100644
index 0000000..66240dd
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of list objects.
+ *
+ * Each list is composed of a range of elements in the underlying child
+ * ColumnVector. The range for list i is
+ * offsets[i]..offsets[i]+lengths[i]-1 inclusive.
+ */
+public class ListColumnVector extends MultiValuedColumnVector {
+
+ public ColumnVector child;
+
+ public ListColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE, null);
+ }
+
+ /**
+ * Constructor for ListColumnVector.
+ *
+ * @param len Vector length
+ * @param child The child vector
+ */
+ public ListColumnVector(int len, ColumnVector child) {
+ super(len);
+ this.child = child;
+ }
+
+ @Override
+ protected void childFlatten(boolean useSelected, int[] selected, int size) {
+ child.flatten(useSelected, selected, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum,
+ ColumnVector inputVector) {
+ ListColumnVector input = (ListColumnVector) inputVector;
+ if (input.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (!input.noNulls && input.isNull[inputElementNum]) {
+ isNull[outElementNum] = true;
+ noNulls = false;
+ } else {
+ isNull[outElementNum] = false;
+ int offset = childCount;
+ int length = (int) input.lengths[inputElementNum];
+ int inputOffset = (int) input.offsets[inputElementNum];
+ offsets[outElementNum] = offset;
+ childCount += length;
+ lengths[outElementNum] = length;
+ child.ensureSize(childCount, true);
+ for (int i = 0; i < length; ++i) {
+ child.setElement(i + offset, inputOffset + i, input.child);
+ }
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append('[');
+ boolean isFirst = true;
+ for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) {
+ if (isFirst) {
+ isFirst = false;
+ } else {
+ buffer.append(", ");
+ }
+ child.stringifyValue(buffer, (int) i);
+ }
+ buffer.append(']');
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ @Override
+ public void init() {
+ super.init();
+ child.init();
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ child.reset();
+ }
+
+ @Override
+ public void unFlatten() {
+ super.unFlatten();
+ if (!isRepeating || noNulls || !isNull[0]) {
+ child.unFlatten();
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
new file mode 100644
index 0000000..80d4731
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable int column vector.
+ * This class will be used for operations on all integer types (tinyint, smallint, int, bigint)
+ * and as such will use a 64-bit long value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller int types will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class LongColumnVector extends ColumnVector {
+ public long[] vector;
+ public static final long NULL_VALUE = 1;
+
+ /**
+ * Use this constructor by default. All column vectors
+ * should normally be the default size.
+ */
+ public LongColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't use this except for testing purposes.
+ *
+ * @param len the number of rows
+ */
+ public LongColumnVector(int len) {
+ super(len);
+ vector = new long[len];
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.vector[0] = vector[0];
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ else {
+ System.arraycopy(vector, 0, output.vector, 0, size);
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.vector[0] = vector[0]; // automatic conversion to double is done here
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ else {
+ for(int i = 0; i < size; ++i) {
+ output.vector[i] = vector[i];
+ }
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ // Fill the column vector with the provided value
+ public void fill(long value) {
+ noNulls = true;
+ isRepeating = true;
+ vector[0] = value;
+ }
+
+ // Fill the column vector with nulls
+ public void fillWithNulls() {
+ noNulls = false;
+ isRepeating = true;
+ vector[0] = NULL_VALUE;
+ isNull[0] = true;
+ }
+
+ // Simplify vector by brute-force flattening noNulls and isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+ long repeatVal = vector[0];
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ vector[i] = repeatVal;
+ }
+ } else {
+ Arrays.fill(vector, 0, size, repeatVal);
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ if (inputVector.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+ isNull[outElementNum] = false;
+ vector[outElementNum] =
+ ((LongColumnVector) inputVector).vector[inputElementNum];
+ } else {
+ isNull[outElementNum] = true;
+ noNulls = false;
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append(vector[row]);
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (size > vector.length) {
+ long[] oldArray = vector;
+ vector = new long[size];
+ if (preserveData) {
+ if (isRepeating) {
+ vector[0] = oldArray[0];
+ } else {
+ System.arraycopy(oldArray, 0, vector, 0 , oldArray.length);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
new file mode 100644
index 0000000..e8421e3
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of map objects.
+ *
+ * Each map is composed of a range of elements in the underlying child
+ * ColumnVector. The range for map i is
+ * offsets[i]..offsets[i]+lengths[i]-1 inclusive.
+ */
+public class MapColumnVector extends MultiValuedColumnVector {
+
+ public ColumnVector keys;
+ public ColumnVector values;
+
+ public MapColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE, null, null);
+ }
+
+ /**
+ * Constructor for MapColumnVector
+ *
+ * @param len Vector length
+ * @param keys The keys column vector
+ * @param values The values column vector
+ */
+ public MapColumnVector(int len, ColumnVector keys, ColumnVector values) {
+ super(len);
+ this.keys = keys;
+ this.values = values;
+ }
+
+ @Override
+ protected void childFlatten(boolean useSelected, int[] selected, int size) {
+ keys.flatten(useSelected, selected, size);
+ values.flatten(useSelected, selected, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum,
+ ColumnVector inputVector) {
+ if (inputVector.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) {
+ isNull[outElementNum] = true;
+ noNulls = false;
+ } else {
+ MapColumnVector input = (MapColumnVector) inputVector;
+ isNull[outElementNum] = false;
+ int offset = childCount;
+ int length = (int) input.lengths[inputElementNum];
+ int inputOffset = (int) input.offsets[inputElementNum];
+ offsets[outElementNum] = offset;
+ childCount += length;
+ lengths[outElementNum] = length;
+ keys.ensureSize(childCount, true);
+ values.ensureSize(childCount, true);
+ for (int i = 0; i < length; ++i) {
+ keys.setElement(i + offset, inputOffset + i, input.keys);
+ values.setElement(i + offset, inputOffset + i, input.values);
+ }
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append('[');
+ boolean isFirst = true;
+ for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) {
+ if (isFirst) {
+ isFirst = false;
+ } else {
+ buffer.append(", ");
+ }
+ buffer.append("{\"key\": ");
+ keys.stringifyValue(buffer, (int) i);
+ buffer.append(", \"value\": ");
+ values.stringifyValue(buffer, (int) i);
+ buffer.append('}');
+ }
+ buffer.append(']');
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ @Override
+ public void init() {
+ super.init();
+ keys.init();
+ values.init();
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ keys.reset();
+ values.reset();
+ }
+
+ @Override
+ public void unFlatten() {
+ super.unFlatten();
+ if (!isRepeating || noNulls || !isNull[0]) {
+ keys.unFlatten();
+ values.unFlatten();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
new file mode 100644
index 0000000..1aeff83
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * The representation of a vectorized column of multi-valued objects, such
+ * as lists and maps.
+ *
+ * Each object is composed of a range of elements in the underlying child
+ * ColumnVector. The range for list i is
+ * offsets[i]..offsets[i]+lengths[i]-1 inclusive.
+ */
+public abstract class MultiValuedColumnVector extends ColumnVector {
+
+ public long[] offsets;
+ public long[] lengths;
+ // the number of children slots used
+ public int childCount;
+
+ /**
+ * Constructor for MultiValuedColumnVector.
+ *
+ * @param len Vector length
+ */
+ public MultiValuedColumnVector(int len) {
+ super(len);
+ childCount = 0;
+ offsets = new long[len];
+ lengths = new long[len];
+ }
+
+ protected abstract void childFlatten(boolean useSelected, int[] selected,
+ int size);
+
+ @Override
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+
+ if (isRepeating) {
+ if (noNulls || !isNull[0]) {
+ if (selectedInUse) {
+ for (int i = 0; i < size; ++i) {
+ int row = sel[i];
+ offsets[row] = offsets[0];
+ lengths[row] = lengths[0];
+ isNull[row] = false;
+ }
+ } else {
+ Arrays.fill(offsets, 0, size, offsets[0]);
+ Arrays.fill(lengths, 0, size, lengths[0]);
+ Arrays.fill(isNull, 0, size, false);
+ }
+ // We optimize by assuming that a repeating list/map will run from
+ // from 0 .. lengths[0] in the child vector.
+ // Sanity check the assumption that we can start at 0.
+ if (offsets[0] != 0) {
+ throw new IllegalArgumentException("Repeating offset isn't 0, but " +
+ offsets[0]);
+ }
+ childFlatten(false, null, (int) lengths[0]);
+ } else {
+ if (selectedInUse) {
+ for(int i=0; i < size; ++i) {
+ isNull[sel[i]] = true;
+ }
+ } else {
+ Arrays.fill(isNull, 0, size, true);
+ }
+ }
+ isRepeating = false;
+ noNulls = false;
+ } else {
+ if (selectedInUse) {
+ int childSize = 0;
+ for(int i=0; i < size; ++i) {
+ childSize += lengths[sel[i]];
+ }
+ int[] childSelection = new int[childSize];
+ int idx = 0;
+ for(int i=0; i < size; ++i) {
+ int row = sel[i];
+ for(int elem=0; elem < lengths[row]; ++elem) {
+ childSelection[idx++] = (int) (offsets[row] + elem);
+ }
+ }
+ childFlatten(true, childSelection, childSize);
+ } else {
+ childFlatten(false, null, childCount);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+ }
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (size > offsets.length) {
+ long[] oldOffsets = offsets;
+ offsets = new long[size];
+ long oldLengths[] = lengths;
+ lengths = new long[size];
+ if (preserveData) {
+ if (isRepeating) {
+ offsets[0] = oldOffsets[0];
+ lengths[0] = oldLengths[0];
+ } else {
+ System.arraycopy(oldOffsets, 0, offsets, 0 , oldOffsets.length);
+ System.arraycopy(oldLengths, 0, lengths, 0, oldLengths.length);
+ }
+ }
+ }
+ }
+
+ /**
+ * Initializee the vector
+ */
+ @Override
+ public void init() {
+ super.init();
+ childCount = 0;
+ }
+
+ /**
+ * Reset the vector for the next batch.
+ */
+ @Override
+ public void reset() {
+ super.reset();
+ childCount = 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java
new file mode 100644
index 0000000..cf07bca
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of struct objects.
+ *
+ * Each field is represented by a separate inner ColumnVector. Since this
+ * ColumnVector doesn't own any per row data other that the isNull flag, the
+ * isRepeating only covers the isNull array.
+ */
+public class StructColumnVector extends ColumnVector {
+
+ public ColumnVector[] fields;
+
+ public StructColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Constructor for StructColumnVector
+ *
+ * @param len Vector length
+ * @param fields the field column vectors
+ */
+ public StructColumnVector(int len, ColumnVector... fields) {
+ super(len);
+ this.fields = fields;
+ }
+
+ @Override
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].flatten(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum,
+ ColumnVector inputVector) {
+ if (inputVector.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+ isNull[outElementNum] = false;
+ ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields;
+ for (int i = 0; i < inputFields.length; ++i) {
+ fields[i].setElement(outElementNum, inputElementNum, inputFields[i]);
+ }
+ } else {
+ noNulls = false;
+ isNull[outElementNum] = true;
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append('[');
+ for(int i=0; i < fields.length; ++i) {
+ if (i != 0) {
+ buffer.append(", ");
+ }
+ fields[i].stringifyValue(buffer, row);
+ }
+ buffer.append(']');
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].ensureSize(size, preserveData);
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ for(int i =0; i < fields.length; ++i) {
+ fields[i].reset();
+ }
+ }
+
+ @Override
+ public void init() {
+ super.init();
+ for(int i =0; i < fields.length; ++i) {
+ fields[i].init();
+ }
+ }
+
+ @Override
+ public void unFlatten() {
+ super.unFlatten();
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].unFlatten();
+ }
+ }
+
+ @Override
+ public void setRepeating(boolean isRepeating) {
+ super.setRepeating(isRepeating);
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].setRepeating(isRepeating);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
new file mode 100644
index 0000000..228461a
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -0,0 +1,400 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents a nullable timestamp column vector capable of handing a wide range of
+ * timestamp values.
+ *
+ * We store the 2 (value) fields of a Timestamp class in primitive arrays.
+ *
+ * We do this to avoid an array of Java Timestamp objects which would have poor storage
+ * and memory access characteristics.
+ *
+ * Generally, the caller will fill in a scratch timestamp object with values from a row, work
+ * using the scratch timestamp, and then perhaps update the column vector row with a result.
+ */
+public class TimestampColumnVector extends ColumnVector {
+
+ /*
+ * The storage arrays for this column vector corresponds to the storage of a Timestamp:
+ */
+ public long[] time;
+ // The values from Timestamp.getTime().
+
+ public int[] nanos;
+ // The values from Timestamp.getNanos().
+
+ /*
+ * Scratch objects.
+ */
+ private final Timestamp scratchTimestamp;
+
+ private Writable scratchWritable;
+ // Supports keeping a TimestampWritable object without having to import that definition...
+
+ /**
+ * Use this constructor by default. All column vectors
+ * should normally be the default size.
+ */
+ public TimestampColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Don't use this except for testing purposes.
+ *
+ * @param len the number of rows
+ */
+ public TimestampColumnVector(int len) {
+ super(len);
+
+ time = new long[len];
+ nanos = new int[len];
+
+ scratchTimestamp = new Timestamp(0);
+
+ scratchWritable = null; // Allocated by caller.
+ }
+
+ /**
+ * Return the number of rows.
+ * @return
+ */
+ public int getLength() {
+ return time.length;
+ }
+
+ /**
+ * Return a row's Timestamp.getTime() value.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public long getTime(int elementNum) {
+ return time[elementNum];
+ }
+
+ /**
+ * Return a row's Timestamp.getNanos() value.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public int getNanos(int elementNum) {
+ return nanos[elementNum];
+ }
+
+ /**
+ * Set a Timestamp object from a row of the column.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param timestamp
+ * @param elementNum
+ */
+ public void timestampUpdate(Timestamp timestamp, int elementNum) {
+ timestamp.setTime(time[elementNum]);
+ timestamp.setNanos(nanos[elementNum]);
+ }
+
+ /**
+ * Return the scratch Timestamp object set from a row.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @return
+ */
+ public Timestamp asScratchTimestamp(int elementNum) {
+ scratchTimestamp.setTime(time[elementNum]);
+ scratchTimestamp.setNanos(nanos[elementNum]);
+ return scratchTimestamp;
+ }
+
+ /**
+ * Return the scratch timestamp (contents undefined).
+ * @return
+ */
+ public Timestamp getScratchTimestamp() {
+ return scratchTimestamp;
+ }
+
+ /**
+ * Return a long representation of a Timestamp.
+ * @param elementNum
+ * @return
+ */
+ public long getTimestampAsLong(int elementNum) {
+ scratchTimestamp.setTime(time[elementNum]);
+ scratchTimestamp.setNanos(nanos[elementNum]);
+ return getTimestampAsLong(scratchTimestamp);
+ }
+
+ /**
+ * Return a long representation of a Timestamp.
+ * @param timestamp
+ * @return
+ */
+ public static long getTimestampAsLong(Timestamp timestamp) {
+ return millisToSeconds(timestamp.getTime());
+ }
+
+ // Copy of TimestampWritable.millisToSeconds
+ /**
+ * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of
+ * seconds. 500 would round to 0, -500 would round to -1.
+ */
+ private static long millisToSeconds(long millis) {
+ if (millis >= 0) {
+ return millis / 1000;
+ } else {
+ return (millis - 999) / 1000;
+ }
+ }
+
+ /**
+ * Return a double representation of a Timestamp.
+ * @param elementNum
+ * @return
+ */
+ public double getDouble(int elementNum) {
+ scratchTimestamp.setTime(time[elementNum]);
+ scratchTimestamp.setNanos(nanos[elementNum]);
+ return getDouble(scratchTimestamp);
+ }
+
+ /**
+ * Return a double representation of a Timestamp.
+ * @param elementNum
+ * @return
+ */
+ public static double getDouble(Timestamp timestamp) {
+ // Same algorithm as TimestampWritable (not currently import-able here).
+ double seconds, nanos;
+ seconds = millisToSeconds(timestamp.getTime());
+ nanos = timestamp.getNanos();
+ return seconds + nanos / 1000000000;
+ }
+
+ /**
+ * Compare row to Timestamp.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param elementNum
+ * @param timestamp
+ * @return -1, 0, 1 standard compareTo values.
+ */
+ public int compareTo(int elementNum, Timestamp timestamp) {
+ return asScratchTimestamp(elementNum).compareTo(timestamp);
+ }
+
+ /**
+ * Compare Timestamp to row.
+ * We assume the entry has already been NULL checked and isRepeated adjusted.
+ * @param timestamp
+ * @param elementNum
+ * @return -1, 0, 1 standard compareTo values.
+ */
+ public int compareTo(Timestamp timestamp, int elementNum) {
+ return timestamp.compareTo(asScratchTimestamp(elementNum));
+ }
+
+ /**
+ * Compare a row to another TimestampColumnVector's row.
+ * @param elementNum1
+ * @param timestampColVector2
+ * @param elementNum2
+ * @return
+ */
+ public int compareTo(int elementNum1, TimestampColumnVector timestampColVector2,
+ int elementNum2) {
+ return asScratchTimestamp(elementNum1).compareTo(
+ timestampColVector2.asScratchTimestamp(elementNum2));
+ }
+
+ /**
+ * Compare another TimestampColumnVector's row to a row.
+ * @param timestampColVector1
+ * @param elementNum1
+ * @param elementNum2
+ * @return
+ */
+ public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1,
+ int elementNum2) {
+ return timestampColVector1.asScratchTimestamp(elementNum1).compareTo(
+ asScratchTimestamp(elementNum2));
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+
+ TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector;
+
+ time[outElementNum] = timestampColVector.time[inputElementNum];
+ nanos[outElementNum] = timestampColVector.nanos[inputElementNum];
+ }
+
+ // Simplify vector by brute-force flattening noNulls and isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ if (isRepeating) {
+ isRepeating = false;
+ long repeatFastTime = time[0];
+ int repeatNanos = nanos[0];
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ time[i] = repeatFastTime;
+ nanos[i] = repeatNanos;
+ }
+ } else {
+ Arrays.fill(time, 0, size, repeatFastTime);
+ Arrays.fill(nanos, 0, size, repeatNanos);
+ }
+ flattenRepeatingNulls(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ /**
+ * Set a row from a timestamp.
+ * We assume the entry has already been isRepeated adjusted.
+ * @param elementNum
+ * @param timestamp
+ */
+ public void set(int elementNum, Timestamp timestamp) {
+ if (timestamp == null) {
+ this.noNulls = false;
+ this.isNull[elementNum] = true;
+ } else {
+ this.time[elementNum] = timestamp.getTime();
+ this.nanos[elementNum] = timestamp.getNanos();
+ }
+ }
+
+ /**
+ * Set a row from the current value in the scratch timestamp.
+ * @param elementNum
+ */
+ public void setFromScratchTimestamp(int elementNum) {
+ this.time[elementNum] = scratchTimestamp.getTime();
+ this.nanos[elementNum] = scratchTimestamp.getNanos();
+ }
+
+ /**
+ * Set row to standard null value(s).
+ * We assume the entry has already been isRepeated adjusted.
+ * @param elementNum
+ */
+ public void setNullValue(int elementNum) {
+ time[elementNum] = 0;
+ nanos[elementNum] = 1;
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ output.time[0] = time[0];
+ output.nanos[0] = nanos[0];
+ output.isNull[0] = isNull[0];
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.time[i] = time[i];
+ output.nanos[i] = nanos[i];
+ }
+ }
+ else {
+ System.arraycopy(time, 0, output.time, 0, size);
+ System.arraycopy(nanos, 0, output.nanos, 0, size);
+ }
+
+ // Copy nulls over if needed
+ if (!noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ /**
+ * Fill all the vector entries with a timestamp.
+ * @param timestamp
+ */
+ public void fill(Timestamp timestamp) {
+ noNulls = true;
+ isRepeating = true;
+ time[0] = timestamp.getTime();
+ nanos[0] = timestamp.getNanos();
+ }
+
+ /**
+ * Return a convenience writable object stored by this column vector.
+ * Supports keeping a TimestampWritable object without having to import that definition...
+ * @return
+ */
+ public Writable getScratchWritable() {
+ return scratchWritable;
+ }
+
+ /**
+ * Set the convenience writable object stored by this column vector
+ * @param scratchWritable
+ */
+ public void setScratchWritable(Writable scratchWritable) {
+ this.scratchWritable = scratchWritable;
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ scratchTimestamp.setTime(time[row]);
+ scratchTimestamp.setNanos(nanos[row]);
+ buffer.append(scratchTimestamp.toString());
+ } else {
+ buffer.append("null");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
new file mode 100644
index 0000000..0c61243
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of struct objects.
+ *
+ * Each field is represented by a separate inner ColumnVector. Since this
+ * ColumnVector doesn't own any per row data other that the isNull flag, the
+ * isRepeating only covers the isNull array.
+ */
+public class UnionColumnVector extends ColumnVector {
+
+ public int[] tags;
+ public ColumnVector[] fields;
+
+ public UnionColumnVector() {
+ this(VectorizedRowBatch.DEFAULT_SIZE);
+ }
+
+ /**
+ * Constructor for UnionColumnVector
+ *
+ * @param len Vector length
+ * @param fields the field column vectors
+ */
+ public UnionColumnVector(int len, ColumnVector... fields) {
+ super(len);
+ tags = new int[len];
+ this.fields = fields;
+ }
+
+ @Override
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ flattenPush();
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].flatten(selectedInUse, sel, size);
+ }
+ flattenNoNulls(selectedInUse, sel, size);
+ }
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum,
+ ColumnVector inputVector) {
+ if (inputVector.isRepeating) {
+ inputElementNum = 0;
+ }
+ if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+ isNull[outElementNum] = false;
+ UnionColumnVector input = (UnionColumnVector) inputVector;
+ tags[outElementNum] = input.tags[inputElementNum];
+ fields[tags[outElementNum]].setElement(outElementNum, inputElementNum,
+ input.fields[tags[outElementNum]]);
+ } else {
+ noNulls = false;
+ isNull[outElementNum] = true;
+ }
+ }
+
+ @Override
+ public void stringifyValue(StringBuilder buffer, int row) {
+ if (isRepeating) {
+ row = 0;
+ }
+ if (noNulls || !isNull[row]) {
+ buffer.append("{\"tag\": ");
+ buffer.append(tags[row]);
+ buffer.append(", \"value\": ");
+ fields[tags[row]].stringifyValue(buffer, row);
+ buffer.append('}');
+ } else {
+ buffer.append("null");
+ }
+ }
+
+ @Override
+ public void ensureSize(int size, boolean preserveData) {
+ super.ensureSize(size, preserveData);
+ if (tags.length < size) {
+ if (preserveData) {
+ int[] oldTags = tags;
+ tags = new int[size];
+ System.arraycopy(oldTags, 0, tags, 0, oldTags.length);
+ } else {
+ tags = new int[size];
+ }
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].ensureSize(size, preserveData);
+ }
+ }
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ for(int i =0; i < fields.length; ++i) {
+ fields[i].reset();
+ }
+ }
+
+ @Override
+ public void init() {
+ super.init();
+ for(int i =0; i < fields.length; ++i) {
+ fields[i].init();
+ }
+ }
+
+ @Override
+ public void unFlatten() {
+ super.unFlatten();
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].unFlatten();
+ }
+ }
+
+ @Override
+ public void setRepeating(boolean isRepeating) {
+ super.setRepeating(isRepeating);
+ for(int i=0; i < fields.length; ++i) {
+ fields[i].setRepeating(isRepeating);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
new file mode 100644
index 0000000..9c066e0
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A VectorizedRowBatch is a set of rows, organized with each column
+ * as a vector. It is the unit of query execution, organized to minimize
+ * the cost per row and achieve high cycles-per-instruction.
+ * The major fields are public by design to allow fast and convenient
+ * access by the vectorized query execution code.
+ */
+public class VectorizedRowBatch implements Writable {
+ public int numCols; // number of columns
+ public ColumnVector[] cols; // a vector for each column
+ public int size; // number of rows that qualify (i.e. haven't been filtered out)
+ public int[] selected; // array of positions of selected values
+ public int[] projectedColumns;
+ public int projectionSize;
+
+ private int dataColumnCount;
+ private int partitionColumnCount;
+
+
+ /*
+ * If no filtering has been applied yet, selectedInUse is false,
+ * meaning that all rows qualify. If it is true, then the selected[] array
+ * records the offsets of qualifying rows.
+ */
+ public boolean selectedInUse;
+
+ // If this is true, then there is no data in the batch -- we have hit the end of input.
+ public boolean endOfFile;
+
+ /*
+ * This number is carefully chosen to minimize overhead and typically allows
+ * one VectorizedRowBatch to fit in cache.
+ */
+ public static final int DEFAULT_SIZE = 1024;
+
+ /**
+ * Return a batch with the specified number of columns.
+ * This is the standard constructor -- all batches should be the same size
+ *
+ * @param numCols the number of columns to include in the batch
+ */
+ public VectorizedRowBatch(int numCols) {
+ this(numCols, DEFAULT_SIZE);
+ }
+
+ /**
+ * Return a batch with the specified number of columns and rows.
+ * Only call this constructor directly for testing purposes.
+ * Batch size should normally always be defaultSize.
+ *
+ * @param numCols the number of columns to include in the batch
+ * @param size the number of rows to include in the batch
+ */
+ public VectorizedRowBatch(int numCols, int size) {
+ this.numCols = numCols;
+ this.size = size;
+ selected = new int[size];
+ selectedInUse = false;
+ this.cols = new ColumnVector[numCols];
+ projectedColumns = new int[numCols];
+
+ // Initially all columns are projected and in the same order
+ projectionSize = numCols;
+ for (int i = 0; i < numCols; i++) {
+ projectedColumns[i] = i;
+ }
+
+ dataColumnCount = -1;
+ partitionColumnCount = -1;
+ }
+
+ public void setPartitionInfo(int dataColumnCount, int partitionColumnCount) {
+ this.dataColumnCount = dataColumnCount;
+ this.partitionColumnCount = partitionColumnCount;
+ }
+
+ public int getDataColumnCount() {
+ return dataColumnCount;
+ }
+
+ public int getPartitionColumnCount() {
+ return partitionColumnCount;
+ }
+
+ /**
+ * Returns the maximum size of the batch (number of rows it can hold)
+ */
+ public int getMaxSize() {
+ return selected.length;
+ }
+
+ /**
+ * Return count of qualifying rows.
+ *
+ * @return number of rows that have not been filtered out
+ */
+ public long count() {
+ return size;
+ }
+
+ private static String toUTF8(Object o) {
+ if(o == null || o instanceof NullWritable) {
+ return "\\N"; /* as found in LazySimpleSerDe's nullSequence */
+ }
+ return o.toString();
+ }
+
+ @Override
+ public String toString() {
+ if (size == 0) {
+ return "";
+ }
+ StringBuilder b = new StringBuilder();
+ if (this.selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = selected[j];
+ b.append('[');
+ for (int k = 0; k < projectionSize; k++) {
+ int projIndex = projectedColumns[k];
+ ColumnVector cv = cols[projIndex];
+ if (k > 0) {
+ b.append(", ");
+ }
+ cv.stringifyValue(b, i);
+ }
+ b.append(']');
+ if (j < size - 1) {
+ b.append('\n');
+ }
+ }
+ } else {
+ for (int i = 0; i < size; i++) {
+ b.append('[');
+ for (int k = 0; k < projectionSize; k++) {
+ int projIndex = projectedColumns[k];
+ ColumnVector cv = cols[projIndex];
+ if (k > 0) {
+ b.append(", ");
+ }
+ if (cv != null) {
+ cv.stringifyValue(b, i);
+ }
+ }
+ b.append(']');
+ if (i < size - 1) {
+ b.append('\n');
+ }
+ }
+ }
+ return b.toString();
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ throw new UnsupportedOperationException("Do you really need me?");
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ throw new UnsupportedOperationException("Don't call me");
+ }
+
+ /**
+ * Resets the row batch to default state
+ * - sets selectedInUse to false
+ * - sets size to 0
+ * - sets endOfFile to false
+ * - resets each column
+ * - inits each column
+ */
+ public void reset() {
+ selectedInUse = false;
+ size = 0;
+ endOfFile = false;
+ for (ColumnVector vc : cols) {
+ if (vc != null) {
+ vc.reset();
+ vc.init();
+ }
+ }
+ }
+
+ /**
+ * Set the maximum number of rows in the batch.
+ * Data is not preserved.
+ */
+ public void ensureSize(int rows) {
+ for(int i=0; i < cols.length; ++i) {
+ cols[i].ensureSize(rows, false);
+ }
+ }
+}