You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ji...@apache.org on 2014/03/03 20:28:18 UTC
svn commit: r1573687 [1/3] - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/exec/vector/udf/
java/org/apache/hadoop/hive/ql/io/orc/ java/org/apac...
Author: jitendra
Date: Mon Mar 3 19:28:17 2014
New Revision: 1573687
URL: http://svn.apache.org/r1573687
Log:
HIVE-5761 : Implement vectorized support for the DATE data type (Teddy Choi via jitendra)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColScalar.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubScalarCol.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java
hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
hive/trunk/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1573687&r1=1573686&r2=1573687&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Mon Mar 3 19:28:17 2014
@@ -727,11 +727,13 @@ public class VectorizationContext {
private VectorExpression createVectorExpression(Class<?> vectorClass,
List<ExprNodeDesc> childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException {
int numChildren = childExpr == null ? 0: childExpr.size();
+ VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren];
List<VectorExpression> children = new ArrayList<VectorExpression>();
Object[] arguments = new Object[numChildren];
try {
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
+ inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName());
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
@@ -751,6 +753,7 @@ public class VectorizationContext {
}
}
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments);
+ vectorExpression.setInputTypes(inputTypes);
if ((vectorExpression != null) && !children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
@@ -1234,7 +1237,7 @@ public class VectorizationContext {
}
public static boolean isDatetimeFamily(String resultType) {
- return resultType.equalsIgnoreCase("timestamp");
+ return resultType.equalsIgnoreCase("timestamp") || resultType.equalsIgnoreCase("date");
}
// return true if this is any kind of float
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1573687&r1=1573686&r2=1573687&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Mon Mar 3 19:28:17 2014
@@ -23,6 +23,7 @@ import java.util.List;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -147,6 +148,17 @@ public class VectorizedBatchUtil {
}
}
break;
+ case DATE: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ SetNullColIsNullValue(lcv, rowIndex);
+ }
+ }
+ break;
case FLOAT: {
DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i];
if (writableCol != null) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1573687&r1=1573686&r2=1573687&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java Mon Mar 3 19:28:17 2014
@@ -29,7 +29,9 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.lazy.LazyDate;
import org.apache.hadoop.hive.serde2.lazy.LazyLong;
import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;
import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
@@ -166,6 +168,11 @@ public class VectorizedColumnarSerDe ext
tw.set(t);
LazyTimestamp.writeUTF8(serializeVectorStream, tw);
break;
+ case DATE:
+ LongColumnVector dacv = (LongColumnVector) batch.cols[k];
+ DateWritable daw = new DateWritable((int) dacv.vector[rowIndex]);
+ LazyDate.writeUTF8(serializeVectorStream, daw);
+ break;
default:
throw new UnsupportedOperationException(
"Vectorizaton is not supported for datatype:"
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+
+import java.sql.Date;
+
+/**
+ * Casts a timestamp and date vector to a date vector.
+ */
+public class CastLongToDate extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int inputColumn;
+ private int outputColumn;
+ private transient Date date = new Date(0);
+
+ public CastLongToDate() {
+ super();
+ }
+
+ public CastLongToDate(int inputColumn, int outputColumn) {
+ this.inputColumn = inputColumn;
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn];
+ int[] sel = batch.selected;
+ int n = batch.size;
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+
+ if (n == 0) {
+
+ // Nothing to do
+ return;
+ }
+
+ switch (inputTypes[0]) {
+ case TIMESTAMP:
+ if (inV.noNulls) {
+ outV.noNulls = true;
+ if (inV.isRepeating) {
+ outV.isRepeating = true;
+ date.setTime(inV.vector[0] / 1000000);
+ outV.vector[0] = DateWritable.dateToDays(date);
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ date.setTime(inV.vector[i] / 1000000);
+ outV.vector[i] = DateWritable.dateToDays(date);
+ }
+ outV.isRepeating = false;
+ } else {
+ for(int i = 0; i != n; i++) {
+ date.setTime(inV.vector[i] / 1000000);
+ outV.vector[i] = DateWritable.dateToDays(date);
+ }
+ outV.isRepeating = false;
+ }
+ } else {
+
+ // Handle case with nulls. Don't do function if the value is null,
+ // because the data may be undefined for a null value.
+ outV.noNulls = false;
+ if (inV.isRepeating) {
+ outV.isRepeating = true;
+ outV.isNull[0] = inV.isNull[0];
+ if (!inV.isNull[0]) {
+ date.setTime(inV.vector[0] / 1000000);
+ outV.vector[0] = DateWritable.dateToDays(date);
+ }
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inV.isNull[i];
+ if (!inV.isNull[i]) {
+ date.setTime(inV.vector[i] / 1000000);
+ outV.vector[i] = DateWritable.dateToDays(date);
+ }
+ }
+ outV.isRepeating = false;
+ } else {
+ System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inV.isNull[i]) {
+ date.setTime(inV.vector[i] / 1000000);
+ outV.vector[i] = DateWritable.dateToDays(date);
+ }
+ }
+ outV.isRepeating = false;
+ }
+ }
+ break;
+
+ case DATE:
+ inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV);
+ break;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public int getInputColumn() {
+ return inputColumn;
+ }
+
+ public void setInputColumn(int inputColumn) {
+ this.inputColumn = inputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "date";
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(1)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.LONG)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+
+import java.util.Date;
+import java.text.SimpleDateFormat;
+
+/**
+ * Casts a string vector to a date vector.
+ */
+public class CastStringToDate extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int inputColumn;
+ private int outputColumn;
+ private transient java.sql.Date sqlDate = new java.sql.Date(0);
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+
+ public CastStringToDate() {
+
+ }
+
+ public CastStringToDate(int inputColumn, int outputColumn) {
+ this.inputColumn = inputColumn;
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+ int[] sel = batch.selected;
+ int n = batch.size;
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+
+ if (n == 0) {
+
+ // Nothing to do
+ return;
+ }
+
+ if (inV.noNulls) {
+ outV.noNulls = true;
+ if (inV.isRepeating) {
+ outV.isRepeating = true;
+ evaluate(outV, inV, 0);
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ evaluate(outV, inV, i);
+ }
+ outV.isRepeating = false;
+ } else {
+ for(int i = 0; i != n; i++) {
+ evaluate(outV, inV, i);
+ }
+ outV.isRepeating = false;
+ }
+ } else {
+
+ // Handle case with nulls. Don't do function if the value is null,
+ // because the data may be undefined for a null value.
+ outV.noNulls = false;
+ if (inV.isRepeating) {
+ outV.isRepeating = true;
+ outV.isNull[0] = inV.isNull[0];
+ if (!inV.isNull[0]) {
+ evaluate(outV, inV, 0);
+ }
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inV.isNull[i];
+ if (!inV.isNull[i]) {
+ evaluate(outV, inV, i);
+ }
+ }
+ outV.isRepeating = false;
+ } else {
+ System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inV.isNull[i]) {
+ evaluate(outV, inV, i);
+ }
+ }
+ outV.isRepeating = false;
+ }
+ }
+ }
+
+ private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+ try {
+ Date utilDate = formatter.parse(new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
+ sqlDate.setTime(utilDate.getTime());
+ outV.vector[i] = DateWritable.dateToDays(sqlDate);
+ } catch (Exception e) {
+ outV.vector[i] = 1;
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public int getInputColumn() {
+ return inputColumn;
+ }
+
+ public void setInputColumn(int inputColumn) {
+ this.inputColumn = inputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "date";
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(1)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java?rev=1573687&r1=1573686&r2=1573687&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java Mon Mar 3 19:28:17 2014
@@ -19,7 +19,9 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.io.Serializable;
+import java.util.Map;
+import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -27,6 +29,21 @@ import org.apache.hadoop.hive.ql.exec.ve
* Base class for expressions.
*/
public abstract class VectorExpression implements Serializable {
+ public enum Type {
+ STRING, TIMESTAMP, DATE, OTHER;
+ private static Map<String, Type> types = ImmutableMap.<String, Type>builder()
+ .put("string", STRING)
+ .put("timestamp", TIMESTAMP)
+ .put("date", DATE)
+ .build();
+
+ public static Type getValue(String name) {
+ if (types.containsKey(name.toLowerCase())) {
+ return types.get(name);
+ }
+ return OTHER;
+ }
+ }
private static final long serialVersionUID = 1L;
/**
@@ -35,6 +52,11 @@ public abstract class VectorExpression i
protected VectorExpression [] childExpressions = null;
/**
+ * More detailed input types, such as date and timestamp.
+ */
+ protected Type [] inputTypes;
+
+ /**
* Output type of the expression.
*/
protected String outputType;
@@ -70,6 +92,7 @@ public abstract class VectorExpression i
* Initialize the child expressions.
*/
public void setChildExpressions(VectorExpression [] ve) {
+
childExpressions = ve;
}
@@ -91,6 +114,21 @@ public abstract class VectorExpression i
}
}
+ /**
+ * Set more detailed types to distinguish certain types that is represented in same
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType}s. For example, date and
+ * timestamp will be in {@link org.apache.hadoop.hive.ql.exec.vector.LongColumnVector} but they need to be
+ * distinguished.
+ * @param inputTypes
+ */
+ public void setInputTypes(Type ... inputTypes) {
+ this.inputTypes = inputTypes;
+ }
+
+ public Type [] getInputTypes() {
+ return inputTypes;
+ }
+
@Override
public String toString() {
StringBuilder b = new StringBuilder();
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.Text;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Date;
+
+public class VectorUDFDateAddColCol extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int colNum1;
+ private int colNum2;
+ private int outputColumn;
+ protected boolean isPositive = true;
+ private transient final Calendar calendar = Calendar.getInstance();
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient final Text text = new Text();
+
+ public VectorUDFDateAddColCol(int colNum1, int colNum2, int outputColumn) {
+ this();
+ this.colNum1 = colNum1;
+ this.colNum2 = colNum2;
+ this.outputColumn = outputColumn;
+ }
+
+ public VectorUDFDateAddColCol() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ ColumnVector inputColVector1 = batch.cols[colNum1];
+ LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2];
+ int[] sel = batch.selected;
+ int n = batch.size;
+ long[] vector2 = inputColVector2.vector;
+
+ BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+ byte[][] outputVector = outV.vector;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ // Handle null
+ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse);
+
+ switch (inputTypes[0]) {
+ case DATE:
+ // Now disregard null in second pass.
+ if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outV.isRepeating = true;
+ outputVector[0] = evaluateDate(inputColVector1, 0, vector2[0]);
+ outV.start[0] = 0;
+ outV.length[0] = outputVector[0].length;
+ } else if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = evaluateDate(inputColVector1, i, vector2[i]);
+ outV.start[i] = 0;
+ outV.length[i] = outputVector[0].length;
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ outputVector[i] = evaluateDate(inputColVector1, i, vector2[i]);
+ outV.start[i] = 0;
+ outV.length[i] = outputVector[0].length;
+ }
+ }
+ break;
+
+ case TIMESTAMP:
+ // Now disregard null in second pass.
+ if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outV.isRepeating = true;
+ outputVector[0] = evaluateTimestamp(inputColVector1, 0, vector2[0]);
+ } else if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = evaluateTimestamp(inputColVector1, i, vector2[i]);
+ outV.start[i] = 0;
+ outV.length[i] = outputVector[0].length;
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ outputVector[i] = evaluateTimestamp(inputColVector1, i, vector2[i]);
+ outV.start[i] = 0;
+ outV.length[i] = outputVector[0].length;
+ }
+ }
+ break;
+
+ case STRING:
+ // Now disregard null in second pass.
+ if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outV.isRepeating = true;
+ evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, 0);
+ } else if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, i);
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, i);
+ }
+ }
+ break;
+ }
+ }
+
+ protected byte[] evaluateDate(ColumnVector columnVector, int index, long numDays) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ if (isPositive) {
+ calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + (int) numDays));
+ } else {
+ calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - (int) numDays));
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+ return Arrays.copyOf(text.getBytes(), text.getLength());
+ }
+
+ protected byte[] evaluateTimestamp(ColumnVector columnVector, int index, long numDays) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ calendar.setTimeInMillis(lcv.vector[index] / 1000000);
+ if (isPositive) {
+ calendar.add(Calendar.DATE, (int) numDays);
+ } else {
+ calendar.add(Calendar.DATE, (int) -numDays);
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+ return Arrays.copyOf(text.getBytes(), text.getLength());
+ }
+
+ protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVector inputColumnVector2,
+ BytesColumnVector outputVector, int i) {
+ if (inputColumnVector1.isNull[i] || inputColumnVector2.isNull[i]) {
+ outputVector.noNulls = false;
+ outputVector.isNull[i] = true;
+ } else {
+ text.set(inputColumnVector1.vector[i], inputColumnVector1.start[i], inputColumnVector1.length[i]);
+ try {
+ calendar.setTime(formatter.parse(text.toString()));
+ } catch (ParseException e) {
+ outputVector.noNulls = false;
+ outputVector.isNull[i] = true;
+ }
+ if (isPositive) {
+ calendar.add(Calendar.DATE, (int) inputColumnVector2.vector[i]);
+ } else {
+ calendar.add(Calendar.DATE, -(int) inputColumnVector2.vector[i]);
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+
+ outputVector.vector[i] = Arrays.copyOf(text.getBytes(), text.getLength());
+ outputVector.start[i] = 0;
+ outputVector.length[i] = text.getLength();
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "string";
+ }
+
+ public int getColNum1() {
+ return colNum1;
+ }
+
+ public void setColNum1(int colNum1) {
+ this.colNum1 = colNum1;
+ }
+
+ public int getColNum2() {
+ return colNum2;
+ }
+
+ public void setColNum2(int colNum2) {
+ this.colNum2 = colNum2;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.ANY,
+ VectorExpressionDescriptor.ArgumentType.LONG)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,291 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.Text;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Date;
+
+public class VectorUDFDateAddColScalar extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private int numDays;
+ protected boolean isPositive = true;
+ private transient final Calendar calendar = Calendar.getInstance();
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient final Text text = new Text();
+
+ public VectorUDFDateAddColScalar(int colNum, long numDays, int outputColumn) {
+ super();
+ this.colNum = colNum;
+ this.numDays = (int) numDays;
+ this.outputColumn = outputColumn;
+ }
+
+ public VectorUDFDateAddColScalar() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+ ColumnVector inputCol = batch.cols[this.colNum];
+ /* every line below this is identical for evaluateLong & evaluateString */
+ final int n = inputCol.isRepeating ? 1 : batch.size;
+ int[] sel = batch.selected;
+
+ if(batch.size == 0) {
+ /* n != batch.size when isRepeating */
+ return;
+ }
+
+ /* true for all algebraic UDFs with no state */
+ outV.isRepeating = inputCol.isRepeating;
+
+ switch (inputTypes[0]) {
+ case DATE:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.vector[i] = evaluateDate(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ }
+ }
+ }
+ break;
+
+ case TIMESTAMP:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ outV.start[i] = 0;
+ outV.length[i] = outV.vector[i].length;
+ }
+ }
+ }
+ }
+ break;
+
+ case STRING:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ evaluateString(inputCol, outV, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ protected byte[] evaluateTimestamp(ColumnVector columnVector, int index) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ calendar.setTimeInMillis(lcv.vector[index] / 1000000);
+ if (isPositive) {
+ calendar.add(Calendar.DATE, numDays);
+ } else {
+ calendar.add(Calendar.DATE, -numDays);
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+ return Arrays.copyOf(text.getBytes(), text.getLength());
+ }
+
+ protected byte[] evaluateDate(ColumnVector columnVector, int index) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ if (isPositive) {
+ calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + numDays));
+ } else {
+ calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - numDays));
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+ return Arrays.copyOf(text.getBytes(), text.getLength());
+ }
+
+ protected void evaluateString(ColumnVector columnVector, BytesColumnVector outputVector, int i) {
+ BytesColumnVector bcv = (BytesColumnVector) columnVector;
+ text.set(bcv.vector[i], bcv.start[i], bcv.length[i]);
+ try {
+ calendar.setTime(formatter.parse(text.toString()));
+ } catch (ParseException e) {
+ outputVector.isNull[i] = true;
+ }
+ if (isPositive) {
+ calendar.add(Calendar.DATE, numDays);
+ } else {
+ calendar.add(Calendar.DATE, -numDays);
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+
+ byte[] bytes = text.getBytes();
+ int size = text.getLength();
+ outputVector.vector[i] = Arrays.copyOf(bytes, size);
+ outputVector.start[i] = 0;
+ outputVector.length[i] = size;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "string";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public int getNumDays() {
+ return numDays;
+ }
+
+ public void setNumDay(int numDays) {
+ this.numDays = numDays;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.ANY,
+ VectorExpressionDescriptor.ArgumentType.LONG)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.Text;
+
+import java.io.UnsupportedEncodingException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Date;
+
+public class VectorUDFDateAddScalarCol extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private long longValue = 0;
+ private byte[] stringValue = null;
+ protected boolean isPositive = true;
+ private transient final Calendar calendar = Calendar.getInstance();
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient final Text text = new Text();
+ private transient Date baseDate = new Date();
+
+ public VectorUDFDateAddScalarCol() {
+ super();
+ }
+
+ public VectorUDFDateAddScalarCol(Object object, int colNum, int outputColumn) {
+ this();
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+
+ if (object instanceof Long) {
+ this.longValue = (Long) object;
+ } else if (object instanceof byte []) {
+ this.stringValue = (byte[]) object;
+ }
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ LongColumnVector inputCol = (LongColumnVector) batch.cols[this.colNum];
+ /* every line below this is identical for evaluateLong & evaluateString */
+ final int n = inputCol.isRepeating ? 1 : batch.size;
+ int[] sel = batch.selected;
+ BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+
+ switch (inputTypes[0]) {
+ case DATE:
+ baseDate.setTime(DateWritable.daysToMillis((int) longValue));
+ break;
+
+ case TIMESTAMP:
+ baseDate.setTime(longValue / 1000000);
+ break;
+
+ case STRING:
+ try {
+ baseDate = formatter.parse(new String(stringValue, "UTF-8"));
+ break;
+ } catch (Exception e) {
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = true;
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = true;
+ }
+ }
+ return;
+ }
+ }
+
+ if(batch.size == 0) {
+ /* n != batch.size when isRepeating */
+ return;
+ }
+
+ /* true for all algebraic UDFs with no state */
+ outV.isRepeating = inputCol.isRepeating;
+
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ evaluate(baseDate, inputCol.vector[i], outV, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ evaluate(baseDate, inputCol.vector[i], outV, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluate(baseDate, inputCol.vector[i], outV, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluate(baseDate, inputCol.vector[i], outV, i);
+ }
+ }
+ }
+ }
+ }
+
+ private void evaluate(Date baseDate, long numDays, BytesColumnVector output, int i) {
+ calendar.setTime(baseDate);
+
+ if (isPositive) {
+ calendar.add(Calendar.DATE, (int) numDays);
+ } else {
+ calendar.add(Calendar.DATE, -(int) numDays);
+ }
+ Date newDate = calendar.getTime();
+ text.set(formatter.format(newDate));
+ int size = text.getLength();
+ output.vector[i] = Arrays.copyOf(text.getBytes(), size);
+ output.start[i] = 0;
+ output.length[i] = size;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "string";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public long getLongValue() {
+ return longValue;
+ }
+
+ public void setLongValue(long longValue) {
+ this.longValue = longValue;
+ }
+
+ public byte[] getStringValue() {
+ return stringValue;
+ }
+
+ public void setStringValue(byte[] stringValue) {
+ this.stringValue = stringValue;
+ }
+
+ public boolean isPositive() {
+ return isPositive;
+ }
+
+ public void setPositive(boolean isPositive) {
+ this.isPositive = isPositive;
+ }
+
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.ANY,
+ VectorExpressionDescriptor.ArgumentType.LONG)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.SCALAR,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,306 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+
+import java.sql.Date;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+
+public class VectorUDFDateDiffColCol extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int colNum1;
+ private int colNum2;
+ private int outputColumn;
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient Date date = new Date(0);
+ private transient LongColumnVector dateVector1 = new LongColumnVector();
+ private transient LongColumnVector dateVector2 = new LongColumnVector();
+
+ public VectorUDFDateDiffColCol(int colNum1, int colNum2, int outputColumn) {
+ this();
+ this.colNum1 = colNum1;
+ this.colNum2 = colNum2;
+ this.outputColumn = outputColumn;
+ }
+
+ public VectorUDFDateDiffColCol() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ ColumnVector inputColVector1 = batch.cols[colNum1];
+ ColumnVector inputColVector2 = batch.cols[colNum2];
+ int[] sel = batch.selected;
+ int n = batch.size;
+
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+ long[] outputVector = outV.vector;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse);
+
+ LongColumnVector convertedVector1 = toDateArray(batch, inputTypes[0], inputColVector1, dateVector1);
+ LongColumnVector convertedVector2 = toDateArray(batch, inputTypes[1], inputColVector2, dateVector2);
+
+ // Now disregard null in second pass.
+ if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outV.isRepeating = true;
+ if (convertedVector1.isNull[0] || convertedVector2.isNull[0]) {
+ outV.isNull[0] = true;
+ } else {
+ outputVector[0] = convertedVector1.vector[0] - convertedVector2.vector[0];
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (convertedVector1.isNull[0] || convertedVector2.isNull[i]) {
+ outV.isNull[i] = true;
+ } else {
+ outputVector[i] = convertedVector1.vector[0] - convertedVector2.vector[i];
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (convertedVector1.isNull[0] || convertedVector2.isNull[i]) {
+ outV.isNull[i] = true;
+ } else {
+ outputVector[i] = convertedVector1.vector[0] - convertedVector2.vector[i];
+ }
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (convertedVector1.isNull[i] || convertedVector2.isNull[0]) {
+ outV.isNull[i] = true;
+ } else {
+ outputVector[i] = convertedVector1.vector[i] - convertedVector2.vector[0];
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (convertedVector1.isNull[i] || convertedVector2.isNull[0]) {
+ outV.isNull[i] = true;
+ } else {
+ outputVector[i] = convertedVector1.vector[i] - convertedVector2.vector[0];
+ }
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (convertedVector1.isNull[i] || convertedVector2.isNull[i]) {
+ outV.isNull[i] = true;
+ } else {
+ outputVector[i] = convertedVector1.vector[i] - convertedVector2.vector[i];
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (convertedVector1.isNull[i] || convertedVector2.isNull[i]) {
+ outV.isNull[i] = true;
+ } else {
+ outputVector[i] = convertedVector1.vector[i] - convertedVector2.vector[i];
+ }
+ }
+ }
+ }
+ }
+
+ private LongColumnVector toDateArray(VectorizedRowBatch batch, Type colType,
+ ColumnVector inputColVector, LongColumnVector dateVector) {
+ int size = batch.size;
+ if (colType == Type.DATE) {
+ return (LongColumnVector) inputColVector;
+ }
+
+ if (size > dateVector.vector.length) {
+ if (dateVector1 == dateVector) {
+ dateVector1 = new LongColumnVector(size * 2);
+ dateVector = dateVector1;
+ } else {
+ dateVector2 = new LongColumnVector(size * 2);
+ dateVector = dateVector2;
+ }
+ }
+
+ switch (colType) {
+ case TIMESTAMP:
+ LongColumnVector lcv = (LongColumnVector) inputColVector;
+ lcv.copySelected(batch.selectedInUse, batch.selected, batch.size, dateVector);
+ if (dateVector.isRepeating) {
+ date.setTime(dateVector.vector[0] / 1000000);
+ dateVector.vector[0] = DateWritable.dateToDays(date);
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j != size; j++) {
+ int i = batch.selected[j];
+ if (!dateVector.isNull[i]) {
+ date.setTime(dateVector.vector[i] / 1000000);
+ dateVector.vector[i] = DateWritable.dateToDays(date);
+ }
+ }
+ } else {
+ for (int i = 0; i != size; i++) {
+ if (!dateVector.isNull[i]) {
+ date.setTime(dateVector.vector[i] / 1000000);
+ dateVector.vector[i] = DateWritable.dateToDays(date);
+ }
+ }
+ }
+ }
+ return dateVector;
+
+ case STRING:
+ BytesColumnVector bcv = (BytesColumnVector) inputColVector;
+ copySelected(bcv, batch.selectedInUse, batch.selected, batch.size, dateVector);
+ return dateVector;
+ }
+
+ return null;
+ }
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ public void copySelected(
+ BytesColumnVector input, boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
+
+ // Output has nulls if and only if input has nulls.
+ output.noNulls = input.noNulls;
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (input.isRepeating) {
+ output.isNull[0] = input.isNull[0];
+ output.isRepeating = true;
+
+ String string = new String(input.vector[0], input.start[0], input.length[0]);
+ try {
+ date.setTime(formatter.parse(string).getTime());
+ output.vector[0] = DateWritable.dateToDays(date);
+ } catch (ParseException e) {
+ output.isNull[0] = true;
+ }
+ return;
+ }
+
+ // Handle normal case
+
+ // Copy data values over
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ setDays(input, output, i);
+ }
+ } else {
+ for (int i = 0; i < size; i++) {
+ setDays(input, output, i);
+ }
+ }
+
+ // Copy nulls over if needed
+ if (!input.noNulls) {
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = input.isNull[i];
+ }
+ }
+ else {
+ System.arraycopy(input.isNull, 0, output.isNull, 0, size);
+ }
+ }
+ }
+
+ private void setDays(BytesColumnVector input, LongColumnVector output, int i) {
+ if (input.isNull[i]) {
+ output.isNull[i] = true;
+ return;
+ }
+ String string = new String(input.vector[i], input.start[i], input.length[i]);
+ try {
+ date.setTime(formatter.parse(string).getTime());
+ output.vector[i] = DateWritable.dateToDays(date);
+ } catch (ParseException e) {
+ output.isNull[i] = true;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getColNum1() {
+ return colNum1;
+ }
+
+ public void setColNum1(int colNum1) {
+ this.colNum1 = colNum1;
+ }
+
+ public int getColNum2() {
+ return colNum2;
+ }
+
+ public void setColNum2(int colNum2) {
+ this.colNum2 = colNum2;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.ANY,
+ VectorExpressionDescriptor.ArgumentType.ANY)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,292 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.Text;
+
+import java.sql.Date;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+
+public class VectorUDFDateDiffColScalar extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private long longValue;
+ private byte[] stringValue;
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient final Text text = new Text();
+ private int baseDate;
+ private transient Date date = new Date(0);
+
+ public VectorUDFDateDiffColScalar(int colNum, Object object, int outputColumn) {
+ super();
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+
+ if (object instanceof Long) {
+ this.longValue = (Long) object;
+ } else if (object instanceof byte []) {
+ this.stringValue = (byte []) object;
+ }
+ }
+
+ public VectorUDFDateDiffColScalar() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+ ColumnVector inputCol = batch.cols[this.colNum];
+ /* every line below this is identical for evaluateLong & evaluateString */
+ final int n = inputCol.isRepeating ? 1 : batch.size;
+ int[] sel = batch.selected;
+
+ if(batch.size == 0) {
+ /* n != batch.size when isRepeating */
+ return;
+ }
+
+ /* true for all algebraic UDFs with no state */
+ outV.isRepeating = inputCol.isRepeating;
+
+ switch (inputTypes[1]) {
+ case DATE:
+ baseDate = (int) longValue;
+ break;
+
+ case TIMESTAMP:
+ date.setTime(longValue / 1000000);
+ baseDate = DateWritable.dateToDays(date);
+ break;
+
+ case STRING:
+ try {
+ date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime());
+ baseDate = DateWritable.dateToDays(date);
+ break;
+ } catch (Exception e) {
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = true;
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = true;
+ }
+ }
+ return;
+ }
+ }
+
+ switch (inputTypes[0]) {
+ case DATE:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ }
+ }
+ }
+ break;
+
+ case TIMESTAMP:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ }
+ }
+ }
+ break;
+
+ case STRING:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ evaluateString(inputCol, outV, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ protected int evaluateTimestamp(ColumnVector columnVector, int index) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ date.setTime(lcv.vector[index] / 1000000);
+ return DateWritable.dateToDays(date) - baseDate;
+ }
+
+ protected int evaluateDate(ColumnVector columnVector, int index) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ return ((int) lcv.vector[index]) - baseDate;
+ }
+
+ protected void evaluateString(ColumnVector columnVector, LongColumnVector output, int i) {
+ BytesColumnVector bcv = (BytesColumnVector) columnVector;
+ text.set(bcv.vector[i], bcv.start[i], bcv.length[i]);
+ try {
+ date.setTime(formatter.parse(text.toString()).getTime());
+ output.vector[i] = DateWritable.dateToDays(date) - baseDate;
+ } catch (ParseException e) {
+ output.vector[i] = 1;
+ output.isNull[i] = true;
+ }
+ }
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public long getLongValue() {
+ return longValue;
+ }
+
+ public void setLongValue(int longValue) {
+ this.longValue = longValue;
+ }
+
+ public byte[] getStringValue() {
+ return stringValue;
+ }
+
+ public void setStringValue(byte[] stringValue) {
+ this.stringValue = stringValue;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.ANY,
+ VectorExpressionDescriptor.ArgumentType.ANY)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR);
+ return b.build();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,292 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.Text;
+
+import java.sql.Date;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+
+public class VectorUDFDateDiffScalarCol extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private long longValue;
+ private byte[] stringValue;
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient final Text text = new Text();
+ private int baseDate;
+ private transient Date date = new Date(0);
+
+ public VectorUDFDateDiffScalarCol(Object object, int colNum, int outputColumn) {
+ super();
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+
+ if (object instanceof Long) {
+ this.longValue = (Long) object;
+ } else if (object instanceof byte []) {
+ this.stringValue = (byte[]) object;
+ }
+ }
+
+ public VectorUDFDateDiffScalarCol() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+ ColumnVector inputCol = batch.cols[this.colNum];
+ /* every line below this is identical for evaluateLong & evaluateString */
+ final int n = inputCol.isRepeating ? 1 : batch.size;
+ int[] sel = batch.selected;
+
+ if(batch.size == 0) {
+ /* n != batch.size when isRepeating */
+ return;
+ }
+
+ /* true for all algebraic UDFs with no state */
+ outV.isRepeating = inputCol.isRepeating;
+
+ switch (inputTypes[0]) {
+ case DATE:
+ baseDate = (int) longValue;
+ break;
+
+ case TIMESTAMP:
+ date.setTime(longValue / 1000000);
+ baseDate = DateWritable.dateToDays(date);
+ break;
+
+ case STRING:
+ try {
+ date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime());
+ baseDate = DateWritable.dateToDays(date);
+ break;
+ } catch (Exception e) {
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = true;
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = true;
+ }
+ }
+ return;
+ }
+ }
+
+ switch (inputTypes[1]) {
+ case DATE:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateDate(inputCol, i);
+ }
+ }
+ }
+ }
+ break;
+
+ case TIMESTAMP:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ outV.vector[i] = evaluateTimestamp(inputCol, i);
+ }
+ }
+ }
+ }
+ break;
+
+ case STRING:
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for(int j=0; j < n; j++) {
+ int i = sel[j];
+ evaluateString(inputCol, outV, i);
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ } else {
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for(int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ } else {
+ for(int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ evaluateString(inputCol, outV, i);
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ protected int evaluateTimestamp(ColumnVector columnVector, int index) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ date.setTime(lcv.vector[index] / 1000000);
+ return baseDate - DateWritable.dateToDays(date);
+ }
+
+ protected int evaluateDate(ColumnVector columnVector, int index) {
+ LongColumnVector lcv = (LongColumnVector) columnVector;
+ return baseDate - ((int) lcv.vector[index]);
+ }
+
+ protected void evaluateString(ColumnVector columnVector, LongColumnVector output, int i) {
+ BytesColumnVector bcv = (BytesColumnVector) columnVector;
+ text.set(bcv.vector[i], bcv.start[i], bcv.length[i]);
+ try {
+ date.setTime(formatter.parse(text.toString()).getTime());
+ output.vector[i] = baseDate - DateWritable.dateToDays(date);
+ } catch (ParseException e) {
+ output.vector[i] = 1;
+ output.isNull[i] = true;
+ }
+ }
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public long getLongValue() {
+ return longValue;
+ }
+
+ public void setLongValue(int longValue) {
+ this.longValue = longValue;
+ }
+
+ public byte[] getStringValue() {
+ return stringValue;
+ }
+
+ public void setStringValue(byte[] stringValue) {
+ this.stringValue = stringValue;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.ANY,
+ VectorExpressionDescriptor.ArgumentType.ANY)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.SCALAR,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}
\ No newline at end of file
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+
+import java.io.UnsupportedEncodingException;
+import java.sql.Date;
+import java.text.SimpleDateFormat;
+
+public class VectorUDFDateLong extends LongToStringUnaryUDF {
+ private static final long serialVersionUID = 1L;
+
+ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ private transient Date date = new Date(0);
+
+ public VectorUDFDateLong() {
+ super();
+ }
+
+ public VectorUDFDateLong(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ switch (inputTypes[0]) {
+ case DATE:
+ date.setTime(DateWritable.daysToMillis((int) vector[i]));
+ break;
+
+ case TIMESTAMP:
+ date.setTime(vector[i] / 1000000);
+ break;
+ }
+ try {
+ byte[] bytes = formatter.format(date).getBytes("UTF-8");
+ outV.setRef(i, bytes, 0, bytes.length);
+ } catch (UnsupportedEncodingException e) {
+ outV.vector[i] = null;
+ outV.isNull[i] = true;
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.io.Text;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+public class VectorUDFDateString extends StringUnaryUDF {
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFDateString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, new StringUnaryUDF.IUDFUnaryString() {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+ Text t = new Text();
+
+ @Override
+ public Text evaluate(Text s) {
+ if (s == null) {
+ return null;
+ }
+ try {
+ Date date = formatter.parse(s.toString());
+ t.set(formatter.format(date)) ;
+ return t;
+ } catch (ParseException e) {
+ return null;
+ }
+ }
+ });
+ }
+
+ public VectorUDFDateString() {
+ super();
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java?rev=1573687&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java Mon Mar 3 19:28:17 2014
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Returns the date that is num_days before start_date.
+ */
+public class VectorUDFDateSubColCol extends VectorUDFDateAddColCol {
+ public VectorUDFDateSubColCol(int colNum1, int colNum2, int outputColumn) {
+ super(colNum1, colNum2, outputColumn);
+ isPositive = false;
+ }
+
+ public VectorUDFDateSubColCol() {
+ super();
+ isPositive = false;
+ }
+}