You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/20 02:33:01 UTC
svn commit: r1524874 [6/9] - in /hive/branches/vectorization: ./
cli/src/java/org/apache/hadoop/hive/cli/
cli/src/test/org/apache/hadoop/hive/cli/
common/src/java/org/apache/hadoop/hive/common/type/
common/src/java/org/apache/hadoop/hive/conf/ common/s...
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Fri Sep 20 00:32:55 2013
@@ -83,6 +83,7 @@ public class GenericUDAFComputeStats ext
case DOUBLE:
return new GenericUDAFDoubleStatsEvaluator();
case STRING:
+ case VARCHAR:
return new GenericUDAFStringStatsEvaluator();
case BINARY:
return new GenericUDAFBinaryStatsEvaluator();
@@ -102,12 +103,12 @@ public class GenericUDAFComputeStats ext
/* Object Inspector corresponding to the input parameter.
*/
- private PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector inputOI;
/* Partial aggregation result returned by TerminatePartial. Partial result is a struct
* containing a long field named "count".
*/
- private Object[] partialResult;
+ private transient Object[] partialResult;
/* Object Inspectors corresponding to the struct returned by TerminatePartial and the long
* field within the struct - "count"
@@ -115,17 +116,17 @@ public class GenericUDAFComputeStats ext
private transient StructObjectInspector soi;
private transient StructField countTruesField;
- private WritableLongObjectInspector countTruesFieldOI;
+ private transient WritableLongObjectInspector countTruesFieldOI;
private transient StructField countFalsesField;
- private WritableLongObjectInspector countFalsesFieldOI;
+ private transient WritableLongObjectInspector countFalsesFieldOI;
private transient StructField countNullsField;
- private WritableLongObjectInspector countNullsFieldOI;
+ private transient WritableLongObjectInspector countNullsFieldOI;
/* Output of final result of the aggregation
*/
- private Object[] result;
+ private transient Object[] result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
@@ -305,13 +306,13 @@ public class GenericUDAFComputeStats ext
/* Object Inspector corresponding to the input parameter.
*/
- private PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector inputOI;
private transient PrimitiveObjectInspector numVectorsOI;
/* Partial aggregation result returned by TerminatePartial. Partial result is a struct
* containing a long field named "count".
*/
- private Object[] partialResult;
+ private transient Object[] partialResult;
/* Object Inspectors corresponding to the struct returned by TerminatePartial and the long
* field within the struct - "count"
@@ -319,23 +320,23 @@ public class GenericUDAFComputeStats ext
private transient StructObjectInspector soi;
private transient StructField minField;
- private WritableLongObjectInspector minFieldOI;
+ private transient WritableLongObjectInspector minFieldOI;
private transient StructField maxField;
- private WritableLongObjectInspector maxFieldOI;
+ private transient WritableLongObjectInspector maxFieldOI;
private transient StructField countNullsField;
- private WritableLongObjectInspector countNullsFieldOI;
+ private transient WritableLongObjectInspector countNullsFieldOI;
private transient StructField ndvField;
- private WritableStringObjectInspector ndvFieldOI;
+ private transient WritableStringObjectInspector ndvFieldOI;
private transient StructField numBitVectorsField;
- private WritableIntObjectInspector numBitVectorsFieldOI;
+ private transient WritableIntObjectInspector numBitVectorsFieldOI;
/* Output of final result of the aggregation
*/
- private Object[] result;
+ private transient Object[] result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
@@ -601,13 +602,13 @@ public class GenericUDAFComputeStats ext
/* Object Inspector corresponding to the input parameter.
*/
- private PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector inputOI;
private transient PrimitiveObjectInspector numVectorsOI;
/* Partial aggregation result returned by TerminatePartial. Partial result is a struct
* containing a long field named "count".
*/
- private Object[] partialResult;
+ private transient Object[] partialResult;
/* Object Inspectors corresponding to the struct returned by TerminatePartial and the long
* field within the struct - "count"
@@ -615,23 +616,23 @@ public class GenericUDAFComputeStats ext
private transient StructObjectInspector soi;
private transient StructField minField;
- private WritableDoubleObjectInspector minFieldOI;
+ private transient WritableDoubleObjectInspector minFieldOI;
private transient StructField maxField;
- private WritableDoubleObjectInspector maxFieldOI;
+ private transient WritableDoubleObjectInspector maxFieldOI;
private transient StructField countNullsField;
- private WritableLongObjectInspector countNullsFieldOI;
+ private transient WritableLongObjectInspector countNullsFieldOI;
private transient StructField ndvField;
- private WritableStringObjectInspector ndvFieldOI;
+ private transient WritableStringObjectInspector ndvFieldOI;
private transient StructField numBitVectorsField;
- private WritableIntObjectInspector numBitVectorsFieldOI;
+ private transient WritableIntObjectInspector numBitVectorsFieldOI;
/* Output of final result of the aggregation
*/
- private Object[] result;
+ private transient Object[] result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
@@ -898,13 +899,13 @@ public class GenericUDAFComputeStats ext
/* Object Inspector corresponding to the input parameter.
*/
- private PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector inputOI;
private transient PrimitiveObjectInspector numVectorsOI;
/* Partial aggregation result returned by TerminatePartial. Partial result is a struct
* containing a long field named "count".
*/
- private Object[] partialResult;
+ private transient Object[] partialResult;
/* Object Inspectors corresponding to the struct returned by TerminatePartial and the
* fields within the struct - "maxLength", "sumLength", "count", "countNulls", "ndv"
@@ -912,26 +913,26 @@ public class GenericUDAFComputeStats ext
private transient StructObjectInspector soi;
private transient StructField maxLengthField;
- private WritableLongObjectInspector maxLengthFieldOI;
+ private transient WritableLongObjectInspector maxLengthFieldOI;
private transient StructField sumLengthField;
- private WritableLongObjectInspector sumLengthFieldOI;
+ private transient WritableLongObjectInspector sumLengthFieldOI;
private transient StructField countField;
- private WritableLongObjectInspector countFieldOI;
+ private transient WritableLongObjectInspector countFieldOI;
private transient StructField countNullsField;
- private WritableLongObjectInspector countNullsFieldOI;
+ private transient WritableLongObjectInspector countNullsFieldOI;
private transient StructField ndvField;
- private WritableStringObjectInspector ndvFieldOI;
+ private transient WritableStringObjectInspector ndvFieldOI;
private transient StructField numBitVectorsField;
- private WritableIntObjectInspector numBitVectorsFieldOI;
+ private transient WritableIntObjectInspector numBitVectorsFieldOI;
/* Output of final result of the aggregation
*/
- private Object[] result;
+ private transient Object[] result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
@@ -1217,12 +1218,12 @@ public class GenericUDAFComputeStats ext
/* Object Inspector corresponding to the input parameter.
*/
- private PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector inputOI;
/* Partial aggregation result returned by TerminatePartial. Partial result is a struct
* containing a long field named "count".
*/
- private Object[] partialResult;
+ private transient Object[] partialResult;
/* Object Inspectors corresponding to the struct returned by TerminatePartial and the
* fields within the struct - "maxLength", "sumLength", "count", "countNulls"
@@ -1230,20 +1231,20 @@ public class GenericUDAFComputeStats ext
private transient StructObjectInspector soi;
private transient StructField maxLengthField;
- private WritableLongObjectInspector maxLengthFieldOI;
+ private transient WritableLongObjectInspector maxLengthFieldOI;
private transient StructField sumLengthField;
- private WritableLongObjectInspector sumLengthFieldOI;
+ private transient WritableLongObjectInspector sumLengthFieldOI;
private transient StructField countField;
- private WritableLongObjectInspector countFieldOI;
+ private transient WritableLongObjectInspector countFieldOI;
private transient StructField countNullsField;
- private WritableLongObjectInspector countNullsFieldOI;
+ private transient WritableLongObjectInspector countNullsFieldOI;
/* Output of final result of the aggregation
*/
- private Object[] result;
+ private transient Object[] result;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java Fri Sep 20 00:32:55 2013
@@ -133,9 +133,14 @@ public abstract class GenericUDFBaseComp
TypeInfo oiTypeInfo0 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]);
TypeInfo oiTypeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]);
- if (oiTypeInfo0 != oiTypeInfo1) {
+ if (oiTypeInfo0 == oiTypeInfo1
+ || TypeInfoUtils.doPrimitiveCategoriesMatch(oiTypeInfo0, oiTypeInfo1)) {
+ compareType = CompareType.SAME_TYPE;
+ } else {
compareType = CompareType.NEED_CONVERT;
- TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(oiTypeInfo0, oiTypeInfo1);
+ TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(
+ oiTypeInfo0, oiTypeInfo1);
+
// For now, we always convert to double if we can't find a common type
compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
(compareType == null) ?
@@ -143,8 +148,6 @@ public abstract class GenericUDFBaseComp
converter0 = ObjectInspectorConverters.getConverter(arguments[0], compareOI);
converter1 = ObjectInspectorConverters.getConverter(arguments[1], compareOI);
- } else {
- compareType = CompareType.SAME_TYPE;
}
}
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java Fri Sep 20 00:32:55 2013
@@ -27,7 +27,11 @@ import org.apache.hadoop.hive.serde.serd
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;
@@ -61,15 +65,12 @@ public class GenericUDFConcatWS extends
for (int i = 0; i < arguments.length; i++) {
switch(arguments[i].getCategory()) {
case LIST:
- if (((ListObjectInspector)arguments[i]).getListElementObjectInspector()
- .getTypeName().equals(serdeConstants.STRING_TYPE_NAME)
- || ((ListObjectInspector)arguments[i]).getListElementObjectInspector()
- .getTypeName().equals(serdeConstants.VOID_TYPE_NAME)) {
- break;
+ if (isStringOrVoidType(
+ ((ListObjectInspector) arguments[i]).getListElementObjectInspector())) {
+ break;
}
case PRIMITIVE:
- if (arguments[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME)
- || arguments[i].getTypeName().equals(serdeConstants.VOID_TYPE_NAME)) {
+ if (isStringOrVoidType(arguments[i])) {
break;
}
default:
@@ -84,6 +85,18 @@ public class GenericUDFConcatWS extends
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
+ protected boolean isStringOrVoidType(ObjectInspector oi) {
+ if (oi.getCategory() == Category.PRIMITIVE) {
+ if (PrimitiveGrouping.STRING_GROUP
+ == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
+ ((PrimitiveObjectInspector) oi).getPrimitiveCategory())
+ || ((PrimitiveObjectInspector) oi).getPrimitiveCategory() == PrimitiveCategory.VOID) {
+ return true;
+ }
+ }
+ return false;
+ }
+
private final Text resultText = new Text();
@Override
@@ -91,8 +104,8 @@ public class GenericUDFConcatWS extends
if (arguments[0].get() == null) {
return null;
}
- String separator = ((StringObjectInspector) argumentOIs[0])
- .getPrimitiveJavaObject(arguments[0].get());
+ String separator = PrimitiveObjectInspectorUtils.getString(
+ arguments[0].get(), (PrimitiveObjectInspector)argumentOIs[0]);
StringBuilder sb = new StringBuilder();
boolean first = true;
@@ -116,8 +129,8 @@ public class GenericUDFConcatWS extends
sb.append(strArrayOI.getListElement(strArray, j));
}
} else {
- sb.append(((StringObjectInspector) argumentOIs[i])
- .getPrimitiveJavaObject(arguments[i].get()));
+ sb.append(PrimitiveObjectInspectorUtils.getString(
+ arguments[i].get(), (PrimitiveObjectInspector)argumentOIs[i]));
}
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java Fri Sep 20 00:32:55 2013
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
@@ -93,8 +94,12 @@ public class GenericUDFReflect2 extends
try {
method = findMethod(targetClass, methodName.toString(), null, true);
+ // While getTypeFor() returns a TypeEntry, we won't actually be able to get any
+ // type parameter information from this since the TypeEntry is derived from a return type.
+ PrimitiveTypeEntry typeEntry = getTypeFor(method.getReturnType());
returnOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
- getTypeFor(method.getReturnType()).primitiveCategory);
+ PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs(
+ typeEntry.primitiveCategory, typeEntry.typeParams));
returnObj = (Writable) returnOI.getPrimitiveWritableClass().newInstance();
} catch (Exception e) {
throw new UDFArgumentException(e);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java Fri Sep 20 00:32:55 2013
@@ -24,9 +24,15 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -41,30 +47,30 @@ import org.apache.hadoop.hive.serde2.typ
+ " delimiters are used: ',' as delimiter1 and '=' as delimiter2.")
public class GenericUDFStringToMap extends GenericUDF {
private final HashMap<Object, Object> ret = new HashMap<Object, Object>();
- private transient StringObjectInspector soi_text, soi_de1 = null, soi_de2 = null;
+ private transient Converter soi_text, soi_de1 = null, soi_de2 = null;
final static String default_de1 = ",";
final static String default_de2 = ":";
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (!TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]).equals(
- TypeInfoFactory.stringTypeInfo)
- || (arguments.length > 1 &&
- !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]).equals(
- TypeInfoFactory.stringTypeInfo))
- || (arguments.length > 2 &&
- !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[2]).equals(
- TypeInfoFactory.stringTypeInfo))) {
- throw new UDFArgumentException("All argument should be string");
+ for (int idx = 0; idx < Math.min(arguments.length, 3); ++idx) {
+ if (arguments[idx].getCategory() != Category.PRIMITIVE
+ || PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
+ ((PrimitiveObjectInspector) arguments[idx]).getPrimitiveCategory())
+ != PrimitiveGrouping.STRING_GROUP) {
+ throw new UDFArgumentException("All argument should be string/character type");
+ }
}
-
- soi_text = (StringObjectInspector) arguments[0];
+ soi_text = ObjectInspectorConverters.getConverter(arguments[0],
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector);
if (arguments.length > 1) {
- soi_de1 = (StringObjectInspector) arguments[1];
+ soi_de1 = ObjectInspectorConverters.getConverter(arguments[1],
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
if (arguments.length > 2) {
- soi_de2 = (StringObjectInspector) arguments[2];
+ soi_de2 = ObjectInspectorConverters.getConverter(arguments[2],
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
return ObjectInspectorFactory.getStandardMapObjectInspector(
@@ -75,11 +81,11 @@ public class GenericUDFStringToMap exten
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
ret.clear();
- String text = soi_text.getPrimitiveJavaObject(arguments[0].get());
+ String text = (String) soi_text.convert(arguments[0].get());
String delimiter1 = (soi_de1 == null) ?
- default_de1 : soi_de1.getPrimitiveJavaObject(arguments[1].get());
+ default_de1 : (String) soi_de1.convert(arguments[1].get());
String delimiter2 = (soi_de2 == null) ?
- default_de2 : soi_de2.getPrimitiveJavaObject(arguments[2].get());
+ default_de2 : (String) soi_de2.convert(arguments[2].get());
String[] keyValuePairs = text.split(delimiter1);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java Fri Sep 20 00:32:55 2013
@@ -25,6 +25,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.DateConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
/**
* GenericUDFToDate
@@ -49,10 +51,11 @@ public class GenericUDFToDate extends Ge
}
try {
argumentOI = (PrimitiveObjectInspector) arguments[0];
- switch (argumentOI.getPrimitiveCategory()) {
- case DATE:
- case STRING:
- case TIMESTAMP:
+ PrimitiveGrouping pg =
+ PrimitiveObjectInspectorUtils.getPrimitiveGrouping(argumentOI.getPrimitiveCategory());
+ switch (pg) {
+ case DATE_GROUP:
+ case STRING_GROUP:
break;
default:
throw new UDFArgumentException(
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java Fri Sep 20 00:32:55 2013
@@ -18,10 +18,9 @@
package org.apache.hadoop.hive.ql.udf.generic;
-import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,12 +40,11 @@ public class GenericUDFUnion extends Gen
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
tagOI = arguments[0];
- ObjectInspector[] unionOIs = new ObjectInspector[arguments.length-1];
+ List<ObjectInspector> unionOIs = new ArrayList<ObjectInspector>(arguments.length-1);
for (int i = 1; i < arguments.length; i++) {
- unionOIs[i-1] = arguments[i];
+ unionOIs.add(arguments[i]);
}
- return ObjectInspectorFactory.getStandardUnionObjectInspector(
- Arrays.asList(unionOIs));
+ return ObjectInspectorFactory.getStandardUnionObjectInspector(unionOIs);
}
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java Fri Sep 20 00:32:55 2013
@@ -29,17 +29,21 @@ import org.apache.hadoop.hive.ql.exec.Fu
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
import org.apache.hadoop.io.Text;
/**
@@ -350,6 +354,69 @@ public final class GenericUDFUtils {
};
/**
+ * Helper class for UDFs returning string/varchar/char
+ */
+ public static class StringHelper {
+
+ protected Object returnValue;
+ protected PrimitiveCategory type;
+
+ public StringHelper(PrimitiveCategory type) throws UDFArgumentException {
+ this.type = type;
+ switch (type) {
+ case STRING:
+ returnValue = new Text();
+ break;
+ case VARCHAR:
+ returnValue = new HiveVarcharWritable();
+ break;
+ default:
+ throw new UDFArgumentException("Unexpected non-string type " + type);
+ }
+ }
+
+ public Object setReturnValue(String val) throws UDFArgumentException {
+ if (val == null) {
+ return null;
+ }
+ switch (type) {
+ case STRING:
+ ((Text)returnValue).set(val);
+ return returnValue;
+ case VARCHAR:
+ ((HiveVarcharWritable)returnValue).set(val);
+ return returnValue;
+ default:
+ throw new UDFArgumentException("Bad return type " + type);
+ }
+ }
+
+ /**
+ * Helper function to help GenericUDFs determine the return type
+ * character length for char/varchar.
+ * @param poi PrimitiveObjectInspector representing the type
+ * @return character length of the type
+ * @throws UDFArgumentException
+ */
+ public static int getFixedStringSizeForType(PrimitiveObjectInspector poi)
+ throws UDFArgumentException {
+ // TODO: we can support date, int, .. any types which would have a fixed length value
+ switch (poi.getPrimitiveCategory()) {
+ case VARCHAR:
+ VarcharTypeParams varcharParams = null;
+ varcharParams = (VarcharTypeParams) poi.getTypeParams();
+ if (varcharParams == null || varcharParams.length < 0) {
+ throw new UDFArgumentException("varchar type used without type params");
+ }
+ return varcharParams.length;
+ default:
+ throw new UDFArgumentException("No fixed size for type " + poi.getTypeName());
+ }
+ }
+
+ }
+
+ /**
* Return an ordinal from an integer.
*/
public static String getOrdinal(int i) {
Modified: hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Fri Sep 20 00:32:55 2013
@@ -103,10 +103,12 @@ message Type {
UNION = 13;
DECIMAL = 14;
DATE = 15;
+ VARCHAR = 16;
}
required Kind kind = 1;
repeated uint32 subtypes = 2 [packed=true];
repeated string fieldNames = 3;
+ optional uint32 maximumLength = 4;
}
message StripeInformation {
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Fri Sep 20 00:32:55 2013
@@ -478,7 +478,7 @@ public class QTestUtil {
// Delete any tables other than the source tables
// and any databases other than the default database.
for (String dbName : db.getAllDatabases()) {
- db.setCurrentDatabase(dbName);
+ SessionState.get().setCurrentDatabase(dbName);
for (String tblName : db.getAllTables()) {
if (!DEFAULT_DATABASE_NAME.equals(dbName) || !srcTables.contains(tblName)) {
Table tblObj = db.getTable(tblName);
@@ -502,7 +502,7 @@ public class QTestUtil {
db.dropDatabase(dbName);
}
}
- Hive.get().setCurrentDatabase(DEFAULT_DATABASE_NAME);
+ SessionState.get().setCurrentDatabase(DEFAULT_DATABASE_NAME);
List<String> roleNames = db.getAllRoleNames();
for (String roleName : roleNames) {
@@ -626,7 +626,8 @@ public class QTestUtil {
db.createTable("src_sequencefile", cols, null,
SequenceFileInputFormat.class, SequenceFileOutputFormat.class);
- Table srcThrift = new Table(db.getCurrentDatabase(), "src_thrift");
+ Table srcThrift =
+ new Table(SessionState.get().getCurrentDatabase(), "src_thrift");
srcThrift.setInputFormatClass(SequenceFileInputFormat.class.getName());
srcThrift.setOutputFormatClass(SequenceFileOutputFormat.class.getName());
srcThrift.setSerializationLib(ThriftDeserializer.class.getName());
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java Fri Sep 20 00:32:55 2013
@@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.Re
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.mapred.TextInputFormat;
@@ -78,7 +79,8 @@ public class TestExecDriver extends Test
static {
try {
conf = new HiveConf(ExecDriver.class);
-
+ SessionState.start(conf);
+
fs = FileSystem.get(conf);
if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDir()) {
throw new RuntimeException(tmpdir + " exists but is not a directory");
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Fri Sep 20 00:32:55 2013
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.exec;
-import java.lang.reflect.Type;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.LinkedList;
@@ -27,6 +26,7 @@ import java.util.List;
import junit.framework.Assert;
import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
@@ -44,6 +45,7 @@ public class TestFunctionRegistry extend
public class TestUDF {
public void same(DoubleWritable x, DoubleWritable y) {}
public void same(HiveDecimalWritable x, HiveDecimalWritable y) {}
+ public void same(Text x, Text y) {}
public void one(IntWritable x, HiveDecimalWritable y) {}
public void one(IntWritable x, DoubleWritable y) {}
public void one(IntWritable x, IntWritable y) {}
@@ -57,8 +59,16 @@ public class TestFunctionRegistry extend
public void typeaffinity2(DoubleWritable x) {}
}
+ TypeInfo varchar5;
+ TypeInfo varchar10;
+ TypeInfo maxVarchar;
+
@Override
protected void setUp() {
+ String maxVarcharTypeName = "varchar(" + HiveVarchar.MAX_VARCHAR_LENGTH + ")";
+ maxVarchar = TypeInfoFactory.getPrimitiveTypeInfo(maxVarcharTypeName);
+ varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)");
+ varchar5 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)");
}
private void implicit(TypeInfo a, TypeInfo b, boolean convertible) {
@@ -72,6 +82,21 @@ public class TestFunctionRegistry extend
implicit(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, true);
implicit(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.decimalTypeInfo, false);
implicit(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.decimalTypeInfo, false);
+ implicit(varchar10, TypeInfoFactory.stringTypeInfo, true);
+ implicit(TypeInfoFactory.stringTypeInfo, varchar10, true);
+
+ // Try with parameterized varchar types
+ TypeInfo varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)");
+ TypeInfo varchar20 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(20)");
+
+ implicit(varchar10, TypeInfoFactory.stringTypeInfo, true);
+ implicit(varchar20, TypeInfoFactory.stringTypeInfo, true);
+ implicit(TypeInfoFactory.stringTypeInfo, varchar10, true);
+ implicit(TypeInfoFactory.stringTypeInfo, varchar20, true);
+ implicit(varchar20, varchar10, true);
+
+ implicit(TypeInfoFactory.intTypeInfo, varchar10, true);
+ implicit(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo, true);
}
private static List<Method> getMethods(Class<?> udfClass, String methodName) {
@@ -136,8 +161,8 @@ public class TestFunctionRegistry extend
}
assert(!throwException);
assertEquals(2, result.getParameterTypes().length);
- assertEquals(result.getParameterTypes()[0], a);
- assertEquals(result.getParameterTypes()[1], b);
+ assertEquals(a, result.getParameterTypes()[0]);
+ assertEquals(b, result.getParameterTypes()[1]);
}
public void testGetMethodInternal() {
@@ -166,12 +191,15 @@ public class TestFunctionRegistry extend
verify(TestUDF.class, "one", TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo,
IntWritable.class, IntWritable.class, false);
+ // Passing varchar arguments should prefer the version of evaluate() with Text args.
+ verify(TestUDF.class, "same", varchar5, varchar10, Text.class, Text.class, false);
+
verify(TestUDF.class, "mismatch", TypeInfoFactory.voidTypeInfo, TypeInfoFactory.intTypeInfo,
null, null, true);
}
private void common(TypeInfo a, TypeInfo b, TypeInfo result) {
- assertEquals(FunctionRegistry.getCommonClass(a,b), result);
+ assertEquals(result, FunctionRegistry.getCommonClass(a,b));
}
public void testCommonClass() {
@@ -183,10 +211,13 @@ public class TestFunctionRegistry extend
TypeInfoFactory.decimalTypeInfo);
common(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.stringTypeInfo);
+
+ common(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo);
+ common(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
}
private void comparison(TypeInfo a, TypeInfo b, TypeInfo result) {
- assertEquals(FunctionRegistry.getCommonClassForComparison(a,b), result);
+ assertEquals(result, FunctionRegistry.getCommonClassForComparison(a,b));
}
public void testCommonClassComparison() {
@@ -198,6 +229,61 @@ public class TestFunctionRegistry extend
TypeInfoFactory.decimalTypeInfo);
comparison(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.doubleTypeInfo);
+
+ comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo,
+ TypeInfoFactory.stringTypeInfo);
+ comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo,
+ TypeInfoFactory.stringTypeInfo);
+
+ comparison(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo);
+ comparison(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
+ comparison(varchar5, varchar10, varchar10);
+ }
+
+ /**
+ * Method to print out the comparison/conversion behavior for data types.
+ */
+ public void testPrintTypeCompatibility() {
+ if (true) {
+ return;
+ }
+
+ String[] typeStrings = {
+ "void", "boolean", "tinyint", "smallint", "int", "bigint", "float", "double",
+ "string", "timestamp", "date", "binary", "decimal", "varchar(10)", "varchar(5)",
+ };
+ for (String cat1 : typeStrings) {
+ TypeInfo ti1 = null;
+ try {
+ ti1 = TypeInfoUtils.getTypeInfoFromTypeString(cat1);
+ } catch (Exception err) {
+ System.out.println(err);
+ System.out.println("Unable to get TypeInfo for " + cat1 + ", skipping ...");
+ continue;
+ }
+
+ for (String cat2 : typeStrings) {
+ TypeInfo commonClass = null;
+ boolean implicitConvertable = false;
+ try {
+ TypeInfo ti2 = TypeInfoUtils.getTypeInfoFromTypeString(cat2);
+ try {
+ commonClass = FunctionRegistry.getCommonClassForComparison(ti1, ti2);
+ //implicitConvertable = FunctionRegistry.implicitConvertable(ti1, ti2);
+ } catch (Exception err) {
+ System.out.println("Failed to get common class for " + ti1 + ", " + ti2 + ": " + err);
+ err.printStackTrace();
+ //System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ...");
+ }
+ System.out.println(cat1 + " - " + cat2 + ": " + commonClass);
+ //System.out.println(cat1 + " - " + cat2 + ": " + implicitConvertable);
+ } catch (Exception err) {
+ System.out.println(err);
+ System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ...");
+ continue;
+ }
+ }
+ }
}
private void unionAll(TypeInfo a, TypeInfo b, TypeInfo result) {
@@ -213,11 +299,26 @@ public class TestFunctionRegistry extend
TypeInfoFactory.decimalTypeInfo);
unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.stringTypeInfo);
+
+ unionAll(varchar5, varchar10, varchar10);
+ unionAll(varchar10, varchar5, varchar10);
+ unionAll(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
+ unionAll(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo);
}
public void testGetTypeInfoForPrimitiveCategory() {
+ // varchar should take string length into account.
+ // varchar(5), varchar(10) => varchar(10)
+ assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory(
+ (PrimitiveTypeInfo) varchar5, (PrimitiveTypeInfo) varchar10, PrimitiveCategory.VARCHAR));
+ assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory(
+ (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) varchar5, PrimitiveCategory.VARCHAR));
+
// non-qualified types should simply return the TypeInfo associated with that type
assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory(
+ (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
+ PrimitiveCategory.STRING));
+ assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory(
(PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
(PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
PrimitiveCategory.STRING));
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java Fri Sep 20 00:32:55 2013
@@ -64,6 +64,7 @@ public class TestHiveHistory extends Tes
protected void setUp() {
try {
conf = new HiveConf(HiveHistory.class);
+ SessionState.start(conf);
fs = FileSystem.get(conf);
if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDir()) {
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java Fri Sep 20 00:32:55 2013
@@ -28,7 +28,6 @@ import java.util.List;
import java.util.Properties;
import java.util.Random;
-import junit.framework.TestCase;
import static org.junit.Assert.*;
import org.apache.commons.logging.Log;
@@ -40,6 +39,7 @@ import org.apache.hadoop.fs.LocalFileSys
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
@@ -63,55 +63,49 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.FileSplit;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
/**
* TestRCFile.
*
*/
-public class TestRCFile extends TestCase {
+public class TestRCFile {
private static final Log LOG = LogFactory.getLog(TestRCFile.class);
- private static Configuration conf = new Configuration();
-
- private static ColumnarSerDe serDe;
-
- private static Path file;
-
- private static FileSystem fs;
-
- private static Properties tbl;
-
- static {
- try {
- fs = FileSystem.getLocal(conf);
- Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred");
- file = new Path(dir, "test_rcfile");
- fs.delete(dir, true);
- // the SerDe part is from TestLazySimpleSerDe
- serDe = new ColumnarSerDe();
- // Create the SerDe
- tbl = createProperties();
- serDe.initialize(conf, tbl);
- } catch (Exception e) {
- }
- }
+ private Configuration conf;
+ private ColumnarSerDe serDe;
+ private Path dir, file;
+ private FileSystem fs;
+ private Properties tbl;
// Data
- private static Writable[] expectedFieldsData = {
+ private Writable[] expectedFieldsData = {
new ByteWritable((byte) 123), new ShortWritable((short) 456),
new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
new Text("hive and hadoop"), null, null};
- private static Object[] expectedPartitalFieldsData = {null, null,
+ private Object[] expectedPartitalFieldsData = {null, null,
new IntWritable(789), new LongWritable(1000), null, null, null, null};
- private static BytesRefArrayWritable patialS = new BytesRefArrayWritable();
-
- private static byte[][] bytesArray = null;
-
- private static BytesRefArrayWritable s = null;
- static {
+ private BytesRefArrayWritable patialS = new BytesRefArrayWritable();
+ private byte[][] bytesArray;
+ private BytesRefArrayWritable s;
+
+ @Before
+ public void setup() throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred");
+ file = new Path(dir, "test_rcfile");
+ cleanup();
+ // the SerDe part is from TestLazySimpleSerDe
+ serDe = new ColumnarSerDe();
+ // Create the SerDe
+ tbl = createProperties();
+ serDe.initialize(conf, tbl);
try {
bytesArray = new byte[][] {"123".getBytes("UTF-8"),
"456".getBytes("UTF-8"), "789".getBytes("UTF-8"),
@@ -139,11 +133,27 @@ public class TestRCFile extends TestCase
patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8")));
} catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @After
+ public void teardown() throws Exception {
+ cleanup();
+ }
+
+ private void cleanup() throws IOException {
+ if(fs != null && dir != null) {
+ fs.delete(dir, true);
+ if(fs.exists(dir)) {
+ throw new RuntimeException("Could not delete " + dir);
+ }
}
}
+ @Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
- fs.delete(file, true);
+ cleanup();
byte[][] record_1 = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"),
"789".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
@@ -222,13 +232,14 @@ public class TestRCFile extends TestCase
reader.close();
}
-
+
/**
* Tests {@link RCFile.Reader#getColumn(int, BytesRefArrayWritable) } method.
* @throws IOException
*/
+ @Test
public void testGetColumn() throws IOException {
- fs.delete(file, true);
+ cleanup();
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer =
@@ -238,26 +249,26 @@ public class TestRCFile extends TestCase
new Text("cat"),
new Text("dog")),
new DefaultCodec());
-
+
byte[][] record_1 = {
- "123".getBytes("UTF-8"),
+ "123".getBytes("UTF-8"),
"456".getBytes("UTF-8"),
- "789".getBytes("UTF-8"),
+ "789".getBytes("UTF-8"),
"1000".getBytes("UTF-8"),
- "5.3".getBytes("UTF-8"),
+ "5.3".getBytes("UTF-8"),
"hive and hadoop".getBytes("UTF-8"),
- new byte[0],
+ new byte[0],
"NULL".getBytes("UTF-8") };
byte[][] record_2 = {
- "100".getBytes("UTF-8"),
+ "100".getBytes("UTF-8"),
"200".getBytes("UTF-8"),
- "123".getBytes("UTF-8"),
+ "123".getBytes("UTF-8"),
"1000".getBytes("UTF-8"),
- "5.3".getBytes("UTF-8"),
+ "5.3".getBytes("UTF-8"),
"hive and hadoop".getBytes("UTF-8"),
- new byte[0],
+ new byte[0],
"NULL".getBytes("UTF-8")};
-
+
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
for (int i = 0; i < record_1.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_1[i], 0,
@@ -275,14 +286,14 @@ public class TestRCFile extends TestCase
writer.close();
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
-
+
LongWritable rowID = new LongWritable();
assertTrue(reader.next(rowID));
assertEquals(rowID.get(), 0L);
-
+
assertTrue(reader.next(rowID));
assertEquals(rowID.get(), 1L);
-
+
BytesRefArrayWritable result = null;
BytesRefWritable brw;
for (int col=0; col < 8; col++) {
@@ -291,10 +302,10 @@ public class TestRCFile extends TestCase
assertNotNull(result2);
result = result2;
} else {
- // #getColumn(2) should return the instance passed in:
+ // #getColumn(2) should return the instance passed in:
assertSame(result2, result);
}
- // each column has height of 2:
+ // each column has height of 2:
assertEquals(2, result.size());
for (int row=0; row<result.size(); row++) {
brw = result.get(row);
@@ -304,15 +315,16 @@ public class TestRCFile extends TestCase
byte[] expectedData = (row == 0) ? record_1[col] : record_2[col];
assertArrayEquals("col="+col+" : row="+row, expectedData, actualData);
}
-
+
result.clear();
}
-
+
reader.close();
}
+ @Test
public void testReadCorruptFile() throws IOException, SerDeException {
- fs.delete(file, true);
+ cleanup();
byte[][] record = {null, null, null, null, null, null, null, null};
@@ -364,6 +376,7 @@ public class TestRCFile extends TestCase
reader.close();
}
+ @Test
public void testReadOldFileHeader() throws IOException {
String[] row = new String[]{"Tester", "Bart", "333 X St.", "Reno", "NV",
"USA"};
@@ -381,11 +394,13 @@ public class TestRCFile extends TestCase
reader.close();
}
+ @Test
public void testWriteAndFullyRead() throws IOException, SerDeException {
writeTest(fs, 10000, file, bytesArray);
fullyReadTest(fs, 10000, file);
}
+ @Test
public void testWriteAndPartialRead() throws IOException, SerDeException {
writeTest(fs, 10000, file, bytesArray);
partialReadTest(fs, 10000, file);
@@ -395,6 +410,14 @@ public class TestRCFile extends TestCase
public static void main(String[] args) throws Exception {
int count = 10000;
boolean create = true;
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path file = null;
+ // the SerDe part is from TestLazySimpleSerDe
+ AbstractSerDe serDe = new ColumnarSerDe();
+ // Create the SerDe
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
String usage = "Usage: RCFile " + "[-count N]" + " file";
if (args.length == 0) {
@@ -428,7 +451,11 @@ public class TestRCFile extends TestCase
// test.performanceTest();
test.testSimpleReadAndWrite();
-
+ byte[][] bytesArray = new byte[][] {"123".getBytes("UTF-8"),
+ "456".getBytes("UTF-8"), "789".getBytes("UTF-8"),
+ "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"),
+ "hive and hadoop".getBytes("UTF-8"), new byte[0],
+ "NULL".getBytes("UTF-8")};
test.writeTest(fs, count, file, bytesArray);
test.fullyReadTest(fs, count, file);
test.partialReadTest(fs, count, file);
@@ -445,7 +472,7 @@ public class TestRCFile extends TestCase
private void writeTest(FileSystem fs, int count, Path file,
byte[][] fieldsData, Configuration conf) throws IOException, SerDeException {
- fs.delete(file, true);
+ cleanup();
RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
@@ -566,6 +593,7 @@ public class TestRCFile extends TestCase
LOG.debug("reading fully costs:" + cost + " milliseconds");
}
+ @Test
public void testSynAndSplit() throws IOException {
splitBeforeSync();
splitRightBeforeSync();
@@ -574,6 +602,7 @@ public class TestRCFile extends TestCase
splitAfterSync();
}
+ @Test
public void testSync() throws IOException {
Path testDir = new Path(System.getProperty("test.data.dir", ".")
+ "/mapred/testsync");
@@ -585,7 +614,7 @@ public class TestRCFile extends TestCase
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount);
- RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
+ RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
@@ -605,7 +634,7 @@ public class TestRCFile extends TestCase
jobconf.setLong("mapred.min.split.size", fileLen);
InputSplit[] splits = inputFormat.getSplits(jobconf, 1);
RCFileRecordReader rr = new RCFileRecordReader(jobconf, (FileSplit)splits[0]);
- long lastSync = 0;
+ long lastSync = 0;
for(int i = 0; i < 2500; i++) {
rr.sync(i);
if(rr.getPos() < lastSync) {
@@ -709,6 +738,7 @@ public class TestRCFile extends TestCase
}
}
+ @Test
public void testCloseForErroneousRCFile() throws IOException {
Configuration conf = new Configuration();
LocalFileSystem fs = FileSystem.getLocal(conf);
@@ -740,7 +770,6 @@ public class TestRCFile extends TestCase
public void testRCFileHeader(char[] expected, Configuration conf)
throws IOException, SerDeException {
-
writeTest(fs, 10000, file, bytesArray, conf);
DataInputStream di = fs.open(file, 10000);
byte[] bytes = new byte[3];
@@ -751,6 +780,7 @@ public class TestRCFile extends TestCase
di.close();
}
+ @Test
public void testNonExplicitRCFileHeader() throws IOException, SerDeException {
Configuration conf = new Configuration();
conf.setBoolean(HiveConf.ConfVars.HIVEUSEEXPLICITRCFILEHEADER.varname, false);
@@ -758,6 +788,7 @@ public class TestRCFile extends TestCase
testRCFileHeader(expected, conf);
}
+ @Test
public void testExplicitRCFileHeader() throws IOException, SerDeException {
Configuration conf = new Configuration();
conf.setBoolean(HiveConf.ConfVars.HIVEUSEEXPLICITRCFILEHEADER.varname, true);
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Fri Sep 20 00:32:55 2013
@@ -17,29 +17,38 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.StringUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@@ -47,11 +56,17 @@ import org.junit.rules.TestName;
import java.io.DataInput;
import java.io.DataOutput;
+import java.io.DataOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
public class TestInputOutputFormat {
@@ -93,6 +108,409 @@ public class TestInputOutputFormat {
}
@Test
+ public void testOverlap() throws Exception {
+ assertEquals(0, OrcInputFormat.SplitGenerator.getOverlap(100, 100,
+ 200, 100));
+ assertEquals(0, OrcInputFormat.SplitGenerator.getOverlap(0, 1000,
+ 2000, 100));
+ assertEquals(100, OrcInputFormat.SplitGenerator.getOverlap(1000, 1000,
+ 1500, 100));
+ assertEquals(250, OrcInputFormat.SplitGenerator.getOverlap(1000, 250,
+ 500, 2000));
+ assertEquals(100, OrcInputFormat.SplitGenerator.getOverlap(1000, 1000,
+ 1900, 1000));
+ assertEquals(500, OrcInputFormat.SplitGenerator.getOverlap(2000, 1000,
+ 2500, 2000));
+ }
+
+ @Test
+ public void testGetInputPaths() throws Exception {
+ conf.set("mapred.input.dir", "a,b,c");
+ assertArrayEquals(new Path[]{new Path("a"), new Path("b"), new Path("c")},
+ OrcInputFormat.getInputPaths(conf));
+ conf.set("mapred.input.dir", "/a/b/c/d/e");
+ assertArrayEquals(new Path[]{new Path("/a/b/c/d/e")},
+ OrcInputFormat.getInputPaths(conf));
+ conf.set("mapred.input.dir", "/a/b/c\\,d,/e/f\\,g/h");
+ assertArrayEquals(new Path[]{new Path("/a/b/c,d"), new Path("/e/f,g/h")},
+ OrcInputFormat.getInputPaths(conf));
+ }
+
+ static class TestContext extends OrcInputFormat.Context {
+ List<Runnable> queue = new ArrayList<Runnable>();
+
+ TestContext(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ public void schedule(Runnable runnable) {
+ queue.add(runnable);
+ }
+ }
+
+ @Test
+ public void testFileGenerator() throws Exception {
+ TestContext context = new TestContext(conf);
+ MockFileSystem fs = new MockFileSystem(conf,
+ new MockFile("/a/b/part-00", 1000, new byte[0]),
+ new MockFile("/a/b/part-01", 1000, new byte[0]),
+ new MockFile("/a/b/_part-02", 1000, new byte[0]),
+ new MockFile("/a/b/.part-03", 1000, new byte[0]),
+ new MockFile("/a/b/part-04", 1000, new byte[0]));
+ OrcInputFormat.FileGenerator gen =
+ new OrcInputFormat.FileGenerator(context, fs, new Path("/a/b"));
+ gen.run();
+ if (context.getErrors().size() > 0) {
+ for(Throwable th: context.getErrors()) {
+ System.out.println(StringUtils.stringifyException(th));
+ }
+ throw new IOException("Errors during file generation");
+ }
+ assertEquals(-1, context.getSchedulers());
+ assertEquals(3, context.queue.size());
+ assertEquals(new Path("/a/b/part-00"),
+ ((OrcInputFormat.SplitGenerator) context.queue.get(0)).getPath());
+ assertEquals(new Path("/a/b/part-01"),
+ ((OrcInputFormat.SplitGenerator) context.queue.get(1)).getPath());
+ assertEquals(new Path("/a/b/part-04"),
+ ((OrcInputFormat.SplitGenerator) context.queue.get(2)).getPath());
+ }
+
+ static final Charset UTF8 = Charset.forName("UTF-8");
+
+ static class MockBlock {
+ int offset;
+ int length;
+ final String[] hosts;
+
+ MockBlock(String... hosts) {
+ this.hosts = hosts;
+ }
+ }
+
+ static class MockFile {
+ final Path path;
+ final int blockSize;
+ final int length;
+ final MockBlock[] blocks;
+ final byte[] content;
+
+ MockFile(String path, int blockSize, byte[] content, MockBlock... blocks) {
+ this.path = new Path(path);
+ this.blockSize = blockSize;
+ this.blocks = blocks;
+ this.content = content;
+ this.length = content.length;
+ int offset = 0;
+ for(MockBlock block: blocks) {
+ block.offset = offset;
+ block.length = Math.min(length - offset, blockSize);
+ offset += block.length;
+ }
+ }
+ }
+
+ static class MockInputStream extends FSInputStream {
+ final MockFile file;
+ int offset = 0;
+
+ public MockInputStream(MockFile file) throws IOException {
+ this.file = file;
+ }
+
+ @Override
+ public void seek(long offset) throws IOException {
+ this.offset = (int) offset;
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return offset;
+ }
+
+ @Override
+ public boolean seekToNewSource(long l) throws IOException {
+ return false;
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (offset < file.length) {
+ return file.content[offset++] & 0xff;
+ }
+ return -1;
+ }
+ }
+
+ static class MockFileSystem extends FileSystem {
+ final MockFile[] files;
+ Path workingDir = new Path("/");
+
+ MockFileSystem(Configuration conf, MockFile... files) {
+ setConf(conf);
+ this.files = files;
+ }
+
+ @Override
+ public URI getUri() {
+ try {
+ return new URI("mock:///");
+ } catch (URISyntaxException err) {
+ throw new IllegalArgumentException("huh?", err);
+ }
+ }
+
+ @Override
+ public FSDataInputStream open(Path path, int i) throws IOException {
+ for(MockFile file: files) {
+ if (file.path.equals(path)) {
+ return new FSDataInputStream(new MockInputStream(file));
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public FSDataOutputStream create(Path path, FsPermission fsPermission,
+ boolean b, int i, short i2, long l,
+ Progressable progressable
+ ) throws IOException {
+ throw new UnsupportedOperationException("no writes");
+ }
+
+ @Override
+ public FSDataOutputStream append(Path path, int i,
+ Progressable progressable
+ ) throws IOException {
+ throw new UnsupportedOperationException("no writes");
+ }
+
+ @Override
+ public boolean rename(Path path, Path path2) throws IOException {
+ return false;
+ }
+
+ @Override
+ public boolean delete(Path path) throws IOException {
+ return false;
+ }
+
+ @Override
+ public boolean delete(Path path, boolean b) throws IOException {
+ return false;
+ }
+
+ @Override
+ public FileStatus[] listStatus(Path path) throws IOException {
+ List<FileStatus> result = new ArrayList<FileStatus>();
+ for(MockFile file: files) {
+ if (file.path.getParent().equals(path)) {
+ result.add(getFileStatus(file.path));
+ }
+ }
+ return result.toArray(new FileStatus[result.size()]);
+ }
+
+ @Override
+ public void setWorkingDirectory(Path path) {
+ workingDir = path;
+ }
+
+ @Override
+ public Path getWorkingDirectory() {
+ return workingDir;
+ }
+
+ @Override
+ public boolean mkdirs(Path path, FsPermission fsPermission) {
+ return false;
+ }
+
+ @Override
+ public FileStatus getFileStatus(Path path) throws IOException {
+ for(MockFile file: files) {
+ if (file.path.equals(path)) {
+ return new FileStatus(file.length, false, 1, file.blockSize, 0, 0,
+ FsPermission.createImmutable((short) 644), "owen", "group",
+ file.path);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public BlockLocation[] getFileBlockLocations(FileStatus stat,
+ long start, long len) {
+ List<BlockLocation> result = new ArrayList<BlockLocation>();
+ for(MockFile file: files) {
+ if (file.path.equals(stat.getPath())) {
+ for(MockBlock block: file.blocks) {
+ if (OrcInputFormat.SplitGenerator.getOverlap(block.offset,
+ block.length, start, len) > 0) {
+ result.add(new BlockLocation(block.hosts, block.hosts,
+ block.offset, block.length));
+ }
+ }
+ return result.toArray(new BlockLocation[result.size()]);
+ }
+ }
+ return new BlockLocation[0];
+ }
+ }
+
+ static void fill(DataOutputBuffer out, long length) throws IOException {
+ for(int i=0; i < length; ++i) {
+ out.write(0);
+ }
+ }
+
+ /**
+ * Create the binary contents of an ORC file that just has enough information
+ * to test the getInputSplits.
+ * @param stripeLengths the length of each stripe
+ * @return the bytes of the file
+ * @throws IOException
+ */
+ static byte[] createMockOrcFile(long... stripeLengths) throws IOException {
+ OrcProto.Footer.Builder footer = OrcProto.Footer.newBuilder();
+ final long headerLen = 3;
+ long offset = headerLen;
+ DataOutputBuffer buffer = new DataOutputBuffer();
+ for(long stripeLength: stripeLengths) {
+ footer.addStripes(OrcProto.StripeInformation.newBuilder()
+ .setOffset(offset)
+ .setIndexLength(0)
+ .setDataLength(stripeLength-10)
+ .setFooterLength(10)
+ .setNumberOfRows(1000));
+ offset += stripeLength;
+ }
+ fill(buffer, offset);
+ footer.addTypes(OrcProto.Type.newBuilder()
+ .setKind(OrcProto.Type.Kind.STRUCT)
+ .addFieldNames("col1")
+ .addSubtypes(1));
+ footer.addTypes(OrcProto.Type.newBuilder()
+ .setKind(OrcProto.Type.Kind.STRING));
+ footer.setNumberOfRows(1000 * stripeLengths.length)
+ .setHeaderLength(headerLen)
+ .setContentLength(offset - headerLen);
+ footer.build().writeTo(buffer);
+ int footerEnd = buffer.getLength();
+ OrcProto.PostScript ps =
+ OrcProto.PostScript.newBuilder()
+ .setCompression(OrcProto.CompressionKind.NONE)
+ .setFooterLength(footerEnd - offset)
+ .setMagic("ORC")
+ .build();
+ ps.writeTo(buffer);
+ buffer.write(buffer.getLength() - footerEnd);
+ byte[] result = new byte[buffer.getLength()];
+ System.arraycopy(buffer.getData(), 0, result, 0, buffer.getLength());
+ return result;
+ }
+
+ @Test
+ public void testAddSplit() throws Exception {
+ // create a file with 5 blocks spread around the cluster
+ MockFileSystem fs = new MockFileSystem(conf,
+ new MockFile("/a/file", 500,
+ createMockOrcFile(197, 300, 600, 200, 200, 100, 100, 100, 100, 100),
+ new MockBlock("host1-1", "host1-2", "host1-3"),
+ new MockBlock("host2-1", "host0", "host2-3"),
+ new MockBlock("host0", "host3-2", "host3-3"),
+ new MockBlock("host4-1", "host4-2", "host4-3"),
+ new MockBlock("host5-1", "host5-2", "host5-3")));
+ OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
+ OrcInputFormat.SplitGenerator splitter =
+ new OrcInputFormat.SplitGenerator(context, fs,
+ fs.getFileStatus(new Path("/a/file")));
+ splitter.createSplit(0, 200);
+ FileSplit result = context.getResult(-1);
+ assertEquals(0, result.getStart());
+ assertEquals(200, result.getLength());
+ assertEquals("/a/file", result.getPath().toString());
+ String[] locs = result.getLocations();
+ assertEquals(3, locs.length);
+ assertEquals("host1-1", locs[0]);
+ assertEquals("host1-2", locs[1]);
+ assertEquals("host1-3", locs[2]);
+ splitter.createSplit(500, 600);
+ result = context.getResult(-1);
+ locs = result.getLocations();
+ assertEquals(3, locs.length);
+ assertEquals("host2-1", locs[0]);
+ assertEquals("host0", locs[1]);
+ assertEquals("host2-3", locs[2]);
+ splitter.createSplit(0, 2500);
+ result = context.getResult(-1);
+ locs = result.getLocations();
+ assertEquals(1, locs.length);
+ assertEquals("host0", locs[0]);
+ }
+
+ @Test
+ public void testSplitGenerator() throws Exception {
+ // create a file with 5 blocks spread around the cluster
+ long[] stripeSizes =
+ new long[]{197, 300, 600, 200, 200, 100, 100, 100, 100, 100};
+ MockFileSystem fs = new MockFileSystem(conf,
+ new MockFile("/a/file", 500,
+ createMockOrcFile(stripeSizes),
+ new MockBlock("host1-1", "host1-2", "host1-3"),
+ new MockBlock("host2-1", "host0", "host2-3"),
+ new MockBlock("host0", "host3-2", "host3-3"),
+ new MockBlock("host4-1", "host4-2", "host4-3"),
+ new MockBlock("host5-1", "host5-2", "host5-3")));
+ conf.setInt(OrcInputFormat.MAX_SPLIT_SIZE, 300);
+ conf.setInt(OrcInputFormat.MIN_SPLIT_SIZE, 200);
+ OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
+ OrcInputFormat.SplitGenerator splitter =
+ new OrcInputFormat.SplitGenerator(context, fs,
+ fs.getFileStatus(new Path("/a/file")));
+ splitter.run();
+ if (context.getErrors().size() > 0) {
+ for(Throwable th: context.getErrors()) {
+ System.out.println(StringUtils.stringifyException(th));
+ }
+ throw new IOException("Errors during splitting");
+ }
+ FileSplit result = context.getResult(0);
+ assertEquals(3, result.getStart());
+ assertEquals(497, result.getLength());
+ result = context.getResult(1);
+ assertEquals(500, result.getStart());
+ assertEquals(600, result.getLength());
+ result = context.getResult(2);
+ assertEquals(1100, result.getStart());
+ assertEquals(400, result.getLength());
+ result = context.getResult(3);
+ assertEquals(1500, result.getStart());
+ assertEquals(300, result.getLength());
+ result = context.getResult(4);
+ assertEquals(1800, result.getStart());
+ assertEquals(200, result.getLength());
+ // test min = 0, max = 0 generates each stripe
+ conf.setInt(OrcInputFormat.MIN_SPLIT_SIZE, 0);
+ conf.setInt(OrcInputFormat.MAX_SPLIT_SIZE, 0);
+ context = new OrcInputFormat.Context(conf);
+ splitter = new OrcInputFormat.SplitGenerator(context, fs,
+ fs.getFileStatus(new Path("/a/file")));
+ splitter.run();
+ if (context.getErrors().size() > 0) {
+ for(Throwable th: context.getErrors()) {
+ System.out.println(StringUtils.stringifyException(th));
+ }
+ throw new IOException("Errors during splitting");
+ }
+ for(int i=0; i < stripeSizes.length; ++i) {
+ assertEquals("checking stripe " + i + " size",
+ stripeSizes[i], context.getResult(i).getLength());
+ }
+ }
+
+ @Test
public void testInOutFormat() throws Exception {
Properties properties = new Properties();
StructObjectInspector inspector;
@@ -144,7 +562,7 @@ public class TestInputOutputFormat {
IntObjectInspector intInspector =
(IntObjectInspector) fields.get(0).getFieldObjectInspector();
assertEquals(0.0, reader.getProgress(), 0.00001);
- assertEquals(0, reader.getPos());
+ assertEquals(3, reader.getPos());
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.
getStructFieldData(serde.deserialize(value), fields.get(0))));
@@ -279,18 +697,7 @@ public class TestInputOutputFormat {
InputFormat<?,?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
- assertEquals(1, splits.length);
-
- // read the whole file
- conf.set("hive.io.file.readcolumn.ids", "0,1");
- org.apache.hadoop.mapred.RecordReader reader =
- in.getRecordReader(splits[0], conf, Reporter.NULL);
- Object key = reader.createKey();
- Object value = reader.createValue();
- assertEquals(0.0, reader.getProgress(), 0.00001);
- assertEquals(0, reader.getPos());
- assertEquals(false, reader.next(key, value));
- reader.close();
+ assertEquals(0, splits.length);
assertEquals(null, serde.getSerDeStats());
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Fri Sep 20 00:32:55 2013
@@ -1019,12 +1019,10 @@ public class TestOrcFile {
stripeCount += 1;
rowCount += stripe.getNumberOfRows();
if (currentOffset < 0) {
- currentOffset = stripe.getOffset() + stripe.getIndexLength() +
- stripe.getDataLength() + stripe.getFooterLength();
+ currentOffset = stripe.getOffset() + stripe.getLength();
} else {
assertEquals(currentOffset, stripe.getOffset());
- currentOffset += stripe.getIndexLength() +
- stripe.getDataLength() + stripe.getFooterLength();
+ currentOffset += stripe.getLength();
}
}
assertEquals(reader.getNumberOfRows(), rowCount);
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java Fri Sep 20 00:32:55 2013
@@ -30,6 +30,7 @@ import junit.framework.TestCase;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.Database;
@@ -39,6 +40,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.index.HiveIndex;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer;
@@ -61,6 +63,7 @@ public class TestHive extends TestCase {
protected void setUp() throws Exception {
super.setUp();
hiveConf = new HiveConf(this.getClass());
+ SessionState.start(hiveConf);
try {
hm = Hive.get(hiveConf);
} catch (Exception e) {
@@ -96,6 +99,7 @@ public class TestHive extends TestCase {
e1.printStackTrace();
assertTrue("Unable to drop table", false);
}
+
Table tbl = new Table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
List<FieldSchema> fields = tbl.getCols();
@@ -142,6 +146,7 @@ public class TestHive extends TestCase {
tbl.setStoredAsSubDirectories(false);
// create table
+ setNullCreateTableGrants();
try {
hm.createTable(tbl);
} catch (HiveException e) {
@@ -168,6 +173,14 @@ public class TestHive extends TestCase {
}
}
+ private void setNullCreateTableGrants() {
+ //having a non null create table grants privileges causes problems in
+ // the tests that compares underlying thrift Table object of created
+ // table with a table object that was fetched from metastore.
+ // This is because the fetch does not populate the privileges field in Table
+ SessionState.get().setCreateTableGrants(null);
+ }
+
/**
* Tests create and fetch of a thrift based table.
*
@@ -190,6 +203,8 @@ public class TestHive extends TestCase {
tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, TBinaryProtocol.class
.getName());
tbl.setStoredAsSubDirectories(false);
+
+ setNullCreateTableGrants();
try {
hm.createTable(tbl);
} catch (HiveException e) {
@@ -487,13 +502,26 @@ public class TestHive extends TestCase {
}
}
- public void testHiveRefreshDatabase() throws Throwable{
- String testDatabaseName = "test_database";
- Database testDatabase = new Database();
- testDatabase.setName(testDatabaseName);
- hm.createDatabase(testDatabase, true);
- hm.setCurrentDatabase(testDatabaseName);
- hm = Hive.get(hiveConf, true); //refresh Hive instance
- assertEquals(testDatabaseName, hm.getCurrentDatabase());
+ public void testHiveRefreshOnConfChange() throws Throwable{
+ Hive prevHiveObj = Hive.get();
+ Hive newHiveObj;
+
+ //if HiveConf has not changed, same object should be returned
+ HiveConf newHconf = new HiveConf(hiveConf);
+ newHiveObj = Hive.get(newHconf);
+ assertTrue(prevHiveObj == newHiveObj);
+
+ //if needs refresh param is passed, it should return new object
+ newHiveObj = Hive.get(newHconf, true);
+ assertTrue(prevHiveObj != newHiveObj);
+
+ //if HiveConf has changed, new object should be returned
+ prevHiveObj = Hive.get();
+ //change value of a metavar config param in new hive conf
+ newHconf = new HiveConf(hiveConf);
+ newHconf.setIntVar(ConfVars.METASTORETHRIFTCONNECTIONRETRIES,
+ newHconf.getIntVar(ConfVars.METASTORETHRIFTCONNECTIONRETRIES) + 1);
+ newHiveObj = Hive.get(newHconf);
+ assertTrue(prevHiveObj != newHiveObj);
}
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java Fri Sep 20 00:32:55 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro;
import org.junit.Before;
import org.junit.Test;
@@ -44,6 +45,7 @@ public class TestMacroSemanticAnalyzer {
context = new Context(conf);
parseDriver = new ParseDriver();
analyzer = new MacroSemanticAnalyzer(conf);
+ SessionState.start(conf);
}
private ASTNode parse(String command) throws Exception {
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableListObjectInspector1.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableListObjectInspector1.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableListObjectInspector1.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableListObjectInspector1.java Fri Sep 20 00:32:55 2013
@@ -25,8 +25,11 @@ import org.apache.hadoop.hive.serde2.obj
public class CustomNonSettableListObjectInspector1 implements ListObjectInspector {
- ObjectInspector listElementObjectInspector;
+ private ObjectInspector listElementObjectInspector;
+ protected CustomNonSettableListObjectInspector1() {
+ super();
+ }
protected CustomNonSettableListObjectInspector1(
ObjectInspector listElementObjectInspector) {
this.listElementObjectInspector = listElementObjectInspector;
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableStructObjectInspector1.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableStructObjectInspector1.java?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableStructObjectInspector1.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/serde2/CustomNonSettableStructObjectInspector1.java Fri Sep 20 00:32:55 2013
@@ -21,6 +21,10 @@ StructObjectInspector {
protected ObjectInspector fieldObjectInspector;
protected String fieldComment;
+ protected MyField() {
+ super();
+ }
+
public MyField(int fieldID, String fieldName,
ObjectInspector fieldObjectInspector) {
this.fieldID = fieldID;
@@ -58,10 +62,9 @@ StructObjectInspector {
protected List<MyField> fields;
- public String getTypeName() {
- return ObjectInspectorUtils.getStandardStructTypeName(this);
+ protected CustomNonSettableStructObjectInspector1() {
+ super();
}
-
/**
* Call ObjectInspectorFactory.getNonSettableStructObjectInspector instead.
*/
@@ -81,6 +84,10 @@ StructObjectInspector {
}
}
+ public String getTypeName() {
+ return ObjectInspectorUtils.getStandardStructTypeName(this);
+ }
+
public final Category getCategory() {
return Category.STRUCT;
}
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q Fri Sep 20 00:32:55 2013
@@ -1,4 +1,4 @@
-dfs -mkdir hdfs:///tmp/test/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test/;
dfs -copyFromLocal ../data/files hdfs:///tmp/test/.;
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/newline.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/newline.q?rev=1524874&r1=1524873&r2=1524874&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/newline.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/newline.q Fri Sep 20 00:32:55 2013
@@ -52,6 +52,6 @@ select * from tmp_tmp ORDER BY key ASC,
SELECT TRANSFORM(key, value) USING
'cat' AS (key, value) FROM tmp_tmp;
-SELECT key FROM (SELECT TRANSFORM ('a\tb', 'c') USING '/bin/cat' AS (key, value) FROM src limit 1)a ORDER BY key ASC;
+SELECT key FROM (SELECT TRANSFORM ('a\tb', 'c') USING 'cat' AS (key, value) FROM src limit 1)a ORDER BY key ASC;
-SELECT value FROM (SELECT TRANSFORM ('a\tb', 'c') USING '/bin/cat' AS (key, value) FROM src limit 1)a ORDER BY value ASC;
+SELECT value FROM (SELECT TRANSFORM ('a\tb', 'c') USING 'cat' AS (key, value) FROM src limit 1)a ORDER BY value ASC;