You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/02/25 08:59:20 UTC
svn commit: r1571600 [16/46] - in /hive/branches/tez: ./
ant/src/org/apache/hadoop/hive/ant/
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/common/type/
common/src/java/org/apache/hadoop/hive/conf/ common/src/java...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Feb 25 07:58:52 2014
@@ -40,28 +40,37 @@ import org.apache.hadoop.hive.ql.exec.Fu
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDecimal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
@@ -81,22 +90,11 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.udf.UDFConv;
-import org.apache.hadoop.hive.ql.udf.UDFHex;
-import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
-import org.apache.hadoop.hive.ql.udf.UDFToByte;
-import org.apache.hadoop.hive.ql.udf.UDFToDouble;
-import org.apache.hadoop.hive.ql.udf.UDFToFloat;
-import org.apache.hadoop.hive.ql.udf.UDFToInteger;
-import org.apache.hadoop.hive.ql.udf.UDFToLong;
-import org.apache.hadoop.hive.ql.udf.UDFToShort;
-import org.apache.hadoop.hive.ql.udf.UDFToString;
+import org.apache.hadoop.hive.ql.udf.*;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.typeinfo.*;
/**
@@ -115,9 +113,9 @@ public class VectorizationContext {
//columnName to column position map
private final Map<String, Integer> columnMap;
private final int firstOutputColumnIndex;
- private final Mode operatorMode = Mode.PROJECTION;
- public static final Pattern decimalTypePattern = Pattern.compile("decimal.*");
+ public static final Pattern decimalTypePattern = Pattern.compile("decimal.*",
+ Pattern.CASE_INSENSITIVE);
//Map column number to type
private final OutputColumnManager ocm;
@@ -134,6 +132,7 @@ public class VectorizationContext {
castExpressionUdfs.add(GenericUDFToUtcTimestamp.class);
castExpressionUdfs.add(GenericUDFToChar.class);
castExpressionUdfs.add(GenericUDFToVarchar.class);
+ castExpressionUdfs.add(GenericUDFTimestamp.class);
castExpressionUdfs.add(UDFToByte.class);
castExpressionUdfs.add(UDFToBoolean.class);
castExpressionUdfs.add(UDFToDouble.class);
@@ -311,6 +310,10 @@ public class VectorizationContext {
return ve;
}
+ /**
+ * Given a udf and its children, return the common type to which the children's type should be
+ * cast.
+ */
private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, List<ExprNodeDesc> children,
TypeInfo returnType) {
TypeInfo commonType;
@@ -342,7 +345,7 @@ public class VectorizationContext {
*/
private List<ExprNodeDesc> getChildExpressionsWithImplicitCast(GenericUDF genericUDF,
List<ExprNodeDesc> children, TypeInfo returnType) {
- if (isCastExpression(genericUDF)) {
+ if (isExcludedFromCast(genericUDF)) {
// No implicit cast needed
return children;
@@ -350,11 +353,12 @@ public class VectorizationContext {
if (children == null) {
return null;
}
+
TypeInfo commonType = getCommonTypeForChildExpressions(genericUDF, children, returnType);
List<ExprNodeDesc> childrenWithCasts = new ArrayList<ExprNodeDesc>();
boolean atleastOneCastNeeded = false;
for (ExprNodeDesc child : children) {
- ExprNodeDesc castExpression = getImplicitCastExpression(child, commonType);
+ ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType);
if (castExpression != null) {
atleastOneCastNeeded = true;
childrenWithCasts.add(castExpression);
@@ -369,12 +373,18 @@ public class VectorizationContext {
}
}
- private boolean isCastExpression(GenericUDF genericUDF) {
- boolean ret = castExpressionUdfs.contains(genericUDF.getClass());
+ private boolean isExcludedFromCast(GenericUDF genericUDF) {
+ boolean ret = castExpressionUdfs.contains(genericUDF.getClass())
+ || (genericUDF instanceof GenericUDFRound);
+
if (ret) {
return ret;
- } else if (genericUDF instanceof GenericUDFBridge) {
- return castExpressionUdfs.contains(((GenericUDFBridge) genericUDF).getUdfClass());
+ }
+
+ if (genericUDF instanceof GenericUDFBridge) {
+ Class<?> udfClass = ((GenericUDFBridge) genericUDF).getUdfClass();
+ return castExpressionUdfs.contains(udfClass)
+ || UDFSign.class.isAssignableFrom(udfClass);
}
return false;
}
@@ -393,10 +403,19 @@ public class VectorizationContext {
return new DecimalTypeInfo(precision, scale);
}
- private ExprNodeDesc getImplicitCastExpression(ExprNodeDesc child, TypeInfo castType) {
+ /**
+ * The GenericUDFs might need their children output to be cast to the given castType.
+ * This method returns a cast expression that would achieve the required casting.
+ */
+ private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) {
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputTypeString = inputTypeInfo.getTypeName();
String castTypeString = castType.getTypeName();
+
+ if (inputTypeString.equals(castTypeString)) {
+ // Nothing to be done
+ return null;
+ }
boolean inputTypeDecimal = false;
boolean castTypeDecimal = false;
if (decimalTypePattern.matcher(inputTypeString).matches()) {
@@ -406,72 +425,82 @@ public class VectorizationContext {
castTypeDecimal = true;
}
- // If castType is decimal, try not to lose precision for numeric types.
- if (castTypeDecimal) {
- castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
- }
-
if (castTypeDecimal && !inputTypeDecimal) {
+
// Cast the input to decimal
+ // If castType is decimal, try not to lose precision for numeric types.
+ castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
return desc;
} else if (!castTypeDecimal && inputTypeDecimal) {
+
// Cast decimal input to returnType
- UDF udfClass = null;
- GenericUDF genericUdf = null;
- PrimitiveObjectInspector.PrimitiveCategory primitiveCategory =
- ((PrimitiveTypeInfo) castType).getPrimitiveCategory();
- switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) {
- case BYTE:
- udfClass = new UDFToByte();
- break;
- case SHORT:
- udfClass = new UDFToShort();
- break;
- case INT:
- udfClass = new UDFToInteger();
- break;
- case LONG:
- udfClass = new UDFToLong();
- break;
- case FLOAT:
- udfClass = new UDFToFloat();
- break;
- case DOUBLE:
- udfClass = new UDFToDouble();
- break;
- case STRING:
- udfClass = new UDFToString();
- break;
- case BOOLEAN:
- udfClass = new UDFToBoolean();
- break;
- case DATE:
- genericUdf = new GenericUDFToDate();
- break;
- case TIMESTAMP:
- genericUdf = new GenericUDFToUnixTimeStamp();
- break;
- case BINARY:
- genericUdf = new GenericUDFToBinary();
- break;
- }
- if (genericUdf == null) {
- genericUdf = new GenericUDFBridge();
- ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName());
- }
+ GenericUDF genericUdf = getGenericUDFForCast(castType);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
return desc;
+ } else {
+
+ // Casts to exact types including long to double etc. are needed in some special cases.
+ if (udf instanceof GenericUDFCoalesce) {
+ GenericUDF genericUdf = getGenericUDFForCast(castType);
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(child);
+ ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
+ return desc;
+ }
}
- // No cast needed
return null;
}
+ private GenericUDF getGenericUDFForCast(TypeInfo castType) {
+ UDF udfClass = null;
+ GenericUDF genericUdf = null;
+ switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) {
+ case BYTE:
+ udfClass = new UDFToByte();
+ break;
+ case SHORT:
+ udfClass = new UDFToShort();
+ break;
+ case INT:
+ udfClass = new UDFToInteger();
+ break;
+ case LONG:
+ udfClass = new UDFToLong();
+ break;
+ case FLOAT:
+ udfClass = new UDFToFloat();
+ break;
+ case DOUBLE:
+ udfClass = new UDFToDouble();
+ break;
+ case STRING:
+ udfClass = new UDFToString();
+ break;
+ case BOOLEAN:
+ udfClass = new UDFToBoolean();
+ break;
+ case DATE:
+ genericUdf = new GenericUDFToDate();
+ break;
+ case TIMESTAMP:
+ genericUdf = new GenericUDFToUnixTimeStamp();
+ break;
+ case BINARY:
+ genericUdf = new GenericUDFToBinary();
+ break;
+ }
+ if (genericUdf == null) {
+ genericUdf = new GenericUDFBridge();
+ ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName());
+ }
+ return genericUdf;
+ }
/* Return true if this is one of a small set of functions for which
@@ -568,7 +597,10 @@ public class VectorizationContext {
}
GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();
- if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive) {
+ if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive
+ || castExpressionUdfs.contains(gudf)
+ || ((gudf instanceof GenericUDFBridge)
+ && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) {
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
ObjectInspector output = evaluator.initialize(null);
Object constant = evaluator.evaluate(null);
@@ -775,6 +807,9 @@ public class VectorizationContext {
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
+
+ List<ExprNodeDesc> constantFoldedChildren = foldConstantsForUnaryExprs(childExpr);
+ childExpr = constantFoldedChildren;
//First handle special cases
if (udf instanceof GenericUDFBetween) {
return getBetweenFilterExpression(childExpr, mode);
@@ -782,6 +817,10 @@ public class VectorizationContext {
return getInExpression(childExpr, mode);
} else if (udf instanceof GenericUDFOPPositive) {
return getIdentityExpression(childExpr);
+ } else if (udf instanceof GenericUDFCoalesce) {
+
+ // Coalesce is a special case because it can take variable number of arguments.
+ return getCoalesceExpression(childExpr, returnType);
} else if (udf instanceof GenericUDFBridge) {
VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode,
returnType);
@@ -798,7 +837,6 @@ public class VectorizationContext {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
}
- List<ExprNodeDesc> constantFoldedChildren = foldConstantsForUnaryExprs(childExpr);
VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode, returnType);
if (ve == null) {
@@ -808,6 +846,33 @@ public class VectorizationContext {
return ve;
}
+ private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr, TypeInfo returnType)
+ throws HiveException {
+ int[] inputColumns = new int[childExpr.size()];
+ VectorExpression[] vectorChildren = null;
+ try {
+ vectorChildren = getVectorExpressions(childExpr, Mode.PROJECTION);
+
+ int i = 0;
+ for (VectorExpression ve : vectorChildren) {
+ inputColumns[i++] = ve.getOutputColumn();
+ }
+
+ int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName()));
+ VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn);
+ vectorCoalesce.setOutputType(returnType.getTypeName());
+ vectorCoalesce.setChildExpressions(vectorChildren);
+ return vectorCoalesce;
+ } finally {
+ // Free the output columns of the child expressions.
+ if (vectorChildren != null) {
+ for (VectorExpression v : vectorChildren) {
+ ocm.freeOutputColumn(v.getOutputColumn());
+ }
+ }
+ }
+ }
+
/**
* Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
*/
@@ -816,7 +881,7 @@ public class VectorizationContext {
ExprNodeDesc colExpr = childExpr.get(0);
TypeInfo colTypeInfo = colExpr.getTypeInfo();
String colType = colExpr.getTypeString();
-
+
// prepare arguments for createVectorExpression
List<ExprNodeDesc> childrenForInList =
foldConstantsForUnaryExprs(childExpr.subList(1, childExpr.size()));
@@ -1058,7 +1123,7 @@ public class VectorizationContext {
String colType = colExpr.getTypeString();
// prepare arguments for createVectorExpression
- List<ExprNodeDesc> childrenAfterNot = foldConstantsForUnaryExprs(childExpr.subList(1, 4));
+ List<ExprNodeDesc> childrenAfterNot = foldConstantsForUnaryExprs(childExpr.subList(1, 4));;
// determine class
Class<?> cl = null;
@@ -1188,6 +1253,10 @@ public class VectorizationContext {
|| resultType.equalsIgnoreCase("long");
}
+ public static boolean isDecimalFamily(String colType) {
+ return decimalTypePattern.matcher(colType).matches();
+ }
+
private Object getScalarValue(ExprNodeConstantDesc constDesc)
throws HiveException {
if (constDesc.getTypeString().equalsIgnoreCase("String")) {
@@ -1300,14 +1369,13 @@ public class VectorizationContext {
}
}
- static String getNormalizedTypeName(String colType) {
+ static String getNormalizedTypeName(String colType){
String normalizedType = null;
if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) {
normalizedType = "Double";
} else if (colType.equalsIgnoreCase("String")) {
normalizedType = "String";
- } else if (decimalTypePattern.matcher(colType.toLowerCase()).matches()) {
-
+ } else if (decimalTypePattern.matcher(colType).matches()) {
//Return the decimal type as is, it includes scale and precision.
normalizedType = colType;
} else {
@@ -1320,31 +1388,43 @@ public class VectorizationContext {
{"min", "Long", VectorUDAFMinLong.class},
{"min", "Double", VectorUDAFMinDouble.class},
{"min", "String", VectorUDAFMinString.class},
+ {"min", "Decimal",VectorUDAFMinDecimal.class},
{"max", "Long", VectorUDAFMaxLong.class},
{"max", "Double", VectorUDAFMaxDouble.class},
{"max", "String", VectorUDAFMaxString.class},
+ {"max", "Decimal",VectorUDAFMaxDecimal.class},
{"count", null, VectorUDAFCountStar.class},
{"count", "Long", VectorUDAFCount.class},
{"count", "Double", VectorUDAFCount.class},
{"count", "String", VectorUDAFCount.class},
+ {"count", "Decimal",VectorUDAFCount.class},
{"sum", "Long", VectorUDAFSumLong.class},
{"sum", "Double", VectorUDAFSumDouble.class},
+ {"sum", "Decimal",VectorUDAFSumDecimal.class},
{"avg", "Long", VectorUDAFAvgLong.class},
{"avg", "Double", VectorUDAFAvgDouble.class},
+ {"avg", "Decimal",VectorUDAFAvgDecimal.class},
{"variance", "Long", VectorUDAFVarPopLong.class},
{"var_pop", "Long", VectorUDAFVarPopLong.class},
{"variance", "Double", VectorUDAFVarPopDouble.class},
{"var_pop", "Double", VectorUDAFVarPopDouble.class},
+ {"variance", "Decimal",VectorUDAFVarPopDecimal.class},
+ {"var_pop", "Decimal",VectorUDAFVarPopDecimal.class},
{"var_samp", "Long", VectorUDAFVarSampLong.class},
{"var_samp" , "Double", VectorUDAFVarSampDouble.class},
+ {"var_samp" , "Decimal",VectorUDAFVarSampDecimal.class},
{"std", "Long", VectorUDAFStdPopLong.class},
{"stddev", "Long", VectorUDAFStdPopLong.class},
{"stddev_pop","Long", VectorUDAFStdPopLong.class},
{"std", "Double", VectorUDAFStdPopDouble.class},
{"stddev", "Double", VectorUDAFStdPopDouble.class},
{"stddev_pop","Double", VectorUDAFStdPopDouble.class},
+ {"std", "Decimal",VectorUDAFStdPopDecimal.class},
+ {"stddev", "Decimal",VectorUDAFStdPopDecimal.class},
+ {"stddev_pop","Decimal",VectorUDAFStdPopDecimal.class},
{"stddev_samp","Long", VectorUDAFStdSampLong.class},
{"stddev_samp","Double",VectorUDAFStdSampDouble.class},
+ {"stddev_samp","Decimal",VectorUDAFStdSampDecimal.class},
};
public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc)
@@ -1364,6 +1444,9 @@ public class VectorizationContext {
if (paramDescList.size() > 0) {
ExprNodeDesc inputExpr = paramDescList.get(0);
inputType = getNormalizedTypeName(inputExpr.getTypeString());
+ if (decimalTypePattern.matcher(inputType).matches()) {
+ inputType = "Decimal";
+ }
}
for (Object[] aggDef : aggregatesDefinition) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Tue Feb 25 07:58:52 2014
@@ -394,7 +394,7 @@ public class VectorizedRowBatchCtx {
return new DoubleColumnVector(defaultSize);
} else if (type.equalsIgnoreCase("string")) {
return new BytesColumnVector(defaultSize);
- } else if (VectorizationContext.decimalTypePattern.matcher(type.toLowerCase()).matches()){
+ } else if (VectorizationContext.decimalTypePattern.matcher(type).matches()){
int [] precisionScale = getScalePrecisionFromDecimalType(type);
return new DecimalColumnVector(defaultSize, precisionScale[0], precisionScale[1]);
} else {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java Tue Feb 25 07:58:52 2014
@@ -19,15 +19,25 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.SqlMathUtil;
import org.apache.hadoop.hive.common.type.UnsignedInt128;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.udf.generic.RoundUtils;
/**
* Utility functions for vector operations on decimal values.
*/
public class DecimalUtil {
+ public static final Decimal128 DECIMAL_ONE = new Decimal128();
+ private static final UnsignedInt128 scratchUInt128 = new UnsignedInt128();
+
+ static {
+ DECIMAL_ONE.update(1L, (short) 0);
+ }
+
// Addition with overflow check. Overflow produces NULL output.
public static void addChecked(int i, Decimal128 left, Decimal128 right,
DecimalColumnVector outputColVector) {
@@ -87,4 +97,73 @@ public class DecimalUtil {
outputColVector.isNull[i] = true;
}
}
+
+ public static void floor(int i, Decimal128 input, DecimalColumnVector outputColVector) {
+ try {
+ Decimal128 result = outputColVector.vector[i];
+ result.update(input);
+ result.zeroFractionPart(scratchUInt128);
+ result.changeScaleDestructive(outputColVector.scale);
+ if ((result.compareTo(input) != 0) && input.getSignum() < 0) {
+ result.subtractDestructive(DECIMAL_ONE, outputColVector.scale);
+ }
+ } catch (ArithmeticException e) {
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
+ }
+ }
+
+ public static void ceiling(int i, Decimal128 input, DecimalColumnVector outputColVector) {
+ try {
+ Decimal128 result = outputColVector.vector[i];
+ result.update(input);
+ result.zeroFractionPart(scratchUInt128);
+ result.changeScaleDestructive(outputColVector.scale);
+ if ((result.compareTo(input) != 0) && input.getSignum() > 0) {
+ result.addDestructive(DECIMAL_ONE, outputColVector.scale);
+ }
+ } catch (ArithmeticException e) {
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
+ }
+ }
+
+ public static void round(int i, Decimal128 input, DecimalColumnVector outputColVector) {
+ HiveDecimal inputHD = HiveDecimal.create(input.toBigDecimal());
+ HiveDecimal result = RoundUtils.round(inputHD, outputColVector.scale);
+ if (result == null) {
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
+ } else {
+ outputColVector.vector[i].update(result.bigDecimalValue().toPlainString(), outputColVector.scale);
+ }
+ }
+
+ public static void sign(int i, Decimal128 input, LongColumnVector outputColVector) {
+ outputColVector.vector[i] = input.getSignum();
+ }
+
+ public static void abs(int i, Decimal128 input, DecimalColumnVector outputColVector) {
+ Decimal128 result = outputColVector.vector[i];
+ try {
+ result.update(input);
+ result.absDestructive();
+ result.changeScaleDestructive(outputColVector.scale);
+ } catch (ArithmeticException e) {
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
+ }
+ }
+
+ public static void negate(int i, Decimal128 input, DecimalColumnVector outputColVector) {
+ Decimal128 result = outputColVector.vector[i];
+ try {
+ result.update(input);
+ result.negateDestructive();
+ result.changeScaleDestructive(outputColVector.scale);
+ } catch (ArithmeticException e) {
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
+ }
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java Tue Feb 25 07:58:52 2014
@@ -22,6 +22,7 @@ import java.io.Serializable;
import java.util.HashSet;
import java.util.Set;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
@@ -35,10 +36,16 @@ public class ReadEntity extends Entity i
// Consider a query like: select * from V, where the view V is defined as:
// select * from T
// The inputs will contain V and T (parent: V)
+ // T will be marked as an indirect entity using isDirect flag.
+ // This will help in distinguishing from the case where T is a direct dependency
+ // For example in the case of "select * from V join T ..." T would be direct dependency
+ private boolean isDirect = true;
// For views, the entities can be nested - by default, entities are at the top level
private final Set<ReadEntity> parents = new HashSet<ReadEntity>();
+
+
/**
* For serialization only.
*/
@@ -74,6 +81,11 @@ public class ReadEntity extends Entity i
initParent(parent);
}
+ public ReadEntity(Table t, ReadEntity parent, boolean isDirect) {
+ this(t, parent);
+ this.isDirect = isDirect;
+ }
+
/**
* Constructor given a partition.
*
@@ -89,6 +101,23 @@ public class ReadEntity extends Entity i
initParent(parent);
}
+ public ReadEntity(Partition p, ReadEntity parent, boolean isDirect) {
+ this(p, parent);
+ this.isDirect = isDirect;
+ }
+
+ /**
+ * Constructor for a file.
+ *
+ * @param d
+ * The name of the directory that is being written to.
+ * @param islocal
+ * Flag to decide whether this directory is local or in dfs.
+ */
+ public ReadEntity(Path d, boolean islocal) {
+ super(d.toString(), islocal, true);
+ }
+
public Set<ReadEntity> getParents() {
return parents;
}
@@ -109,4 +138,15 @@ public class ReadEntity extends Entity i
return false;
}
}
+
+ public boolean isDirect() {
+ return isDirect;
+ }
+
+ public void setDirect(boolean isDirect) {
+ this.isDirect = isDirect;
+ }
+
+
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java Tue Feb 25 07:58:52 2014
@@ -22,8 +22,8 @@ import java.io.Serializable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
/**
@@ -32,6 +32,8 @@ import org.apache.hadoop.hive.ql.metadat
*/
public class WriteEntity extends Entity implements Serializable {
+ private boolean isTempURI = false;
+
/**
* Only used by serialization.
*/
@@ -50,7 +52,7 @@ public class WriteEntity extends Entity
* Table that is written to.
*/
public WriteEntity(Table t) {
- super(t, true);
+ this(t, true);
}
public WriteEntity(Table t, boolean complete) {
@@ -80,7 +82,22 @@ public class WriteEntity extends Entity
* Flag to decide whether this directory is local or in dfs.
*/
public WriteEntity(Path d, boolean islocal) {
+ this(d, islocal, false);
+ }
+
+ /**
+ * Constructor for a file.
+ *
+ * @param d
+ * The name of the directory that is being written to.
+ * @param islocal
+ * Flag to decide whether this directory is local or in dfs.
+ * @param isTemp
+ * True if this is a temporary location such as scratch dir
+ */
+ public WriteEntity(Path d, boolean islocal, boolean isTemp) {
super(d.toString(), islocal, true);
+ this.isTempURI = isTemp;
}
/**
@@ -99,4 +116,9 @@ public class WriteEntity extends Entity
return false;
}
}
+
+ public boolean isTempURI() {
+ return isTempURI;
+ }
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Feb 25 07:58:52 2014
@@ -33,7 +33,7 @@ public final class FileDump {
for(String filename: args) {
System.out.println("Structure for " + filename);
Path path = new Path(filename);
- Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
+ Reader reader = OrcFile.createReader(path.getFileSystem(conf), path, conf);
RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompression());
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Tue Feb 25 07:58:52 2014
@@ -114,14 +114,15 @@ public final class OrcFile {
* @return a new ORC file reader.
* @throws IOException
*/
- public static Reader createReader(FileSystem fs, Path path
- ) throws IOException {
- return new ReaderImpl(fs, path);
+ public static Reader createReader(FileSystem fs, Path path,
+ Configuration conf) throws IOException {
+ return new ReaderImpl(fs, path, conf);
}
- public static Reader createReader(FileSystem fs, Path path, FileMetaInfo fileMetaInfo)
+ public static Reader createReader(FileSystem fs, Path path,
+ FileMetaInfo fileMetaInfo, Configuration conf)
throws IOException {
- return new ReaderImpl(fs, path, fileMetaInfo);
+ return new ReaderImpl(fs, path, fileMetaInfo, conf);
}
/**
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Feb 25 07:58:52 2014
@@ -270,16 +270,16 @@ public class OrcInputFormat implements
if(!(fSplit instanceof OrcSplit)){
//If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
} else {
//We have OrcSplit, which may have footer metadata cached, so use the appropriate reader
//constructor
OrcSplit orcSplit = (OrcSplit) fSplit;
if (orcSplit.hasFooter()) {
FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo();
- reader = OrcFile.createReader(fs, path, fMetaInfo);
+ reader = OrcFile.createReader(fs, path, fMetaInfo, conf);
} else {
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
}
}
return new OrcRecordReader(reader, conf, fSplit.getStart(), fSplit.getLength());
@@ -299,7 +299,7 @@ public class OrcInputFormat implements
}
for (FileStatus file : files) {
try {
- OrcFile.createReader(fs, file.getPath());
+ OrcFile.createReader(fs, file.getPath(), conf);
} catch (IOException e) {
return false;
}
@@ -729,7 +729,10 @@ public class OrcInputFormat implements
idx++;
// eliminate stripes that doesn't satisfy the predicate condition
- if (sarg != null && !isStripeSatisfyPredicate(stripeStats.get(idx), sarg, filterColumns)) {
+ if (sarg != null &&
+ stripeStats != null &&
+ idx < stripeStats.size() &&
+ !isStripeSatisfyPredicate(stripeStats.get(idx), sarg, filterColumns)) {
// if a stripe doesn't satisfy predicate condition then skip it
if (LOG.isDebugEnabled()) {
@@ -757,7 +760,7 @@ public class OrcInputFormat implements
currentOffset = stripe.getOffset();
currentLength = stripe.getLength();
} else {
- currentLength += stripe.getLength();
+ currentLength = (stripe.getOffset() + stripe.getLength()) - currentOffset;
}
if (currentLength >= context.maxSize) {
createSplit(currentOffset, currentLength, fileMetaInfo);
@@ -794,14 +797,14 @@ public class OrcInputFormat implements
types = fileInfo.types;
// For multiple runs, in case sendSplitsInFooter changes
if (fileMetaInfo == null && context.footerInSplits) {
- orcReader = OrcFile.createReader(fs, file.getPath());
+ orcReader = OrcFile.createReader(fs, file.getPath(), context.conf);
fileInfo.fileMetaInfo = orcReader.getFileMetaInfo();
fileInfo.metadata = orcReader.getMetadata();
fileInfo.types = orcReader.getTypes();
}
}
if (!found) {
- orcReader = OrcFile.createReader(fs, file.getPath());
+ orcReader = OrcFile.createReader(fs, file.getPath(), context.conf);
stripes = orcReader.getStripes();
metadata = orcReader.getMetadata();
types = orcReader.getTypes();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java Tue Feb 25 07:58:52 2014
@@ -46,9 +46,10 @@ public class OrcNewInputFormat extends I
throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) inputSplit;
Path path = fileSplit.getPath();
- FileSystem fs = path.getFileSystem(ShimLoader.getHadoopShims()
- .getConfiguration(context));
- return new OrcRecordReader(OrcFile.createReader(fs, path),
+ Configuration conf = ShimLoader.getHadoopShims()
+ .getConfiguration(context);
+ FileSystem fs = path.getFileSystem(conf);
+ return new OrcRecordReader(OrcFile.createReader(fs, path, conf),
ShimLoader.getHadoopShims().getConfiguration(context),
fileSplit.getStart(), fileSplit.getLength());
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java Tue Feb 25 07:58:52 2014
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.hadoop.hive.ql.io.orc;
import java.io.DataInput;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Feb 25 07:58:52 2014
@@ -30,6 +30,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -60,6 +61,7 @@ final class ReaderImpl implements Reader
private final OrcProto.Footer footer;
private final ObjectInspector inspector;
private long deserializedSize = -1;
+ private final Configuration conf;
//serialized footer - Keeping this around for use by getFileMetaInfo()
// will help avoid cpu cycles spend in deserializing at cost of increased
@@ -288,11 +290,13 @@ final class ReaderImpl implements Reader
* Constructor that extracts metadata information from file footer
* @param fs
* @param path
+ * @param conf
* @throws IOException
*/
- ReaderImpl(FileSystem fs, Path path) throws IOException {
+ ReaderImpl(FileSystem fs, Path path, Configuration conf) throws IOException {
this.fileSystem = fs;
this.path = path;
+ this.conf = conf;
FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fs, path);
@@ -316,12 +320,14 @@ final class ReaderImpl implements Reader
* @param fs
* @param path
* @param fMetaInfo
+ * @param conf
* @throws IOException
*/
- ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo)
+ ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo, Configuration conf)
throws IOException {
this.fileSystem = fs;
this.path = path;
+ this.conf = conf;
MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(
fMetaInfo.compressionType,
@@ -487,7 +493,7 @@ final class ReaderImpl implements Reader
return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset,
length, footer.getTypesList(), codec, bufferSize,
- include, footer.getRowIndexStride(), sarg, columnNames);
+ include, footer.getRowIndexStride(), sarg, columnNames, conf);
}
@Override
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Feb 25 07:58:52 2014
@@ -31,6 +31,7 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -84,6 +85,7 @@ class RecordReaderImpl implements Record
private final int[] filterColumns;
// an array about which row groups aren't skipped
private boolean[] includedRowGroups = null;
+ private final Configuration conf;
RecordReaderImpl(Iterable<StripeInformation> stripes,
FileSystem fileSystem,
@@ -95,13 +97,15 @@ class RecordReaderImpl implements Record
boolean[] included,
long strideRate,
SearchArgument sarg,
- String[] columnNames
+ String[] columnNames,
+ Configuration conf
) throws IOException {
this.file = fileSystem.open(path);
this.codec = codec;
this.types = types;
this.bufferSize = bufferSize;
this.included = included;
+ this.conf = conf;
this.sarg = sarg;
if (sarg != null) {
sargLeaves = sarg.getLeaves();
@@ -128,7 +132,7 @@ class RecordReaderImpl implements Record
firstRow = skippedRows;
totalRowCount = rows;
- reader = createTreeReader(path, 0, types, included);
+ reader = createTreeReader(path, 0, types, included, conf);
indexes = new OrcProto.RowIndex[types.size()];
rowIndexStride = strideRate;
advanceToNextRow(0L);
@@ -163,10 +167,12 @@ class RecordReaderImpl implements Record
protected final int columnId;
private BitFieldReader present = null;
protected boolean valuePresent = false;
+ protected final Configuration conf;
- TreeReader(Path path, int columnId) {
+ TreeReader(Path path, int columnId, Configuration conf) {
this.path = path;
this.columnId = columnId;
+ this.conf = conf;
}
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
@@ -182,7 +188,7 @@ class RecordReaderImpl implements Record
switch (kind) {
case DIRECT_V2:
case DICTIONARY_V2:
- return new RunLengthIntegerReaderV2(in, signed);
+ return new RunLengthIntegerReaderV2(in, signed, conf);
case DIRECT:
case DICTIONARY:
return new RunLengthIntegerReader(in, signed);
@@ -275,8 +281,8 @@ class RecordReaderImpl implements Record
private static class BooleanTreeReader extends TreeReader{
private BitFieldReader reader = null;
- BooleanTreeReader(Path path, int columnId) {
- super(path, columnId);
+ BooleanTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -335,8 +341,8 @@ class RecordReaderImpl implements Record
private static class ByteTreeReader extends TreeReader{
private RunLengthByteReader reader = null;
- ByteTreeReader(Path path, int columnId) {
- super(path, columnId);
+ ByteTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -395,8 +401,8 @@ class RecordReaderImpl implements Record
private static class ShortTreeReader extends TreeReader{
private IntegerReader reader = null;
- ShortTreeReader(Path path, int columnId) {
- super(path, columnId);
+ ShortTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -465,8 +471,8 @@ class RecordReaderImpl implements Record
private static class IntTreeReader extends TreeReader{
private IntegerReader reader = null;
- IntTreeReader(Path path, int columnId) {
- super(path, columnId);
+ IntTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -535,8 +541,8 @@ class RecordReaderImpl implements Record
private static class LongTreeReader extends TreeReader{
private IntegerReader reader = null;
- LongTreeReader(Path path, int columnId) {
- super(path, columnId);
+ LongTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -605,8 +611,8 @@ class RecordReaderImpl implements Record
private static class FloatTreeReader extends TreeReader{
private InStream stream;
- FloatTreeReader(Path path, int columnId) {
- super(path, columnId);
+ FloatTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -685,8 +691,8 @@ class RecordReaderImpl implements Record
private static class DoubleTreeReader extends TreeReader{
private InStream stream;
- DoubleTreeReader(Path path, int columnId) {
- super(path, columnId);
+ DoubleTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -764,8 +770,8 @@ class RecordReaderImpl implements Record
private InStream stream;
private IntegerReader lengths = null;
- BinaryTreeReader(Path path, int columnId) {
- super(path, columnId);
+ BinaryTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -843,8 +849,8 @@ class RecordReaderImpl implements Record
private IntegerReader nanos = null;
private final LongColumnVector nanoVector = new LongColumnVector();
- TimestampTreeReader(Path path, int columnId) {
- super(path, columnId);
+ TimestampTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -970,8 +976,8 @@ class RecordReaderImpl implements Record
private static class DateTreeReader extends TreeReader{
private IntegerReader reader = null;
- DateTreeReader(Path path, int columnId) {
- super(path, columnId);
+ DateTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -1045,8 +1051,8 @@ class RecordReaderImpl implements Record
private final int precision;
private final int scale;
- DecimalTreeReader(Path path, int columnId, int precision, int scale) {
- super(path, columnId);
+ DecimalTreeReader(Path path, int columnId, int precision, int scale, Configuration conf) {
+ super(path, columnId, conf);
this.precision = precision;
this.scale = scale;
}
@@ -1155,8 +1161,8 @@ class RecordReaderImpl implements Record
private static class StringTreeReader extends TreeReader {
private TreeReader reader;
- StringTreeReader(Path path, int columnId) {
- super(path, columnId);
+ StringTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
}
@Override
@@ -1173,11 +1179,11 @@ class RecordReaderImpl implements Record
switch (encodings.get(columnId).getKind()) {
case DIRECT:
case DIRECT_V2:
- reader = new StringDirectTreeReader(path, columnId);
+ reader = new StringDirectTreeReader(path, columnId, conf);
break;
case DICTIONARY:
case DICTIONARY_V2:
- reader = new StringDictionaryTreeReader(path, columnId);
+ reader = new StringDictionaryTreeReader(path, columnId, conf);
break;
default:
throw new IllegalArgumentException("Unsupported encoding " +
@@ -1217,8 +1223,8 @@ class RecordReaderImpl implements Record
private final LongColumnVector scratchlcv;
- StringDirectTreeReader(Path path, int columnId) {
- super(path, columnId);
+ StringDirectTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
scratchlcv = new LongColumnVector();
}
@@ -1366,8 +1372,8 @@ class RecordReaderImpl implements Record
private byte[] dictionaryBufferInBytesCache = null;
private final LongColumnVector scratchlcv;
- StringDictionaryTreeReader(Path path, int columnId) {
- super(path, columnId);
+ StringDictionaryTreeReader(Path path, int columnId, Configuration conf) {
+ super(path, columnId, conf);
scratchlcv = new LongColumnVector();
}
@@ -1532,8 +1538,8 @@ class RecordReaderImpl implements Record
private static class CharTreeReader extends StringTreeReader {
int maxLength;
- CharTreeReader(Path path, int columnId, int maxLength) {
- super(path, columnId);
+ CharTreeReader(Path path, int columnId, int maxLength, Configuration conf) {
+ super(path, columnId, conf);
this.maxLength = maxLength;
}
@@ -1560,8 +1566,8 @@ class RecordReaderImpl implements Record
private static class VarcharTreeReader extends StringTreeReader {
int maxLength;
- VarcharTreeReader(Path path, int columnId, int maxLength) {
- super(path, columnId);
+ VarcharTreeReader(Path path, int columnId, int maxLength, Configuration conf) {
+ super(path, columnId, conf);
this.maxLength = maxLength;
}
@@ -1591,8 +1597,8 @@ class RecordReaderImpl implements Record
StructTreeReader(Path path, int columnId,
List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getFieldNamesCount();
this.fields = new TreeReader[fieldCount];
@@ -1600,7 +1606,7 @@ class RecordReaderImpl implements Record
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(path, subtype, types, included);
+ this.fields[i] = createTreeReader(path, subtype, types, included, conf);
}
this.fieldNames[i] = type.getFieldNames(i);
}
@@ -1693,15 +1699,15 @@ class RecordReaderImpl implements Record
UnionTreeReader(Path path, int columnId,
List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getSubtypesCount();
this.fields = new TreeReader[fieldCount];
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(path, subtype, types, included);
+ this.fields[i] = createTreeReader(path, subtype, types, included, conf);
}
}
}
@@ -1772,11 +1778,11 @@ class RecordReaderImpl implements Record
ListTreeReader(Path path, int columnId,
List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
elementReader = createTreeReader(path, type.getSubtypes(0), types,
- included);
+ included, conf);
}
@Override
@@ -1863,18 +1869,18 @@ class RecordReaderImpl implements Record
MapTreeReader(Path path,
int columnId,
List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(path, columnId);
+ boolean[] included, Configuration conf) throws IOException {
+ super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int keyColumn = type.getSubtypes(0);
int valueColumn = type.getSubtypes(1);
if (included == null || included[keyColumn]) {
- keyReader = createTreeReader(path, keyColumn, types, included);
+ keyReader = createTreeReader(path, keyColumn, types, included, conf);
} else {
keyReader = null;
}
if (included == null || included[valueColumn]) {
- valueReader = createTreeReader(path, valueColumn, types, included);
+ valueReader = createTreeReader(path, valueColumn, types, included, conf);
} else {
valueReader = null;
}
@@ -1956,54 +1962,55 @@ class RecordReaderImpl implements Record
private static TreeReader createTreeReader(Path path,
int columnId,
List<OrcProto.Type> types,
- boolean[] included
+ boolean[] included,
+ Configuration conf
) throws IOException {
OrcProto.Type type = types.get(columnId);
switch (type.getKind()) {
case BOOLEAN:
- return new BooleanTreeReader(path, columnId);
+ return new BooleanTreeReader(path, columnId, conf);
case BYTE:
- return new ByteTreeReader(path, columnId);
+ return new ByteTreeReader(path, columnId, conf);
case DOUBLE:
- return new DoubleTreeReader(path, columnId);
+ return new DoubleTreeReader(path, columnId, conf);
case FLOAT:
- return new FloatTreeReader(path, columnId);
+ return new FloatTreeReader(path, columnId, conf);
case SHORT:
- return new ShortTreeReader(path, columnId);
+ return new ShortTreeReader(path, columnId, conf);
case INT:
- return new IntTreeReader(path, columnId);
+ return new IntTreeReader(path, columnId, conf);
case LONG:
- return new LongTreeReader(path, columnId);
+ return new LongTreeReader(path, columnId, conf);
case STRING:
- return new StringTreeReader(path, columnId);
+ return new StringTreeReader(path, columnId, conf);
case CHAR:
if (!type.hasMaximumLength()) {
throw new IllegalArgumentException("ORC char type has no length specified");
}
- return new CharTreeReader(path, columnId, type.getMaximumLength());
+ return new CharTreeReader(path, columnId, type.getMaximumLength(), conf);
case VARCHAR:
if (!type.hasMaximumLength()) {
throw new IllegalArgumentException("ORC varchar type has no length specified");
}
- return new VarcharTreeReader(path, columnId, type.getMaximumLength());
+ return new VarcharTreeReader(path, columnId, type.getMaximumLength(), conf);
case BINARY:
- return new BinaryTreeReader(path, columnId);
+ return new BinaryTreeReader(path, columnId, conf);
case TIMESTAMP:
- return new TimestampTreeReader(path, columnId);
+ return new TimestampTreeReader(path, columnId, conf);
case DATE:
- return new DateTreeReader(path, columnId);
+ return new DateTreeReader(path, columnId, conf);
case DECIMAL:
int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
- return new DecimalTreeReader(path, columnId, precision, scale);
+ return new DecimalTreeReader(path, columnId, precision, scale, conf);
case STRUCT:
- return new StructTreeReader(path, columnId, types, included);
+ return new StructTreeReader(path, columnId, types, included, conf);
case LIST:
- return new ListTreeReader(path, columnId, types, included);
+ return new ListTreeReader(path, columnId, types, included, conf);
case MAP:
- return new MapTreeReader(path, columnId, types, included);
+ return new MapTreeReader(path, columnId, types, included, conf);
case UNION:
- return new UnionTreeReader(path, columnId, types, included);
+ return new UnionTreeReader(path, columnId, types, included, conf);
default:
throw new IllegalArgumentException("Unsupported type " +
type.getKind());
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java Tue Feb 25 07:58:52 2014
@@ -20,6 +20,10 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
@@ -34,10 +38,13 @@ class RunLengthIntegerReaderV2 implement
private final long[] literals = new long[RunLengthIntegerWriterV2.MAX_SCOPE];
private int numLiterals = 0;
private int used = 0;
+ private final boolean skipCorrupt;
- RunLengthIntegerReaderV2(InStream input, boolean signed) throws IOException {
+ RunLengthIntegerReaderV2(InStream input, boolean signed,
+ Configuration conf) throws IOException {
this.input = input;
this.signed = signed;
+ this.skipCorrupt = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA);
}
private void readValues() throws IOException {
@@ -163,14 +170,20 @@ class RunLengthIntegerReaderV2 implement
// unpack the patch blob
long[] unpackedPatch = new long[pl];
- SerializationUtils.readInts(unpackedPatch, 0, pl, pw + pgw, input);
+
+ if ((pw + pgw) > 64 && !skipCorrupt) {
+ throw new IOException(ErrorMsg.ORC_CORRUPTED_READ.getMsg());
+ }
+ int bitSize = SerializationUtils.getClosestFixedBits(pw + pgw);
+ SerializationUtils.readInts(unpackedPatch, 0, pl, bitSize, input);
// apply the patch directly when decoding the packed data
int patchIdx = 0;
long currGap = 0;
long currPatch = 0;
+ long patchMask = ((1L << pw) - 1);
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
long actualGap = 0;
// special case: gap is >255 then patch value will be 0.
@@ -179,7 +192,7 @@ class RunLengthIntegerReaderV2 implement
actualGap += 255;
patchIdx++;
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
}
// add the left over gap
actualGap += currGap;
@@ -199,7 +212,7 @@ class RunLengthIntegerReaderV2 implement
if (patchIdx < pl) {
// read the next gap and patch
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
actualGap = 0;
// special case: gap is >255 then patch will be 0. if gap is
@@ -208,7 +221,7 @@ class RunLengthIntegerReaderV2 implement
actualGap += 255;
patchIdx++;
currGap = unpackedPatch[patchIdx] >>> pw;
- currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+ currPatch = unpackedPatch[patchIdx] & patchMask;
}
// add the left over gap
actualGap += currGap;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java Tue Feb 25 07:58:52 2014
@@ -567,6 +567,7 @@ class RunLengthIntegerWriterV2 implement
// since we are considering only 95 percentile, the size of gap and
// patch array can contain only be 5% values
patchLength = (int) Math.ceil((baseRedLiterals.length * 0.05));
+
int[] gapList = new int[patchLength];
long[] patchList = new long[patchLength];
@@ -574,6 +575,15 @@ class RunLengthIntegerWriterV2 implement
patchWidth = brBits100p - brBits95p;
patchWidth = SerializationUtils.getClosestFixedBits(patchWidth);
+ // if patch bit requirement is 64 then it will not possible to pack
+ // gap and patch together in a long. To make sure gap and patch can be
+ // packed together adjust the patch width
+ if (patchWidth == 64) {
+ patchWidth = 56;
+ brBits95p = 8;
+ mask = (1L << brBits95p) - 1;
+ }
+
int gapIdx = 0;
int patchIdx = 0;
int prev = 0;
@@ -642,7 +652,7 @@ class RunLengthIntegerWriterV2 implement
long g = gapList[gapIdx++];
long p = patchList[patchIdx++];
while (g > 255) {
- gapVsPatchList[i++] = (255 << patchWidth) | 0;
+ gapVsPatchList[i++] = (255L << patchWidth);
g -= 255;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java Tue Feb 25 07:58:52 2014
@@ -151,16 +151,16 @@ public class VectorizedOrcInputFormat ex
if(!(fSplit instanceof OrcSplit)){
//If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
} else {
//We have OrcSplit, which may have footer metadata cached, so use the appropriate reader
//constructor
OrcSplit orcSplit = (OrcSplit) fSplit;
if (orcSplit.hasFooter()) {
FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo();
- reader = OrcFile.createReader(fs, path, fMetaInfo);
+ reader = OrcFile.createReader(fs, path, fMetaInfo, conf);
} else {
- reader = OrcFile.createReader(fs, path);
+ reader = OrcFile.createReader(fs, path, conf);
}
}
@@ -176,7 +176,7 @@ public class VectorizedOrcInputFormat ex
}
for (FileStatus file : files) {
try {
- OrcFile.createReader(fs, file.getPath());
+ OrcFile.createReader(fs, file.getPath(), conf);
} catch (IOException e) {
return false;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Feb 25 07:58:52 2014
@@ -69,6 +69,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
@@ -2449,11 +2450,7 @@ private void constructOneLBLocationMap(F
}
private String getUserName() {
- SessionState ss = SessionState.get();
- if (ss != null && ss.getAuthenticator() != null) {
- return ss.getAuthenticator().getUserName();
- }
- return null;
+ return SessionState.getUserFromAuthenticator();
}
private List<String> getGroupNames() {
@@ -2571,8 +2568,50 @@ private void constructOneLBLocationMap(F
}
}
- private static String[] getQualifiedNames(String qualifiedName) {
+ public static String[] getQualifiedNames(String qualifiedName) {
return qualifiedName.split("\\.");
}
+ public void createFunction(Function func) throws HiveException {
+ try {
+ getMSC().createFunction(func);
+ } catch (TException te) {
+ throw new HiveException(te);
+ }
+ }
+
+ public void alterFunction(String dbName, String funcName, Function newFunction)
+ throws HiveException {
+ try {
+ getMSC().alterFunction(dbName, funcName, newFunction);
+ } catch (TException te) {
+ throw new HiveException(te);
+ }
+ }
+
+ public void dropFunction(String dbName, String funcName)
+ throws HiveException {
+ try {
+ getMSC().dropFunction(dbName, funcName);
+ } catch (TException te) {
+ throw new HiveException(te);
+ }
+ }
+
+ public Function getFunction(String dbName, String funcName) throws HiveException {
+ try {
+ return getMSC().getFunction(dbName, funcName);
+ } catch (TException te) {
+ throw new HiveException(te);
+ }
+ }
+
+ public List<String> getFunctions(String dbName, String pattern) throws HiveException {
+ try {
+ return getMSC().getFunctions(dbName, pattern);
+ } catch (TException te) {
+ throw new HiveException(te);
+ }
+ }
+
};
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Tue Feb 25 07:58:52 2014
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat;
@@ -401,6 +402,11 @@ public class Table implements Serializab
return tTable.getParameters().get(name);
}
+ public boolean isImmutable(){
+ return (tTable.getParameters().containsKey(hive_metastoreConstants.IS_IMMUTABLE)
+ && tTable.getParameters().get(hive_metastoreConstants.IS_IMMUTABLE).equalsIgnoreCase("true"));
+ }
+
public void setTableType(TableType tableType) {
tTable.setTableType(tableType.toString());
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java Tue Feb 25 07:58:52 2014
@@ -398,28 +398,20 @@ public class JsonMetaDataFormatter imple
* Show the description of a database
*/
@Override
- public void showDatabaseDescription(DataOutputStream out,
- String database,
- String comment,
- String location,
- Map<String, String> params)
- throws HiveException
- {
- if (params == null || params.isEmpty()) {
- asJson(out, MapBuilder
- .create()
- .put("database", database)
- .put("comment", comment)
- .put("location", location)
- .build());
- } else {
- asJson(out, MapBuilder
- .create()
- .put("database", database)
- .put("comment", comment)
- .put("location", location)
- .put("params", params)
- .build());
- }
+ public void showDatabaseDescription(DataOutputStream out, String database, String comment,
+ String location, String ownerName, String ownerType, Map<String, String> params)
+ throws HiveException {
+ MapBuilder builder = MapBuilder.create().put("database", database).put("comment", comment)
+ .put("location", location);
+ if (null != ownerName) {
+ builder.put("owner", ownerName);
+ }
+ if (null != ownerType) {
+ builder.put("ownerType", ownerType);
+ }
+ if (null != params && !params.isEmpty()) {
+ builder.put("params", params);
+ }
+ asJson(out, builder.build());
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java Tue Feb 25 07:58:52 2014
@@ -93,11 +93,8 @@ public interface MetaDataFormatter {
/**
* Describe a database.
*/
- public void showDatabaseDescription(DataOutputStream out,
- String database,
- String comment,
- String location,
- Map<String, String> params)
- throws HiveException;
+ public void showDatabaseDescription (DataOutputStream out, String database, String comment,
+ String location, String ownerName, String ownerType, Map<String, String> params)
+ throws HiveException;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java Tue Feb 25 07:58:52 2014
@@ -430,13 +430,9 @@ class TextMetaDataFormatter implements M
* Describe a database
*/
@Override
- public void showDatabaseDescription(DataOutputStream outStream,
- String database,
- String comment,
- String location,
- Map<String, String> params)
- throws HiveException
- {
+ public void showDatabaseDescription(DataOutputStream outStream, String database, String comment,
+ String location, String ownerName, String ownerType, Map<String, String> params)
+ throws HiveException {
try {
outStream.writeBytes(database);
outStream.write(separator);
@@ -448,6 +444,14 @@ class TextMetaDataFormatter implements M
outStream.writeBytes(location);
}
outStream.write(separator);
+ if (ownerName != null) {
+ outStream.writeBytes(ownerName);
+ }
+ outStream.write(separator);
+ if (ownerType != null) {
+ outStream.writeBytes(ownerType);
+ }
+ outStream.write(separator);
if (params != null && !params.isEmpty()) {
outStream.writeBytes(params.toString());
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Tue Feb 25 07:58:52 2014
@@ -44,8 +44,8 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
@@ -102,7 +102,6 @@ import org.apache.hadoop.hive.ql.plan.Ta
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.stats.StatsFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.mapred.InputFormat;
/**
* General utility common functions for the Processor to convert operator into
@@ -562,14 +561,18 @@ public final class GenMapRedUtils {
// The table should also be considered a part of inputs, even if the table is a
// partitioned table and whether any partition is selected or not
+
+ //This read entity is a direct read entity and not an indirect read (that is when
+ // this is being read because it is a dependency of a view).
+ boolean isDirectRead = (parentViewInfo == null);
PlanUtils.addInput(inputs,
- new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo));
+ new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo, isDirectRead));
for (Partition part : parts) {
if (part.getTable().isPartitioned()) {
- PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo));
+ PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo, isDirectRead));
} else {
- PlanUtils.addInput(inputs, new ReadEntity(part.getTable(), parentViewInfo));
+ PlanUtils.addInput(inputs, new ReadEntity(part.getTable(), parentViewInfo, isDirectRead));
}
// Later the properties have to come from the partition as opposed
@@ -1236,7 +1239,7 @@ public final class GenMapRedUtils {
// Check if InputFormatClass is valid
String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
try {
- Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass);
+ Class c = Class.forName(inputFormatClass);
LOG.info("RCFile format- Using block level merge");
cplan = GenMapRedUtils.createRCFileMergeTask(fsInputDesc, finalName,
@@ -1684,9 +1687,9 @@ public final class GenMapRedUtils {
Context baseCtx = parseCtx.getContext();
// if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/..
// to final location /user/hive/warehouse/ will fail later, so instead pick tmp dir
- // on same namespace as tbl dir.
- Path tmpDir = dest.toUri().getScheme().equals("viewfs") ?
- baseCtx.getExtTmpPathRelTo(dest.toUri()) :
+ // on same namespace as tbl dir.
+ Path tmpDir = dest.toUri().getScheme().equals("viewfs") ?
+ baseCtx.getExtTmpPathRelTo(dest.toUri()) :
baseCtx.getExternalTmpPath(dest.toUri());
FileSinkDesc fileSinkDesc = fsOp.getConf();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java Tue Feb 25 07:58:52 2014
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Feb 25 07:58:52 2014
@@ -230,6 +230,7 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(GenericUDFIn.class);
supportedGenericUDFs.add(GenericUDFCase.class);
supportedGenericUDFs.add(GenericUDFWhen.class);
+ supportedGenericUDFs.add(GenericUDFCoalesce.class);
// For type casts
supportedGenericUDFs.add(UDFToLong.class);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1571600&r1=1571599&r2=1571600&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Tue Feb 25 07:58:52 2014
@@ -42,6 +42,7 @@ import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
@@ -666,6 +667,7 @@ public class DDLSemanticAnalyzer extends
break;
case TOK_DATABASELOCATION:
dbLocation = unescapeSQLString(childNode.getChild(0).getText());
+ addLocationToOutputs(dbLocation);
break;
default:
throw new SemanticException("Unrecognized token in CREATE DATABASE statement");
@@ -970,6 +972,7 @@ public class DDLSemanticAnalyzer extends
break;
case HiveParser.TOK_TABLELOCATION:
location = unescapeSQLString(child.getChild(0).getText());
+ addLocationToOutputs(location);
break;
case HiveParser.TOK_TABLEPROPERTIES:
tblProps = DDLSemanticAnalyzer.getProps((ASTNode) child.getChild(0));
@@ -1342,12 +1345,13 @@ public class DDLSemanticAnalyzer extends
HashMap<String, String> partSpec) throws SemanticException {
String newLocation = unescapeSQLString(ast.getChild(0).getText());
-
+ addLocationToOutputs(newLocation);
AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, newLocation, partSpec);
addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
alterTblDesc), conf));
+
}
private void analyzeAlterTableProtectMode(ASTNode ast, String tableName,
@@ -2520,7 +2524,7 @@ public class DDLSemanticAnalyzer extends
Table tab = getTable(tblName, true);
boolean isView = tab.isView();
validateAlterTableType(tab, AlterTableTypes.ADDPARTITION, expectView);
- inputs.add(new ReadEntity(tab));
+ outputs.add(new WriteEntity(tab));
int numCh = ast.getChildCount();
int start = ifNotExists ? 2 : 1;
@@ -2547,6 +2551,17 @@ public class DDLSemanticAnalyzer extends
throw new SemanticException("LOCATION clause illegal for view partition");
}
currentLocation = unescapeSQLString(child.getChild(0).getText());
+ boolean isLocal = false;
+ try {
+ // do best effor to determine if this is a local file
+ String scheme = new URI(currentLocation).getScheme();
+ if (scheme != null) {
+ isLocal = FileUtils.isLocalFile(conf, currentLocation);
+ }
+ } catch (URISyntaxException e) {
+ LOG.warn("Unable to create URI from " + currentLocation, e);
+ }
+ inputs.add(new ReadEntity(new Path(currentLocation), isLocal));
break;
default:
throw new SemanticException("Unknown child: " + child);
@@ -3159,6 +3174,7 @@ public class DDLSemanticAnalyzer extends
.getText()));
validateSkewedLocationString(newLocation);
locations.put(keyList, newLocation);
+ addLocationToOutputs(newLocation);
}
}
}
@@ -3172,6 +3188,10 @@ public class DDLSemanticAnalyzer extends
alterTblDesc), conf));
}
+ private void addLocationToOutputs(String newLocation) {
+ outputs.add(new WriteEntity(new Path(newLocation), FileUtils.isLocalFile(conf, newLocation)));
+ }
+
/**
* Check if the node is constant.
*