You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/08/16 03:22:02 UTC
svn commit: r1514554 [9/18] - in /hive/branches/vectorization: ./
beeline/src/java/org/apache/hive/beeline/
cli/src/java/org/apache/hadoop/hive/cli/
cli/src/test/org/apache/hadoop/hive/cli/
common/src/java/org/apache/hadoop/hive/conf/ conf/ contrib/src...
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java Fri Aug 16 01:21:54 2013
@@ -22,8 +22,6 @@ import org.apache.hadoop.hive.ql.exec.UD
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@Description(name = "nvl",
@@ -31,8 +29,8 @@ value = "_FUNC_(value,default_value) - R
extended = "Example:\n"
+ " > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n" + " bla")
public class GenericUDFNvl extends GenericUDF{
- private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
- private ObjectInspector[] argumentOIs;
+ private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+ private transient ObjectInspector[] argumentOIs;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java Fri Aug 16 01:21:54 2013
@@ -33,7 +33,7 @@ import org.apache.hadoop.io.BooleanWrita
@Description(name = "and", value = "a _FUNC_ b - Logical and")
public class GenericUDFOPAnd extends GenericUDF {
private final BooleanWritable result = new BooleanWritable();
- BooleanObjectInspector boi0,boi1;
+ private transient BooleanObjectInspector boi0,boi1;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNot.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNot.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNot.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNot.java Fri Aug 16 01:21:54 2013
@@ -34,7 +34,7 @@ import org.apache.hadoop.io.BooleanWrita
@Description(name = "not", value = "_FUNC_ a - Logical not")
public class GenericUDFOPNot extends GenericUDF {
private final BooleanWritable result = new BooleanWritable();
- BooleanObjectInspector boi;
+ private transient BooleanObjectInspector boi;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java Fri Aug 16 01:21:54 2013
@@ -33,7 +33,7 @@ import org.apache.hadoop.io.BooleanWrita
@Description(name = "or", value = "a _FUNC_ b - Logical or")
public class GenericUDFOPOr extends GenericUDF {
private final BooleanWritable result = new BooleanWritable();
- BooleanObjectInspector boi0,boi1;
+ private transient BooleanObjectInspector boi0,boi1;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFPrintf.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFPrintf.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFPrintf.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFPrintf.java Fri Aug 16 01:21:54 2013
@@ -18,6 +18,10 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.ArrayList;
+import java.util.Formatter;
+import java.util.Locale;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
@@ -26,26 +30,19 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
-import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.io.Text;
-import java.util.Formatter;
-import java.util.Locale;
-import java.util.ArrayList;
-
/**
* Generic UDF for printf function
* <code>printf(String format, Obj... args)</code>.
@@ -60,7 +57,7 @@ import java.util.ArrayList;
+ "FROM src LIMIT 1;\n"
+ " \"Hello World 100 days\"")
public class GenericUDFPrintf extends GenericUDF {
- private ObjectInspector[] argumentOIs;
+ private transient ObjectInspector[] argumentOIs;
private final Text resultText = new Text();
@Override
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect.java Fri Aug 16 01:21:54 2013
@@ -24,8 +24,8 @@ import org.apache.hadoop.hive.ql.exec.De
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
@@ -42,8 +42,8 @@ import org.apache.hadoop.util.Reflection
@UDFType(deterministic = false)
public class GenericUDFReflect extends AbstractGenericUDFReflect {
- StringObjectInspector inputClassNameOI;
- StringObjectInspector inputMethodNameOI;
+ private transient StringObjectInspector inputClassNameOI;
+ private transient StringObjectInspector inputMethodNameOI;
StringObjectInspector classNameOI;
StringObjectInspector methodNameOI;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java Fri Aug 16 01:21:54 2013
@@ -29,9 +29,9 @@ import org.apache.hadoop.hive.ql.exec.UD
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
@@ -57,11 +57,11 @@ import org.apache.hadoop.io.Writable;
@UDFType(deterministic = true)
public class GenericUDFReflect2 extends AbstractGenericUDFReflect {
- PrimitiveObjectInspector targetOI;
- PrimitiveObjectInspector returnOI;
- Method method;
+ private PrimitiveObjectInspector targetOI;
+ private PrimitiveObjectInspector returnOI;
+ private transient Method method;
- transient Writable returnObj;
+ private transient Writable returnObj;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSentences.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSentences.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSentences.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSentences.java Fri Aug 16 01:21:54 2013
@@ -18,9 +18,9 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;
-import java.text.BreakIterator;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -49,7 +49,7 @@ import org.apache.hadoop.io.Text;
+ "language codes are fully supported, and if an unsupported code is specified, a default "
+ "locale is used to process that string.")
public class GenericUDFSentences extends GenericUDF {
- private ObjectInspectorConverters.Converter[] converters;
+ private transient ObjectInspectorConverters.Converter[] converters;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSize.java Fri Aug 16 01:21:54 2013
@@ -37,8 +37,8 @@ import org.apache.hadoop.io.IntWritable;
*/
@Description(name = "size", value = "_FUNC_(a) - Returns the size of a")
public class GenericUDFSize extends GenericUDF {
- private ObjectInspector returnOI;
- private final IntWritable result = new IntWritable(-1);
+ private transient ObjectInspector returnOI;
+ private final transient IntWritable result = new IntWritable(-1);
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java Fri Aug 16 01:21:54 2013
@@ -51,9 +51,9 @@ import org.apache.hadoop.hive.serde2.obj
+ " > SELECT _FUNC_(array('b', 'd', 'c', 'a')) FROM src LIMIT 1;\n"
+ " 'a', 'b', 'c', 'd'")
public class GenericUDFSortArray extends GenericUDF {
- private Converter[] converters;
+ private transient Converter[] converters;
private final List<Object> ret = new ArrayList<Object>();
- private ObjectInspector[] argumentOIs;
+ private transient ObjectInspector[] argumentOIs;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSplit.java Fri Aug 16 01:21:54 2013
@@ -39,7 +39,7 @@ import org.apache.hadoop.io.Text;
+ " > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1;\n"
+ " [\"one\", \"two\", \"three\"]")
public class GenericUDFSplit extends GenericUDF {
- private ObjectInspectorConverters.Converter[] converters;
+ private transient ObjectInspectorConverters.Converter[] converters;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java Fri Aug 16 01:21:54 2013
@@ -40,8 +40,8 @@ import org.apache.hadoop.hive.serde2.typ
+ " second delimiter sperates key and value. If only one parameter is given, default"
+ " delimiters are used: ',' as delimiter1 and '=' as delimiter2.")
public class GenericUDFStringToMap extends GenericUDF {
- HashMap<Object, Object> ret = new HashMap<Object, Object>();
- StringObjectInspector soi_text, soi_de1 = null, soi_de2 = null;
+ private final HashMap<Object, Object> ret = new HashMap<Object, Object>();
+ private transient StringObjectInspector soi_text, soi_de1 = null, soi_de2 = null;
final static String default_de1 = ",";
final static String default_de2 = ":";
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java Fri Aug 16 01:21:54 2013
@@ -37,8 +37,8 @@ import org.apache.hadoop.hive.serde2.obj
*/
public class GenericUDFTimestamp extends GenericUDF {
- private PrimitiveObjectInspector argumentOI;
- private TimestampConverter tc;
+ private transient PrimitiveObjectInspector argumentOI;
+ private transient TimestampConverter tc;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToBinary.java Fri Aug 16 01:21:54 2013
@@ -34,8 +34,8 @@ import org.apache.hadoop.hive.serde2.obj
extended = "Currently only string or binary can be cast into binary")
public class GenericUDFToBinary extends GenericUDF {
- private PrimitiveObjectInspector argumentOI;
- private BinaryConverter baConverter;
+ private transient PrimitiveObjectInspector argumentOI;
+ private transient BinaryConverter baConverter;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java Fri Aug 16 01:21:54 2013
@@ -37,8 +37,8 @@ import org.apache.hadoop.hive.serde2.obj
+ " '2009-01-01'")
public class GenericUDFToDate extends GenericUDF {
- private PrimitiveObjectInspector argumentOI;
- private DateConverter dc;
+ private transient PrimitiveObjectInspector argumentOI;
+ private transient DateConverter dc;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java Fri Aug 16 01:21:54 2013
@@ -29,8 +29,8 @@ import org.apache.hadoop.hive.serde2.obj
@Description(name = "decimal", value = "_FUNC_(a) - cast a to decimal")
public class GenericUDFToDecimal extends GenericUDF {
- private PrimitiveObjectInspector argumentOI;
- private HiveDecimalConverter bdConverter;
+ private transient PrimitiveObjectInspector argumentOI;
+ private transient HiveDecimalConverter bdConverter;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java Fri Aug 16 01:21:54 2013
@@ -42,13 +42,13 @@ import org.apache.hadoop.io.LongWritable
extended = "Converts the specified time to number of seconds since 1970-01-01.")
public class GenericUDFToUnixTimeStamp extends GenericUDF {
- private StringObjectInspector intputTextOI;
- private DateObjectInspector inputDateOI;
- private TimestampObjectInspector inputTimestampOI;
- private StringObjectInspector patternOI;
+ private transient StringObjectInspector intputTextOI;
+ private transient DateObjectInspector inputDateOI;
+ private transient TimestampObjectInspector inputTimestampOI;
+ private transient StringObjectInspector patternOI;
private String lasPattern = "yyyy-MM-dd HH:mm:ss";
- private SimpleDateFormat formatter = new SimpleDateFormat(lasPattern);
+ private transient final SimpleDateFormat formatter = new SimpleDateFormat(lasPattern);
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTranslate.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTranslate.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTranslate.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTranslate.java Fri Aug 16 01:21:54 2013
@@ -103,17 +103,17 @@ public class GenericUDFTranslate extends
/**
* If a code point needs to be replaced with another code point, this map with store the mapping.
*/
- private Map<Integer, Integer> replacementMap = new HashMap<Integer, Integer>();
+ private final Map<Integer, Integer> replacementMap = new HashMap<Integer, Integer>();
/**
* This set stores all the code points which needed to be deleted from the input string. The
* objects in deletionSet and keys in replacementMap are mutually exclusive
*/
- private Set<Integer> deletionSet = new HashSet<Integer>();
+ private final Set<Integer> deletionSet = new HashSet<Integer>();
/**
* A placeholder for result.
*/
- private Text result = new Text();
+ private final Text result = new Text();
/**
* The values of from parameter from the previous evaluate() call.
@@ -126,7 +126,7 @@ public class GenericUDFTranslate extends
/**
* Converters for retrieving the arguments to the UDF.
*/
- private ObjectInspectorConverters.Converter[] converters;
+ private transient ObjectInspectorConverters.Converter[] converters;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java Fri Aug 16 01:21:54 2013
@@ -35,8 +35,7 @@ import org.apache.hadoop.hive.serde2.obj
extended = "Example:\n"
+ " > SELECT _FUNC_(1, 1, \"one\") FROM src LIMIT 1;\n" + " one")
public class GenericUDFUnion extends GenericUDF {
- Log LOG = LogFactory.getLog("GenericUDFUnion");
- ObjectInspector tagOI;
+ private transient ObjectInspector tagOI;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java Fri Aug 16 01:21:54 2013
@@ -32,8 +32,8 @@ import org.apache.hadoop.hive.serde2.obj
* thrown.
*/
public class GenericUDFWhen extends GenericUDF {
- private ObjectInspector[] argumentOIs;
- private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+ private transient ObjectInspector[] argumentOIs;
+ private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java Fri Aug 16 01:21:54 2013
@@ -42,8 +42,7 @@ import org.apache.hadoop.hive.serde2.obj
+ " or the elements of a map into multiple rows and columns ")
public class GenericUDTFExplode extends GenericUDTF {
- private ObjectInspector inputOI = null;
-
+ private transient ObjectInspector inputOI = null;
@Override
public void close() throws HiveException {
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java Fri Aug 16 01:21:54 2013
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.ArrayList;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -26,13 +28,11 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import java.util.ArrayList;
-
@Description(name ="inline", value= "_FUNC_( ARRAY( STRUCT()[,STRUCT()] "
+ "- explodes and array and struct into a table")
public class GenericUDTFInline extends GenericUDTF {
- private ListObjectInspector li;
+ private transient ListObjectInspector li;
public GenericUDTFInline(){
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java Fri Aug 16 01:21:54 2013
@@ -66,7 +66,7 @@ public class GenericUDTFJSONTuple extend
Text[] retCols; // array of returned column values
Text[] cols; // object pool of non-null Text, avoid creating objects all the time
Object[] nullCols; // array of null column values
- ObjectInspector[] inputOIs; // input ObjectInspectors
+ private transient ObjectInspector[] inputOIs; // input ObjectInspectors
boolean pathParsed = false;
boolean seenErrors = false;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java Fri Aug 16 01:21:54 2013
@@ -18,16 +18,15 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.List;
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
-import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector;
import org.apache.hadoop.io.IntWritable;
@@ -48,9 +47,9 @@ public class GenericUDTFStack extends Ge
public void close() throws HiveException {
}
- ArrayList<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
+ private transient List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
Object[] forwardObj = null;
- ArrayList<ReturnObjectInspectorResolver> returnOIResolvers =
+ private transient ArrayList<ReturnObjectInspectorResolver> returnOIResolvers =
new ArrayList<ReturnObjectInspectorResolver>();
IntWritable numRows = null;
Integer numCols = null;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NGramEstimator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NGramEstimator.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NGramEstimator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NGramEstimator.java Fri Aug 16 01:21:54 2013
@@ -104,7 +104,19 @@ public class NGramEstimator {
Collections.sort(list, new Comparator<Map.Entry<ArrayList<String>, Double>>() {
public int compare(Map.Entry<ArrayList<String>, Double> o1,
Map.Entry<ArrayList<String>, Double> o2) {
- return o2.getValue().compareTo(o1.getValue());
+ int result = o2.getValue().compareTo(o1.getValue());
+ if (result != 0)
+ return result;
+
+ ArrayList<String> key1 = o1.getKey();
+ ArrayList<String> key2 = o2.getKey();
+ for (int i = 0; i < key1.size() && i < key2.size(); i++) {
+ result = key1.get(i).compareTo(key2.get(i));
+ if (result != 0)
+ return result;
+ }
+
+ return key1.size() - key2.size();
}
});
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/GenericUDFXPath.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/GenericUDFXPath.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/GenericUDFXPath.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/GenericUDFXPath.java Fri Aug 16 01:21:54 2013
@@ -29,8 +29,8 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
import org.w3c.dom.NodeList;
@@ -50,11 +50,11 @@ public class GenericUDFXPath extends Gen
private static final List<Text> emptyResult = Collections.<Text>emptyList();
- private final UDFXPathUtil xpath = new UDFXPathUtil();
+ private transient final UDFXPathUtil xpath = new UDFXPathUtil();
private final List<Text> result = new ArrayList<Text>(10);
- private Converter converterArg0;
- private Converter converterArg1;
+ private transient Converter converterArg0;
+ private transient Converter converterArg1;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
Modified: hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/branches/vectorization/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Fri Aug 16 01:21:54 2013
@@ -73,6 +73,8 @@ message ColumnEncoding {
enum Kind {
DIRECT = 0;
DICTIONARY = 1;
+ DIRECT_V2 = 2;
+ DICTIONARY_V2 = 3;
}
required Kind kind = 1;
optional uint32 dictionarySize = 2;
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Fri Aug 16 01:21:54 2013
@@ -916,6 +916,8 @@ public class QTestUtil {
"<string>[0-9]{10}</string>",
"<string>/.*/warehouse/.*</string>"
};
+
+ fixXml4JDK7(outf.getPath());
maskPatterns(patterns, outf.getPath());
int exitVal = executeDiffCommand(outf.getPath(), planFile, true, false);
@@ -966,6 +968,101 @@ public class QTestUtil {
return ret;
}
+ /**
+ * Fix the XML generated by JDK7 which is slightly different from what's generated by JDK6,
+ * causing 40+ test failures. There are mainly two problems:
+ *
+ * 1. object element's properties, id and class, are in reverse order, i.e.
+ * <object class="org.apache.hadoop.hive.ql.exec.MapRedTask" id="MapRedTask0">
+ * which needs to be fixed to
+ * <object id="MapRedTask0" class="org.apache.hadoop.hive.ql.exec.MapRedTask">
+ * 2. JDK introduces Enum as class, i.e.
+ * <object id="GenericUDAFEvaluator$Mode0" class="java.lang.Enum">
+ * <class>org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator$Mode</class>
+ * which needs to be fixed to
+ * <object id="GenericUDAFEvaluator$Mode0" class="org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator$Mode"
+ * method="valueOf">
+ *
+ * Though not elegant, this allows these test cases to pass until we have a better serialization mechanism.
+ *
+ * Did I mention this is test code?
+ *
+ * @param fname the name of the file to fix
+ * @throws Exception in case of IO error
+ */
+ private static void fixXml4JDK7(String fname) throws Exception {
+ String version = System.getProperty("java.version");
+ if (!version.startsWith("1.7")) {
+ return;
+ }
+
+ BufferedReader in = new BufferedReader(new FileReader(fname));
+ BufferedWriter out = new BufferedWriter(new FileWriter(fname + ".orig"));
+ String line = null;
+ while (null != (line = in.readLine())) {
+ out.write(line);
+ out.write('\n');
+ }
+ in.close();
+ out.close();
+
+ in = new BufferedReader(new FileReader(fname + ".orig"));
+ out = new BufferedWriter(new FileWriter(fname));
+
+ while (null != (line = in.readLine())) {
+ if (line.indexOf("<object ") == -1 || line.indexOf("class=") == -1) {
+ out.write(line);
+ } else {
+ StringBuilder sb = new StringBuilder();
+ String prefix = line.substring(0, line.indexOf("<object") + 7);
+ sb.append( prefix );
+ String postfix = line.substring(line.lastIndexOf('"') + 1);
+ String id = getPropertyValue(line, "id");
+ if (id != null)
+ sb.append(" id=" + id);
+ String cls = getPropertyValue(line, "class");
+ assert(cls != null);
+ if (cls.equals("\"java.lang.Enum\"")) {
+ line = in.readLine();
+ cls = "\"" + getElementValue(line, "class") + "\"";
+ sb.append(" class=" + cls + " method=\"valueOf\"" );
+ } else {
+ sb.append(" class=" + cls);
+ }
+
+ sb.append(postfix);
+ out.write(sb.toString());
+ }
+
+ out.write('\n');
+ }
+
+ in.close();
+ out.close();
+ }
+
+ /**
+ * Get the value of a property in line. The returned value has original quotes
+ */
+ private static String getPropertyValue(String line, String name) {
+ int start = line.indexOf( name + "=" );
+ if (start == -1)
+ return null;
+ start += name.length() + 1;
+ int end = line.indexOf("\"", start + 1);
+ return line.substring( start, end + 1 );
+ }
+
+ /**
+ * Get the value of the element in input. (Note: the returned value has no quotes.)
+ */
+ private static String getElementValue(String line, String name) {
+ assert(line.indexOf("<" + name + ">") != -1);
+ int start = line.indexOf("<" + name + ">") + name.length() + 2;
+ int end = line.indexOf("</" + name + ">");
+ return line.substring(start, end);
+ }
+
private void maskPatterns(String[] patterns, String fname) throws Exception {
String maskPattern = "#### A masked pattern was here ####";
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Fri Aug 16 01:21:54 2013
@@ -22,6 +22,7 @@ import java.lang.reflect.Method;
import java.util.LinkedList;
import java.util.List;
+import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -146,4 +147,24 @@ public class TestFunctionRegistry extend
@Override
protected void tearDown() {
}
+
+ public void testIsRankingFunction() {
+ Assert.assertTrue(FunctionRegistry.isRankingFunction("rank"));
+ Assert.assertTrue(FunctionRegistry.isRankingFunction("dense_rank"));
+ Assert.assertTrue(FunctionRegistry.isRankingFunction("percent_rank"));
+ Assert.assertTrue(FunctionRegistry.isRankingFunction("cume_dist"));
+ Assert.assertFalse(FunctionRegistry.isRankingFunction("min"));
+ }
+
+ public void testImpliesOrder() {
+ Assert.assertTrue(FunctionRegistry.impliesOrder("rank"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("cume_dist"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("first_value"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("last_value"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("lead"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("lag"));
+ Assert.assertFalse(FunctionRegistry.impliesOrder("min"));
+ }
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/history/TestHiveHistory.java Fri Aug 16 01:21:54 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.histor
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
+import java.lang.reflect.Proxy;
import java.util.LinkedList;
import java.util.Map;
@@ -31,9 +32,9 @@ import org.apache.hadoop.hive.cli.CliSes
import org.apache.hadoop.hive.common.LogUtils;
import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.ql.Driver;
-import org.apache.hadoop.hive.ql.QTestUtil.QTestSetup;
import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
import org.apache.hadoop.hive.ql.history.HiveHistory.QueryInfo;
import org.apache.hadoop.hive.ql.history.HiveHistory.TaskInfo;
@@ -75,7 +76,7 @@ public class TestHiveHistory extends Tes
+ tmpdir);
}
}
-
+
conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
// copy the test files into hadoop if required.
@@ -124,8 +125,9 @@ public class TestHiveHistory extends Tes
LogUtils.initHiveLog4j();
} catch (LogInitializationException e) {
}
-
- CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class));
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.setBoolVar(ConfVars.HIVE_SESSION_HISTORY_ENABLED, true);
+ CliSessionState ss = new CliSessionState(hconf);
ss.in = System.in;
try {
ss.out = new PrintStream(System.out, true, "UTF-8");
@@ -179,7 +181,7 @@ public class TestHiveHistory extends Tes
HiveConf conf = new HiveConf(SessionState.class);
conf.set(HiveConf.ConfVars.HIVEHISTORYFILELOC.toString(), actualDir);
SessionState ss = new CliSessionState(conf);
- HiveHistory hiveHistory = new HiveHistory(ss);
+ HiveHistory hiveHistory = new HiveHistoryImpl(ss);
Path actualPath = new Path(actualDir);
if (!fs.exists(actualPath)) {
fail("Query location path is not exist :" + actualPath.toString());
@@ -192,4 +194,38 @@ public class TestHiveHistory extends Tes
}
}
+ /**
+ * Check if HiveHistoryImpl class is returned when hive history is enabled
+ * @throws Exception
+ */
+ public void testHiveHistoryConfigEnabled() throws Exception {
+ HiveConf conf = new HiveConf(SessionState.class);
+ conf.setBoolVar(ConfVars.HIVE_SESSION_HISTORY_ENABLED, true);
+ SessionState ss = new CliSessionState(conf);
+ SessionState.start(ss);
+ HiveHistory hHistory = ss.getHiveHistory();
+ assertEquals("checking hive history class when history is enabled",
+ hHistory.getClass(), HiveHistoryImpl.class);
+ }
+ /**
+ * Check if HiveHistory class is a Proxy class when hive history is disabled
+ * @throws Exception
+ */
+ public void testHiveHistoryConfigDisabled() throws Exception {
+ HiveConf conf = new HiveConf(SessionState.class);
+ conf.setBoolVar(ConfVars.HIVE_SESSION_HISTORY_ENABLED, false);
+ SessionState ss = new CliSessionState(conf);
+ SessionState.start(ss);
+ HiveHistory hHistory = ss.getHiveHistory();
+ assertTrue("checking hive history class when history is disabled",
+ hHistory.getClass() != HiveHistoryImpl.class);
+ System.err.println("hHistory.getClass" + hHistory.getClass());
+ assertTrue("verifying proxy class is used when history is disabled",
+ Proxy.isProxyClass(hHistory.getClass()));
+
+ }
+
+
+
+
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java Fri Aug 16 01:21:54 2013
@@ -167,7 +167,9 @@ public class TestSymlinkTextInputFormat
QueryPlan plan = drv.getPlan();
MapRedTask selectTask = (MapRedTask)plan.getRootTasks().get(0);
- ExecDriver.addInputPaths(newJob, selectTask.getWork().getMapWork(), emptyScratchDir.toString(), ctx);
+ List<Path> inputPaths = Utilities.getInputPaths(newJob, selectTask.getWork().getMapWork(), emptyScratchDir.toString(), ctx);
+ Utilities.setInputPaths(newJob, inputPaths);
+
Utilities.setMapRedWork(newJob, selectTask.getWork(), ctx.getMRTmpFileURI());
CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java Fri Aug 16 01:21:54 2013
@@ -18,15 +18,8 @@
package org.apache.hadoop.hive.ql.io.orc;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import java.io.BufferedReader;
import java.io.File;
@@ -35,8 +28,14 @@ import java.io.FileReader;
import java.io.PrintStream;
import java.util.Random;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.junit.Before;
+import org.junit.Test;
public class TestFileDump {
@@ -69,9 +68,6 @@ public class TestFileDump {
}
}
- private static final String outputFilename =
- File.separator + "orc-file-dump.out";
-
private static void checkOutput(String expected,
String actual) throws Exception {
BufferedReader eStream =
@@ -114,8 +110,62 @@ public class TestFileDump {
}
writer.close();
PrintStream origOut = System.out;
- FileOutputStream myOut = new FileOutputStream(workDir + File.separator +
- "orc-file-dump.out");
+ String outputFilename = File.separator + "orc-file-dump.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString()});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(resourceDir + outputFilename, workDir + outputFilename);
+ }
+
+ // Test that if the fraction of rows that have distinct strings is greater than the configured
+ // threshold dictionary encoding is turned off. If dictionary encoding is turned off the length
+ // of the dictionary stream for the column will be 0 in the ORC file dump.
+ @Test
+ public void testDictionaryThreshold() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Configuration conf = new Configuration();
+ conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
+ Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
+ 100000, CompressionKind.ZLIB, 10000, 10000);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ int nextInt = 0;
+ for(int i=0; i < 21000; ++i) {
+ // Write out the same string twice, this guarantees the fraction of rows with
+ // distinct strings is 0.5
+ if (i % 2 == 0) {
+ nextInt = r1.nextInt(words.length);
+ // Append the value of i to the word, this guarantees when an index or word is repeated
+ // the actual string is unique.
+ words[nextInt] += "-" + i;
+ }
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+ words[nextInt]));
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = File.separator + "orc-file-dump-dictionary-threshold.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + outputFilename);
// replace stdout and run command
System.setOut(new PrintStream(myOut));
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Fri Aug 16 01:21:54 2013
@@ -58,6 +58,7 @@ import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -905,6 +906,40 @@ public class TestOrcFile {
compareList(expectedList, actualList);
compareList(expected.list, (List) row.getFieldValue(10));
}
+ rows.close();
+ Iterator<StripeInformation> stripeIterator =
+ reader.getStripes().iterator();
+ long offsetOfStripe2 = 0;
+ long offsetOfStripe4 = 0;
+ long lastRowOfStripe2 = 0;
+ for(int i = 0; i < 5; ++i) {
+ StripeInformation stripe = stripeIterator.next();
+ if (i < 2) {
+ lastRowOfStripe2 += stripe.getNumberOfRows();
+ } else if (i == 2) {
+ offsetOfStripe2 = stripe.getOffset();
+ lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
+ } else if (i == 4) {
+ offsetOfStripe4 = stripe.getOffset();
+ }
+ }
+ boolean[] columns = new boolean[reader.getStatistics().length];
+ columns[5] = true; // long colulmn
+ columns[9] = true; // text column
+ rows = reader.rows(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2,
+ columns);
+ rows.seekToRow(lastRowOfStripe2);
+ for(int i = 0; i < 2; ++i) {
+ row = (OrcStruct) rows.next(row);
+ BigRow expected = createRandomRow(intValues, doubleValues,
+ stringValues, byteValues, words,
+ (int) (lastRowOfStripe2 + i));
+
+ assertEquals(expected.long1.longValue(),
+ ((LongWritable) row.getFieldValue(4)).get());
+ assertEquals(expected.string1, row.getFieldValue(8));
+ }
+ rows.close();
}
private void compareInner(InnerStruct expect,
@@ -993,7 +1028,7 @@ public class TestOrcFile {
ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
- Writer writer = new WriterImpl(fs, testFilePath, inspector,
+ Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
50000, CompressionKind.NONE, 100, 0, memory);
assertEquals(testFilePath, memory.path);
for(int i=0; i < 2500; ++i) {
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java Fri Aug 16 01:21:54 2013
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hive.ql.io.orc;
import static junit.framework.Assert.assertEquals;
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTestTranslate.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTestTranslate.java?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTestTranslate.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTestTranslate.java Fri Aug 16 01:21:54 2013
@@ -35,7 +35,7 @@ import org.apache.hadoop.io.Text;
* Mimics oracle's function translate(str1, str2, str3).
*/
public class GenericUDFTestTranslate extends GenericUDF {
- ObjectInspector[] argumentOIs;
+ private transient ObjectInspector[] argumentOIs;
/**
* Return a corresponding ordinal from an integer.
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/ba_table_udfs.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/ba_table_udfs.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/ba_table_udfs.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/ba_table_udfs.q Fri Aug 16 01:21:54 2013
@@ -1,16 +1,36 @@
USE default;
--- this query tests all the udfs provided to work with binary types
+CREATE TABLE dest1(bytes1 BINARY,
+ bytes2 BINARY,
+ string STRING);
+FROM src INSERT OVERWRITE TABLE dest1
SELECT
- key,
- value,
- LENGTH(CAST(src.key AS BINARY)),
- LENGTH(CAST(src.value AS BINARY)),
- CONCAT(CAST(src.key AS BINARY), CAST(src.value AS BINARY)),
- SUBSTR(CAST(src.value AS BINARY), 1, 4),
- SUBSTR(CAST(src.value AS BINARY), 3),
- SUBSTR(CAST(src.value AS BINARY), -4, 3)
-FROM src
+ CAST(key AS BINARY),
+ CAST(value AS BINARY),
+ value
ORDER BY value
LIMIT 100;
+
+--Add in a null row for good measure
+INSERT INTO TABLE dest1 SELECT NULL, NULL, NULL FROM dest1 LIMIT 1;
+
+-- this query tests all the udfs provided to work with binary types
+
+SELECT
+ bytes1,
+ bytes2,
+ string,
+ LENGTH(bytes1),
+ CONCAT(bytes1, bytes2),
+ SUBSTR(bytes2, 1, 4),
+ SUBSTR(bytes2, 3),
+ SUBSTR(bytes2, -4, 3),
+ HEX(bytes1),
+ UNHEX(HEX(bytes1)),
+ BASE64(bytes1),
+ UNBASE64(BASE64(bytes1)),
+ HEX(ENCODE(string, 'US-ASCII')),
+ DECODE(ENCODE(string, 'US-ASCII'), 'US-ASCII')
+FROM dest1
+ORDER BY bytes2;
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer1.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer1.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer1.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer1.q Fri Aug 16 01:21:54 2013
@@ -33,23 +33,6 @@ set hive.optimize.correlation=true;
-- Enable hive.auto.convert.join.
-- Correlation Optimizer will detect that the join will be converted to a Map-join,
-- so it will not try to optimize this query.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT x.key AS key, count(1) AS cnt
- FROM src1 x JOIN src y ON (x.key = y.key)
- GROUP BY x.key) tmp;
-
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT x.key AS key, count(1) AS cnt
- FROM src1 x JOIN src y ON (x.key = y.key)
- GROUP BY x.key) tmp;
-
-set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
-set hive.optimize.correlation=true;
--- Enable hive.auto.convert.join.
--- Correlation Optimizer will detect that the join will be converted to a Map-join,
--- so it will not try to optimize this query.
-- We should generate 1 MR job for subquery tmp.
EXPLAIN
SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer3.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer3.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer3.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer3.q Fri Aug 16 01:21:54 2013
@@ -36,7 +36,6 @@ FROM (SELECT b.key AS key, b.cnt AS cnt,
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
-- Enable hive.auto.convert.join.
EXPLAIN
SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
@@ -79,10 +78,9 @@ FROM (SELECT d.key AS key, d.cnt AS cnt,
FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key = y.key) group by x.key) d
ON b.key = d.key) tmp;
-
+
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
-- Enable hive.auto.convert.join.
EXPLAIN
SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer4.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer4.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer4.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer4.q Fri Aug 16 01:21:54 2013
@@ -33,10 +33,9 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.
FROM (SELECT y.key AS key, count(1) AS cnt
FROM T2 x JOIN T1 y ON (x.key = y.key) JOIN T3 z ON (y.key = z.key)
GROUP BY y.key) tmp;
-
+
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
-- Enable hive.auto.convert.join.
EXPLAIN
SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer5.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer5.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer5.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer5.q Fri Aug 16 01:21:54 2013
@@ -52,7 +52,6 @@ ON b.key = d.key;
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
set hive.auto.convert.join.noconditionaltask.size=10000000000;
-- Enable hive.auto.convert.join.
EXPLAIN
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer6.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer6.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer6.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer6.q Fri Aug 16 01:21:54 2013
@@ -36,7 +36,6 @@ ON xx.key=yy.key ORDER BY xx.key, xx.cnt
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
-- Enable hive.auto.convert.join.
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.cnt
@@ -306,7 +305,6 @@ ON xx.key=yy.key ORDER BY xx.key, xx.cnt
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-set hive.optimize.mapjoin.mapreduce=true;
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer7.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer7.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer7.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/correlationoptimizer7.q Fri Aug 16 01:21:54 2013
@@ -36,19 +36,14 @@ ON xx.key=yy.key ORDER BY xx.key, xx.cnt
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000000000;
-set hive.optimize.mapjoin.mapreduce=true;
set hive.optimize.correlation=false;
-- Without correlation optimizer, we will have 3 MR jobs.
-- The first one is a MapJoin and Aggregation (in the Reduce Phase).
-- The second one is another MapJoin. The third one is for ordering.
--- With the correlation optimizer, right now, we still have
--- 3 MR jobs. The first one is a MapJoin and the map-side aggregation (a map-only job).
--- The second one have the reduce-side aggregation and the second join.
--- The third one is for ordering.
--- Although we have turned on hive.optimize.mapjoin.mapreduce, that optimizer
--- can not handle the case that the MR job (the one which a map-only job will be merged in)
--- has multiple inputs. We should improve that optimizer.
+-- With the correlation optimizer, right now, we have
+-- 2 MR jobs. The first one will evaluate the sub-query xx and the join of
+-- xx and yy. The second one will do the ORDER BY.
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.value
FROM (SELECT x.key AS key, count(1) AS cnt
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/multiMapJoin1.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/multiMapJoin1.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/multiMapJoin1.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/multiMapJoin1.q Fri Aug 16 01:21:54 2013
@@ -1,10 +1,15 @@
--- Join of a big table with 2 small tables on different keys should be performed as a single MR job
create table smallTbl1(key string, value string);
insert overwrite table smallTbl1 select * from src where key < 10;
create table smallTbl2(key string, value string);
insert overwrite table smallTbl2 select * from src where key < 10;
+create table smallTbl3(key string, value string);
+insert overwrite table smallTbl3 select * from src where key < 10;
+
+create table smallTbl4(key string, value string);
+insert overwrite table smallTbl4 select * from src where key < 10;
+
create table bigTbl(key string, value string);
insert overwrite table bigTbl
select * from
@@ -68,37 +73,30 @@ select count(*) FROM
bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value);
-set hive.optimize.mapjoin.mapreduce=true;
-
-- Now run a query with two-way join, which should first be converted into a
-- map-join followed by groupby and then finally into a single MR job.
-explain insert overwrite directory '${system:test.tmp.dir}/multiJoin1.output'
+explain
select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
- bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
group by smallTbl2.key;
-insert overwrite directory '${system:test.tmp.dir}/multiJoin1.output'
select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
- bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
group by smallTbl2.key;
-set hive.optimize.mapjoin.mapreduce=false;
-
-create table smallTbl3(key string, value string);
-insert overwrite table smallTbl3 select * from src where key < 10;
drop table bigTbl;
@@ -128,100 +126,276 @@ select * from
) subq;
set hive.auto.convert.join.noconditionaltask=false;
-
-explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key);
-
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key);
+-- First disable noconditionaltask
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
+
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
-
--- join with 4 tables on different keys is also executed as a single MR job,
--- So, overall two jobs - one for multi-way join and one for count(*)
-explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key);
-
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key);
-
-set hive.optimize.mapjoin.mapreduce=true;
--- Now run the above query with M-MR optimization
--- This should be a single MR job end-to-end.
-explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key);
-
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key);
-
-set hive.optimize.mapjoin.mapreduce=false;
+-- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size
+-- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4).
+-- We will use a single MR job to evaluate this query.
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
+
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
+
+set hive.auto.convert.join.noconditionaltask.size=200;
+-- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size
+-- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this
+-- query. The first job is a Map-only job to evaluate join1 and join2.
+-- The second job will evaluate the rest of this query.
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
+
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
+
+set hive.auto.convert.join.noconditionaltask.size=0;
+-- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size
+-- to 0. The plan will be the same as the one with a disabled nonconditionaltask.
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
+
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key);
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/nonblock_op_deduplicate.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/nonblock_op_deduplicate.q?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/nonblock_op_deduplicate.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/nonblock_op_deduplicate.q Fri Aug 16 01:21:54 2013
@@ -1,2 +1,45 @@
-- negative, references twice for result of funcion
explain select nkey, nkey + 1 from (select key + 1 as nkey, value from src) a;
+
+set hive.auto.convert.join=false;
+-- This test query is introduced for HIVE-4968.
+-- First, we do not convert the join to MapJoin.
+EXPLAIN
+SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count
+FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count
+ FROM (SELECT *
+ FROM (SELECT key, value
+ FROM src1) tmp1 ) tmp2
+ JOIN (SELECT count(*) as count
+ FROM src1) tmp3
+ ) tmp4 order by key, value, count;
+
+SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count
+FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count
+ FROM (SELECT *
+ FROM (SELECT key, value
+ FROM src1) tmp1 ) tmp2
+ JOIN (SELECT count(*) as count
+ FROM src1) tmp3
+ ) tmp4 order by key, value, count;
+
+set hive.auto.convert.join=true;
+-- Then, we convert the join to MapJoin.
+EXPLAIN
+SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count
+FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count
+ FROM (SELECT *
+ FROM (SELECT key, value
+ FROM src1) tmp1 ) tmp2
+ JOIN (SELECT count(*) as count
+ FROM src1) tmp3
+ ) tmp4 order by key, value, count;
+
+SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count
+FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count
+ FROM (SELECT *
+ FROM (SELECT key, value
+ FROM src1) tmp1 ) tmp2
+ JOIN (SELECT count(*) as count
+ FROM src1) tmp3
+ ) tmp4 order by key, value, count;