You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/06 05:09:31 UTC

svn commit: r1465172 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java test/queries/clientpositive/windowing_navfn.q test/results/clientpositive/windowing_navfn.q.out

Author: hashutosh
Date: Sat Apr  6 03:09:30 2013
New Revision: 1465172

URL: http://svn.apache.org/r1465172
Log:
HIVE-4262 : fix last_value UDAF behavior (Harish Butani via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
    hive/trunk/ql/src/test/queries/clientpositive/windowing_navfn.q
    hive/trunk/ql/src/test/results/clientpositive/windowing_navfn.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1465172&r1=1465171&r2=1465172&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sat Apr  6 03:09:30 2013
@@ -126,107 +126,9 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFUpper;
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFComputeStats;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFContextNGrams;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovarianceSample;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCumeDist;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEWAHBitmap;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFFirstValue;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFNTile;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentRank;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLead;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLag;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStdSample;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAssertTrue;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFField;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFormatNumber;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr;
+import org.apache.hadoop.hive.ql.udf.generic.*;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag.GenericUDFLag;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag.GenericUDFLead;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapKeys;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapValues;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNamedStruct;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFPrintf;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFReflect;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFReflect2;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSentences;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSortArray;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTranslate;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFParseUrlTuple;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFStack;
-import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
 import org.apache.hadoop.hive.ql.udf.ptf.NPath.NPathResolver;
 import org.apache.hadoop.hive.ql.udf.ptf.Noop.NoopResolver;
 import org.apache.hadoop.hive.ql.udf.ptf.NoopWithMap.NoopWithMapResolver;
@@ -273,6 +175,8 @@ public final class FunctionRegistry {
 
   public static final String LEAD_FUNC_NAME = "lead";
   public static final String LAG_FUNC_NAME = "lag";
+  public static final String LAST_VALUE_FUNC_NAME = "last_value";
+
 
   public static final String WINDOWING_TABLE_FUNCTION = "windowingtablefunction";
   public static final String NOOP_TABLE_FUNCTION = "noop";

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java?rev=1465172&r1=1465171&r2=1465172&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java Sat Apr  6 03:09:30 2013
@@ -237,10 +237,24 @@ public class WindowingSpec {
     OrderSpec orderSpec = wdwSpec.getOrder();
     if ( wFrame == null ) {
       if (!supportsWindowing ) {
-        wFrame = new WindowFrameSpec(
-            new RangeBoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
-            new RangeBoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
-            );
+
+        if ( wFn.getName().toLowerCase().equals(FunctionRegistry.LAST_VALUE_FUNC_NAME)
+            && orderSpec != null ) {
+          /*
+           * last_value: when an Sort Key is specified, then last_value should return the
+           * last value among rows with the same Sort Key value.
+           */
+          wFrame = new WindowFrameSpec(
+              new CurrentRowSpec(),
+              new RangeBoundarySpec(Direction.FOLLOWING, 0)
+              );
+        }
+        else {
+          wFrame = new WindowFrameSpec(
+              new RangeBoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
+              new RangeBoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
+              );
+        }
       }
       else if ( orderSpec == null ) {
         wFrame = new WindowFrameSpec(

Modified: hive/trunk/ql/src/test/queries/clientpositive/windowing_navfn.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/windowing_navfn.q?rev=1465172&r1=1465171&r2=1465172&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/windowing_navfn.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/windowing_navfn.q Sat Apr  6 03:09:30 2013
@@ -27,4 +27,5 @@ select s, last_value(t) over (partition 
 
 select s, first_value(s) over (partition by bo order by s) from over10k limit 100;
 
-
+select t, s, i, last_value(i) over (partition by t order by s) 
+from over10k where (s = 'oscar allen' or s = 'oscar carson') and t = 10;

Modified: hive/trunk/ql/src/test/results/clientpositive/windowing_navfn.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/windowing_navfn.q.out?rev=1465172&r1=1465171&r2=1465172&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/windowing_navfn.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/windowing_navfn.q.out Sat Apr  6 03:09:30 2013
@@ -579,3 +579,15 @@ alice miller	alice allen
 alice nixon	alice allen
 alice nixon	alice allen
 alice nixon	alice allen
+PREHOOK: query: select t, s, i, last_value(i) over (partition by t order by s) 
+from over10k where (s = 'oscar allen' or s = 'oscar carson') and t = 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select t, s, i, last_value(i) over (partition by t order by s) 
+from over10k where (s = 'oscar allen' or s = 'oscar carson') and t = 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+10	oscar allen	65662	65662
+10	oscar carson	65549	65549