You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ec...@apache.org on 2013/08/04 19:37:01 UTC

svn commit: r1510269 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/udf/ java/org/apache/hadoop/hive/ql/udf/generic/ test/org/apache/hadoop/hive/ql/exec/

Author: ecapriolo
Date: Sun Aug  4 17:37:01 2013
New Revision: 1510269

URL: http://svn.apache.org/r1510269
Log:
Hive-4879 Window function that imply order can only be registered at compile time (Edward Capriolo)

Reviewed by:	Brock Noland

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sun Aug  4 17:37:01 2013
@@ -184,10 +184,6 @@ public final class FunctionRegistry {
 
   static Map<String, WindowFunctionInfo> windowFunctions = Collections.synchronizedMap(new LinkedHashMap<String, WindowFunctionInfo>());
 
-  /*
-   * UDAFS that only work when the input rows have an order.
-   */
-  public static final HashSet<String> UDAFS_IMPLY_ORDER = new HashSet<String>();
 
   static {
     registerUDF("concat", UDFConcat.class, false);
@@ -442,15 +438,6 @@ public final class FunctionRegistry {
     registerWindowFunction(LEAD_FUNC_NAME, new GenericUDAFLead(), false);
     registerWindowFunction(LAG_FUNC_NAME, new GenericUDAFLag(), false);
 
-    UDAFS_IMPLY_ORDER.add("rank");
-    UDAFS_IMPLY_ORDER.add("dense_rank");
-    UDAFS_IMPLY_ORDER.add("percent_rank");
-    UDAFS_IMPLY_ORDER.add("cume_dist");
-    UDAFS_IMPLY_ORDER.add(LEAD_FUNC_NAME);
-    UDAFS_IMPLY_ORDER.add(LAG_FUNC_NAME);
-    UDAFS_IMPLY_ORDER.add("first_value");
-    UDAFS_IMPLY_ORDER.add("last_value");
-
     registerTableFunction(NOOP_TABLE_FUNCTION, NoopResolver.class);
     registerTableFunction(NOOP_MAP_TABLE_FUNCTION, NoopWithMapResolver.class);
     registerTableFunction(WINDOWING_TABLE_FUNCTION,  WindowingTableFunctionResolver.class);
@@ -1431,8 +1418,30 @@ public final class FunctionRegistry {
     return windowFunctions.get(name.toLowerCase());
   }
 
+  /**
+   * Both UDF and UDAF functions can imply order for analytical functions
+   *
+   * @param name
+   *          name of function
+   * @return true if a GenericUDF or GenericUDAF exists for this name and implyOrder is true, false
+   *         otherwise.
+   */
   public static boolean impliesOrder(String functionName) {
-    return functionName == null ? false : UDAFS_IMPLY_ORDER.contains(functionName.toLowerCase());
+
+    FunctionInfo info = mFunctions.get(functionName.toLowerCase());
+    if (info != null) {
+      if (info.isGenericUDF()) {
+        UDFType type = info.getGenericUDF().getClass().getAnnotation(UDFType.class);
+        if (type != null) {
+          return type.impliesOrder();
+        }
+      }
+    }
+    WindowFunctionInfo windowInfo = windowFunctions.get(functionName.toLowerCase());
+    if (windowInfo != null) {
+      return windowInfo.isImpliesOrder();
+    }
+    return false;
   }
 
   static void registerHiveUDAFsAsWindowFunctions()

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java Sun Aug  4 17:37:01 2013
@@ -56,5 +56,11 @@ public @interface WindowFunctionDescript
 	 * @return true if ranking function
 	 */
 	boolean rankingFunction() default false;
+
+	 /**
+	  * Using in analytical functions to specify that UDF implies an ordering
+	  * @return true if the function implies order
+	  */
+	 boolean impliesOrder() default false;
 }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java Sun Aug  4 17:37:01 2013
@@ -18,8 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
 
 @SuppressWarnings("deprecation")
@@ -27,6 +25,7 @@ public class WindowFunctionInfo
 {
 	boolean supportsWindow = true;
 	boolean pivotResult = false;
+	boolean impliesOrder = false;
 	FunctionInfo fInfo;
 
 	WindowFunctionInfo(FunctionInfo fInfo)
@@ -39,6 +38,7 @@ public class WindowFunctionInfo
 		{
 			supportsWindow = def.supportsWindow();
 			pivotResult = def.pivotResult();
+			impliesOrder = def.impliesOrder();
 		}
 	}
 
@@ -52,6 +52,9 @@ public class WindowFunctionInfo
 		return pivotResult;
 	}
 
+	public boolean isImpliesOrder(){
+	  return impliesOrder;
+	}
 	public FunctionInfo getfInfo()
 	{
 		return fInfo;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java Sun Aug  4 17:37:01 2013
@@ -34,4 +34,11 @@ public @interface UDFType {
   boolean deterministic() default true;
   boolean stateful() default false;
   boolean distinctLike() default false;
+
+  /**
+   * Using in analytical functions to specify that UDF implies an ordering
+   *
+   * @return true if the function implies order
+   */
+  boolean impliesOrder() default false;
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java Sun Aug  4 17:37:01 2013
@@ -44,7 +44,8 @@ import org.apache.hadoop.io.IntWritable;
 								),
 		supportsWindow = false,
 		pivotResult = true,
-		rankingFunction = true
+		rankingFunction = true,
+		impliesOrder = true
 )
 public class GenericUDAFCumeDist extends GenericUDAFRank
 {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java Sun Aug  4 17:37:01 2013
@@ -35,7 +35,8 @@ import org.apache.hadoop.hive.ql.exec.Wi
 								),
 		supportsWindow = false,
 		pivotResult = true,
-		rankingFunction = true
+		rankingFunction = true,
+		impliesOrder = true
 )
 public class GenericUDAFDenseRank extends GenericUDAFRank
 {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java Sun Aug  4 17:37:01 2013
@@ -41,7 +41,8 @@ import org.apache.hadoop.hive.serde2.typ
 								value = "_FUNC_(x)"
 								),
 		supportsWindow = true,
-		pivotResult = false
+		pivotResult = false,
+		impliesOrder = true
 )
 public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver
 {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java Sun Aug  4 17:37:01 2013
@@ -33,7 +33,8 @@ import org.apache.hadoop.hive.ql.metadat
                 value = "_FUNC_(expr, amt, default)"
                 ),
     supportsWindow = false,
-    pivotResult = true
+    pivotResult = true,
+    impliesOrder = true
 )
 public class GenericUDAFLag extends GenericUDAFLeadLag {
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java Sun Aug  4 17:37:01 2013
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
-@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false)
+@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false, impliesOrder = true)
 public class GenericUDAFLastValue extends AbstractGenericUDAFResolver
 {
 	static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java Sun Aug  4 17:37:01 2013
@@ -33,7 +33,8 @@ import org.apache.hadoop.hive.ql.metadat
                 value = "_FUNC_(expr, amt, default)"
                 ),
     supportsWindow = false,
-    pivotResult = true
+    pivotResult = true,
+    impliesOrder = true
 )
 public class GenericUDAFLead extends GenericUDAFLeadLag {
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java Sun Aug  4 17:37:01 2013
@@ -41,7 +41,8 @@ import org.apache.hadoop.io.IntWritable;
 								),
 		supportsWindow = false,
 		pivotResult = true,
-		rankingFunction = true
+		rankingFunction = true,
+		impliesOrder = true
 )
 public class GenericUDAFPercentRank extends GenericUDAFRank
 {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java Sun Aug  4 17:37:01 2013
@@ -45,7 +45,8 @@ import org.apache.hadoop.io.IntWritable;
 								),
 		supportsWindow = false,
 		pivotResult = true,
-		rankingFunction = true
+		rankingFunction = true,
+		impliesOrder = true
 )
 public class GenericUDAFRank extends AbstractGenericUDAFResolver
 {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java Sun Aug  4 17:37:01 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.PT
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
@@ -197,6 +198,7 @@ public abstract class GenericUDFLeadLag 
 
 	protected abstract int getIndex(int amt);
 
+	@UDFType(impliesOrder = true)
 	public static class GenericUDFLead extends GenericUDFLeadLag
 	{
 
@@ -219,6 +221,7 @@ public abstract class GenericUDFLeadLag 
 
 	}
 
+	@UDFType(impliesOrder = true)
 	public static class GenericUDFLag extends GenericUDFLeadLag
 	{
 		@Override

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Sun Aug  4 17:37:01 2013
@@ -155,4 +155,16 @@ public class TestFunctionRegistry extend
     Assert.assertTrue(FunctionRegistry.isRankingFunction("cume_dist"));
     Assert.assertFalse(FunctionRegistry.isRankingFunction("min"));
   }
+
+  public void testImpliesOrder() {
+    Assert.assertTrue(FunctionRegistry.impliesOrder("rank"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("cume_dist"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("first_value"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("last_value"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("lead"));
+    Assert.assertTrue(FunctionRegistry.impliesOrder("lag"));
+    Assert.assertFalse(FunctionRegistry.impliesOrder("min"));
+  }
 }