You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ec...@apache.org on 2013/08/04 19:37:01 UTC
svn commit: r1510269 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/udf/
java/org/apache/hadoop/hive/ql/udf/generic/
test/org/apache/hadoop/hive/ql/exec/
Author: ecapriolo
Date: Sun Aug 4 17:37:01 2013
New Revision: 1510269
URL: http://svn.apache.org/r1510269
Log:
Hive-4879 Window function that imply order can only be registered at compile time (Edward Capriolo)
Reviewed by: Brock Noland
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sun Aug 4 17:37:01 2013
@@ -184,10 +184,6 @@ public final class FunctionRegistry {
static Map<String, WindowFunctionInfo> windowFunctions = Collections.synchronizedMap(new LinkedHashMap<String, WindowFunctionInfo>());
- /*
- * UDAFS that only work when the input rows have an order.
- */
- public static final HashSet<String> UDAFS_IMPLY_ORDER = new HashSet<String>();
static {
registerUDF("concat", UDFConcat.class, false);
@@ -442,15 +438,6 @@ public final class FunctionRegistry {
registerWindowFunction(LEAD_FUNC_NAME, new GenericUDAFLead(), false);
registerWindowFunction(LAG_FUNC_NAME, new GenericUDAFLag(), false);
- UDAFS_IMPLY_ORDER.add("rank");
- UDAFS_IMPLY_ORDER.add("dense_rank");
- UDAFS_IMPLY_ORDER.add("percent_rank");
- UDAFS_IMPLY_ORDER.add("cume_dist");
- UDAFS_IMPLY_ORDER.add(LEAD_FUNC_NAME);
- UDAFS_IMPLY_ORDER.add(LAG_FUNC_NAME);
- UDAFS_IMPLY_ORDER.add("first_value");
- UDAFS_IMPLY_ORDER.add("last_value");
-
registerTableFunction(NOOP_TABLE_FUNCTION, NoopResolver.class);
registerTableFunction(NOOP_MAP_TABLE_FUNCTION, NoopWithMapResolver.class);
registerTableFunction(WINDOWING_TABLE_FUNCTION, WindowingTableFunctionResolver.class);
@@ -1431,8 +1418,30 @@ public final class FunctionRegistry {
return windowFunctions.get(name.toLowerCase());
}
+ /**
+ * Both UDF and UDAF functions can imply order for analytical functions
+ *
+ * @param name
+ * name of function
+ * @return true if a GenericUDF or GenericUDAF exists for this name and implyOrder is true, false
+ * otherwise.
+ */
public static boolean impliesOrder(String functionName) {
- return functionName == null ? false : UDAFS_IMPLY_ORDER.contains(functionName.toLowerCase());
+
+ FunctionInfo info = mFunctions.get(functionName.toLowerCase());
+ if (info != null) {
+ if (info.isGenericUDF()) {
+ UDFType type = info.getGenericUDF().getClass().getAnnotation(UDFType.class);
+ if (type != null) {
+ return type.impliesOrder();
+ }
+ }
+ }
+ WindowFunctionInfo windowInfo = windowFunctions.get(functionName.toLowerCase());
+ if (windowInfo != null) {
+ return windowInfo.isImpliesOrder();
+ }
+ return false;
}
static void registerHiveUDAFsAsWindowFunctions()
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java Sun Aug 4 17:37:01 2013
@@ -56,5 +56,11 @@ public @interface WindowFunctionDescript
* @return true if ranking function
*/
boolean rankingFunction() default false;
+
+ /**
+ * Using in analytical functions to specify that UDF implies an ordering
+ * @return true if the function implies order
+ */
+ boolean impliesOrder() default false;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java Sun Aug 4 17:37:01 2013
@@ -18,8 +18,6 @@
package org.apache.hadoop.hive.ql.exec;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
@SuppressWarnings("deprecation")
@@ -27,6 +25,7 @@ public class WindowFunctionInfo
{
boolean supportsWindow = true;
boolean pivotResult = false;
+ boolean impliesOrder = false;
FunctionInfo fInfo;
WindowFunctionInfo(FunctionInfo fInfo)
@@ -39,6 +38,7 @@ public class WindowFunctionInfo
{
supportsWindow = def.supportsWindow();
pivotResult = def.pivotResult();
+ impliesOrder = def.impliesOrder();
}
}
@@ -52,6 +52,9 @@ public class WindowFunctionInfo
return pivotResult;
}
+ public boolean isImpliesOrder(){
+ return impliesOrder;
+ }
public FunctionInfo getfInfo()
{
return fInfo;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java Sun Aug 4 17:37:01 2013
@@ -34,4 +34,11 @@ public @interface UDFType {
boolean deterministic() default true;
boolean stateful() default false;
boolean distinctLike() default false;
+
+ /**
+ * Using in analytical functions to specify that UDF implies an ordering
+ *
+ * @return true if the function implies order
+ */
+ boolean impliesOrder() default false;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java Sun Aug 4 17:37:01 2013
@@ -44,7 +44,8 @@ import org.apache.hadoop.io.IntWritable;
),
supportsWindow = false,
pivotResult = true,
- rankingFunction = true
+ rankingFunction = true,
+ impliesOrder = true
)
public class GenericUDAFCumeDist extends GenericUDAFRank
{
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java Sun Aug 4 17:37:01 2013
@@ -35,7 +35,8 @@ import org.apache.hadoop.hive.ql.exec.Wi
),
supportsWindow = false,
pivotResult = true,
- rankingFunction = true
+ rankingFunction = true,
+ impliesOrder = true
)
public class GenericUDAFDenseRank extends GenericUDAFRank
{
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java Sun Aug 4 17:37:01 2013
@@ -41,7 +41,8 @@ import org.apache.hadoop.hive.serde2.typ
value = "_FUNC_(x)"
),
supportsWindow = true,
- pivotResult = false
+ pivotResult = false,
+ impliesOrder = true
)
public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver
{
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java Sun Aug 4 17:37:01 2013
@@ -33,7 +33,8 @@ import org.apache.hadoop.hive.ql.metadat
value = "_FUNC_(expr, amt, default)"
),
supportsWindow = false,
- pivotResult = true
+ pivotResult = true,
+ impliesOrder = true
)
public class GenericUDAFLag extends GenericUDAFLeadLag {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java Sun Aug 4 17:37:01 2013
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false)
+@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false, impliesOrder = true)
public class GenericUDAFLastValue extends AbstractGenericUDAFResolver
{
static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java Sun Aug 4 17:37:01 2013
@@ -33,7 +33,8 @@ import org.apache.hadoop.hive.ql.metadat
value = "_FUNC_(expr, amt, default)"
),
supportsWindow = false,
- pivotResult = true
+ pivotResult = true,
+ impliesOrder = true
)
public class GenericUDAFLead extends GenericUDAFLeadLag {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java Sun Aug 4 17:37:01 2013
@@ -41,7 +41,8 @@ import org.apache.hadoop.io.IntWritable;
),
supportsWindow = false,
pivotResult = true,
- rankingFunction = true
+ rankingFunction = true,
+ impliesOrder = true
)
public class GenericUDAFPercentRank extends GenericUDAFRank
{
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java Sun Aug 4 17:37:01 2013
@@ -45,7 +45,8 @@ import org.apache.hadoop.io.IntWritable;
),
supportsWindow = false,
pivotResult = true,
- rankingFunction = true
+ rankingFunction = true,
+ impliesOrder = true
)
public class GenericUDAFRank extends AbstractGenericUDAFResolver
{
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java Sun Aug 4 17:37:01 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.PT
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
@@ -197,6 +198,7 @@ public abstract class GenericUDFLeadLag
protected abstract int getIndex(int amt);
+ @UDFType(impliesOrder = true)
public static class GenericUDFLead extends GenericUDFLeadLag
{
@@ -219,6 +221,7 @@ public abstract class GenericUDFLeadLag
}
+ @UDFType(impliesOrder = true)
public static class GenericUDFLag extends GenericUDFLeadLag
{
@Override
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1510269&r1=1510268&r2=1510269&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Sun Aug 4 17:37:01 2013
@@ -155,4 +155,16 @@ public class TestFunctionRegistry extend
Assert.assertTrue(FunctionRegistry.isRankingFunction("cume_dist"));
Assert.assertFalse(FunctionRegistry.isRankingFunction("min"));
}
+
+ public void testImpliesOrder() {
+ Assert.assertTrue(FunctionRegistry.impliesOrder("rank"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("cume_dist"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("first_value"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("last_value"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("lead"));
+ Assert.assertTrue(FunctionRegistry.impliesOrder("lag"));
+ Assert.assertFalse(FunctionRegistry.impliesOrder("min"));
+ }
}