You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/03/12 07:48:45 UTC

svn commit: r1455421 - in /hive/branches/ptf-windowing/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/

Author: hashutosh
Date: Tue Mar 12 06:48:45 2013
New Revision: 1455421

URL: http://svn.apache.org/r1455421
Log:
HIVE-4108 [jira] Allow over() clause to contain an order by with no partition by
(Harish Butani via Ashutosh Chauhan)

Summary:
allow over clause with only an order spec

HIVE-4073 allows over() to be called with no partition by and no order by. We should allow only an order by.

>From the review of HIVE-4073:

Ashutosh

Can you also add following test. This should also work.

select p_name, p_retailprice,
avg(p_retailprice) over(order by p_name)
from part
partition by p_name;

Harish

This test will not work (:
The grammar needs to be changed so:

partitioningSpec
@init { msgs.push("partitioningSpec clause"); }
@after { msgs.pop(); }
:
partitionByClause orderByClause? -> ^(TOK_PARTITIONINGSPEC partitionByClause orderByClause?) |
orderByClause -> ^(TOK_PARTITIONINGSPEC orderByClause) |
distributeByClause sortByClause? -> ^(TOK_PARTITIONINGSPEC distributeByClause sortByClause?) |
sortByClause? -> ^(TOK_PARTITIONINGSPEC sortByClause) |
clusterByClause -> ^(TOK_PARTITIONINGSPEC clusterByClause)
;

And the SemanticAnalyzer::processPTFPartitionSpec has to handle this shape of the AST Tree. The PTFTranslator also needs changes. Do this as another Jira

Test Plan: added new tests with over clause containing only order spec.

Reviewers: JIRA, ashutoshc

Reviewed By: ashutoshc

Differential Revision: https://reviews.facebook.net/D9309

Modified:
    hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
    hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
    hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/ptf_npath.q
    hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/windowing_windowspec.q
    hive/branches/ptf-windowing/ql/src/test/results/clientpositive/ptf_npath.q.out
    hive/branches/ptf-windowing/ql/src/test/results/clientpositive/windowing_windowspec.q.out

Modified: hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original)
+++ hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Tue Mar 12 06:48:45 2013
@@ -208,7 +208,9 @@ partitioningSpec
 @after { gParent.msgs.pop(); } 
    :
    partitionByClause orderByClause? -> ^(TOK_PARTITIONINGSPEC partitionByClause orderByClause?) |
+   orderByClause -> ^(TOK_PARTITIONINGSPEC orderByClause) |
    distributeByClause sortByClause? -> ^(TOK_PARTITIONINGSPEC distributeByClause sortByClause?) |
+   sortByClause -> ^(TOK_PARTITIONINGSPEC sortByClause) |
    clusterByClause -> ^(TOK_PARTITIONINGSPEC clusterByClause)
    ;
 

Modified: hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original)
+++ hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Tue Mar 12 06:48:45 2013
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Stack;
 
+import org.antlr.runtime.CommonToken;
 import org.antlr.runtime.tree.TreeWizard;
 import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
 import org.apache.commons.logging.Log;
@@ -471,6 +472,7 @@ public class PTFTranslator {
       PartitionedTableFunctionSpec spec)
       throws SemanticException {
 
+    applyConstantPartition(spec);
     if ( spec.getPartition() == null ) {
       return;
     }
@@ -480,6 +482,24 @@ public class PTFTranslator {
     def.setOrder(orderDef);
   }
 
+  /*
+   * If this the first PPTF in the chain and there is no partition specified
+   * then assume the user wants to include the entire input in 1 partition.
+   */
+  private static void applyConstantPartition( PartitionedTableFunctionSpec spec) {
+    if ( spec.getPartition() != null ) {
+      return;
+    }
+    PTFInputSpec iSpec = spec.getInput();
+    if ( iSpec instanceof PTFInputSpec ) {
+        PartitionSpec partSpec = new PartitionSpec();
+        PartitionExpression partExpr = new PartitionExpression();
+        partExpr.setExpression(new ASTNode(new CommonToken(HiveParser.Number, "0")));
+        partSpec.addExpression(partExpr);
+        spec.setPartition(partSpec);
+    }
+  }
+
   private PartitionDef translate(ShapeDetails inpShape, PartitionSpec spec)
       throws SemanticException
   {
@@ -1310,6 +1330,7 @@ public class PTFTranslator {
     }
 
     PartitionedTableFunctionSpec prevFn = (PartitionedTableFunctionSpec) ptfChain.pop();
+    applyConstantPartition(prevFn);
     PartitionSpec partSpec = prevFn.getPartition();
     OrderSpec orderSpec = prevFn.getOrder();
 

Modified: hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/ptf-windowing/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Mar 12 06:48:45 2013
@@ -9831,28 +9831,9 @@ public class SemanticAnalyzer extends Ba
 
     PTFInvocationSpec spec = new PTFInvocationSpec();
     spec.setFunction(ptfSpec);
-    ensurePTFChainHasPartitioning(spec, ptf);
     qb.addPTFNodeToSpec(ptf, spec);
   }
 
-  /*
-   * ensure that the PTF chain has a partitioning specification associated. This method
-   * should be called when a PTF chain is encountered as a fromSource.
-   * (from the processPTF method)
-   */
-  private void ensurePTFChainHasPartitioning(PTFInvocationSpec qSpec, ASTNode node)
-      throws SemanticException {
-    if(qSpec == null){
-      return;
-    }
-    PartitionedTableFunctionSpec ptfSpec = qSpec.getStartOfChain();
-    PartitionSpec pSpec = ptfSpec.getPartition();
-    if ( pSpec == null ) {
-      throw new SemanticException(generateErrorMessage(node,
-                  "No partition specification associated with start of PTF chain "));
-    }
-   }
-
 //--------------------------- Windowing handling -----------------------------------
 
   /*
@@ -10113,11 +10094,11 @@ public class SemanticAnalyzer extends Ba
     int type = firstChild.getType();
     int exprCnt;
 
-    PartitionSpec pSpec = processPartitionSpec(firstChild);
-    partitioning.setPartSpec(pSpec);
 
-    if ( type == HiveParser.TOK_DISTRIBUTEBY )
+    if ( type == HiveParser.TOK_DISTRIBUTEBY || type == HiveParser.TOK_CLUSTERBY )
     {
+      PartitionSpec pSpec = processPartitionSpec(firstChild);
+      partitioning.setPartSpec(pSpec);
       ASTNode sortNode = pSpecNode.getChildCount() > 1 ? (ASTNode) pSpecNode.getChild(1) : null;
       if ( sortNode != null )
       {
@@ -10125,6 +10106,11 @@ public class SemanticAnalyzer extends Ba
         partitioning.setOrderSpec(oSpec);
       }
     }
+    else if ( type == HiveParser.TOK_SORTBY || type == HiveParser.TOK_ORDERBY ) {
+      ASTNode sortNode = firstChild;
+      OrderSpec oSpec = processOrderSpec(sortNode);
+      partitioning.setOrderSpec(oSpec);
+    }
     return partitioning;
   }
 

Modified: hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/ptf_npath.q
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/ptf_npath.q?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/ptf_npath.q (original)
+++ hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/ptf_npath.q Tue Mar 12 06:48:45 2013
@@ -22,3 +22,15 @@ from npath(on 
       arg2('LATE'), arg3(arr_delay > 15), 
     arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath as tpath') 
    );       
+
+-- 2. Npath on 1 partition
+select origin_city_name, fl_num, year, month, day_of_month, sz, tpath 
+from npath(on 
+        flights_tiny 
+        sort by year, month, day_of_month  
+      arg1('LATE.LATE+'), 
+      arg2('LATE'), arg3(arr_delay > 15), 
+    arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath as tpath') 
+   )
+where fl_num = 1142;       
+   
\ No newline at end of file

Modified: hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/windowing_windowspec.q
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/windowing_windowspec.q?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/windowing_windowspec.q (original)
+++ hive/branches/ptf-windowing/ql/src/test/queries/clientpositive/windowing_windowspec.q Tue Mar 12 06:48:45 2013
@@ -31,3 +31,6 @@ select s, avg(f) over (partition by bo o
 
 select s, sum(i) over() from over10k limit 100;
 
+select f, sum(f) over (order by f range between unbounded preceding and current row) from over10k limit 100;
+
+

Modified: hive/branches/ptf-windowing/ql/src/test/results/clientpositive/ptf_npath.q.out
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/test/results/clientpositive/ptf_npath.q.out?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/test/results/clientpositive/ptf_npath.q.out (original)
+++ hive/branches/ptf-windowing/ql/src/test/results/clientpositive/ptf_npath.q.out Tue Mar 12 06:48:45 2013
@@ -71,3 +71,32 @@ Washington	7291	2010	10	27	2	[{"origin_c
 Chicago	897	2010	10	20	4	[{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":20,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":21,"arr_delay":77.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":24,"arr_delay":113.0,"fl_num":"897"}]
 Chicago	897	2010	10	21	3	[{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":21,"arr_delay":77.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":24,"arr_delay":113.0,"fl_num":"897"}]
 Chicago	897	2010	10	22	2	[{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":24,"arr_delay":113.0,"fl_num":"897"}]
+PREHOOK: query: -- 2. Npath on 1 partition
+select origin_city_name, fl_num, year, month, day_of_month, sz, tpath 
+from npath(on 
+        flights_tiny 
+        sort by year, month, day_of_month  
+      arg1('LATE.LATE+'), 
+      arg2('LATE'), arg3(arr_delay > 15), 
+    arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath as tpath') 
+   )
+where fl_num = 1142
+PREHOOK: type: QUERY
+PREHOOK: Input: default@flights_tiny
+#### A masked pattern was here ####
+POSTHOOK: query: -- 2. Npath on 1 partition
+select origin_city_name, fl_num, year, month, day_of_month, sz, tpath 
+from npath(on 
+        flights_tiny 
+        sort by year, month, day_of_month  
+      arg1('LATE.LATE+'), 
+      arg2('LATE'), arg3(arr_delay > 15), 
+    arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath as tpath') 
+   )
+where fl_num = 1142
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@flights_tiny
+#### A masked pattern was here ####
+Baltimore	1142	2010	10	21	2	[{"origin_city_name":"Baltimore","dest_city_name":"New York","year":2010,"month":10,"day_of_month":21,"arr_delay":105.0,"fl_num":"1142"},{"origin_city_name":"Baltimore","dest_city_name":"New York","year":2010,"month":10,"day_of_month":21,"arr_delay":28.0,"fl_num":"1599"}]
+Baltimore	1142	2010	10	22	2	[{"origin_city_name":"Baltimore","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":54.0,"fl_num":"1142"},{"origin_city_name":"Baltimore","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":18.0,"fl_num":"1599"}]
+Baltimore	1142	2010	10	25	3	[{"origin_city_name":"Baltimore","dest_city_name":"New York","year":2010,"month":10,"day_of_month":25,"arr_delay":92.0,"fl_num":"1142"},{"origin_city_name":"Baltimore","dest_city_name":"New York","year":2010,"month":10,"day_of_month":25,"arr_delay":106.0,"fl_num":"1599"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":25,"arr_delay":31.0,"fl_num":"361"}]

Modified: hive/branches/ptf-windowing/ql/src/test/results/clientpositive/windowing_windowspec.q.out
URL: http://svn.apache.org/viewvc/hive/branches/ptf-windowing/ql/src/test/results/clientpositive/windowing_windowspec.q.out?rev=1455421&r1=1455420&r2=1455421&view=diff
==============================================================================
--- hive/branches/ptf-windowing/ql/src/test/results/clientpositive/windowing_windowspec.q.out (original)
+++ hive/branches/ptf-windowing/ql/src/test/results/clientpositive/windowing_windowspec.q.out Tue Mar 12 06:48:45 2013
@@ -795,3 +795,111 @@ oscar king	656584379
 calvin hernandez	656584379
 nick ichabod	656584379
 bob ovid	656584379
+PREHOOK: query: select f, sum(f) over (order by f range between unbounded preceding and current row) from over10k limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select f, sum(f) over (order by f range between unbounded preceding and current row) from over10k limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+0.01	0.009999999776482582
+0.01	0.019999999552965164
+0.02	0.03999999910593033
+0.02	0.05999999865889549
+0.02	0.07999999821186066
+0.03	0.1099999975413084
+0.03	0.13999999687075615
+0.04	0.17999999597668648
+0.05	0.22999999672174454
+0.07	0.29999999701976776
+0.07	0.369999997317791
+0.08	0.44999999552965164
+0.08	0.5299999937415123
+0.08	0.609999991953373
+0.09	0.6999999955296516
+0.09	0.7899999991059303
+0.1	0.8900000005960464
+0.11	1.0
+0.12	1.119999997317791
+0.12	1.239999994635582
+0.13	1.3699999898672104
+0.14	1.5099999904632568
+0.14	1.6499999910593033
+0.15	1.7999999970197678
+0.15	1.9500000029802322
+0.16	2.1099999994039536
+0.16	2.269999995827675
+0.17	2.439999997615814
+0.18	2.6200000047683716
+0.19	2.810000002384186
+0.2	3.010000005364418
+0.2	3.2100000083446503
+0.23	3.4400000125169754
+0.23	3.6700000166893005
+0.23	3.9000000208616257
+0.27	4.170000031590462
+0.27	4.440000042319298
+0.27	4.710000053048134
+0.28	4.990000054240227
+0.28	5.27000005543232
+0.29	5.560000047087669
+0.31	5.870000049471855
+0.32	6.190000042319298
+0.35	6.540000036358833
+0.36	6.900000050663948
+0.36	7.260000064969063
+0.37	7.630000069737434
+0.39	8.02000005543232
+0.39	8.410000041127205
+0.42	8.830000028014183
+0.43	9.26000003516674
+0.43	9.690000042319298
+0.43	10.120000049471855
+0.44	10.56000004708767
+0.44	11.000000044703484
+0.48	11.480000033974648
+0.5	11.980000033974648
+0.5	12.480000033974648
+0.51	12.990000024437904
+0.52	13.510000005364418
+0.52	14.029999986290932
+0.52	14.549999967217445
+0.53	15.079999938607216
+0.54	15.619999960064888
+0.55	16.169999971985817
+0.56	16.729999974370003
+0.57	17.299999967217445
+0.58	17.879999950528145
+0.59	18.4699999243021
+0.6	19.06999994814396
+0.61	19.679999962449074
+0.62	20.299999967217445
+0.62	20.919999971985817
+0.62	21.53999997675419
+0.67	22.20999999344349
+0.68	22.890000000596046
+0.68	23.570000007748604
+0.69	24.260000005364418
+0.69	24.950000002980232
+0.71	25.65999998152256
+0.75	26.40999998152256
+0.76	27.169999971985817
+0.77	27.93999995291233
+0.78	28.7199999243021
+0.79	29.509999945759773
+0.79	30.299999967217445
+0.79	31.089999988675117
+0.8	31.890000000596046
+0.82	32.70999999344349
+0.83	33.53999997675419
+0.84	34.379999950528145
+0.84	35.2199999243021
+0.85	36.06999994814396
+0.87	36.93999995291233
+0.88	37.81999994814396
+0.89	38.709999933838844
+0.9	39.609999909996986
+0.91	40.51999993622303
+0.92	41.43999995291233
+0.93	42.36999996006489