You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ad...@apache.org on 2016/05/07 00:14:27 UTC

drill git commit: DRILL-4657: Rank() will return wrong results if a frame of data is too big (more than 2 batches)

Repository: drill
Updated Branches:
  refs/heads/master 6bba69d48 -> 09b262776


DRILL-4657: Rank() will return wrong results if a frame of data is too big (more than 2 batches)

this closes #499


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/09b26277
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/09b26277
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/09b26277

Branch: refs/heads/master
Commit: 09b262776e965ea17a6a863801f7e1ee3e5b3d5a
Parents: 6bba69d
Author: adeneche <ad...@gmail.com>
Authored: Wed May 4 12:08:36 2016 -0700
Committer: adeneche <ad...@gmail.com>
Committed: Fri May 6 17:13:42 2016 -0700

----------------------------------------------------------------------
 .../physical/impl/window/WindowFunction.java    | 12 +++-
 .../physical/impl/window/TestWindowFrame.java   | 12 ++++
 .../src/test/resources/window/4657.tsv          | 60 ++++++++++++++++++++
 3 files changed, 82 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/09b26277/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java
index cd14b8a..5630ccf 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/WindowFunction.java
@@ -221,10 +221,18 @@ public abstract class WindowFunction {
     @Override
     public boolean canDoWork(int numBatchesAvailable, final WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
       assert numBatchesAvailable > 0 : "canDoWork() should not be called when numBatchesAvailable == 0";
+      if (type == Type.ROW_NUMBER) {
+        // row_number doesn't need to wait for anything
+        return true;
+      }
+      if (type == Type.RANK) {
+        // rank only works if we know how many rows we have in the current frame
+        // we could avoid this, but it requires more refactoring
+        return frameEndReached;
+      }
 
       // for CUME_DIST, PERCENT_RANK and NTILE we need the full partition
-      // otherwise we can process the first batch immediately
-      return partitionEndReached || ! requiresFullPartition(pop);
+      return partitionEndReached;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/drill/blob/09b26277/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
index f5e88d2..76f0935 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
@@ -439,4 +439,16 @@ public class TestWindowFrame extends BaseTestQuery {
       .baselineValues("EMPTY")
       .go();
   }
+
+  @Test
+  public void test4657() throws Exception {
+    testBuilder()
+      .sqlQuery("select row_number() over(order by position_id) rn, rank() over(order by position_id) rnk from dfs_test.`%s/window/b3.p2`", TEST_RES_PATH)
+      .ordered()
+      .csvBaselineFile("window/4657.tsv")
+      .baselineColumns("rn", "rnk")
+      .expectsNumBatches(4) // we expect 3 data batches and the fast schema
+      .go();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/09b26277/exec/java-exec/src/test/resources/window/4657.tsv
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/window/4657.tsv b/exec/java-exec/src/test/resources/window/4657.tsv
new file mode 100644
index 0000000..a7b1973
--- /dev/null
+++ b/exec/java-exec/src/test/resources/window/4657.tsv
@@ -0,0 +1,60 @@
+1	1
+3	1
+4	1
+5	1
+2	1
+11	6
+6	6
+7	6
+8	6
+9	6
+10	6
+12	6
+13	6
+14	6
+15	6
+16	6
+17	6
+18	6
+19	6
+20	6
+21	6
+22	6
+23	6
+24	6
+25	6
+26	6
+27	6
+28	6
+29	6
+30	6
+31	6
+32	6
+33	6
+34	6
+35	6
+36	6
+37	6
+38	6
+39	6
+40	6
+41	6
+42	6
+43	6
+44	6
+45	6
+46	6
+47	6
+48	6
+49	6
+50	6
+51	6
+52	6
+53	6
+54	6
+55	6
+56	6
+57	6
+58	6
+59	6
+60	6
\ No newline at end of file