You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ad...@apache.org on 2015/09/09 01:25:23 UTC

[3/5] drill git commit: DRILL-3668: Incorrect results FIRST_VALUE function

DRILL-3668: Incorrect results FIRST_VALUE function

added DefaultFrameTemplate.resetInternal() and generate code to set first value of internal batch to NULL at the end of each partition
added unit test to make sure bug has been fixed

this closes #146


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/1601a7c5
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/1601a7c5
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/1601a7c5

Branch: refs/heads/master
Commit: 1601a7c5e9a2bb3772f12f0650e13935ab5ba563
Parents: 6db7c05
Author: adeneche <ad...@gmail.com>
Authored: Wed Aug 19 11:52:29 2015 -0700
Committer: adeneche <ad...@gmail.com>
Committed: Tue Sep 8 15:35:36 2015 -0700

----------------------------------------------------------------------
 .../main/codegen/templates/FixedValueVectors.java |   8 ++++++++
 .../codegen/templates/NullableValueVectors.java   |  15 +++++++++++++++
 .../codegen/templates/VariableLengthVectors.java  |   9 +++++++++
 .../impl/window/DefaultFrameTemplate.java         |   7 +++++++
 .../drill/exec/vector/BaseDataValueVector.java    |   5 +++++
 .../org/apache/drill/exec/vector/BitVector.java   |   9 +++++++++
 .../org/apache/drill/exec/vector/ValueVector.java |  17 +++++++++++++++++
 .../physical/impl/window/TestWindowFrame.java     |  10 ++++++++++
 .../src/test/resources/window/3668.parquet        | Bin 0 -> 395 bytes
 exec/java-exec/src/test/resources/window/3668.sql |   6 ++++++
 10 files changed, 86 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/codegen/templates/FixedValueVectors.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/FixedValueVectors.java b/exec/java-exec/src/main/codegen/templates/FixedValueVectors.java
index 9531987..f1eb756 100644
--- a/exec/java-exec/src/main/codegen/templates/FixedValueVectors.java
+++ b/exec/java-exec/src/main/codegen/templates/FixedValueVectors.java
@@ -123,6 +123,14 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
     allocateBytes(valueCount * ${type.width});
   }
 
+  @Override
+  public void reset() {
+    allocationSizeInBytes = INITIAL_VALUE_ALLOCATION;
+    allocationMonitor = 0;
+    zeroVector();
+    super.reset();
+    }
+
   private void allocateBytes(final long size) {
     if (size > MAX_ALLOCATION_SIZE) {
       throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size");

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/codegen/templates/NullableValueVectors.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/NullableValueVectors.java b/exec/java-exec/src/main/codegen/templates/NullableValueVectors.java
index 259005f..85a87f6 100644
--- a/exec/java-exec/src/main/codegen/templates/NullableValueVectors.java
+++ b/exec/java-exec/src/main/codegen/templates/NullableValueVectors.java
@@ -168,6 +168,13 @@ public final class ${className} extends BaseDataValueVector implements <#if type
     accessor.reset();
   }
 
+  public void reset() {
+    bits.zeroVector();
+    mutator.reset();
+    accessor.reset();
+    super.reset();
+  }
+
   @Override
   public int getByteCapacity(){
     return values.getByteCapacity();
@@ -193,6 +200,14 @@ public final class ${className} extends BaseDataValueVector implements <#if type
     accessor.reset();
   }
 
+  @Override
+  public void reset() {
+    bits.zeroVector();
+    mutator.reset();
+    accessor.reset();
+    super.reset();
+  }
+
   /**
    * {@inheritDoc}
    */

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/codegen/templates/VariableLengthVectors.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/VariableLengthVectors.java b/exec/java-exec/src/main/codegen/templates/VariableLengthVectors.java
index c9dae77..92c3700 100644
--- a/exec/java-exec/src/main/codegen/templates/VariableLengthVectors.java
+++ b/exec/java-exec/src/main/codegen/templates/VariableLengthVectors.java
@@ -328,6 +328,15 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
     offsetVector.zeroVector();
   }
 
+  @Override
+  public void reset() {
+    allocationSizeInBytes = INITIAL_BYTE_COUNT;
+    allocationMonitor = 0;
+    data.readerIndex(0);
+    offsetVector.zeroVector();
+    super.reset();
+  }
+
   public void reAlloc() {
     final long newAllocationSize = allocationSizeInBytes*2L;
     if (newAllocationSize > MAX_ALLOCATION_SIZE)  {

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/DefaultFrameTemplate.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/DefaultFrameTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/DefaultFrameTemplate.java
index b56e421..9bde6a5 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/DefaultFrameTemplate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/window/DefaultFrameTemplate.java
@@ -24,6 +24,8 @@ import org.apache.drill.exec.record.TransferPair;
 import org.apache.drill.exec.record.VectorAccessible;
 import org.apache.drill.exec.record.VectorContainer;
 import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.BaseValueVector;
 import org.apache.drill.exec.vector.ValueVector;
 
 import javax.inject.Named;
@@ -144,6 +146,11 @@ public abstract class DefaultFrameTemplate implements WindowFramer {
   private void cleanPartition() {
     partition = null;
     resetValues();
+    for (VectorWrapper<?> vw : internal) {
+      if ((vw.getValueVector() instanceof BaseDataValueVector)) {
+        ((BaseDataValueVector) vw.getValueVector()).reset();
+      }
+    }
     lagCopiedToInternal = false;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
index 42e0972..2fc741c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
@@ -82,4 +82,9 @@ public abstract class BaseDataValueVector extends BaseValueVector {
     return data;
   }
 
+  /**
+   * This method has a similar effect of allocateNew() without actually clearing and reallocating
+   * the value vector. The purpose is to move the value vector to a "mutate" state
+   */
+  public void reset() {}
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java
index 9d31ea8..624e737 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java
@@ -104,6 +104,15 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
     return true;
   }
 
+  @Override
+  public void reset() {
+    valueCount = 0;
+    allocationSizeInBytes = INITIAL_VALUE_ALLOCATION;
+    allocationMonitor = 0;
+    zeroVector();
+    super.reset();
+  }
+
   /**
    * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
    *

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/main/java/org/apache/drill/exec/vector/ValueVector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/ValueVector.java
index 3948163..8ec9fb2 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/ValueVector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/ValueVector.java
@@ -37,6 +37,23 @@ import org.apache.drill.exec.vector.complex.reader.FieldReader;
  * A vector when instantiated, relies on a {@link org.apache.drill.exec.record.DeadBuf dead buffer}. It is important
  * that vector is allocated before attempting to read or write.
  *
+ * There are a few "rules" around vectors:
+ *
+ * <ul>
+ *   <li>values need to be written in order (e.g. index 0, 1, 2, 5)</li>
+ *   <li>null vectors start with all values as null before writing anything</li>
+ *   <li>for variable width types, the offset vector should be all zeros before writing</li>
+ *   <li>you must call setValueCount before a vector can be read</li>
+ *   <li>you should never write to a vector once it has been read.</li>
+ * </ul>
+ *
+ * Please note that the current implementation doesn't enfore those rules, hence we may find few places that
+ * deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector)
+ *
+ * This interface "should" strive to guarantee this order of operation:
+ * <blockquote>
+ * allocate > mutate > setvaluecount > access > clear (or allocate to start the process over).
+ * </blockquote>
  */
 public interface ValueVector extends Closeable, Iterable<ValueVector> {
 

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
index 23e9b46..ba66fce 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/window/TestWindowFrame.java
@@ -296,6 +296,16 @@ public class TestWindowFrame extends BaseTestQuery {
   }
 
   @Test
+  public void test3668Fix() throws Exception {
+    testBuilder()
+      .sqlQuery(getFile("window/3668.sql"), TEST_RES_PATH)
+      .ordered()
+      .baselineColumns("cnt").baselineValues(2L)
+      .build()
+      .run();
+  }
+
+  @Test
   public void testPartitionNtile() {
     Partition partition = new Partition(12);
 

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/test/resources/window/3668.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/window/3668.parquet b/exec/java-exec/src/test/resources/window/3668.parquet
new file mode 100644
index 0000000..4ff9075
Binary files /dev/null and b/exec/java-exec/src/test/resources/window/3668.parquet differ

http://git-wip-us.apache.org/repos/asf/drill/blob/1601a7c5/exec/java-exec/src/test/resources/window/3668.sql
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/window/3668.sql b/exec/java-exec/src/test/resources/window/3668.sql
new file mode 100644
index 0000000..e7ea7ab
--- /dev/null
+++ b/exec/java-exec/src/test/resources/window/3668.sql
@@ -0,0 +1,6 @@
+select  count(fv) as cnt
+from (
+        select  first_value(c2) over(partition by c2 order by c1) as fv
+        from    dfs_test.`%s/window/3668.parquet`
+)
+where   fv = 'e'
\ No newline at end of file