You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by GitBox <gi...@apache.org> on 2018/07/23 22:23:15 UTC

[GitHub] Ben-Zvi closed pull request #1394: DRILL-6626: Fixed an IndexOutOfBoundException during aggregator rehash

Ben-Zvi closed pull request #1394: DRILL-6626: Fixed an IndexOutOfBoundException during aggregator rehash
URL: https://github.com/apache/drill/pull/1394
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
index d37631be45c..ba928ae8f2d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
@@ -114,8 +114,13 @@ public RecordBatchMemoryManager getRecordBatchMemoryManager() {
 
     @Override
     public void update() {
+      update(incoming);
+    }
+
+    @Override
+    public void update(RecordBatch incomingRecordBatch) {
       // Get sizing information for the batch.
-      setRecordBatchSizer(new RecordBatchSizer(incoming));
+      setRecordBatchSizer(new RecordBatchSizer(incomingRecordBatch));
 
       int fieldId = 0;
       int newOutgoingRowWidth = 0;
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
index 2f3bc23da33..4bbfa05a16e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
@@ -584,6 +584,11 @@ public AggOutcome doWork() {
         currentBatchRecordCount = incoming.getRecordCount(); // initialize for first non empty batch
         // Calculate the number of partitions based on actual incoming data
         delayedSetup();
+        // Update the record batch manager since this is the first batch with data; we need to
+        // perform the update before any processing.
+        // NOTE - We pass the incoming record batch explicitly because it could be a spilled record (different
+        //        from the instance owned by the HashAggBatch).
+        outgoing.getRecordBatchMemoryManager().update(incoming);
       }
 
       //
@@ -666,7 +671,9 @@ public AggOutcome doWork() {
           // remember EMIT, but continue like handling OK
 
         case OK:
-          outgoing.getRecordBatchMemoryManager().update();
+          // NOTE - We pass the incoming record batch explicitly because it could be a spilled record (different
+          //        from the instance owned by the HashAggBatch).
+          outgoing.getRecordBatchMemoryManager().update(incoming);
 
           currentBatchRecordCount = incoming.getRecordCount(); // size of next batch
 
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
index 756b3f3a208..83b72d7c70c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
@@ -798,12 +798,10 @@ private void resizeAndRehashIfNeeded() {
 
     IntVector newStartIndices = allocMetadataVector(tableSize, EMPTY_SLOT);
 
-    int idx = 0;
     for (int i = 0; i < batchHolders.size(); i++) {
       BatchHolder bh = batchHolders.get(i);
-      int batchStartIdx = idx;
+      int batchStartIdx = i * BATCH_SIZE;
       bh.rehash(tableSize, newStartIndices, batchStartIdx);
-      idx += bh.getTargetBatchRowCount();
     }
 
     startIndices.clear();
@@ -816,7 +814,7 @@ private void resizeAndRehashIfNeeded() {
         logger.debug("Bucket: {}, startIdx[ {} ] = {}.", i, i, startIndices.getAccessor().get(i));
         int startIdx = startIndices.getAccessor().get(i);
         BatchHolder bh = batchHolders.get((startIdx >>> 16) & BATCH_MASK);
-        bh.dump(idx);
+        bh.dump(startIdx);
       }
     }
     resizingTime += System.currentTimeMillis() - t0;
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
index 79b28db2438..2372be2c900 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
@@ -154,6 +154,9 @@ public void update(int inputIndex) {
 
   public void update() {};
 
+  public void update(RecordBatch recordBatch) {
+  }
+
   public void update(RecordBatch recordBatch, int index) {
     // Get sizing information for the batch.
     setRecordBatchSizer(index, new RecordBatchSizer(recordBatch));


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services