You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by vi...@apache.org on 2018/03/26 11:35:03 UTC
[08/13] drill git commit: DRILL-6231: Fix memory allocation for
repeated list vector
DRILL-6231: Fix memory allocation for repeated list vector
closes #1171
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d55b62f9
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d55b62f9
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d55b62f9
Branch: refs/heads/master
Commit: d55b62f9ea76372c823c15780b2a69f81f0b945c
Parents: 3167771
Author: Padma Penumarthy <pp...@yahoo.com>
Authored: Thu Mar 15 21:50:54 2018 -0700
Committer: Vitalii Diravka <vi...@gmail.com>
Committed: Sat Mar 24 20:35:32 2018 +0200
----------------------------------------------------------------------
.../drill/exec/record/RecordBatchSizer.java | 15 ++
.../exec/physical/unit/TestOutputBatchSize.java | 257 ++++++++++++++++++-
2 files changed, 267 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/d55b62f9/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
index 9525c91..bfe0ef1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java
@@ -395,11 +395,26 @@ public class RecordBatchSizer {
}
}
+ private void allocateRepeatedList(RepeatedListVector vector, int recordCount) {
+ vector.allocateOffsetsNew(recordCount);
+ recordCount *= getCardinality();
+ ColumnSize child = children.get(vector.getField().getName());
+ if (vector.getDataVector() != null) {
+ child.allocateVector(vector.getDataVector(), recordCount);
+ }
+ }
+
public void allocateVector(ValueVector vector, int recordCount) {
if (vector instanceof AbstractMapVector) {
allocateMap((AbstractMapVector) vector, recordCount);
return;
}
+
+ if (vector instanceof RepeatedListVector) {
+ allocateRepeatedList((RepeatedListVector) vector, recordCount);
+ return;
+ }
+
AllocationHelper.allocate(vector, recordCount, getEntryWidth(), getCardinality());
}
http://git-wip-us.apache.org/repos/asf/drill/blob/d55b62f9/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java
index edd4cbf..76de381 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java
@@ -1194,20 +1194,267 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase {
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
+
+ // offset vector of delegate vector i.e. outer array should have row count number of values.
UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
- assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
- ValueVector dataVector = ((RepeatedValueVector) v).getDataVector();
- assertEquals(Integer.highestOneBit((testRowCount * 2) << 1) - 1, dataVector.getValueCapacity());
+ assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
+
+ // Get inner vector of delegate vector.
+ ValueVector vector = ((RepeatedValueVector) v).getDataVector();
+
+ // Data vector of inner vector should
+ // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
+ ValueVector dataVector = ((RepeatedValueVector) vector).getDataVector();
+ assertEquals(Integer.highestOneBit((testRowCount*8) << 1), dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * row count number of values.
+ offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
+ assertEquals((Integer.highestOneBit(testRowCount*2) << 1), offsetVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
+
+ // offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
- dataVector = ((RepeatedValueVector) v).getDataVector();
- assertEquals(Integer.highestOneBit(testRowCountPowerTwo)-1, dataVector.getValueCapacity());
+
+ // Get inner vector of delegate vector.
+ vector = ((RepeatedValueVector) v).getDataVector();
+
+ // Data vector of inner vector should
+ // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
+ dataVector = ((RepeatedValueVector) vector).getDataVector();
+ assertEquals(testRowCountPowerTwo * 8, dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * row count number of values.
+ offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
+ assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
+ v.clear();
+
+ // MAX ROW COUNT
+ colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
+
+ // offset vector of delegate vector i.e. outer array should have row count number of values.
+ offsetVector = ((RepeatedListVector) v).getOffsetVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
+
+ // Get inner vector of delegate vector.
+ vector = ((RepeatedValueVector) v).getDataVector();
+
+ // Data vector of inner vector should
+ // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
+ dataVector = ((RepeatedValueVector) vector).getDataVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT*8, dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * row count number of values.
+ offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT*2, offsetVector.getValueCapacity());
+ v.clear();
+
+ // MIN ROW COUNT
+ colSize.allocateVector(v, 0);
+
+ // offset vector of delegate vector i.e. outer array should have 1 value.
+ offsetVector = ((RepeatedListVector) v).getOffsetVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
+
+ // Get inner vector of delegate vector.
+ vector = ((RepeatedValueVector) v).getDataVector();
+
+ // Data vector of inner vector should have 1 value
+ dataVector = ((RepeatedValueVector) vector).getDataVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * 1.
+ offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT*2, offsetVector.getValueCapacity());
v.clear();
}
}
+
+ @Test
+ public void testSizerRepeatedRepeatedList() throws Exception {
+ List<String> inputJsonBatches = Lists.newArrayList();
+ StringBuilder batchString = new StringBuilder();
+
+ StringBuilder newString = new StringBuilder();
+ newString.append("[ [[1,2,3,4], [5,6,7,8]], [[1,2,3,4], [5,6,7,8]] ]");
+
+ numRows = 9;
+ batchString.append("[");
+ for (int i = 0; i < numRows; i++) {
+ batchString.append("{\"c\" : " + newString);
+ batchString.append("},");
+ }
+ batchString.append("{\"c\" : " + newString);
+ batchString.append("}");
+
+ batchString.append("]");
+ inputJsonBatches.add(batchString.toString());
+
+ // Create a dummy scanBatch to figure out the size.
+ RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(),
+ fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
+
+ VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
+ RecordBatchSizer sizer = new RecordBatchSizer(va);
+
+ assertEquals(1, sizer.columns().size());
+ RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
+ assertNotNull(column);
+
+ /**
+ * stdDataSize:8*10*10*10, stdNetSize:8*10*10*10 + 8*10*10 + 8*10 + 4,
+ * dataSizePerEntry:16*8, netSizePerEntry:16*8 + 16*4 + 4*2 + 4*2,
+ * totalDataSize:16*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
+ * elementCount:10, estElementCountPerArray:1, isVariableWidth:false
+ */
+ assertEquals(8000, column.getStdDataSizePerEntry());
+ assertEquals(8884, column.getStdNetSizePerEntry());
+ assertEquals(128, column.getDataSizePerEntry());
+ assertEquals(156, column.getNetSizePerEntry());
+ assertEquals(1280, column.getTotalDataSize());
+ assertEquals(1560, column.getTotalNetSize());
+ assertEquals(10, column.getValueCount());
+ assertEquals(20, column.getElementCount());
+ assertEquals(2, column.getCardinality(), 0.01);
+ assertEquals(false, column.isVariableWidth());
+
+ final int testRowCount = 1000;
+ final int testRowCountPowerTwo = 2048;
+
+ for (VectorWrapper<?> vw : va) {
+ ValueVector v = vw.getValueVector();
+ v.clear();
+
+ RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
+
+ // Allocates to nearest power of two
+ colSize.allocateVector(v, testRowCount);
+
+ // offset vector of delegate vector i.e. outer array should have row count number of values.
+ UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
+ assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
+
+ // Get data vector of delegate vector. This is repeated list again
+ ValueVector dataVector = ((RepeatedListVector) v).getDataVector();
+
+ // offset vector of delegate vector of the inner repeated list
+ // This should have row count * 2 number of values.
+ offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
+ assertEquals((Integer.highestOneBit(testRowCount*2) << 1), offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
+ ValueVector innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
+ assertEquals((Integer.highestOneBit((testRowCount*2) << 1) - 1), dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
+ offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
+ assertEquals((Integer.highestOneBit(testRowCount*4) << 1), offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should
+ // have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
+ dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
+ assertEquals(Integer.highestOneBit(testRowCount << 1) * 16, dataVector.getValueCapacity());
+
+ v.clear();
+
+ // Allocates the same as value passed since it is already power of two.
+ // -1 is done for adjustment needed for offset vector.
+ colSize.allocateVector(v, testRowCountPowerTwo - 1);
+
+ // offset vector of delegate vector i.e. outer array should have row count number of values.
+ offsetVector = ((RepeatedListVector) v).getOffsetVector();
+ assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
+
+ // Get data vector of delegate vector. This is repeated list again
+ dataVector = ((RepeatedListVector) v).getDataVector();
+
+ // offset vector of delegate vector of the inner repeated list
+ // This should have row count * 2 number of values.
+ offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
+ assertEquals(testRowCountPowerTwo*2, offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
+ innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
+ assertEquals(testRowCountPowerTwo*2 - 1, dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
+ offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
+ assertEquals(testRowCountPowerTwo*4, offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should
+ // have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
+ dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
+ assertEquals(testRowCountPowerTwo * 16, dataVector.getValueCapacity());
+
+ v.clear();
+
+ // MAX ROW COUNT
+ colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
+
+ // offset vector of delegate vector i.e. outer array should have row count number of values.
+ offsetVector = ((RepeatedListVector) v).getOffsetVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
+
+ // Get data vector of delegate vector. This is repeated list again
+ dataVector = ((RepeatedListVector) v).getDataVector();
+
+ // offset vector of delegate vector of the inner repeated list
+ // This should have row count * 2 number of values.
+ offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT*2, offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
+ innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT*2 - 1, dataVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
+ offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT*4, offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should
+ // have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
+ dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
+ assertEquals(ValueVector.MAX_ROW_COUNT*16, dataVector.getValueCapacity());
+
+ v.clear();
+
+ // MIN ROW COUNT
+ colSize.allocateVector(v, 0);
+
+ // offset vector of delegate vector i.e. outer array should have 1 value.
+ offsetVector = ((RepeatedListVector) v).getOffsetVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
+
+ // Get data vector of delegate vector. This is repeated list again
+ dataVector = ((RepeatedListVector) v).getDataVector();
+
+ // offset vector of delegate vector of the inner repeated list
+ offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
+
+ // offset vector of inner vector should have
+ // 2 (outer array cardinality) * 1.
+ offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT*2, offsetVector.getValueCapacity());
+
+ // Data vector of inner vector should 1 value.
+ dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
+ assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
+
+ v.clear();
+
+ }
+ }
+
}
\ No newline at end of file