You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/23 22:51:31 UTC
[04/15] impala git commit: IMPALA-4835: Part 3: switch I/O buffers to
buffer pool
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 7735f98..87d1806 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -59,6 +59,7 @@ import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.ImpalaRuntimeException;
import org.apache.impala.common.InternalException;
import org.apache.impala.common.NotImplementedException;
+import org.apache.impala.common.Pair;
import org.apache.impala.common.PrintUtils;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.fb.FbFileBlock;
@@ -76,6 +77,7 @@ import org.apache.impala.thrift.TScanRange;
import org.apache.impala.thrift.TScanRangeLocation;
import org.apache.impala.thrift.TScanRangeLocationList;
import org.apache.impala.thrift.TTableStats;
+import org.apache.impala.util.BitUtil;
import org.apache.impala.util.MembershipSnapshot;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -143,7 +145,7 @@ public class HdfsScanNode extends ScanNode {
// derived experimentally: running metadata-only Parquet count(*) scans on TPC-H
// lineitem and TPC-DS store_sales of different sizes resulted in memory consumption
// between 128kb and 1.1mb.
- private final static long MIN_MEMORY_ESTIMATE = 1 * 1024 * 1024;
+ private static final long MIN_MEMORY_ESTIMATE = 1L * 1024L * 1024L;
private final HdfsTable tbl_;
@@ -166,6 +168,18 @@ public class HdfsScanNode extends ScanNode {
private long totalFiles_ = 0;
private long totalBytes_ = 0;
+ // File formats scanned. Set in computeScanRangeLocations().
+ private Set<HdfsFileFormat> fileFormats_;
+
+ // Number of bytes in the largest scan range (i.e. hdfs split). Set in
+ // computeScanRangeLocations().
+ private long maxScanRangeBytes_ = 0;
+
+ // The ideal reservation to process a single scan range (i.e. hdfs split), >= the
+ // minimum reservation. Generally provides enough memory to overlap CPU and I/O and
+ // maximize throughput. Set in computeResourceProfile().
+ private long idealScanRangeReservation_ = -1;
+
// Input cardinality based on the partition row counts or extrapolation. -1 if invalid.
// Both values can be valid to report them in the explain plan, but only one of them is
// used for determining the scan cardinality.
@@ -329,25 +343,25 @@ public class HdfsScanNode extends ScanNode {
computeDictionaryFilterConjuncts(analyzer);
// compute scan range locations with optional sampling
- Set<HdfsFileFormat> fileFormats = computeScanRangeLocations(analyzer);
+ computeScanRangeLocations(analyzer);
// Determine backend scan node implementation to use. The optimized MT implementation
// is currently only supported for Parquet.
if (analyzer.getQueryOptions().isSetMt_dop() &&
analyzer.getQueryOptions().mt_dop > 0 &&
- fileFormats.size() == 1 &&
- (fileFormats.contains(HdfsFileFormat.PARQUET)
- || fileFormats.contains(HdfsFileFormat.TEXT))) {
+ fileFormats_.size() == 1 &&
+ (fileFormats_.contains(HdfsFileFormat.PARQUET)
+ || fileFormats_.contains(HdfsFileFormat.TEXT))) {
useMtScanNode_ = true;
} else {
useMtScanNode_ = false;
}
- if (fileFormats.contains(HdfsFileFormat.PARQUET)) {
+ if (fileFormats_.contains(HdfsFileFormat.PARQUET)) {
computeMinMaxTupleAndConjuncts(analyzer);
}
- if (canApplyParquetCountStarOptimization(analyzer, fileFormats)) {
+ if (canApplyParquetCountStarOptimization(analyzer, fileFormats_)) {
Preconditions.checkState(desc_.getPath().destTable() != null);
Preconditions.checkState(collectionConjuncts_.isEmpty());
countStarSlot_ = applyParquetCountStartOptimization(analyzer);
@@ -460,7 +474,7 @@ public class HdfsScanNode extends ScanNode {
// This node is a table scan, so this must be a scanning slot.
Preconditions.checkState(slotRef.getDesc().isScanSlot());
// Skip the slot ref if it refers to an array's "pos" field.
- if (slotRef.isArrayPosRef()) return;
+ if (slotRef.getDesc().isArrayPosRef()) return;
Expr constExpr = binaryPred.getChild(1);
// Only constant exprs can be evaluated against parquet::Statistics. This includes
@@ -488,7 +502,7 @@ public class HdfsScanNode extends ScanNode {
// This node is a table scan, so this must be a scanning slot.
Preconditions.checkState(slotRef.getDesc().isScanSlot());
// Skip the slot ref if it refers to an array's "pos" field.
- if (slotRef.isArrayPosRef()) return;
+ if (slotRef.getDesc().isArrayPosRef()) return;
if (inPred.isNotIn()) return;
ArrayList<Expr> children = inPred.getChildren();
@@ -695,13 +709,15 @@ public class HdfsScanNode extends ScanNode {
}
/**
- * Computes scan ranges (hdfs splits) plus their storage locations, including volume
- * ids, based on the given maximum number of bytes each scan range should scan.
+ * Computes scan ranges (i.e. hdfs splits) plus their storage locations, including
+ * volume ids, based on the given maximum number of bytes each scan range should scan.
* If 'sampleParams_' is not null, generates a sample and computes the scan ranges
* based on the sample.
- * Returns the set of file formats being scanned.
+ *
+ * Initializes members with information about files and scan ranges, e.g. totalFiles_,
+ * fileFormats_, etc.
*/
- private Set<HdfsFileFormat> computeScanRangeLocations(Analyzer analyzer)
+ private void computeScanRangeLocations(Analyzer analyzer)
throws ImpalaRuntimeException {
Map<Long, List<FileDescriptor>> sampledFiles = null;
if (sampleParams_ != null) {
@@ -724,7 +740,8 @@ public class HdfsScanNode extends ScanNode {
numPartitions_ = (sampledFiles != null) ? sampledFiles.size() : partitions_.size();
totalFiles_ = 0;
totalBytes_ = 0;
- Set<HdfsFileFormat> fileFormats = Sets.newHashSet();
+ maxScanRangeBytes_ = 0;
+ fileFormats_ = Sets.newHashSet();
for (HdfsPartition partition: partitions_) {
List<FileDescriptor> fileDescs = partition.getFileDescriptors();
if (sampledFiles != null) {
@@ -734,7 +751,7 @@ public class HdfsScanNode extends ScanNode {
}
analyzer.getDescTbl().addReferencedPartition(tbl_, partition.getId());
- fileFormats.add(partition.getFileFormat());
+ fileFormats_.add(partition.getFileFormat());
Preconditions.checkState(partition.getId() >= 0);
// Missing disk id accounting is only done for file systems that support the notion
// of disk/storage ids.
@@ -797,6 +814,7 @@ public class HdfsScanNode extends ScanNode {
scanRangeLocations.scan_range = scanRange;
scanRangeLocations.locations = locations;
scanRanges_.add(scanRangeLocations);
+ maxScanRangeBytes_ = Math.max(maxScanRangeBytes_, currentLength);
remainingLength -= currentLength;
currentOffset += currentLength;
}
@@ -811,7 +829,58 @@ public class HdfsScanNode extends ScanNode {
}
if (partitionMissingDiskIds) ++numPartitionsNoDiskIds_;
}
- return fileFormats;
+ }
+
+ /**
+ * Compute the number of columns that are read from the file, as opposed to
+ * materialised based on metadata. If there are nested collections, counts the
+ * number of leaf scalar slots per collection. This matches Parquet's "shredded"
+ * approach to nested collections, where each nested field is stored as a separate
+ * column. We may need to adjust this logic for non-shredded columnar formats if added.
+ */
+ private int computeNumColumnsReadFromFile() {
+ HdfsTable table = (HdfsTable) desc_.getTable();
+ int numColumns = 0;
+ boolean havePosSlot = false;
+ for (SlotDescriptor slot: desc_.getSlots()) {
+ if (!slot.isMaterialized() || slot == countStarSlot_) continue;
+ if (slot.getColumn() == null ||
+ slot.getColumn().getPosition() >= table.getNumClusteringCols()) {
+ if (slot.isArrayPosRef()) {
+ // Position virtual slots can be materialized by piggybacking on another slot.
+ havePosSlot = true;
+ } else if (slot.getType().isScalarType()) {
+ ++numColumns;
+ } else {
+ numColumns += computeNumColumnsReadForCollection(slot);
+ }
+ }
+ }
+ // Must scan something to materialize a position slot.
+ if (havePosSlot) numColumns = Math.max(numColumns, 1);
+ return numColumns;
+ }
+
+ /**
+ * Compute the number of columns read from disk for materialized scalar slots in
+ * the provided tuple.
+ */
+ private int computeNumColumnsReadForCollection(SlotDescriptor collectionSlot) {
+ Preconditions.checkState(collectionSlot.getType().isCollectionType());
+ int numColumns = 0;
+ for (SlotDescriptor nestedSlot: collectionSlot.getItemTupleDesc().getSlots()) {
+ // Position virtual slots can be materialized by piggybacking on another slot.
+ if (!nestedSlot.isMaterialized() || nestedSlot.isArrayPosRef()) continue;
+ if (nestedSlot.getType().isScalarType()) {
+ ++numColumns;
+ } else {
+ numColumns += computeNumColumnsReadForCollection(nestedSlot);
+ }
+ }
+ // Need to scan at least one column to materialize the pos virtual slot and/or
+ // determine the size of the nested array.
+ numColumns = Math.max(numColumns, 1);
+ return numColumns;
}
/**
@@ -1006,6 +1075,8 @@ public class HdfsScanNode extends ScanNode {
}
msg.hdfs_scan_node.setRandom_replica(randomReplica_);
msg.node_type = TPlanNodeType.HDFS_SCAN_NODE;
+ Preconditions.checkState(idealScanRangeReservation_ >= 0, idealScanRangeReservation_);
+ msg.hdfs_scan_node.setIdeal_scan_range_reservation(idealScanRangeReservation_);
if (!collectionConjuncts_.isEmpty()) {
Map<Integer, List<TExpr>> tcollectionConjuncts = Maps.newLinkedHashMap();
for (Map.Entry<TupleDescriptor, List<Expr>> entry:
@@ -1184,25 +1255,20 @@ public class HdfsScanNode extends ScanNode {
Preconditions.checkNotNull(scanRanges_, "Cost estimation requires scan ranges.");
if (scanRanges_.isEmpty()) {
nodeResourceProfile_ = ResourceProfile.noReservation(0);
+ idealScanRangeReservation_ = 0;
return;
}
Preconditions.checkState(0 < numNodes_ && numNodes_ <= scanRanges_.size());
Preconditions.checkNotNull(desc_);
Preconditions.checkNotNull(desc_.getTable() instanceof HdfsTable);
HdfsTable table = (HdfsTable) desc_.getTable();
+ int numColumnsReadFromFile = computeNumColumnsReadFromFile();
int perHostScanRanges;
if (table.getMajorityFormat() == HdfsFileFormat.PARQUET) {
// For the purpose of this estimation, the number of per-host scan ranges for
// Parquet files are equal to the number of columns read from the file. I.e.
// excluding partition columns and columns that are populated from file metadata.
- perHostScanRanges = 0;
- for (SlotDescriptor slot: desc_.getSlots()) {
- if (!slot.isMaterialized() || slot == countStarSlot_) continue;
- if (slot.getColumn() == null ||
- slot.getColumn().getPosition() >= table.getNumClusteringCols()) {
- ++perHostScanRanges;
- }
- }
+ perHostScanRanges = numColumnsReadFromFile;
} else {
perHostScanRanges = (int) Math.ceil((
(double) scanRanges_.size() / (double) numNodes_) * SCAN_RANGE_SKEW_FACTOR);
@@ -1224,12 +1290,13 @@ public class HdfsScanNode extends ScanNode {
long avgScanRangeBytes = (long) Math.ceil(totalBytes_ / (double) scanRanges_.size());
// The +1 accounts for an extra I/O buffer to read past the scan range due to a
// trailing record spanning Hdfs blocks.
- long readSize = BackendConfig.INSTANCE.getReadSize();
+ long maxIoBufferSize =
+ BitUtil.roundUpToPowerOf2(BackendConfig.INSTANCE.getReadSize());
long perThreadIoBuffers =
- Math.min((long) Math.ceil(avgScanRangeBytes / (double) readSize),
+ Math.min((long) Math.ceil(avgScanRangeBytes / (double) maxIoBufferSize),
MAX_IO_BUFFERS_PER_THREAD) + 1;
long perInstanceMemEstimate = checkedMultiply(
- checkedMultiply(maxScannerThreads, perThreadIoBuffers), readSize);
+ checkedMultiply(maxScannerThreads, perThreadIoBuffers), maxIoBufferSize);
// Sanity check: the tighter estimation should not exceed the per-host maximum.
long perHostUpperBound = getPerHostMemUpperBound();
@@ -1240,7 +1307,51 @@ public class HdfsScanNode extends ScanNode {
perInstanceMemEstimate = perHostUpperBound;
}
perInstanceMemEstimate = Math.max(perInstanceMemEstimate, MIN_MEMORY_ESTIMATE);
- nodeResourceProfile_ = ResourceProfile.noReservation(perInstanceMemEstimate);
+
+ Pair<Long, Long> reservation = computeReservation(numColumnsReadFromFile);
+ nodeResourceProfile_ = new ResourceProfileBuilder()
+ .setMemEstimateBytes(perInstanceMemEstimate)
+ .setMinReservationBytes(reservation.first).build();
+ idealScanRangeReservation_ = reservation.second;
+ }
+
+ /*
+ * Compute the minimum and ideal memory reservation to process a single scan range
+ * (i.e. hdfs split). Bound the reservation based on:
+ * - One minimum-sized buffer per IoMgr scan range, which is the absolute minimum
+ * required to scan the data.
+ * - A maximum of either 1 or 3 max-sized I/O buffers per IoMgr scan range for
+ * the minimum and ideal reservation respectively. 1 max-sized I/O buffer avoids
+ * issuing small I/O unnecessarily while 3 max-sized I/O buffers guarantees higher
+ * throughput by overlapping compute and I/O efficiently.
+ * - A maximum reservation of the hdfs split size, to avoid reserving excessive
+ * memory for small files or ranges, e.g. small dimension tables with very few
+ * rows.
+ */
+ private Pair<Long, Long> computeReservation(int numColumnsReadFromFile) {
+ Preconditions.checkState(maxScanRangeBytes_ >= 0);
+ long maxIoBufferSize =
+ BitUtil.roundUpToPowerOf2(BackendConfig.INSTANCE.getReadSize());
+ // Scanners for columnar formats issue one IoMgr scan range for metadata, followed by
+ // one IoMgr scan range per column in parallel. Scanners for row-oriented formats
+ // issue only one IoMgr scan range at a time.
+ int iomgrScanRangesPerSplit = fileFormats_.contains(HdfsFileFormat.PARQUET) ?
+ Math.max(1, numColumnsReadFromFile) : 1;
+ // Need one buffer per IoMgr scan range to execute the scan.
+ long minReservationToExecute =
+ iomgrScanRangesPerSplit * BackendConfig.INSTANCE.getMinBufferSize();
+
+ // Quantize the max scan range (i.e. hdfs split) size to an I/O buffer size.
+ long quantizedMaxScanRangeBytes = maxScanRangeBytes_ < maxIoBufferSize ?
+ BitUtil.roundUpToPowerOf2(maxScanRangeBytes_) :
+ BitUtil.roundUpToPowerOf2Factor(maxScanRangeBytes_, maxIoBufferSize);
+ long minReservationBytes = Math.max(minReservationToExecute,
+ Math.min(iomgrScanRangesPerSplit * maxIoBufferSize,
+ quantizedMaxScanRangeBytes));
+ long idealReservationBytes = Math.max(minReservationToExecute,
+ Math.min(iomgrScanRangesPerSplit * maxIoBufferSize * 3,
+ quantizedMaxScanRangeBytes));
+ return Pair.create(minReservationBytes, idealReservationBytes);
}
/**
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/fe/src/main/java/org/apache/impala/util/BitUtil.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/BitUtil.java b/fe/src/main/java/org/apache/impala/util/BitUtil.java
index 839dd6e..6bb670d 100644
--- a/fe/src/main/java/org/apache/impala/util/BitUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/BitUtil.java
@@ -29,4 +29,10 @@ public class BitUtil {
public static long roundUpToPowerOf2(long val) {
return 1L << log2Ceiling(val);
}
+
+ // Round up 'val' to the nearest multiple of a power-of-two 'factor'.
+ // 'val' must be > 0.
+ public static long roundUpToPowerOf2Factor(long val, long factor) {
+ return (val + (factor - 1)) & ~(factor - 1);
+ }
}
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/fe/src/test/java/org/apache/impala/util/BitUtilTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/util/BitUtilTest.java b/fe/src/test/java/org/apache/impala/util/BitUtilTest.java
index a134b6a..a6da80c 100644
--- a/fe/src/test/java/org/apache/impala/util/BitUtilTest.java
+++ b/fe/src/test/java/org/apache/impala/util/BitUtilTest.java
@@ -46,4 +46,10 @@ public class BitUtilTest {
assertEquals(0x8000000000000000L, BitUtil.roundUpToPowerOf2(0x8000000000000000L - 1));
}
+ @Test
+ public void testPowerOf2Factor() {
+ assertEquals(BitUtil.roundUpToPowerOf2Factor(7, 8), 8);
+ assertEquals(BitUtil.roundUpToPowerOf2Factor(8, 8), 8);
+ assertEquals(BitUtil.roundUpToPowerOf2Factor(9, 8), 16);
+ }
}
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
index f25ad0a..533ac42 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
@@ -5,7 +5,7 @@ where 5 + 5 < c_custkey and o_orderkey = (2 + 2)
and (coalesce(2, 3, 4) * 10) + l_linenumber < (0 * 1)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=176.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=264.00MB mem-reservation=24.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -59,7 +59,7 @@ PLAN-ROOT SINK
parquet dictionary predicates: c_custkey > 10
parquet dictionary predicates on o: o_orderkey = 4
parquet dictionary predicates on o_lineitems: 20 + l_linenumber < 0
- mem-estimate=176.00MB mem-reservation=0B
+ mem-estimate=264.00MB mem-reservation=24.00MB
tuple-ids=0 row-size=24B cardinality=15000
====
# Test HBase scan node.
@@ -107,7 +107,7 @@ having 1024 * 1024 * count(*) % 2 = 0
and (sm between 5 and 10)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.94MB
+| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.97MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -125,7 +125,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=20B cardinality=7300
====
# Test hash join.
@@ -136,7 +136,7 @@ left outer join functional.alltypes b
where round(1.11 + 2.22 + 3.33 + 4.44, 1) < cast(b.double_col as decimal(3, 2))
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=257.94MB mem-reservation=1.94MB
+| Per-Host Resources: mem-estimate=257.94MB mem-reservation=2.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -157,7 +157,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
-| mem-estimate=128.00MB mem-reservation=0B
+| mem-estimate=128.00MB mem-reservation=32.00KB
| tuple-ids=1 row-size=20B cardinality=730
|
00:SCAN HDFS [functional.alltypes a]
@@ -167,7 +167,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=8B cardinality=7300
====
# Test nested-loop join. Same as above but and with a disjunction in the On clause.
@@ -179,7 +179,7 @@ left outer join functional.alltypes b
where cast(b.double_col as decimal(3, 2)) > round(1.11 + 2.22 + 3.33 + 4.44, 1)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=256.01MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=256.01MB mem-reservation=64.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -198,7 +198,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
-| mem-estimate=128.00MB mem-reservation=0B
+| mem-estimate=128.00MB mem-reservation=32.00KB
| tuple-ids=1 row-size=20B cardinality=730
|
00:SCAN HDFS [functional.alltypes a]
@@ -208,7 +208,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=8B cardinality=7300
====
# Test distinct aggregation with grouping.
@@ -242,7 +242,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=20B cardinality=7300
====
# Test non-grouping distinct aggregation.
@@ -251,7 +251,7 @@ from functional.alltypes
having 1024 * 1024 * count(*) % 2 = 0
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.94MB
+| Per-Host Resources: mem-estimate=138.00MB mem-reservation=1.97MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -274,7 +274,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=4B cardinality=7300
====
# Test analytic eval node.
@@ -284,7 +284,7 @@ select first_value(1 + 1 + int_col - (1 - 1)) over
from functional.alltypes
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=144.00MB mem-reservation=16.00MB
+| Per-Host Resources: mem-estimate=144.00MB mem-reservation=16.03MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -309,7 +309,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=29B cardinality=7300
====
# Test sort node.
@@ -317,7 +317,7 @@ select int_col from functional.alltypes
order by id * abs((factorial(5) / power(2, 4)))
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=134.00MB mem-reservation=6.00MB
+| Per-Host Resources: mem-estimate=134.00MB mem-reservation=6.03MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -333,7 +333,7 @@ PLAN-ROOT SINK
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=8B cardinality=7300
====
# Test HDFS table sink.
@@ -342,7 +342,7 @@ select id, int_col, cast(1 + 1 + 1 + year as int), cast(month - (1 - 1 - 1) as i
from functional.alltypessmall
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=32.00MB mem-reservation=8.00KB
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(CAST(3 + year AS INT),CAST(month - -1 AS INT))]
| partitions=4
| mem-estimate=1.56KB mem-reservation=0B
@@ -354,7 +354,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(CAST(3 + ye
partitions: 4/4 rows=100
columns: all
extrapolated-rows=disabled
- mem-estimate=32.00MB mem-reservation=0B
+ mem-estimate=32.00MB mem-reservation=8.00KB
tuple-ids=0 row-size=16B cardinality=100
====
# Constant folding does not work across query blocks.
@@ -366,7 +366,7 @@ select sum(id + c3) from
) v3
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=138.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -383,6 +383,6 @@ PLAN-ROOT SINK
columns: all
extrapolated-rows=disabled
limit: 2
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=4B cardinality=2
====
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
index 0f4a5da..3d234d1 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test
@@ -1,7 +1,7 @@
# Rows per node is < 3000: codegen should be disabled.
select count(*) from functional.alltypes
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=32.00KB
Per-Host Resource Estimates: Memory=148.00MB
Codegen disabled by planner
@@ -21,7 +21,7 @@ PLAN-ROOT SINK
# Rows per node is > 3000: codegen should be enabled.
select count(*) from functional.alltypesagg
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=128.00KB
Per-Host Resource Estimates: Memory=100.00MB
PLAN-ROOT SINK
@@ -40,7 +40,7 @@ PLAN-ROOT SINK
# No stats on functional_parquet: codegen should be disabled.
select count(*) from functional_parquet.alltypes
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=8.00KB
Per-Host Resource Estimates: Memory=21.00MB
WARNING: The following tables are missing relevant table and/or column statistics.
functional_parquet.alltypes
@@ -56,12 +56,12 @@ PLAN-ROOT SINK
| output: sum_init_zero(functional_parquet.alltypes.parquet-stats: num_rows)
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=174.39KB
+ partitions=24/24 files=24 size=174.62KB
====
# > 3000 rows returned to coordinator: codegen should be enabled
select * from functional_parquet.alltypes
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=88.00KB
Per-Host Resource Estimates: Memory=128.00MB
WARNING: The following tables are missing relevant table and/or column statistics.
functional_parquet.alltypes
@@ -71,14 +71,14 @@ PLAN-ROOT SINK
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=174.39KB
+ partitions=24/24 files=24 size=174.62KB
====
# Optimisation is enabled for join producing < 3000 rows
select count(*)
from functional.alltypes t1
join functional.alltypestiny t2 on t1.id = t2.id
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=2.94MB
+Max Per-Host Resource Reservation: Memory=2.98MB
Per-Host Resource Estimates: Memory=182.94MB
Codegen disabled by planner
@@ -108,7 +108,7 @@ PLAN-ROOT SINK
# Optimisation is disabled by cross join producing > 3000 rows
select count(*) from functional.alltypes t1, functional.alltypes t2
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=64.00KB
Per-Host Resource Estimates: Memory=276.00MB
PLAN-ROOT SINK
@@ -137,7 +137,7 @@ select count(*) from (
union all
select * from functional.alltypestiny) v
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=32.00KB
Per-Host Resource Estimates: Memory=148.00MB
Codegen disabled by planner
@@ -166,7 +166,7 @@ select count(*) from (
union all
select * from functional.alltypes) v
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=32.00KB
Per-Host Resource Estimates: Memory=148.00MB
PLAN-ROOT SINK
@@ -193,7 +193,7 @@ PLAN-ROOT SINK
select sum(l_discount)
from (select * from tpch.lineitem limit 1000) v
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=8.00MB
Per-Host Resource Estimates: Memory=274.00MB
Codegen disabled by planner
@@ -214,7 +214,7 @@ PLAN-ROOT SINK
select sum(l_discount)
from (select * from tpch.lineitem where l_orderkey > 100 limit 1000) v
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=0B
+Max Per-Host Resource Reservation: Memory=8.00MB
Per-Host Resource Estimates: Memory=274.00MB
PLAN-ROOT SINK
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
index bb12bca..55439d6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
@@ -5,7 +5,7 @@ on ss_customer_sk = c_customer_sk
where c_salutation = 'Mrs.'
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=185.50MB mem-reservation=9.50MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=185.50MB mem-reservation=25.50MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -24,7 +24,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: c_salutation = 'Mrs.'
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=255B cardinality=16667
|
00:SCAN HDFS [tpcds.store_sales]
@@ -35,7 +35,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
# Single-column FK/PK join detection on left outer join. The join cardinality
@@ -46,7 +46,7 @@ on ss_customer_sk = c_customer_sk
where c_salutation = 'Mrs.'
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=184.50MB mem-reservation=8.50MB
+| Per-Host Resources: mem-estimate=184.50MB mem-reservation=24.50MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -65,7 +65,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: c_salutation = 'Mrs.'
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=255B cardinality=16667
|
00:SCAN HDFS [tpcds.store_sales]
@@ -75,7 +75,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
# Single-column FK/PK join detection on right outer join. The join cardinality
@@ -86,7 +86,7 @@ on ss_customer_sk = c_customer_sk
where c_salutation = 'Mrs.'
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=185.50MB mem-reservation=9.50MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=185.50MB mem-reservation=25.50MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -105,7 +105,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: c_salutation = 'Mrs.'
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=255B cardinality=16667
|
00:SCAN HDFS [tpcds.store_sales]
@@ -116,7 +116,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
# Multi-column FK/PK join detection
@@ -126,7 +126,7 @@ on ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number
where sr_return_quantity < 10
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=214.75MB mem-reservation=6.75MB runtime-filters-memory=2.00MB
+| Per-Host Resources: mem-estimate=214.75MB mem-reservation=22.75MB runtime-filters-memory=2.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -145,7 +145,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: sr_return_quantity < 10
-| mem-estimate=80.00MB mem-reservation=0B
+| mem-estimate=80.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=88B cardinality=28751
|
00:SCAN HDFS [tpcds.store_sales]
@@ -156,7 +156,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
# Many-to-many join detection.
@@ -165,7 +165,7 @@ tpcds.store_sales inner join tpcds.web_sales
on ss_sold_time_sk = ws_sold_time_sk
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=397.67MB mem-reservation=35.00MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=397.67MB mem-reservation=51.00MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -182,7 +182,7 @@ PLAN-ROOT SINK
| table: rows=719384 size=140.07MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=160.00MB mem-reservation=0B
+| mem-estimate=160.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=144B cardinality=719384
|
00:SCAN HDFS [tpcds.store_sales]
@@ -193,7 +193,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
# PK/PK join is detected as FK/PK.
@@ -203,7 +203,7 @@ on a.d_date_sk = b.d_date_sk
where a.d_holiday = "Y"
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=114.00MB mem-reservation=18.00MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=114.00MB mem-reservation=34.00MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -222,7 +222,7 @@ PLAN-ROOT SINK
| columns: all
| extrapolated-rows=disabled
| parquet dictionary predicates: a.d_holiday = 'Y'
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=0 row-size=303B cardinality=36525
|
01:SCAN HDFS [tpcds.date_dim b]
@@ -232,7 +232,7 @@ PLAN-ROOT SINK
table: rows=73049 size=9.84MB
columns: all
extrapolated-rows=disabled
- mem-estimate=48.00MB mem-reservation=0B
+ mem-estimate=48.00MB mem-reservation=8.00MB
tuple-ids=1 row-size=303B cardinality=73049
====
# Single query with various join types combined.
@@ -246,7 +246,7 @@ where ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number
and d1.d_fy_week_seq = 1000
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=362.81MB mem-reservation=12.75MB runtime-filters-memory=5.00MB
+| Per-Host Resources: mem-estimate=362.81MB mem-reservation=50.81MB runtime-filters-memory=5.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -263,7 +263,7 @@ PLAN-ROOT SINK
| table: rows=100000 size=12.60MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=2 row-size=4B cardinality=100000
|
07:HASH JOIN [INNER JOIN]
@@ -279,7 +279,7 @@ PLAN-ROOT SINK
| table: rows=73049 size=9.84MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=4 row-size=4B cardinality=73049
|
06:HASH JOIN [INNER JOIN]
@@ -304,7 +304,7 @@ PLAN-ROOT SINK
| | columns: all
| | extrapolated-rows=disabled
| | parquet dictionary predicates: d1.d_fy_week_seq = 1000
-| | mem-estimate=48.00MB mem-reservation=0B
+| | mem-estimate=48.00MB mem-reservation=8.00MB
| | tuple-ids=3 row-size=8B cardinality=7
| |
| 00:SCAN HDFS [tpcds.store_sales]
@@ -315,7 +315,7 @@ PLAN-ROOT SINK
| partitions: 1824/1824 rows=2880404
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=128.00MB mem-reservation=0B
+| mem-estimate=128.00MB mem-reservation=8.00MB
| tuple-ids=0 row-size=24B cardinality=2880404
|
01:SCAN HDFS [tpcds.store_returns]
@@ -325,7 +325,7 @@ PLAN-ROOT SINK
table: rows=287514 size=31.19MB
columns: all
extrapolated-rows=disabled
- mem-estimate=80.00MB mem-reservation=0B
+ mem-estimate=80.00MB mem-reservation=8.00MB
tuple-ids=1 row-size=20B cardinality=287514
====
# Assumed FK/PK join becasue of non-trivial equi-join exprs.
@@ -334,7 +334,7 @@ tpcds.store_sales inner join tpcds.customer
on ss_customer_sk % 10 = c_customer_sk / 100
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=211.00MB mem-reservation=35.00MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=211.00MB mem-reservation=51.00MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -351,7 +351,7 @@ PLAN-ROOT SINK
| table: rows=100000 size=12.60MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=255B cardinality=100000
|
00:SCAN HDFS [tpcds.store_sales]
@@ -362,7 +362,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
# Assumed FK/PK join due to missing stats on the rhs. Join cardinality is equal to
@@ -372,7 +372,7 @@ tpcds.store_sales inner join tpcds_seq_snap.customer
on ss_customer_sk = c_customer_sk
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=2.17GB mem-reservation=35.00MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=2.17GB mem-reservation=51.00MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -384,12 +384,12 @@ PLAN-ROOT SINK
| tuple-ids=0,1 row-size=8B cardinality=2880404
|
|--01:SCAN HDFS [tpcds_seq_snap.customer]
-| partitions=1/1 files=1 size=8.59MB
+| partitions=1/1 files=1 size=8.58MB
| stored statistics:
-| table: rows=unavailable size=8.59MB
+| table: rows=unavailable size=8.58MB
| columns: unavailable
| extrapolated-rows=disabled
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=4B cardinality=unavailable
|
00:SCAN HDFS [tpcds.store_sales]
@@ -400,7 +400,7 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=4B cardinality=2880404
====
# Assumed FK/PK join due to missing stats on the lhs. Join cardinality is unknown.
@@ -409,7 +409,7 @@ tpcds_seq_snap.store_sales inner join tpcds.customer
on ss_customer_sk = c_customer_sk
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=178.94MB mem-reservation=2.94MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=178.94MB mem-reservation=18.94MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -426,18 +426,18 @@ PLAN-ROOT SINK
| table: rows=100000 size=12.60MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=48.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=4B cardinality=100000
|
00:SCAN HDFS [tpcds_seq_snap.store_sales]
- partitions=1824/1824 files=1824 size=207.90MB
+ partitions=1824/1824 files=1824 size=207.85MB
runtime filters: RF000[bloom] -> ss_customer_sk
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/1824 rows=unavailable
columns: unavailable
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=4B cardinality=unavailable
====
# Join is detected as many-to-many even though the rhs join columns
@@ -448,7 +448,7 @@ tpcds.store_sales inner join
on ss_sold_time_sk = ws_sold_time_sk
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=299.00MB mem-reservation=4.88MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=299.00MB mem-reservation=18.94MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -470,7 +470,7 @@ PLAN-ROOT SINK
| table: rows=719384 size=140.07MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=160.00MB mem-reservation=0B
+| mem-estimate=160.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=4B cardinality=719384
|
00:SCAN HDFS [tpcds.store_sales]
@@ -481,6 +481,6 @@ PLAN-ROOT SINK
partitions: 1824/1824 rows=2880404
columns: all
extrapolated-rows=disabled
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=100B cardinality=2880404
====
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test b/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
index 96015e0..1df9270 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/max-row-size.test
@@ -4,7 +4,7 @@ select straight_join *
from tpch_parquet.customer
inner join tpch_parquet.nation on c_nationkey = n_nationkey
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=17.94MB
+Max Per-Host Resource Reservation: Memory=33.97MB
Per-Host Resource Estimates: Memory=57.94MB
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -17,7 +17,7 @@ PLAN-ROOT SINK
| tuple-ids=0,1 row-size=355B cardinality=150000
|
F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
-Per-Host Resources: mem-estimate=41.94MB mem-reservation=17.94MB runtime-filters-memory=1.00MB
+Per-Host Resources: mem-estimate=41.94MB mem-reservation=33.94MB runtime-filters-memory=1.00MB
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n_nationkey
| fk/pk conjuncts: c_nationkey = n_nationkey
@@ -30,14 +30,14 @@ Per-Host Resources: mem-estimate=41.94MB mem-reservation=17.94MB runtime-filters
| | tuple-ids=1 row-size=117B cardinality=25
| |
| F01:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=16.00MB mem-reservation=32.00KB
| 01:SCAN HDFS [tpch_parquet.nation, RANDOM]
| partitions=1/1 files=1 size=2.74KB
| stored statistics:
| table: rows=25 size=2.74KB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=16.00MB mem-reservation=0B
+| mem-estimate=16.00MB mem-reservation=32.00KB
| tuple-ids=1 row-size=117B cardinality=25
|
00:SCAN HDFS [tpch_parquet.customer, RANDOM]
@@ -47,7 +47,7 @@ Per-Host Resources: mem-estimate=41.94MB mem-reservation=17.94MB runtime-filters
table: rows=150000 size=12.31MB
columns: all
extrapolated-rows=disabled
- mem-estimate=24.00MB mem-reservation=0B
+ mem-estimate=24.00MB mem-reservation=16.00MB
tuple-ids=0 row-size=238B cardinality=150000
====
# Join with large build side.
@@ -56,8 +56,8 @@ select straight_join *
from tpch_parquet.lineitem
left join tpch_parquet.orders on l_orderkey = o_orderkey
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=46.00MB
-Per-Host Resource Estimates: Memory=420.41MB
+Max Per-Host Resource Reservation: Memory=166.00MB
+Per-Host Resource Estimates: Memory=428.41MB
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=0B mem-reservation=0B
@@ -69,7 +69,7 @@ PLAN-ROOT SINK
| tuple-ids=0,1N row-size=454B cardinality=6001215
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=380.41MB mem-reservation=46.00MB
+Per-Host Resources: mem-estimate=380.41MB mem-reservation=118.00MB
02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: l_orderkey = o_orderkey
| fk/pk conjuncts: l_orderkey = o_orderkey
@@ -81,23 +81,23 @@ Per-Host Resources: mem-estimate=380.41MB mem-reservation=46.00MB
| | tuple-ids=1 row-size=191B cardinality=1500000
| |
| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
-| Per-Host Resources: mem-estimate=40.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=48.00MB mem-reservation=48.00MB
| 01:SCAN HDFS [tpch_parquet.orders, RANDOM]
| partitions=1/1 files=2 size=54.07MB
| stored statistics:
| table: rows=1500000 size=54.07MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=40.00MB mem-reservation=0B
+| mem-estimate=48.00MB mem-reservation=48.00MB
| tuple-ids=1 row-size=191B cardinality=1500000
|
00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
- partitions=1/1 files=3 size=193.73MB
+ partitions=1/1 files=3 size=193.72MB
stored statistics:
- table: rows=6001215 size=193.73MB
+ table: rows=6001215 size=193.72MB
columns: all
extrapolated-rows=disabled
- mem-estimate=80.00MB mem-reservation=0B
+ mem-estimate=80.00MB mem-reservation=72.00MB
tuple-ids=0 row-size=263B cardinality=6001215
====
# Null-aware anti-join with medium build side.
@@ -105,7 +105,7 @@ Per-Host Resources: mem-estimate=380.41MB mem-reservation=46.00MB
select * from tpch_parquet.lineitem
where l_orderkey not in (select o_orderkey from tpch_parquet.orders)
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=34.00MB
+Max Per-Host Resource Reservation: Memory=114.00MB
Per-Host Resource Estimates: Memory=154.00MB
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -118,7 +118,7 @@ PLAN-ROOT SINK
| tuple-ids=0 row-size=263B cardinality=6001215
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=114.00MB mem-reservation=34.00MB
+Per-Host Resources: mem-estimate=114.00MB mem-reservation=106.00MB
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST]
| hash predicates: l_orderkey = o_orderkey
| mem-estimate=34.00MB mem-reservation=34.00MB spill-buffer=1.00MB
@@ -129,23 +129,23 @@ Per-Host Resources: mem-estimate=114.00MB mem-reservation=34.00MB
| | tuple-ids=1 row-size=8B cardinality=1500000
| |
| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
-| Per-Host Resources: mem-estimate=40.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=40.00MB mem-reservation=8.00MB
| 01:SCAN HDFS [tpch_parquet.orders, RANDOM]
| partitions=1/1 files=2 size=54.07MB
| stored statistics:
| table: rows=1500000 size=54.07MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=40.00MB mem-reservation=0B
+| mem-estimate=40.00MB mem-reservation=8.00MB
| tuple-ids=1 row-size=8B cardinality=1500000
|
00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
- partitions=1/1 files=3 size=193.73MB
+ partitions=1/1 files=3 size=193.72MB
stored statistics:
- table: rows=6001215 size=193.73MB
+ table: rows=6001215 size=193.72MB
columns: all
extrapolated-rows=disabled
- mem-estimate=80.00MB mem-reservation=0B
+ mem-estimate=80.00MB mem-reservation=72.00MB
tuple-ids=0 row-size=263B cardinality=6001215
====
# Mid NDV aggregation.
@@ -156,7 +156,7 @@ from tpch_parquet.lineitem
group by 1, 2
having count(*) = 1
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=113.00MB
+Max Per-Host Resource Reservation: Memory=137.00MB
Per-Host Resource Estimates: Memory=253.12MB
F04:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -201,14 +201,14 @@ Per-Host Resources: mem-estimate=86.12MB mem-reservation=66.00MB runtime-filters
| | tuple-ids=1 row-size=25B cardinality=1500000
| |
| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
-| Per-Host Resources: mem-estimate=40.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=40.00MB mem-reservation=16.00MB
| 01:SCAN HDFS [tpch_parquet.orders, RANDOM]
| partitions=1/1 files=2 size=54.07MB
| stored statistics:
| table: rows=1500000 size=54.07MB
| columns: all
| extrapolated-rows=disabled
-| mem-estimate=40.00MB mem-reservation=0B
+| mem-estimate=40.00MB mem-reservation=16.00MB
| tuple-ids=1 row-size=25B cardinality=1500000
|
04:EXCHANGE [HASH(l_orderkey)]
@@ -216,15 +216,15 @@ Per-Host Resources: mem-estimate=86.12MB mem-reservation=66.00MB runtime-filters
| tuple-ids=0 row-size=8B cardinality=6001215
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=81.00MB mem-reservation=1.00MB runtime-filters-memory=1.00MB
+Per-Host Resources: mem-estimate=81.00MB mem-reservation=9.00MB runtime-filters-memory=1.00MB
00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
- partitions=1/1 files=3 size=193.73MB
+ partitions=1/1 files=3 size=193.72MB
runtime filters: RF000[bloom] -> l_orderkey
stored statistics:
- table: rows=6001215 size=193.73MB
+ table: rows=6001215 size=193.72MB
columns: all
extrapolated-rows=disabled
- mem-estimate=80.00MB mem-reservation=0B
+ mem-estimate=80.00MB mem-reservation=8.00MB
tuple-ids=0 row-size=8B cardinality=6001215
====
# High NDV aggregation.
@@ -232,7 +232,7 @@ Per-Host Resources: mem-estimate=81.00MB mem-reservation=1.00MB runtime-filters-
select distinct *
from tpch_parquet.lineitem
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=80.00MB
+Max Per-Host Resource Reservation: Memory=152.00MB
Per-Host Resource Estimates: Memory=3.31GB
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -256,19 +256,19 @@ Per-Host Resources: mem-estimate=1.62GB mem-reservation=46.00MB
| tuple-ids=1 row-size=263B cardinality=6001215
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=1.69GB mem-reservation=34.00MB
+Per-Host Resources: mem-estimate=1.69GB mem-reservation=106.00MB
01:AGGREGATE [STREAMING]
| group by: tpch_parquet.lineitem.l_orderkey, tpch_parquet.lineitem.l_partkey, tpch_parquet.lineitem.l_suppkey, tpch_parquet.lineitem.l_linenumber, tpch_parquet.lineitem.l_quantity, tpch_parquet.lineitem.l_extendedprice, tpch_parquet.lineitem.l_discount, tpch_parquet.lineitem.l_tax, tpch_parquet.lineitem.l_returnflag, tpch_parquet.lineitem.l_linestatus, tpch_parquet.lineitem.l_shipdate, tpch_parquet.lineitem.l_commitdate, tpch_parquet.lineitem.l_receiptdate, tpch_parquet.lineitem.l_shipinstruct, tpch_parquet.lineitem.l_shipmode, tpch_parquet.lineitem.l_comment
| mem-estimate=1.62GB mem-reservation=34.00MB spill-buffer=2.00MB
| tuple-ids=1 row-size=263B cardinality=6001215
|
00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
- partitions=1/1 files=3 size=193.73MB
+ partitions=1/1 files=3 size=193.72MB
stored statistics:
- table: rows=6001215 size=193.73MB
+ table: rows=6001215 size=193.72MB
columns: all
extrapolated-rows=disabled
- mem-estimate=80.00MB mem-reservation=0B
+ mem-estimate=80.00MB mem-reservation=72.00MB
tuple-ids=0 row-size=263B cardinality=6001215
====
# High NDV aggregation with string aggregation function.
@@ -277,7 +277,7 @@ select l_orderkey, l_partkey, group_concat(l_linestatus, ",")
from tpch_parquet.lineitem
group by 1, 2
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=82.00MB
+Max Per-Host Resource Reservation: Memory=106.00MB
Per-Host Resource Estimates: Memory=482.91MB
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -302,7 +302,7 @@ Per-Host Resources: mem-estimate=201.46MB mem-reservation=48.00MB
| tuple-ids=1 row-size=32B cardinality=6001215
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=281.46MB mem-reservation=34.00MB
+Per-Host Resources: mem-estimate=281.46MB mem-reservation=58.00MB
01:AGGREGATE [STREAMING]
| output: group_concat(l_linestatus, ',')
| group by: l_orderkey, l_partkey
@@ -310,12 +310,12 @@ Per-Host Resources: mem-estimate=281.46MB mem-reservation=34.00MB
| tuple-ids=1 row-size=32B cardinality=6001215
|
00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
- partitions=1/1 files=3 size=193.73MB
+ partitions=1/1 files=3 size=193.72MB
stored statistics:
- table: rows=6001215 size=193.73MB
+ table: rows=6001215 size=193.72MB
columns: all
extrapolated-rows=disabled
- mem-estimate=80.00MB mem-reservation=0B
+ mem-estimate=80.00MB mem-reservation=24.00MB
tuple-ids=0 row-size=33B cardinality=6001215
====
# Sort + Analytic.
@@ -323,7 +323,7 @@ Per-Host Resources: mem-estimate=281.46MB mem-reservation=34.00MB
select max(tinyint_col) over(partition by int_col)
from functional.alltypes
---- DISTRIBUTEDPLAN
-Max Per-Host Resource Reservation: Memory=40.00MB
+Max Per-Host Resource Reservation: Memory=40.03MB
Per-Host Resource Estimates: Memory=56.00MB
Codegen disabled by planner
@@ -354,7 +354,7 @@ Per-Host Resources: mem-estimate=40.00MB mem-reservation=40.00MB
| tuple-ids=0 row-size=5B cardinality=7300
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B
+Per-Host Resources: mem-estimate=16.00MB mem-reservation=32.00KB
00:SCAN HDFS [functional.alltypes, RANDOM]
partitions=24/24 files=24 size=478.45KB
stored statistics:
@@ -362,6 +362,6 @@ Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B
partitions: 24/24 rows=7300
columns: all
extrapolated-rows=disabled
- mem-estimate=16.00MB mem-reservation=0B
+ mem-estimate=16.00MB mem-reservation=32.00KB
tuple-ids=0 row-size=5B cardinality=7300
====
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test b/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test
index 7f6d96b..8dd2593 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test
@@ -99,7 +99,7 @@ select count(*) from functional_kudu.alltypes a, functional_parquet.alltypes b,
where a.int_col = b.int_col and a.int_col = c.int_col
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=2.02GB mem-reservation=36.94MB runtime-filters-memory=1.00MB
+| Per-Host Resources: mem-estimate=2.02GB mem-reservation=36.95MB runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -127,14 +127,14 @@ PLAN-ROOT SINK
| tuple-ids=0,1 row-size=8B cardinality=7300
|
|--01:SCAN HDFS [functional_parquet.alltypes b]
-| partitions=24/24 files=24 size=174.39KB
+| partitions=24/24 files=24 size=174.62KB
| runtime filters: RF000[bloom] -> b.int_col
| stored statistics:
| table: rows=unavailable size=unavailable
| partitions: 0/24 rows=unavailable
| columns: unavailable
| extrapolated-rows=disabled
-| mem-estimate=16.00MB mem-reservation=0B
+| mem-estimate=16.00MB mem-reservation=8.00KB
| tuple-ids=1 row-size=4B cardinality=unavailable
|
00:SCAN KUDU [functional_kudu.alltypes a]
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
index 61d646b..31129ff 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
@@ -40,7 +40,7 @@ order by cnt, bigint_col
limit 10
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=144.00MB mem-reservation=34.00MB
+| Per-Host Resources: mem-estimate=144.00MB mem-reservation=34.02MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -56,7 +56,7 @@ PLAN-ROOT SINK
| tuple-ids=1 row-size=16B cardinality=unavailable
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=179.19KB
+ partitions=24/24 files=24 size=174.39KB
predicates: id < 10
stored statistics:
table: rows=unavailable size=unavailable
@@ -65,7 +65,7 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates: id < 10
parquet dictionary predicates: id < 10
- mem-estimate=16.00MB mem-reservation=0B
+ mem-estimate=16.00MB mem-reservation=24.00KB
tuple-ids=0 row-size=16B cardinality=unavailable
---- PARALLELPLANS
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -97,7 +97,7 @@ Per-Host Resources: mem-estimate=384.00MB mem-reservation=102.00MB
| tuple-ids=1 row-size=16B cardinality=unavailable
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9
-Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.00MB
+Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.07MB
01:AGGREGATE [STREAMING]
| output: count(int_col)
| group by: bigint_col
@@ -105,7 +105,7 @@ Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.00MB
| tuple-ids=1 row-size=16B cardinality=unavailable
|
00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
- partitions=24/24 files=24 size=179.19KB
+ partitions=24/24 files=24 size=174.39KB
predicates: id < 10
stored statistics:
table: rows=unavailable size=unavailable
@@ -114,7 +114,7 @@ Per-Host Resources: mem-estimate=432.00MB mem-reservation=102.00MB
extrapolated-rows=disabled
parquet statistics predicates: id < 10
parquet dictionary predicates: id < 10
- mem-estimate=16.00MB mem-reservation=0B
+ mem-estimate=16.00MB mem-reservation=24.00KB
tuple-ids=0 row-size=16B cardinality=unavailable
====
# Single-table scan/filter/analytic should work.
@@ -123,7 +123,7 @@ from functional_parquet.alltypes
where id < 10
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=26.00MB mem-reservation=10.00MB
+| Per-Host Resources: mem-estimate=26.00MB mem-reservation=10.02MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -141,7 +141,7 @@ PLAN-ROOT SINK
| tuple-ids=4 row-size=8B cardinality=unavailable
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=179.19KB
+ partitions=24/24 files=24 size=174.39KB
predicates: id < 10
stored statistics:
table: rows=unavailable size=unavailable
@@ -150,7 +150,7 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates: id < 10
parquet dictionary predicates: id < 10
- mem-estimate=16.00MB mem-reservation=0B
+ mem-estimate=16.00MB mem-reservation=16.00KB
tuple-ids=0 row-size=8B cardinality=unavailable
---- PARALLELPLANS
F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -182,9 +182,9 @@ Per-Host Resources: mem-estimate=30.00MB mem-reservation=30.00MB
| tuple-ids=0 row-size=8B cardinality=unavailable
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9
-Per-Host Resources: mem-estimate=48.00MB mem-reservation=0B
+Per-Host Resources: mem-estimate=48.00MB mem-reservation=48.00KB
00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
- partitions=24/24 files=24 size=179.19KB
+ partitions=24/24 files=24 size=174.39KB
predicates: id < 10
stored statistics:
table: rows=unavailable size=unavailable
@@ -193,7 +193,7 @@ Per-Host Resources: mem-estimate=48.00MB mem-reservation=0B
extrapolated-rows=disabled
parquet statistics predicates: id < 10
parquet dictionary predicates: id < 10
- mem-estimate=16.00MB mem-reservation=0B
+ mem-estimate=16.00MB mem-reservation=16.00KB
tuple-ids=0 row-size=8B cardinality=unavailable
====
# Nested-loop join in a subplan should work.
@@ -202,7 +202,7 @@ from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
where c_custkey < 10 and o_orderkey < 5 and l_linenumber < 3
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=88.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=88.00MB mem-reservation=88.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -257,7 +257,7 @@ PLAN-ROOT SINK
parquet dictionary predicates: c_custkey < 10
parquet dictionary predicates on o: o_orderkey < 5
parquet dictionary predicates on o_lineitems: l_linenumber < 3
- mem-estimate=88.00MB mem-reservation=0B
+ mem-estimate=88.00MB mem-reservation=88.00MB
tuple-ids=0 row-size=254B cardinality=15000
---- PARALLELPLANS
F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -270,7 +270,7 @@ PLAN-ROOT SINK
| tuple-ids=2,1,0 row-size=562B cardinality=1500000
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9
-Per-Host Resources: mem-estimate=264.00MB mem-reservation=0B
+Per-Host Resources: mem-estimate=264.00MB mem-reservation=264.00MB
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B
| tuple-ids=2,1,0 row-size=562B cardinality=1500000
@@ -322,7 +322,7 @@ Per-Host Resources: mem-estimate=264.00MB mem-reservation=0B
parquet dictionary predicates: c_custkey < 10
parquet dictionary predicates on o: o_orderkey < 5
parquet dictionary predicates on o_lineitems: l_linenumber < 3
- mem-estimate=88.00MB mem-reservation=0B
+ mem-estimate=88.00MB mem-reservation=88.00MB
tuple-ids=0 row-size=254B cardinality=15000
====
# Hash-join in a subplan should work.
@@ -331,7 +331,7 @@ from tpch_nested_parquet.customer c, c.c_orders o1, c.c_orders o2
where o1.o_orderkey = o2.o_orderkey + 2 and o1.o_orderkey < 5
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=89.94MB mem-reservation=1.94MB
+| Per-Host Resources: mem-estimate=89.94MB mem-reservation=81.94MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -374,7 +374,7 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates on o1: o1.o_orderkey < 5
parquet dictionary predicates on o1: o1.o_orderkey < 5
- mem-estimate=88.00MB mem-reservation=0B
+ mem-estimate=88.00MB mem-reservation=80.00MB
tuple-ids=0 row-size=270B cardinality=150000
---- PARALLELPLANS
F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -387,7 +387,7 @@ PLAN-ROOT SINK
| tuple-ids=1,0,2 row-size=286B cardinality=1500000
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9
-Per-Host Resources: mem-estimate=269.81MB mem-reservation=5.81MB
+Per-Host Resources: mem-estimate=269.81MB mem-reservation=245.81MB
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B
| tuple-ids=1,0,2 row-size=286B cardinality=1500000
@@ -427,6 +427,6 @@ Per-Host Resources: mem-estimate=269.81MB mem-reservation=5.81MB
extrapolated-rows=disabled
parquet statistics predicates on o1: o1.o_orderkey < 5
parquet dictionary predicates on o1: o1.o_orderkey < 5
- mem-estimate=88.00MB mem-reservation=0B
+ mem-estimate=88.00MB mem-reservation=80.00MB
tuple-ids=0 row-size=270B cardinality=150000
====
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
index 2b602c9..3e812c6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
@@ -8,7 +8,7 @@ where int_col > 1 and int_col * rand() > 50 and int_col is null
and int_col > tinyint_col;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=42.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -18,7 +18,7 @@ PLAN-ROOT SINK
| tuple-ids=1 row-size=8B cardinality=1
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=179.19KB
+ partitions=24/24 files=24 size=174.39KB
predicates: int_col IS NULL, int_col > 1, int_col > tinyint_col, int_col * rand() > 50
stored statistics:
table: rows=unavailable size=unavailable
@@ -27,7 +27,7 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates: int_col > 1
parquet dictionary predicates: int_col > 1
- mem-estimate=32.00MB mem-reservation=0B
+ mem-estimate=32.00MB mem-reservation=16.00KB
tuple-ids=0 row-size=5B cardinality=unavailable
====
# Test a variety of types
@@ -40,7 +40,7 @@ and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
and year > 2000 and month < 12;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=138.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -50,7 +50,7 @@ PLAN-ROOT SINK
| tuple-ids=1 row-size=8B cardinality=1
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=22/24 files=22 size=164.09KB
+ partitions=22/24 files=22 size=159.69KB
predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
@@ -59,7 +59,7 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates: bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), date_string_col > '1993-10-01'
parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 50.00, id = 1, tinyint_col < 50, string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (1, 2, 3, 4, 5), mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
- mem-estimate=128.00MB mem-reservation=0B
+ mem-estimate=128.00MB mem-reservation=88.00KB
tuple-ids=0 row-size=80B cardinality=unavailable
====
# Test negative cases for IN predicate min/max filtering
@@ -73,7 +73,7 @@ and mod(int_col,50) IN (0,1)
and id IN (int_col);
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=58.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -83,7 +83,7 @@ PLAN-ROOT SINK
| tuple-ids=1 row-size=8B cardinality=1
|
00:SCAN HDFS [functional_parquet.alltypes]
- partitions=24/24 files=24 size=179.19KB
+ partitions=24/24 files=24 size=174.39KB
predicates: id IN (int_col), id NOT IN (0, 1, 2), string_col IN ('aaaa', 'bbbb', 'cccc', NULL), mod(int_col, 50) IN (0, 1)
stored statistics:
table: rows=unavailable size=unavailable
@@ -91,7 +91,7 @@ PLAN-ROOT SINK
columns: unavailable
extrapolated-rows=disabled
parquet dictionary predicates: id NOT IN (0, 1, 2), string_col IN ('aaaa', 'bbbb', 'cccc', NULL), mod(int_col, 50) IN (0, 1)
- mem-estimate=48.00MB mem-reservation=0B
+ mem-estimate=48.00MB mem-reservation=24.00KB
tuple-ids=0 row-size=24B cardinality=unavailable
====
# Test collection types where all collections on the path are required (inner
@@ -101,7 +101,7 @@ select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.
where a.item.e < -10;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -152,7 +152,7 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates on a: a.item.e < -10
parquet dictionary predicates on a: a.item.e < -10
- mem-estimate=32.00MB mem-reservation=0B
+ mem-estimate=32.00MB mem-reservation=16.00KB
tuple-ids=0 row-size=24B cardinality=unavailable
====
# Test collection types where the lower collection in the path is optional
@@ -164,7 +164,7 @@ left outer join cn.item a
where a.item.e < -10;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -213,7 +213,7 @@ PLAN-ROOT SINK
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled
- mem-estimate=32.00MB mem-reservation=0B
+ mem-estimate=32.00MB mem-reservation=16.00KB
tuple-ids=0 row-size=24B cardinality=unavailable
====
# Tests collection types where the outer is optional (outer join descent)
@@ -223,7 +223,7 @@ select id from functional_parquet.complextypestbl c
left outer join c.nested_struct.c.d cn, cn.item a where a.item.e < -10;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -270,7 +270,7 @@ PLAN-ROOT SINK
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled
- mem-estimate=32.00MB mem-reservation=0B
+ mem-estimate=32.00MB mem-reservation=16.00KB
tuple-ids=0 row-size=24B cardinality=unavailable
====
# Test collections so that each level has a filter applied.
@@ -278,7 +278,7 @@ select c_custkey from tpch_nested_parquet.customer c, c.c_orders o,
o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=176.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=264.00MB mem-reservation=24.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -333,7 +333,7 @@ PLAN-ROOT SINK
parquet dictionary predicates: c_custkey > 0
parquet dictionary predicates on o: o.o_orderkey > 0
parquet dictionary predicates on l: l.l_partkey > 0
- mem-estimate=176.00MB mem-reservation=0B
+ mem-estimate=264.00MB mem-reservation=24.00MB
tuple-ids=0 row-size=24B cardinality=15000
====
# Test collections in a way that would incorrectly apply a min-max
@@ -342,7 +342,7 @@ select count(*) from functional_parquet.complextypestbl c left outer join
(select * from c.int_array where item > 10) v;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=26.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=26.00MB mem-reservation=8.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -376,7 +376,7 @@ PLAN-ROOT SINK
table: rows=unavailable size=unavailable
columns: unavailable
extrapolated-rows=disabled
- mem-estimate=16.00MB mem-reservation=0B
+ mem-estimate=16.00MB mem-reservation=8.00KB
tuple-ids=0 row-size=16B cardinality=unavailable
====
# Multiple nested collection values (at the same nesting level) where dictionary
@@ -388,7 +388,7 @@ l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R
l.l_comment is null;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=176.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=616.00MB mem-reservation=56.00MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -439,6 +439,6 @@ PLAN-ROOT SINK
extrapolated-rows=disabled
parquet statistics predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
parquet dictionary predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
- mem-estimate=176.00MB mem-reservation=0B
+ mem-estimate=616.00MB mem-reservation=56.00MB
tuple-ids=0 row-size=50B cardinality=150000
====
http://git-wip-us.apache.org/repos/asf/impala/blob/8c922a6e/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
index 610136d..5cbba7b 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
@@ -4,7 +4,7 @@ select * from functional.stringpartitionkey
where string_col=cast("2009-01-01 00:00:00" as timestamp);
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
+| Per-Host Resources: mem-estimate=32.00MB mem-reservation=8.00KB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
@@ -15,6 +15,6 @@ PLAN-ROOT SINK
partitions: 1/1 rows=1
columns: all
extrapolated-rows=disabled
- mem-estimate=32.00MB mem-reservation=0B
+ mem-estimate=32.00MB mem-reservation=8.00KB
tuple-ids=0 row-size=20B cardinality=1
====