You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by bo...@apache.org on 2019/05/09 01:58:55 UTC
[drill] 05/05: DRILL-7240: Catch runtime pruning filter-match
exceptions and do not prune these rowgroups
This is an automated email from the ASF dual-hosted git repository.
boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit e5e9b35588c06de6ba5b0b072567031daa50e00c
Author: Ben-Zvi <bb...@mapr.com>
AuthorDate: Tue May 7 13:30:21 2019 -0700
DRILL-7240: Catch runtime pruning filter-match exceptions and do not prune these rowgroups
closes #1783
---
.../parquet/AbstractParquetScanBatchCreator.java | 37 +++++++++++++++-------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
index 41f52d3..d95ee6d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
@@ -107,6 +107,8 @@ public abstract class AbstractParquetScanBatchCreator {
long totalPruneTime = 0;
long totalRowgroups = rowGroupScan.getRowGroupReadEntries().size();
Stopwatch pruneTimer = Stopwatch.createUnstarted();
+ int countMatchClassCastExceptions = 0; // in case match() hits CCE, count and report these
+ String matchCastErrorMessage = ""; // report the error too (Java insists on initializing this ....)
// If pruning - Prepare the predicate and the columns before the FOR LOOP
if ( doRuntimePruning ) {
@@ -182,22 +184,31 @@ public abstract class AbstractParquetScanBatchCreator {
Map<SchemaPath, ColumnStatistics> columnsStatistics = ParquetTableMetadataUtils.getRowGroupColumnStatistics(tableMetadataV4, rowGroupMetadata);
//
- // Perform the Run-Time Pruning - i.e. Skip this rowgroup if the match fails
+ // Perform the Run-Time Pruning - i.e. Skip/prune this rowgroup if the match fails
//
- RowsMatch match = FilterEvaluatorUtils.matches(filterPredicate, columnsStatistics, footerRowCount);
-
- // collect logging info
- long timeToRead = pruneTimer.elapsed(TimeUnit.MICROSECONDS);
+ RowsMatch matchResult = RowsMatch.ALL; // default (in case of exception) - do not prune this rowgroup
+ try {
+ matchResult = FilterEvaluatorUtils.matches(filterPredicate, columnsStatistics, footerRowCount);
+
+ // collect logging info
+ long timeToRead = pruneTimer.elapsed(TimeUnit.MICROSECONDS);
+ totalPruneTime += timeToRead;
+ logger.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", // trace each single rowgroup
+ matchResult == RowsMatch.NONE ? "Excluded" : "Included", rowGroup.getPath(), rowGroupIndex, footerRowCount, timeToRead);
+ } catch (ClassCastException cce) {
+ countMatchClassCastExceptions++; // one more CCE occured
+ matchCastErrorMessage = cce.getMessage(); // report the (last) error message
+ } catch (Exception e) {
+ // in case some unexpected exception is raised
+ logger.warn("Run-time pruning check failed - {}. Skip pruning rowgroup - {}", e.getMessage(), rowGroup.getPath());
+ }
pruneTimer.stop();
pruneTimer.reset();
- totalPruneTime += timeToRead;
- logger.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", // trace each single rowgroup
- match == RowsMatch.NONE ? "Excluded" : "Included", rowGroup.getPath(), rowGroupIndex, footerRowCount, timeToRead);
- // If this rowgroup failed the match - skip it
- if (match == RowsMatch.NONE) {
+ // If this rowgroup failed the match - skip it (i.e., no reader for this rowgroup)
+ if (matchResult == RowsMatch.NONE) {
rowgroupsPruned++; // one more RG was pruned
- if (firstRowGroup == null) { // keep first RG, to be used in case all row groups are pruned
+ if (firstRowGroup == null) { // keep the first RG, to be used in case all row groups are pruned
firstRowGroup = rowGroup;
firstFooter = footer;
}
@@ -214,10 +225,14 @@ public abstract class AbstractParquetScanBatchCreator {
mapWithMaxColumns = createReaderAndImplicitColumns(context, rowGroupScan, oContext, columnExplorer, readers, implicitColumns, mapWithMaxColumns, firstRowGroup, fs,
firstFooter, true);
}
+ // do some logging, if relevant
if ( totalPruneTime > 0 ) {
logger.info("Finished parquet_runtime_pruning in {} usec. Out of given {} rowgroups, {} were pruned. {}", totalPruneTime, totalRowgroups, rowgroupsPruned,
totalRowgroups == rowgroupsPruned ? "ALL_PRUNED !!" : "");
}
+ if ( countMatchClassCastExceptions > 0 ) {
+ logger.info("Run-time pruning skipped for {} out of {} rowgroups due to: {}",countMatchClassCastExceptions, totalRowgroups, matchCastErrorMessage);
+ }
// Update stats (same in every reader - the others would just overwrite the stats)
for (CommonParquetRecordReader rr : readers ) {