You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by bo...@apache.org on 2019/05/09 01:58:55 UTC

[drill] 05/05: DRILL-7240: Catch runtime pruning filter-match exceptions and do not prune these rowgroups

This is an automated email from the ASF dual-hosted git repository.

boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit e5e9b35588c06de6ba5b0b072567031daa50e00c
Author: Ben-Zvi <bb...@mapr.com>
AuthorDate: Tue May 7 13:30:21 2019 -0700

    DRILL-7240: Catch runtime pruning filter-match exceptions and do not prune these rowgroups
    
    closes #1783
---
 .../parquet/AbstractParquetScanBatchCreator.java   | 37 +++++++++++++++-------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
index 41f52d3..d95ee6d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java
@@ -107,6 +107,8 @@ public abstract class AbstractParquetScanBatchCreator {
       long totalPruneTime = 0;
       long totalRowgroups = rowGroupScan.getRowGroupReadEntries().size();
       Stopwatch pruneTimer = Stopwatch.createUnstarted();
+      int countMatchClassCastExceptions = 0; // in case match() hits CCE, count and report these
+      String matchCastErrorMessage = ""; // report the error too (Java insists on initializing this ....)
 
       // If pruning - Prepare the predicate and the columns before the FOR LOOP
       if ( doRuntimePruning ) {
@@ -182,22 +184,31 @@ public abstract class AbstractParquetScanBatchCreator {
           Map<SchemaPath, ColumnStatistics> columnsStatistics = ParquetTableMetadataUtils.getRowGroupColumnStatistics(tableMetadataV4, rowGroupMetadata);
 
           //
-          // Perform the Run-Time Pruning - i.e. Skip this rowgroup if the match fails
+          // Perform the Run-Time Pruning - i.e. Skip/prune this rowgroup if the match fails
           //
-          RowsMatch match = FilterEvaluatorUtils.matches(filterPredicate, columnsStatistics, footerRowCount);
-
-          // collect logging info
-          long timeToRead = pruneTimer.elapsed(TimeUnit.MICROSECONDS);
+          RowsMatch matchResult = RowsMatch.ALL; // default (in case of exception) - do not prune this rowgroup
+          try {
+            matchResult = FilterEvaluatorUtils.matches(filterPredicate, columnsStatistics, footerRowCount);
+
+            // collect logging info
+            long timeToRead = pruneTimer.elapsed(TimeUnit.MICROSECONDS);
+            totalPruneTime += timeToRead;
+            logger.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", // trace each single rowgroup
+              matchResult == RowsMatch.NONE ? "Excluded" : "Included", rowGroup.getPath(), rowGroupIndex, footerRowCount, timeToRead);
+          } catch (ClassCastException cce) {
+            countMatchClassCastExceptions++; // one more CCE occured
+            matchCastErrorMessage = cce.getMessage(); // report the (last) error message
+          } catch (Exception e) {
+            // in case some unexpected exception is raised
+            logger.warn("Run-time pruning check failed - {}. Skip pruning rowgroup - {}", e.getMessage(), rowGroup.getPath());
+          }
           pruneTimer.stop();
           pruneTimer.reset();
-          totalPruneTime += timeToRead;
-          logger.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", // trace each single rowgroup
-            match == RowsMatch.NONE ? "Excluded" : "Included", rowGroup.getPath(), rowGroupIndex, footerRowCount, timeToRead);
 
-          // If this rowgroup failed the match - skip it
-          if (match == RowsMatch.NONE) {
+          // If this rowgroup failed the match - skip it (i.e., no reader for this rowgroup)
+          if (matchResult == RowsMatch.NONE) {
             rowgroupsPruned++; // one more RG was pruned
-            if (firstRowGroup == null) {  // keep first RG, to be used in case all row groups are pruned
+            if (firstRowGroup == null) {  // keep the first RG, to be used in case all row groups are pruned
               firstRowGroup = rowGroup;
               firstFooter = footer;
             }
@@ -214,10 +225,14 @@ public abstract class AbstractParquetScanBatchCreator {
         mapWithMaxColumns = createReaderAndImplicitColumns(context, rowGroupScan, oContext, columnExplorer, readers, implicitColumns, mapWithMaxColumns, firstRowGroup, fs,
           firstFooter, true);
       }
+      // do some logging, if relevant
       if ( totalPruneTime > 0 ) {
         logger.info("Finished parquet_runtime_pruning in {} usec. Out of given {} rowgroups, {} were pruned. {}", totalPruneTime, totalRowgroups, rowgroupsPruned,
           totalRowgroups == rowgroupsPruned ? "ALL_PRUNED !!" : "");
       }
+      if ( countMatchClassCastExceptions > 0 ) {
+        logger.info("Run-time pruning skipped for {} out of {} rowgroups due to: {}",countMatchClassCastExceptions, totalRowgroups, matchCastErrorMessage);
+      }
 
       // Update stats (same in every reader - the others would just overwrite the stats)
       for (CommonParquetRecordReader rr : readers ) {