You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@drill.apache.org by pa...@apache.org on 2016/07/20 00:08:44 UTC

[1/2] drill git commit: Fix for DRILL-4759: Drill throwing array index out of bound exception when reading a parquet file written by map reduce program

Repository: drill
Updated Branches:
  refs/heads/master 4f818d074 -> 34ca63ba1


Fix for DRILL-4759: Drill throwing array index out of bound exception when reading a parquet file written by map reduce program

Added unit test case.

Updated fix


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e371e18b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e371e18b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e371e18b

Branch: refs/heads/master
Commit: e371e18b9bd720a862b55f9e2682b39e1f68ce97
Parents: 4f818d0
Author: Padma Penumarthy <pp...@PPENUMARTHY-E653-MPR13.local>
Authored: Tue Jul 5 11:28:11 2016 -0700
Committer: Parth Chandra <pa...@apache.org>
Committed: Tue Jul 19 10:13:25 2016 -0700

----------------------------------------------------------------------
 .../ParquetFixedWidthDictionaryReaders.java      |  18 +++++++++++++-----
 .../columnreaders/TestColumnReaderFactory.java   |  15 +++++++++++++++
 .../resources/parquet/bigIntDictionary.parquet   | Bin 0 -> 1918423 bytes
 3 files changed, 28 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
index 00bf5f0..d7b6fbb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
@@ -156,12 +156,20 @@ public class ParquetFixedWidthDictionaryReaders {
       recordsReadInThisIteration = Math.min(pageReader.currentPageCount
           - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
 
-      for (int i = 0; i < recordsReadInThisIteration; i++){
-        try {
-        valueVec.getMutator().setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readLong());
-        } catch ( Exception ex) {
-          throw ex;
+      if (usingDictionary) {
+        BigIntVector.Mutator mutator =  valueVec.getMutator();
+        for (int i = 0; i < recordsReadInThisIteration; i++){
+          mutator.setSafe(valuesReadInCurrentPass + i,  pageReader.dictionaryValueReader.readLong());
         }
+        // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
+        // and we will go into the else condition below. The readField method of the parent class requires the
+        // writer index to be set correctly.
+        readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
+        readLength = (int) Math.ceil(readLengthInBits / 8.0);
+        int writerIndex = valueVec.getBuffer().writerIndex();
+        valueVec.getBuffer().setIndex(0, writerIndex + (int)readLength);
+      } else {
+        super.readField(recordsToReadInThisPass);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
index 4dff928..bfd894d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
@@ -94,4 +94,19 @@ public class TestColumnReaderFactory extends BaseTestQuery {
     // query parquet file. We shouldn't get any exception
     testNoResult("SELECT * FROM cp.`parquet/decimal_nodictionary.parquet`");
   }
+
+  /**
+   * check if BigInt is read correctly with dictionary encoding.
+   */
+  @Test
+  public void testBigIntWithDictionary() throws Exception {
+    String query = "select sum(ts) as total from cp.`parquet/bigIntDictionary.parquet`";
+
+    testBuilder()
+    .sqlQuery(query)
+    .ordered()
+    .baselineColumns("total")
+    .baselineValues(190928593476806865L)
+    .build().run();
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
new file mode 100644
index 0000000..51c59cc
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet differ

[2/2] drill git commit: DRILL-4785: Avoid checking for hard affinity scans in limit 0 shortcut cases

Posted by pa...@apache.org.

DRILL-4785: Avoid checking for hard affinity scans in limit 0 shortcut cases


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34ca63ba
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34ca63ba
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34ca63ba

Branch: refs/heads/master
Commit: 34ca63ba188e593c961e0b665ac44cd3de2b6b22
Parents: e371e18
Author: vkorukanti <ve...@dremio.com>
Authored: Mon Jul 18 16:56:25 2016 -0700
Committer: Parth Chandra <pa...@apache.org>
Committed: Tue Jul 19 10:13:32 2016 -0700

----------------------------------------------------------------------
 .../planner/sql/handlers/DefaultSqlHandler.java | 10 ++-
 .../sql/handlers/FindHardDistributionScans.java | 71 ++++++++++++++++++++
 .../planner/sql/handlers/FindLimit0Visitor.java | 45 +------------
 3 files changed, 80 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/34ca63ba/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
index fda0a1d..e5359a3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
@@ -209,13 +209,16 @@ public class DefaultSqlHandler extends AbstractSqlHandler {
     if (context.getOptions().getOption(ExecConstants.EARLY_LIMIT0_OPT) &&
         context.getPlannerSettings().isTypeInferenceEnabled() &&
         FindLimit0Visitor.containsLimit0(relNode)) {
-      // disable distributed mode
-      context.getPlannerSettings().forceSingleMode();
       // if the schema is known, return the schema directly
       final DrillRel shorterPlan;
       if ((shorterPlan = FindLimit0Visitor.getDirectScanRelIfFullySchemaed(relNode)) != null) {
         return shorterPlan;
       }
+
+      if (FindHardDistributionScans.canForceSingleMode(relNode)) {
+        // disable distributed mode
+        context.getPlannerSettings().forceSingleMode();
+      }
     }
 
     try {
@@ -256,7 +259,8 @@ public class DefaultSqlHandler extends AbstractSqlHandler {
       } else {
 
         // If the query contains a limit 0 clause, disable distributed mode since it is overkill for determining schema.
-        if (FindLimit0Visitor.containsLimit0(convertedRelNodeWithSum0)) {
+        if (FindLimit0Visitor.containsLimit0(convertedRelNodeWithSum0) &&
+            FindHardDistributionScans.canForceSingleMode(convertedRelNodeWithSum0)) {
           context.getPlannerSettings().forceSingleMode();
         }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/34ca63ba/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java
new file mode 100644
index 0000000..7ad72aa
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.sql.handlers;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttleImpl;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.exec.planner.fragment.DistributionAffinity;
+import org.apache.drill.exec.planner.logical.DrillTable;
+import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
+
+import java.io.IOException;
+
+/**
+ * Visitor to scan the RelNode tree and find if it contains any Scans that require hard distribution requirements.
+ */
+class FindHardDistributionScans extends RelShuttleImpl {
+  private boolean contains;
+
+  /**
+   * Can the given <code>relTree</code> be executed in single fragment mode? For now this returns false when the
+   * <code>relTree</code> contains one or more scans with hard affinity requirements.
+   *
+   * @param relTree
+   * @return
+   */
+  public static boolean canForceSingleMode(final RelNode relTree) {
+    final FindHardDistributionScans hdVisitor = new FindHardDistributionScans();
+    relTree.accept(hdVisitor);
+    // Can't run in single fragment mode if the query contains a table which has hard distribution requirement.
+    return !hdVisitor.contains();
+  }
+
+  @Override
+  public RelNode visit(TableScan scan) {
+    DrillTable unwrap;
+    unwrap = scan.getTable().unwrap(DrillTable.class);
+    if (unwrap == null) {
+      unwrap = scan.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
+    }
+
+    try {
+      if (unwrap.getGroupScan().getDistributionAffinity() == DistributionAffinity.HARD) {
+        contains = true;
+      }
+    } catch (final IOException e) {
+      throw new DrillRuntimeException("Failed to get GroupScan from table.");
+    }
+    return scan;
+  }
+
+  public boolean contains() {
+    return contains;
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/34ca63ba/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
index e7460b3..6df6e0d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
@@ -23,7 +23,6 @@ import com.google.common.collect.Lists;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.RelShuttleImpl;
-import org.apache.calcite.rel.core.TableScan;
 import org.apache.calcite.rel.logical.LogicalAggregate;
 import org.apache.calcite.rel.logical.LogicalIntersect;
 import org.apache.calcite.rel.logical.LogicalJoin;
@@ -35,7 +34,6 @@ import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.exec.exception.SchemaChangeException;
@@ -43,18 +41,14 @@ import org.apache.drill.exec.expr.TypeHelper;
 import org.apache.drill.exec.ops.OperatorContext;
 import org.apache.drill.exec.physical.base.ScanStats;
 import org.apache.drill.exec.physical.impl.OutputMutator;
-import org.apache.drill.exec.planner.fragment.DistributionAffinity;
 import org.apache.drill.exec.planner.logical.DrillDirectScanRel;
 import org.apache.drill.exec.planner.logical.DrillLimitRel;
 import org.apache.drill.exec.planner.logical.DrillRel;
-import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
 import org.apache.drill.exec.planner.sql.TypeInferenceUtils;
 import org.apache.drill.exec.record.MaterializedField;
 import org.apache.drill.exec.store.AbstractRecordReader;
 import org.apache.drill.exec.store.direct.DirectGroupScan;
 
-import java.io.IOException;
 import java.util.List;
 
 /**
@@ -117,18 +111,11 @@ public class FindLimit0Visitor extends RelShuttleImpl {
    * @param rel rel node tree
    * @return true if the root portion of the tree contains LIMIT(0)
    */
-  public static boolean containsLimit0(RelNode rel) {
+  public static boolean containsLimit0(final RelNode rel) {
     FindLimit0Visitor visitor = new FindLimit0Visitor();
     rel.accept(visitor);
 
-    if (!visitor.isContains()) {
-      return false;
-    }
-
-    final FindHardDistributionScans hdVisitor = new FindHardDistributionScans();
-    rel.accept(hdVisitor);
-    // Can't optimize limit 0 if the query contains a table which has hard distribution requirement.
-    return !hdVisitor.contains();
+    return visitor.isContains();
   }
 
   private boolean contains = false;
@@ -248,32 +235,4 @@ public class FindLimit0Visitor extends RelShuttleImpl {
     public void close() throws Exception {
     }
   }
-  /**
-   * Visitor to scan the RelNode tree and find if it contains any Scans that require hard distribution requirements.
-   */
-  private static class FindHardDistributionScans extends RelShuttleImpl {
-    private boolean contains;
-
-    @Override
-    public RelNode visit(TableScan scan) {
-      DrillTable unwrap;
-      unwrap = scan.getTable().unwrap(DrillTable.class);
-      if (unwrap == null) {
-        unwrap = scan.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
-      }
-
-      try {
-        if (unwrap.getGroupScan().getDistributionAffinity() == DistributionAffinity.HARD) {
-          contains = true;
-        }
-      } catch (final IOException e) {
-        throw new DrillRuntimeException("Failed to get GroupScan from table.");
-      }
-      return scan;
-    }
-
-    public boolean contains() {
-      return contains;
-    }
-  }
 }