You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pa...@apache.org on 2016/07/20 00:08:44 UTC
[1/2] drill git commit: Fix for DRILL-4759: Drill throwing array
index out of bound exception when reading a parquet file written by map
reduce program
Repository: drill
Updated Branches:
refs/heads/master 4f818d074 -> 34ca63ba1
Fix for DRILL-4759: Drill throwing array index out of bound exception when reading a parquet file written by map reduce program
Added unit test case.
Updated fix
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e371e18b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e371e18b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e371e18b
Branch: refs/heads/master
Commit: e371e18b9bd720a862b55f9e2682b39e1f68ce97
Parents: 4f818d0
Author: Padma Penumarthy <pp...@PPENUMARTHY-E653-MPR13.local>
Authored: Tue Jul 5 11:28:11 2016 -0700
Committer: Parth Chandra <pa...@apache.org>
Committed: Tue Jul 19 10:13:25 2016 -0700
----------------------------------------------------------------------
.../ParquetFixedWidthDictionaryReaders.java | 18 +++++++++++++-----
.../columnreaders/TestColumnReaderFactory.java | 15 +++++++++++++++
.../resources/parquet/bigIntDictionary.parquet | Bin 0 -> 1918423 bytes
3 files changed, 28 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
index 00bf5f0..d7b6fbb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
@@ -156,12 +156,20 @@ public class ParquetFixedWidthDictionaryReaders {
recordsReadInThisIteration = Math.min(pageReader.currentPageCount
- pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
- for (int i = 0; i < recordsReadInThisIteration; i++){
- try {
- valueVec.getMutator().setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readLong());
- } catch ( Exception ex) {
- throw ex;
+ if (usingDictionary) {
+ BigIntVector.Mutator mutator = valueVec.getMutator();
+ for (int i = 0; i < recordsReadInThisIteration; i++){
+ mutator.setSafe(valuesReadInCurrentPass + i, pageReader.dictionaryValueReader.readLong());
}
+ // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
+ // and we will go into the else condition below. The readField method of the parent class requires the
+ // writer index to be set correctly.
+ readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
+ readLength = (int) Math.ceil(readLengthInBits / 8.0);
+ int writerIndex = valueVec.getBuffer().writerIndex();
+ valueVec.getBuffer().setIndex(0, writerIndex + (int)readLength);
+ } else {
+ super.readField(recordsToReadInThisPass);
}
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
index 4dff928..bfd894d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestColumnReaderFactory.java
@@ -94,4 +94,19 @@ public class TestColumnReaderFactory extends BaseTestQuery {
// query parquet file. We shouldn't get any exception
testNoResult("SELECT * FROM cp.`parquet/decimal_nodictionary.parquet`");
}
+
+ /**
+ * check if BigInt is read correctly with dictionary encoding.
+ */
+ @Test
+ public void testBigIntWithDictionary() throws Exception {
+ String query = "select sum(ts) as total from cp.`parquet/bigIntDictionary.parquet`";
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("total")
+ .baselineValues(190928593476806865L)
+ .build().run();
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/e371e18b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet
new file mode 100644
index 0000000..51c59cc
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/bigIntDictionary.parquet differ
[2/2] drill git commit: DRILL-4785: Avoid checking for hard affinity
scans in limit 0 shortcut cases
Posted by pa...@apache.org.
DRILL-4785: Avoid checking for hard affinity scans in limit 0 shortcut cases
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34ca63ba
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34ca63ba
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34ca63ba
Branch: refs/heads/master
Commit: 34ca63ba188e593c961e0b665ac44cd3de2b6b22
Parents: e371e18
Author: vkorukanti <ve...@dremio.com>
Authored: Mon Jul 18 16:56:25 2016 -0700
Committer: Parth Chandra <pa...@apache.org>
Committed: Tue Jul 19 10:13:32 2016 -0700
----------------------------------------------------------------------
.../planner/sql/handlers/DefaultSqlHandler.java | 10 ++-
.../sql/handlers/FindHardDistributionScans.java | 71 ++++++++++++++++++++
.../planner/sql/handlers/FindLimit0Visitor.java | 45 +------------
3 files changed, 80 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/34ca63ba/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
index fda0a1d..e5359a3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
@@ -209,13 +209,16 @@ public class DefaultSqlHandler extends AbstractSqlHandler {
if (context.getOptions().getOption(ExecConstants.EARLY_LIMIT0_OPT) &&
context.getPlannerSettings().isTypeInferenceEnabled() &&
FindLimit0Visitor.containsLimit0(relNode)) {
- // disable distributed mode
- context.getPlannerSettings().forceSingleMode();
// if the schema is known, return the schema directly
final DrillRel shorterPlan;
if ((shorterPlan = FindLimit0Visitor.getDirectScanRelIfFullySchemaed(relNode)) != null) {
return shorterPlan;
}
+
+ if (FindHardDistributionScans.canForceSingleMode(relNode)) {
+ // disable distributed mode
+ context.getPlannerSettings().forceSingleMode();
+ }
}
try {
@@ -256,7 +259,8 @@ public class DefaultSqlHandler extends AbstractSqlHandler {
} else {
// If the query contains a limit 0 clause, disable distributed mode since it is overkill for determining schema.
- if (FindLimit0Visitor.containsLimit0(convertedRelNodeWithSum0)) {
+ if (FindLimit0Visitor.containsLimit0(convertedRelNodeWithSum0) &&
+ FindHardDistributionScans.canForceSingleMode(convertedRelNodeWithSum0)) {
context.getPlannerSettings().forceSingleMode();
}
http://git-wip-us.apache.org/repos/asf/drill/blob/34ca63ba/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java
new file mode 100644
index 0000000..7ad72aa
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindHardDistributionScans.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.sql.handlers;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttleImpl;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.exec.planner.fragment.DistributionAffinity;
+import org.apache.drill.exec.planner.logical.DrillTable;
+import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
+
+import java.io.IOException;
+
+/**
+ * Visitor to scan the RelNode tree and find if it contains any Scans that require hard distribution requirements.
+ */
+class FindHardDistributionScans extends RelShuttleImpl {
+ private boolean contains;
+
+ /**
+ * Can the given <code>relTree</code> be executed in single fragment mode? For now this returns false when the
+ * <code>relTree</code> contains one or more scans with hard affinity requirements.
+ *
+ * @param relTree
+ * @return
+ */
+ public static boolean canForceSingleMode(final RelNode relTree) {
+ final FindHardDistributionScans hdVisitor = new FindHardDistributionScans();
+ relTree.accept(hdVisitor);
+ // Can't run in single fragment mode if the query contains a table which has hard distribution requirement.
+ return !hdVisitor.contains();
+ }
+
+ @Override
+ public RelNode visit(TableScan scan) {
+ DrillTable unwrap;
+ unwrap = scan.getTable().unwrap(DrillTable.class);
+ if (unwrap == null) {
+ unwrap = scan.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
+ }
+
+ try {
+ if (unwrap.getGroupScan().getDistributionAffinity() == DistributionAffinity.HARD) {
+ contains = true;
+ }
+ } catch (final IOException e) {
+ throw new DrillRuntimeException("Failed to get GroupScan from table.");
+ }
+ return scan;
+ }
+
+ public boolean contains() {
+ return contains;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/34ca63ba/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
index e7460b3..6df6e0d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/FindLimit0Visitor.java
@@ -23,7 +23,6 @@ import com.google.common.collect.Lists;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelShuttleImpl;
-import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.logical.LogicalAggregate;
import org.apache.calcite.rel.logical.LogicalIntersect;
import org.apache.calcite.rel.logical.LogicalJoin;
@@ -35,7 +34,6 @@ import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.exception.SchemaChangeException;
@@ -43,18 +41,14 @@ import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.ops.OperatorContext;
import org.apache.drill.exec.physical.base.ScanStats;
import org.apache.drill.exec.physical.impl.OutputMutator;
-import org.apache.drill.exec.planner.fragment.DistributionAffinity;
import org.apache.drill.exec.planner.logical.DrillDirectScanRel;
import org.apache.drill.exec.planner.logical.DrillLimitRel;
import org.apache.drill.exec.planner.logical.DrillRel;
-import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
import org.apache.drill.exec.planner.sql.TypeInferenceUtils;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.store.AbstractRecordReader;
import org.apache.drill.exec.store.direct.DirectGroupScan;
-import java.io.IOException;
import java.util.List;
/**
@@ -117,18 +111,11 @@ public class FindLimit0Visitor extends RelShuttleImpl {
* @param rel rel node tree
* @return true if the root portion of the tree contains LIMIT(0)
*/
- public static boolean containsLimit0(RelNode rel) {
+ public static boolean containsLimit0(final RelNode rel) {
FindLimit0Visitor visitor = new FindLimit0Visitor();
rel.accept(visitor);
- if (!visitor.isContains()) {
- return false;
- }
-
- final FindHardDistributionScans hdVisitor = new FindHardDistributionScans();
- rel.accept(hdVisitor);
- // Can't optimize limit 0 if the query contains a table which has hard distribution requirement.
- return !hdVisitor.contains();
+ return visitor.isContains();
}
private boolean contains = false;
@@ -248,32 +235,4 @@ public class FindLimit0Visitor extends RelShuttleImpl {
public void close() throws Exception {
}
}
- /**
- * Visitor to scan the RelNode tree and find if it contains any Scans that require hard distribution requirements.
- */
- private static class FindHardDistributionScans extends RelShuttleImpl {
- private boolean contains;
-
- @Override
- public RelNode visit(TableScan scan) {
- DrillTable unwrap;
- unwrap = scan.getTable().unwrap(DrillTable.class);
- if (unwrap == null) {
- unwrap = scan.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
- }
-
- try {
- if (unwrap.getGroupScan().getDistributionAffinity() == DistributionAffinity.HARD) {
- contains = true;
- }
- } catch (final IOException e) {
- throw new DrillRuntimeException("Failed to get GroupScan from table.");
- }
- return scan;
- }
-
- public boolean contains() {
- return contains;
- }
- }
}