You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2016/08/24 21:56:08 UTC

drill git commit: DRILL-4852: Fix performance regression for COUNT(*) query over large JSON table

Repository: drill
Updated Branches:
  refs/heads/master 0ccc81aee -> 5b15d0efd


DRILL-4852: Fix performance regression for COUNT(*) query over large JSON table

close apache/drill#576


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/5b15d0ef
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/5b15d0ef
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/5b15d0ef

Branch: refs/heads/master
Commit: 5b15d0efd99b8c632efabf249f8f4cc30eabb48e
Parents: 0ccc81a
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Wed Aug 24 12:27:42 2016 +0000
Committer: Jinfeng Ni <jn...@apache.org>
Committed: Wed Aug 24 13:52:59 2016 -0700

----------------------------------------------------------------------
 .../exec/store/ImplicitColumnExplorer.java      | 14 +++++------
 .../exec/store/dfs/easy/EasyFormatPlugin.java   |  9 ++-----
 .../store/parquet/ParquetScanBatchCreator.java  |  2 +-
 .../exec/store/TestImplicitFileColumns.java     | 26 +++++++++++++++++---
 4 files changed, 33 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
index af74eb7..b67d8b5 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
@@ -39,7 +39,7 @@ public class ImplicitColumnExplorer {
 
   private final String partitionDesignator;
   private final List<SchemaPath> columns;
-  private final boolean selectAllColumns;
+  private final boolean isStarQuery;
   private final List<Integer> selectedPartitionColumns;
   private final List<SchemaPath> tableColumns;
   private final Map<String, ImplicitFileColumns> allImplicitColumns;
@@ -54,7 +54,7 @@ public class ImplicitColumnExplorer {
   public ImplicitColumnExplorer(FragmentContext context, List<SchemaPath> columns) {
     this.partitionDesignator = context.getOptions().getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
     this.columns = columns;
-    this.selectAllColumns = columns != null && AbstractRecordReader.isStarQuery(columns);
+    this.isStarQuery = columns != null && AbstractRecordReader.isStarQuery(columns);
     this.selectedPartitionColumns = Lists.newArrayList();
     this.tableColumns = Lists.newArrayList();
     this.allImplicitColumns = initImplicitFileColumns(context.getOptions());
@@ -92,7 +92,7 @@ public class ImplicitColumnExplorer {
       if (p.length > r.length) {
         String[] q = ArrayUtils.subarray(p, r.length, p.length - 1);
         for (int a = 0; a < q.length; a++) {
-          if (selectAllColumns || selectedPartitionColumns.contains(a)) {
+          if (isStarQuery || selectedPartitionColumns.contains(a)) {
             implicitValues.put(partitionDesignator + a, q[a]);
           }
         }
@@ -105,8 +105,8 @@ public class ImplicitColumnExplorer {
     return implicitValues;
   }
 
-  public boolean isSelectAllColumns() {
-    return selectAllColumns;
+  public boolean isStarQuery() {
+    return isStarQuery;
   }
 
   public List<SchemaPath> getTableColumns() {
@@ -114,13 +114,13 @@ public class ImplicitColumnExplorer {
   }
 
   /**
-   * If it is not select all query, sorts out columns into three categories:
+   * If it is not star query, sorts out columns into three categories:
    * 1. table columns
    * 2. partition columns
    * 3. implicit file columns
    */
   private void init() {
-    if (selectAllColumns) {
+    if (isStarQuery) {
       selectedImplicitColumns.putAll(allImplicitColumns);
     } else {
       Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));

http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
index f56f445..f95a323 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
@@ -41,7 +41,6 @@ import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.apache.drill.exec.record.CloseableRecordBatch;
 import org.apache.drill.exec.record.RecordBatch;
 import org.apache.drill.exec.server.DrillbitContext;
-import org.apache.drill.exec.store.AbstractRecordReader;
 import org.apache.drill.exec.store.ImplicitColumnExplorer;
 import org.apache.drill.exec.store.RecordReader;
 import org.apache.drill.exec.store.RecordWriter;
@@ -126,13 +125,9 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements
   CloseableRecordBatch getReaderBatch(FragmentContext context, EasySubScan scan) throws ExecutionSetupException {
     final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, scan.getColumns());
 
-    if (!columnExplorer.isSelectAllColumns()) {
-      // We must make sure to pass a table column (not to be confused with implicit column) to the underlying record reader.
-      List<SchemaPath> tableColumns =
-          columnExplorer.getTableColumns().size() == 0 ?
-              Lists.<SchemaPath>newArrayList(AbstractRecordReader.STAR_COLUMN) : columnExplorer.getTableColumns();
+    if (!columnExplorer.isStarQuery()) {
       scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(),
-          tableColumns, scan.getSelectionRoot());
+          columnExplorer.getTableColumns(), scan.getSelectionRoot());
       scan.setOperatorId(scan.getOperatorId());
     }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
index 4d4719b..6c7bc41 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
@@ -65,7 +65,7 @@ public class ParquetScanBatchCreator implements BatchCreator<ParquetRowGroupScan
 
     final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, rowGroupScan.getColumns());
 
-    if (!columnExplorer.isSelectAllColumns()) {
+    if (!columnExplorer.isStarQuery()) {
       rowGroupScan = new ParquetRowGroupScan(rowGroupScan.getUserName(), rowGroupScan.getStorageEngine(),
           rowGroupScan.getRowGroupReadEntries(), columnExplorer.getTableColumns(), rowGroupScan.getSelectionRoot());
       rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());

http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
index ce0f5e9..3974448 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
@@ -63,7 +63,7 @@ public class TestImplicitFileColumns extends BaseTestQuery {
   @Test
   public void testImplicitColumns() throws Exception {
     testBuilder()
-        .sqlQuery("select *, filename, suffix, fqn, filepath from dfs.`" + testFolder.getRoot().getPath() + "` order by filename")
+        .sqlQuery("select *, filename, suffix, fqn, filepath from dfs.`%s` order by filename", testFolder.getRoot().getPath())
         .ordered()
         .baselineColumns("columns", "dir0", "filename", "suffix", "fqn", "filepath")
         .baselineValues(mainColumnValues, null, mainFile.getName(), CSV, new Path(mainFile.getPath()).toString(), new Path(mainFile.getParent()).toString())
@@ -84,7 +84,7 @@ public class TestImplicitFileColumns extends BaseTestQuery {
   @Test
   public void testImplicitColumnAlone() throws Exception {
     testBuilder()
-        .sqlQuery("select filename from dfs.`" + nestedFolder.getPath() + "`")
+        .sqlQuery("select filename from dfs.`%s`", nestedFolder.getPath())
         .unOrdered()
         .baselineColumns("filename")
         .baselineValues(nestedFile.getName())
@@ -94,7 +94,7 @@ public class TestImplicitFileColumns extends BaseTestQuery {
   @Test
   public void testImplicitColumnWithTableColumns() throws Exception {
     testBuilder()
-        .sqlQuery("select columns, filename from dfs.`" + nestedFolder.getPath() + "`")
+        .sqlQuery("select columns, filename from dfs.`%s`", nestedFolder.getPath())
         .unOrdered()
         .baselineColumns("columns", "filename")
         .baselineValues(nestedColumnValues, nestedFile.getName())
@@ -102,6 +102,26 @@ public class TestImplicitFileColumns extends BaseTestQuery {
   }
 
   @Test
+  public void testCountStarWithImplicitColumnsInWhereClause() throws Exception {
+    testBuilder()
+        .sqlQuery("select count(*) as cnt from dfs.`%s` where filename = '%s'", nestedFolder.getPath(), nestedFile.getName())
+        .unOrdered()
+        .baselineColumns("cnt")
+        .baselineValues(1L)
+        .go();
+  }
+
+  @Test
+  public void testImplicitAndPartitionColumnsInSelectClause() throws Exception {
+    testBuilder()
+        .sqlQuery("select dir0, filename from dfs.`%s` order by filename", testFolder.getRoot().getPath()).ordered()
+        .baselineColumns("dir0", "filename")
+        .baselineValues(null, mainFile.getName())
+        .baselineValues(NESTED, nestedFile.getName())
+        .go();
+  }
+
+  @Test
   public void testImplicitColumnsForParquet() throws Exception {
     testBuilder()
         .sqlQuery("select filename, suffix from cp.`tpch/region.parquet` limit 1")