You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2016/08/24 21:56:08 UTC
drill git commit: DRILL-4852: Fix performance regression for COUNT(*)
query over large JSON table
Repository: drill
Updated Branches:
refs/heads/master 0ccc81aee -> 5b15d0efd
DRILL-4852: Fix performance regression for COUNT(*) query over large JSON table
close apache/drill#576
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/5b15d0ef
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/5b15d0ef
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/5b15d0ef
Branch: refs/heads/master
Commit: 5b15d0efd99b8c632efabf249f8f4cc30eabb48e
Parents: 0ccc81a
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Wed Aug 24 12:27:42 2016 +0000
Committer: Jinfeng Ni <jn...@apache.org>
Committed: Wed Aug 24 13:52:59 2016 -0700
----------------------------------------------------------------------
.../exec/store/ImplicitColumnExplorer.java | 14 +++++------
.../exec/store/dfs/easy/EasyFormatPlugin.java | 9 ++-----
.../store/parquet/ParquetScanBatchCreator.java | 2 +-
.../exec/store/TestImplicitFileColumns.java | 26 +++++++++++++++++---
4 files changed, 33 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
index af74eb7..b67d8b5 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ImplicitColumnExplorer.java
@@ -39,7 +39,7 @@ public class ImplicitColumnExplorer {
private final String partitionDesignator;
private final List<SchemaPath> columns;
- private final boolean selectAllColumns;
+ private final boolean isStarQuery;
private final List<Integer> selectedPartitionColumns;
private final List<SchemaPath> tableColumns;
private final Map<String, ImplicitFileColumns> allImplicitColumns;
@@ -54,7 +54,7 @@ public class ImplicitColumnExplorer {
public ImplicitColumnExplorer(FragmentContext context, List<SchemaPath> columns) {
this.partitionDesignator = context.getOptions().getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
this.columns = columns;
- this.selectAllColumns = columns != null && AbstractRecordReader.isStarQuery(columns);
+ this.isStarQuery = columns != null && AbstractRecordReader.isStarQuery(columns);
this.selectedPartitionColumns = Lists.newArrayList();
this.tableColumns = Lists.newArrayList();
this.allImplicitColumns = initImplicitFileColumns(context.getOptions());
@@ -92,7 +92,7 @@ public class ImplicitColumnExplorer {
if (p.length > r.length) {
String[] q = ArrayUtils.subarray(p, r.length, p.length - 1);
for (int a = 0; a < q.length; a++) {
- if (selectAllColumns || selectedPartitionColumns.contains(a)) {
+ if (isStarQuery || selectedPartitionColumns.contains(a)) {
implicitValues.put(partitionDesignator + a, q[a]);
}
}
@@ -105,8 +105,8 @@ public class ImplicitColumnExplorer {
return implicitValues;
}
- public boolean isSelectAllColumns() {
- return selectAllColumns;
+ public boolean isStarQuery() {
+ return isStarQuery;
}
public List<SchemaPath> getTableColumns() {
@@ -114,13 +114,13 @@ public class ImplicitColumnExplorer {
}
/**
- * If it is not select all query, sorts out columns into three categories:
+ * If it is not star query, sorts out columns into three categories:
* 1. table columns
* 2. partition columns
* 3. implicit file columns
*/
private void init() {
- if (selectAllColumns) {
+ if (isStarQuery) {
selectedImplicitColumns.putAll(allImplicitColumns);
} else {
Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));
http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
index f56f445..f95a323 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
@@ -41,7 +41,6 @@ import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.record.CloseableRecordBatch;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.server.DrillbitContext;
-import org.apache.drill.exec.store.AbstractRecordReader;
import org.apache.drill.exec.store.ImplicitColumnExplorer;
import org.apache.drill.exec.store.RecordReader;
import org.apache.drill.exec.store.RecordWriter;
@@ -126,13 +125,9 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements
CloseableRecordBatch getReaderBatch(FragmentContext context, EasySubScan scan) throws ExecutionSetupException {
final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, scan.getColumns());
- if (!columnExplorer.isSelectAllColumns()) {
- // We must make sure to pass a table column (not to be confused with implicit column) to the underlying record reader.
- List<SchemaPath> tableColumns =
- columnExplorer.getTableColumns().size() == 0 ?
- Lists.<SchemaPath>newArrayList(AbstractRecordReader.STAR_COLUMN) : columnExplorer.getTableColumns();
+ if (!columnExplorer.isStarQuery()) {
scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(),
- tableColumns, scan.getSelectionRoot());
+ columnExplorer.getTableColumns(), scan.getSelectionRoot());
scan.setOperatorId(scan.getOperatorId());
}
http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
index 4d4719b..6c7bc41 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
@@ -65,7 +65,7 @@ public class ParquetScanBatchCreator implements BatchCreator<ParquetRowGroupScan
final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, rowGroupScan.getColumns());
- if (!columnExplorer.isSelectAllColumns()) {
+ if (!columnExplorer.isStarQuery()) {
rowGroupScan = new ParquetRowGroupScan(rowGroupScan.getUserName(), rowGroupScan.getStorageEngine(),
rowGroupScan.getRowGroupReadEntries(), columnExplorer.getTableColumns(), rowGroupScan.getSelectionRoot());
rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
http://git-wip-us.apache.org/repos/asf/drill/blob/5b15d0ef/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
index ce0f5e9..3974448 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestImplicitFileColumns.java
@@ -63,7 +63,7 @@ public class TestImplicitFileColumns extends BaseTestQuery {
@Test
public void testImplicitColumns() throws Exception {
testBuilder()
- .sqlQuery("select *, filename, suffix, fqn, filepath from dfs.`" + testFolder.getRoot().getPath() + "` order by filename")
+ .sqlQuery("select *, filename, suffix, fqn, filepath from dfs.`%s` order by filename", testFolder.getRoot().getPath())
.ordered()
.baselineColumns("columns", "dir0", "filename", "suffix", "fqn", "filepath")
.baselineValues(mainColumnValues, null, mainFile.getName(), CSV, new Path(mainFile.getPath()).toString(), new Path(mainFile.getParent()).toString())
@@ -84,7 +84,7 @@ public class TestImplicitFileColumns extends BaseTestQuery {
@Test
public void testImplicitColumnAlone() throws Exception {
testBuilder()
- .sqlQuery("select filename from dfs.`" + nestedFolder.getPath() + "`")
+ .sqlQuery("select filename from dfs.`%s`", nestedFolder.getPath())
.unOrdered()
.baselineColumns("filename")
.baselineValues(nestedFile.getName())
@@ -94,7 +94,7 @@ public class TestImplicitFileColumns extends BaseTestQuery {
@Test
public void testImplicitColumnWithTableColumns() throws Exception {
testBuilder()
- .sqlQuery("select columns, filename from dfs.`" + nestedFolder.getPath() + "`")
+ .sqlQuery("select columns, filename from dfs.`%s`", nestedFolder.getPath())
.unOrdered()
.baselineColumns("columns", "filename")
.baselineValues(nestedColumnValues, nestedFile.getName())
@@ -102,6 +102,26 @@ public class TestImplicitFileColumns extends BaseTestQuery {
}
@Test
+ public void testCountStarWithImplicitColumnsInWhereClause() throws Exception {
+ testBuilder()
+ .sqlQuery("select count(*) as cnt from dfs.`%s` where filename = '%s'", nestedFolder.getPath(), nestedFile.getName())
+ .unOrdered()
+ .baselineColumns("cnt")
+ .baselineValues(1L)
+ .go();
+ }
+
+ @Test
+ public void testImplicitAndPartitionColumnsInSelectClause() throws Exception {
+ testBuilder()
+ .sqlQuery("select dir0, filename from dfs.`%s` order by filename", testFolder.getRoot().getPath()).ordered()
+ .baselineColumns("dir0", "filename")
+ .baselineValues(null, mainFile.getName())
+ .baselineValues(NESTED, nestedFile.getName())
+ .go();
+ }
+
+ @Test
public void testImplicitColumnsForParquet() throws Exception {
testBuilder()
.sqlQuery("select filename, suffix from cp.`tpch/region.parquet` limit 1")