You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by so...@apache.org on 2019/05/03 22:46:29 UTC
[drill] 03/08: DRILL-7171: Create metadata directories cache file
in the leaf level directories to support ConvertCountToDirectScan
optimization. closes #1748
This is an automated email from the ASF dual-hosted git repository.
sorabh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit de74eabe013d9249712a14e1f0acabdc8d3ebc16
Author: Venkata Jyothsna Donapati <jy...@gmail.com>
AuthorDate: Thu Apr 11 14:16:36 2019 -0700
DRILL-7171: Create metadata directories cache file in the leaf level directories to support ConvertCountToDirectScan optimization.
closes #1748
---
.../exec/store/parquet/metadata/Metadata.java | 18 ++--
.../logical/TestConvertCountToDirectScan.java | 98 ++++++++++++++++++----
2 files changed, 87 insertions(+), 29 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
index 59849e7..5459a8a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
@@ -352,23 +352,15 @@ public class Metadata {
writeFile(metadataTableWithRelativePaths.fileMetadata, new Path(path, METADATA_FILENAME), fs);
writeFile(metadataTableWithRelativePaths.getSummary(), new Path(path, METADATA_SUMMARY_FILENAME), fs);
Metadata_V4.MetadataSummary metadataSummaryWithRelativePaths = metadataTableWithRelativePaths.getSummary();
-
- if (directoryList.size() > 0 && childFiles.size() == 0) {
- ParquetTableMetadataDirs parquetTableMetadataDirsRelativePaths =
- new ParquetTableMetadataDirs(metadataSummaryWithRelativePaths.directories);
- writeFile(parquetTableMetadataDirsRelativePaths, new Path(path, METADATA_DIRECTORIES_FILENAME), fs);
- if (timer != null) {
- logger.debug("Creating metadata files recursively took {} ms", timer.elapsed(TimeUnit.MILLISECONDS));
- }
- ParquetTableMetadataDirs parquetTableMetadataDirs = new ParquetTableMetadataDirs(directoryList);
- return Pair.of(parquetTableMetadata, parquetTableMetadataDirs);
- }
- List<Path> emptyDirList = new ArrayList<>();
+ // Directories list will be empty at the leaf level directories. For sub-directories with both files and directories,
+ // only the directories will be included in the list.
+ writeFile(new ParquetTableMetadataDirs(metadataSummaryWithRelativePaths.directories),
+ new Path(path, METADATA_DIRECTORIES_FILENAME), fs);
if (timer != null) {
logger.debug("Creating metadata files recursively took {} ms", timer.elapsed(TimeUnit.MILLISECONDS));
timer.stop();
}
- return Pair.of(parquetTableMetadata, new ParquetTableMetadataDirs(emptyDirList));
+ return Pair.of(parquetTableMetadata, new ParquetTableMetadataDirs(directoryList));
}
/**
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
index 4bd3a0f..eaf9257 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
@@ -190,11 +190,11 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
testBuilder()
- .sqlQuery(sql)
- .unOrdered()
- .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
- .baselineValues(24L, 8L, 12L)
- .go();
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
+ .baselineValues(24L, 8L, 12L)
+ .go();
} finally {
test("drop table if exists %s", tableName);
@@ -222,17 +222,17 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
int expectedNumFiles = 1;
String numFilesPattern = "numFiles = " + expectedNumFiles;
- String usedMetaSummaryPattern = "usedMetadataSummaryFile = false";
+ String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
String recordReaderPattern = "DynamicPojoRecordReader";
testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
testBuilder()
- .sqlQuery(sql)
- .unOrdered()
- .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
- .baselineValues(6L, 2L, 3L)
- .go();
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
+ .baselineValues(6L, 2L, 3L)
+ .go();
} finally {
test("drop table if exists %s", tableName);
@@ -264,11 +264,77 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, recordReaderPattern});
testBuilder()
- .sqlQuery(sql)
- .unOrdered()
- .baselineColumns("star_count")
- .baselineValues(250L)
- .go();
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("star_count")
+ .baselineValues(250L)
+ .go();
+
+ } finally {
+ test("drop table if exists %s", tableName);
+ }
+ }
+
+ @Test
+ public void testCountsForLeafDirectories() throws Exception {
+ test("use dfs.tmp");
+ String tableName = "parquet_table_counts";
+
+ try {
+ test("create table `%s/1` as select * from cp.`tpch/nation.parquet`", tableName);
+ test("create table `%s/2` as select * from cp.`tpch/nation.parquet`", tableName);
+ test("create table `%s/3` as select * from cp.`tpch/nation.parquet`", tableName);
+ test("refresh table metadata %s", tableName);
+
+ String sql = String.format("select\n" +
+ "count(*) as star_count\n" +
+ "from `%s/1`", tableName);
+
+ int expectedNumFiles = 1;
+ String numFilesPattern = "numFiles = " + expectedNumFiles;
+ String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+ String recordReaderPattern = "DynamicPojoRecordReader";
+
+ testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
+
+ testBuilder()
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("star_count")
+ .baselineValues(25L)
+ .go();
+
+ } finally {
+ test("drop table if exists %s", tableName);
+ }
+ }
+
+ @Test
+ public void testCountsForDirWithFilesAndDir() throws Exception {
+ test("use dfs.tmp");
+ String tableName = "parquet_table_counts";
+
+ try {
+ test("create table `%s/1` as select * from cp.`tpch/nation.parquet`", tableName);
+ test("create table `%s/1/2` as select * from cp.`tpch/nation.parquet`", tableName);
+ test("create table `%s/1/3` as select * from cp.`tpch/nation.parquet`", tableName);
+ test("refresh table metadata %s", tableName);
+
+ String sql = String.format("select count(*) as star_count from `%s/1`", tableName);
+
+ int expectedNumFiles = 1;
+ String numFilesPattern = "numFiles = " + expectedNumFiles;
+ String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+ String recordReaderPattern = "DynamicPojoRecordReader";
+
+ testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
+
+ testBuilder()
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("star_count")
+ .baselineValues(75L)
+ .go();
} finally {
test("drop table if exists %s", tableName);