You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by so...@apache.org on 2019/05/03 22:46:29 UTC

[drill] 03/08: DRILL-7171: Create metadata directories cache file in the leaf level directories to support ConvertCountToDirectScan optimization. closes #1748

This is an automated email from the ASF dual-hosted git repository.

sorabh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit de74eabe013d9249712a14e1f0acabdc8d3ebc16
Author: Venkata Jyothsna Donapati <jy...@gmail.com>
AuthorDate: Thu Apr 11 14:16:36 2019 -0700

    DRILL-7171: Create metadata directories cache file in the leaf level directories to support ConvertCountToDirectScan optimization.
    closes #1748
---
 .../exec/store/parquet/metadata/Metadata.java      | 18 ++--
 .../logical/TestConvertCountToDirectScan.java      | 98 ++++++++++++++++++----
 2 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
index 59849e7..5459a8a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
@@ -352,23 +352,15 @@ public class Metadata {
     writeFile(metadataTableWithRelativePaths.fileMetadata, new Path(path, METADATA_FILENAME), fs);
     writeFile(metadataTableWithRelativePaths.getSummary(), new Path(path, METADATA_SUMMARY_FILENAME), fs);
     Metadata_V4.MetadataSummary metadataSummaryWithRelativePaths = metadataTableWithRelativePaths.getSummary();
-
-    if (directoryList.size() > 0 && childFiles.size() == 0) {
-      ParquetTableMetadataDirs parquetTableMetadataDirsRelativePaths =
-          new ParquetTableMetadataDirs(metadataSummaryWithRelativePaths.directories);
-      writeFile(parquetTableMetadataDirsRelativePaths, new Path(path, METADATA_DIRECTORIES_FILENAME), fs);
-      if (timer != null) {
-        logger.debug("Creating metadata files recursively took {} ms", timer.elapsed(TimeUnit.MILLISECONDS));
-      }
-      ParquetTableMetadataDirs parquetTableMetadataDirs = new ParquetTableMetadataDirs(directoryList);
-      return Pair.of(parquetTableMetadata, parquetTableMetadataDirs);
-    }
-    List<Path> emptyDirList = new ArrayList<>();
+    // Directories list will be empty at the leaf level directories. For sub-directories with both files and directories,
+    // only the directories will be included in the list.
+    writeFile(new ParquetTableMetadataDirs(metadataSummaryWithRelativePaths.directories),
+        new Path(path, METADATA_DIRECTORIES_FILENAME), fs);
     if (timer != null) {
       logger.debug("Creating metadata files recursively took {} ms", timer.elapsed(TimeUnit.MILLISECONDS));
       timer.stop();
     }
-    return Pair.of(parquetTableMetadata, new ParquetTableMetadataDirs(emptyDirList));
+    return Pair.of(parquetTableMetadata, new ParquetTableMetadataDirs(directoryList));
   }
 
   /**
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
index 4bd3a0f..eaf9257 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
@@ -190,11 +190,11 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
       testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
 
       testBuilder()
-              .sqlQuery(sql)
-              .unOrdered()
-              .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
-              .baselineValues(24L, 8L, 12L)
-              .go();
+          .sqlQuery(sql)
+          .unOrdered()
+          .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
+          .baselineValues(24L, 8L, 12L)
+          .go();
 
     } finally {
       test("drop table if exists %s", tableName);
@@ -222,17 +222,17 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
 
       int expectedNumFiles = 1;
       String numFilesPattern = "numFiles = " + expectedNumFiles;
-      String usedMetaSummaryPattern = "usedMetadataSummaryFile = false";
+      String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
       String recordReaderPattern = "DynamicPojoRecordReader";
 
       testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
 
       testBuilder()
-              .sqlQuery(sql)
-              .unOrdered()
-              .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
-              .baselineValues(6L, 2L, 3L)
-              .go();
+          .sqlQuery(sql)
+          .unOrdered()
+          .baselineColumns("star_count", "int_column_count", "vrchr_column_count")
+          .baselineValues(6L, 2L, 3L)
+          .go();
 
     } finally {
       test("drop table if exists %s", tableName);
@@ -264,11 +264,77 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
       testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, recordReaderPattern});
 
       testBuilder()
-              .sqlQuery(sql)
-              .unOrdered()
-              .baselineColumns("star_count")
-              .baselineValues(250L)
-              .go();
+          .sqlQuery(sql)
+          .unOrdered()
+          .baselineColumns("star_count")
+          .baselineValues(250L)
+          .go();
+
+    } finally {
+      test("drop table if exists %s", tableName);
+    }
+  }
+
+  @Test
+  public void testCountsForLeafDirectories() throws Exception {
+    test("use dfs.tmp");
+    String tableName = "parquet_table_counts";
+
+    try {
+      test("create table `%s/1` as select * from cp.`tpch/nation.parquet`", tableName);
+      test("create table `%s/2` as select * from cp.`tpch/nation.parquet`", tableName);
+      test("create table `%s/3` as select * from cp.`tpch/nation.parquet`", tableName);
+      test("refresh table metadata %s", tableName);
+
+      String sql = String.format("select\n" +
+              "count(*) as star_count\n" +
+              "from `%s/1`", tableName);
+
+      int expectedNumFiles = 1;
+      String numFilesPattern = "numFiles = " + expectedNumFiles;
+      String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+      String recordReaderPattern = "DynamicPojoRecordReader";
+
+      testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
+
+      testBuilder()
+          .sqlQuery(sql)
+          .unOrdered()
+          .baselineColumns("star_count")
+          .baselineValues(25L)
+          .go();
+
+    } finally {
+      test("drop table if exists %s", tableName);
+    }
+  }
+
+  @Test
+  public void testCountsForDirWithFilesAndDir() throws Exception {
+    test("use dfs.tmp");
+    String tableName = "parquet_table_counts";
+
+    try {
+      test("create table `%s/1` as select * from cp.`tpch/nation.parquet`", tableName);
+      test("create table `%s/1/2` as select * from cp.`tpch/nation.parquet`", tableName);
+      test("create table `%s/1/3` as select * from cp.`tpch/nation.parquet`", tableName);
+      test("refresh table metadata %s", tableName);
+
+      String sql = String.format("select count(*) as star_count from `%s/1`", tableName);
+
+      int expectedNumFiles = 1;
+      String numFilesPattern = "numFiles = " + expectedNumFiles;
+      String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+      String recordReaderPattern = "DynamicPojoRecordReader";
+
+      testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern});
+
+      testBuilder()
+          .sqlQuery(sql)
+          .unOrdered()
+          .baselineColumns("star_count")
+          .baselineValues(75L)
+          .go();
 
     } finally {
       test("drop table if exists %s", tableName);