You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/08/05 16:19:54 UTC

[hudi] branch master updated: [HUDI-1144] Speedup spark read queries by caching metaclient in HoodieROPathFilter (#1919)

This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 9bcd322  [HUDI-1144] Speedup spark read queries by caching metaclient in HoodieROPathFilter (#1919)
9bcd322 is described below

commit 9bcd3221fd440081dbae70e89d08539c3b484862
Author: Balaji Varadarajan <ba...@robinhood.com>
AuthorDate: Wed Aug 5 09:19:10 2020 -0700

    [HUDI-1144] Speedup spark read queries by caching metaclient in HoodieROPathFilter (#1919)
---
 .../apache/hudi/hadoop/HoodieROTablePathFilter.java   | 19 ++++++++++++++++---
 .../hudi/hadoop/TestHoodieROTablePathFilter.java      | 11 ++++++-----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
index 86199d2..1e616f8 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
@@ -18,6 +18,8 @@
 
 package org.apache.hudi.hadoop;
 
+import java.util.Map;
+import java.util.Set;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.model.HoodieBaseFile;
@@ -60,12 +62,17 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
    * Its quite common, to have all files from a given partition path be passed into accept(), cache the check for hoodie
    * metadata for known partition paths and the latest versions of files.
    */
-  private HashMap<String, HashSet<Path>> hoodiePathCache;
+  private Map<String, HashSet<Path>> hoodiePathCache;
 
   /**
    * Paths that are known to be non-hoodie tables.
    */
-  private HashSet<String> nonHoodiePathCache;
+  private Set<String> nonHoodiePathCache;
+
+  /**
+   * Table Meta Client Cache.
+   */
+  Map<String, HoodieTableMetaClient> metaClientCache;
 
   /**
    * Hadoop configurations for the FileSystem.
@@ -82,6 +89,7 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
     this.hoodiePathCache = new HashMap<>();
     this.nonHoodiePathCache = new HashSet<>();
     this.conf = new SerializableConfiguration(conf);
+    this.metaClientCache = new HashMap<>();
   }
 
   /**
@@ -149,7 +157,12 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
 
       if (baseDir != null) {
         try {
-          HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), baseDir.toString());
+          HoodieTableMetaClient metaClient = metaClientCache.get(baseDir.toString());
+          if (null == metaClient) {
+            metaClient = new HoodieTableMetaClient(fs.getConf(), baseDir.toString(), true);
+            metaClientCache.put(baseDir.toString(), metaClient);
+          }
+
           HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
               metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(), fs.listStatus(folder));
           List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles().collect(Collectors.toList());
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
index 18e9afd..f96f6cb 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
@@ -31,6 +31,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -55,9 +56,9 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
 
     HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f1");
     HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f2");
-    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f3");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/02", "001", "f3");
     HoodieTestUtils.createDataFile(basePath, "2017/01/01", "002", "f2");
-    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "003", "f3");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/02", "003", "f3");
 
     HoodieROTablePathFilter pathFilter = new HoodieROTablePathFilter();
     Path partitionPath = new Path("file://" + basePath + File.separator + "2017/01/01");
@@ -68,11 +69,11 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
     assertFalse(
         pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f2"))));
     assertTrue(
-        pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f3"))));
+        pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/02", "001", "f3"))));
     assertTrue(
         pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "002", "f2"))));
     assertFalse(
-        pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3"))));
+        pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/02", "003", "f3"))));
     assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getCommitFilePath(basePath, "001"))));
     assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getCommitFilePath(basePath, "002"))));
     assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getInflightCommitFilePath(basePath, "003"))));
@@ -83,7 +84,7 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
 
     assertFalse(
         pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3"))));
-
+    assertEquals(1, pathFilter.metaClientCache.size());
   }
 
   @Test