You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/08/05 16:19:54 UTC
[hudi] branch master updated: [HUDI-1144] Speedup spark read
queries by caching metaclient in HoodieROPathFilter (#1919)
This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 9bcd322 [HUDI-1144] Speedup spark read queries by caching metaclient in HoodieROPathFilter (#1919)
9bcd322 is described below
commit 9bcd3221fd440081dbae70e89d08539c3b484862
Author: Balaji Varadarajan <ba...@robinhood.com>
AuthorDate: Wed Aug 5 09:19:10 2020 -0700
[HUDI-1144] Speedup spark read queries by caching metaclient in HoodieROPathFilter (#1919)
---
.../apache/hudi/hadoop/HoodieROTablePathFilter.java | 19 ++++++++++++++++---
.../hudi/hadoop/TestHoodieROTablePathFilter.java | 11 ++++++-----
2 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
index 86199d2..1e616f8 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
@@ -18,6 +18,8 @@
package org.apache.hudi.hadoop;
+import java.util.Map;
+import java.util.Set;
import org.apache.hadoop.conf.Configurable;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.model.HoodieBaseFile;
@@ -60,12 +62,17 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
* Its quite common, to have all files from a given partition path be passed into accept(), cache the check for hoodie
* metadata for known partition paths and the latest versions of files.
*/
- private HashMap<String, HashSet<Path>> hoodiePathCache;
+ private Map<String, HashSet<Path>> hoodiePathCache;
/**
* Paths that are known to be non-hoodie tables.
*/
- private HashSet<String> nonHoodiePathCache;
+ private Set<String> nonHoodiePathCache;
+
+ /**
+ * Table Meta Client Cache.
+ */
+ Map<String, HoodieTableMetaClient> metaClientCache;
/**
* Hadoop configurations for the FileSystem.
@@ -82,6 +89,7 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
this.hoodiePathCache = new HashMap<>();
this.nonHoodiePathCache = new HashSet<>();
this.conf = new SerializableConfiguration(conf);
+ this.metaClientCache = new HashMap<>();
}
/**
@@ -149,7 +157,12 @@ public class HoodieROTablePathFilter implements Configurable, PathFilter, Serial
if (baseDir != null) {
try {
- HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), baseDir.toString());
+ HoodieTableMetaClient metaClient = metaClientCache.get(baseDir.toString());
+ if (null == metaClient) {
+ metaClient = new HoodieTableMetaClient(fs.getConf(), baseDir.toString(), true);
+ metaClientCache.put(baseDir.toString(), metaClient);
+ }
+
HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(), fs.listStatus(folder));
List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles().collect(Collectors.toList());
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
index 18e9afd..f96f6cb 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java
@@ -31,6 +31,7 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -55,9 +56,9 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f1");
HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f2");
- HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f3");
+ HoodieTestUtils.createDataFile(basePath, "2017/01/02", "001", "f3");
HoodieTestUtils.createDataFile(basePath, "2017/01/01", "002", "f2");
- HoodieTestUtils.createDataFile(basePath, "2017/01/01", "003", "f3");
+ HoodieTestUtils.createDataFile(basePath, "2017/01/02", "003", "f3");
HoodieROTablePathFilter pathFilter = new HoodieROTablePathFilter();
Path partitionPath = new Path("file://" + basePath + File.separator + "2017/01/01");
@@ -68,11 +69,11 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
assertFalse(
pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f2"))));
assertTrue(
- pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f3"))));
+ pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/02", "001", "f3"))));
assertTrue(
pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "002", "f2"))));
assertFalse(
- pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3"))));
+ pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/02", "003", "f3"))));
assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getCommitFilePath(basePath, "001"))));
assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getCommitFilePath(basePath, "002"))));
assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getInflightCommitFilePath(basePath, "003"))));
@@ -83,7 +84,7 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
assertFalse(
pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3"))));
-
+ assertEquals(1, pathFilter.metaClientCache.size());
}
@Test