You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by xi...@apache.org on 2020/12/18 18:15:42 UTC

[beam] branch master updated: [BEAM-11329]:HDFS not deduplicating identical configuration paths (#13514)

This is an automated email from the ASF dual-hosted git repository.

xinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 287f77a  [BEAM-11329]:HDFS not deduplicating identical configuration paths (#13514)
287f77a is described below

commit 287f77aebf32a71746a15a885eb8dc72d02aeddd
Author: MabelYC <54...@users.noreply.github.com>
AuthorDate: Fri Dec 18 10:15:04 2020 -0800

    [BEAM-11329]:HDFS not deduplicating identical configuration paths (#13514)
---
 .../beam/sdk/io/hdfs/HadoopFileSystemOptions.java    |  9 +++++++--
 .../sdk/io/hdfs/HadoopFileSystemOptionsTest.java     | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java b/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java
index 43a6e7e..d9acf0b 100644
--- a/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java
+++ b/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java
@@ -18,6 +18,7 @@
 package org.apache.beam.sdk.io.hdfs;
 
 import java.io.File;
+import java.nio.file.Paths;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -112,9 +113,13 @@ public interface HadoopFileSystemOptions extends PipelineOptions {
         }
       }
 
-      // Load the configuration from paths found (if exists)
+      // Set used to dedup same config paths
+      Set<java.nio.file.Path> confPaths = Sets.newHashSet();
+      // Load the configuration from paths found (if exists and not loaded yet)
       for (String confDir : explodedConfDirs) {
-        if (new File(confDir).exists()) {
+        java.nio.file.Path path = Paths.get(confDir).normalize();
+        if (new File(confDir).exists() && !confPaths.contains(path)) {
+          confPaths.add(path);
           Configuration conf = new Configuration(false);
           boolean confLoaded = false;
           for (String confName : Lists.newArrayList("core-site.xml", "hdfs-site.xml")) {
diff --git a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java
index f2f289f..5af0e8a 100644
--- a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java
+++ b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java
@@ -183,6 +183,26 @@ public class HadoopFileSystemOptionsTest {
   }
 
   @Test
+  public void testDefaultSetYarnConfDirAndHadoopConfDirAndSameDir() throws IOException {
+    Files.write(
+        createPropertyData("A"), tmpFolder.newFile("core-site.xml"), StandardCharsets.UTF_8);
+    Files.write(
+        createPropertyData("B"), tmpFolder.newFile("hdfs-site.xml"), StandardCharsets.UTF_8);
+    HadoopFileSystemOptions.ConfigurationLocator configurationLocator =
+        spy(new HadoopFileSystemOptions.ConfigurationLocator());
+    Map<String, String> environment = Maps.newHashMap();
+    environment.put("HADOOP_CONF_DIR", tmpFolder.getRoot().getAbsolutePath());
+    environment.put("YARN_CONF_DIR", tmpFolder.getRoot().getAbsolutePath() + "/");
+    when(configurationLocator.getEnvironment()).thenReturn(environment);
+
+    List<Configuration> configurationList =
+        configurationLocator.create(PipelineOptionsFactory.create());
+    assertEquals(1, configurationList.size());
+    assertThat(configurationList.get(0).get("propertyA"), Matchers.equalTo("A"));
+    assertThat(configurationList.get(0).get("propertyB"), Matchers.equalTo("B"));
+  }
+
+  @Test
   public void testDefaultSetYarnConfDirAndHadoopConfDirMultiPathAndSameConfiguration()
       throws IOException {
     File hadoopConfDir = tmpFolder.newFolder("hadoop");