You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by xi...@apache.org on 2020/12/18 18:15:42 UTC
[beam] branch master updated: [BEAM-11329]:HDFS not deduplicating
identical configuration paths (#13514)
This is an automated email from the ASF dual-hosted git repository.
xinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 287f77a [BEAM-11329]:HDFS not deduplicating identical configuration paths (#13514)
287f77a is described below
commit 287f77aebf32a71746a15a885eb8dc72d02aeddd
Author: MabelYC <54...@users.noreply.github.com>
AuthorDate: Fri Dec 18 10:15:04 2020 -0800
[BEAM-11329]:HDFS not deduplicating identical configuration paths (#13514)
---
.../beam/sdk/io/hdfs/HadoopFileSystemOptions.java | 9 +++++++--
.../sdk/io/hdfs/HadoopFileSystemOptionsTest.java | 20 ++++++++++++++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java b/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java
index 43a6e7e..d9acf0b 100644
--- a/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java
+++ b/sdks/java/io/hadoop-file-system/src/main/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptions.java
@@ -18,6 +18,7 @@
package org.apache.beam.sdk.io.hdfs;
import java.io.File;
+import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -112,9 +113,13 @@ public interface HadoopFileSystemOptions extends PipelineOptions {
}
}
- // Load the configuration from paths found (if exists)
+ // Set used to dedup same config paths
+ Set<java.nio.file.Path> confPaths = Sets.newHashSet();
+ // Load the configuration from paths found (if exists and not loaded yet)
for (String confDir : explodedConfDirs) {
- if (new File(confDir).exists()) {
+ java.nio.file.Path path = Paths.get(confDir).normalize();
+ if (new File(confDir).exists() && !confPaths.contains(path)) {
+ confPaths.add(path);
Configuration conf = new Configuration(false);
boolean confLoaded = false;
for (String confName : Lists.newArrayList("core-site.xml", "hdfs-site.xml")) {
diff --git a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java
index f2f289f..5af0e8a 100644
--- a/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java
+++ b/sdks/java/io/hadoop-file-system/src/test/java/org/apache/beam/sdk/io/hdfs/HadoopFileSystemOptionsTest.java
@@ -183,6 +183,26 @@ public class HadoopFileSystemOptionsTest {
}
@Test
+ public void testDefaultSetYarnConfDirAndHadoopConfDirAndSameDir() throws IOException {
+ Files.write(
+ createPropertyData("A"), tmpFolder.newFile("core-site.xml"), StandardCharsets.UTF_8);
+ Files.write(
+ createPropertyData("B"), tmpFolder.newFile("hdfs-site.xml"), StandardCharsets.UTF_8);
+ HadoopFileSystemOptions.ConfigurationLocator configurationLocator =
+ spy(new HadoopFileSystemOptions.ConfigurationLocator());
+ Map<String, String> environment = Maps.newHashMap();
+ environment.put("HADOOP_CONF_DIR", tmpFolder.getRoot().getAbsolutePath());
+ environment.put("YARN_CONF_DIR", tmpFolder.getRoot().getAbsolutePath() + "/");
+ when(configurationLocator.getEnvironment()).thenReturn(environment);
+
+ List<Configuration> configurationList =
+ configurationLocator.create(PipelineOptionsFactory.create());
+ assertEquals(1, configurationList.size());
+ assertThat(configurationList.get(0).get("propertyA"), Matchers.equalTo("A"));
+ assertThat(configurationList.get(0).get("propertyB"), Matchers.equalTo("B"));
+ }
+
+ @Test
public void testDefaultSetYarnConfDirAndHadoopConfDirMultiPathAndSameConfiguration()
throws IOException {
File hadoopConfDir = tmpFolder.newFolder("hadoop");