You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by ba...@apache.org on 2016/07/01 16:42:41 UTC

falcon git commit: FALCON-2049 Feed Replication with Empty Directories are failing

Repository: falcon
Updated Branches:
  refs/heads/master d0bc18860 -> b135f28f3


FALCON-2049 Feed Replication with Empty Directories are failing

Author: bvellanki <bv...@hortonworks.com>

Reviewers: "Venkat Ranganathan <ve...@hortonworks.com>, Ying Zheng <yz...@hortonworks.com>, Peeyush B <pe...@apache.org>, Pallavi Rao <pa...@inmobi.com>"

Closes #204 from bvellanki/FALCON-2049


Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/b135f28f
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/b135f28f
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/b135f28f

Branch: refs/heads/master
Commit: b135f28f314b1ec0a89c2e1c21b033e48e7db602
Parents: d0bc188
Author: bvellanki <bv...@hortonworks.com>
Authored: Fri Jul 1 09:42:31 2016 -0700
Committer: bvellanki <bv...@hortonworks.com>
Committed: Fri Jul 1 09:42:31 2016 -0700

----------------------------------------------------------------------
 replication/pom.xml                             |  5 +++++
 .../falcon/replication/FeedReplicator.java      | 19 ++++++++++++-----
 .../falcon/replication/FeedReplicatorTest.java  | 22 +++++++++++++-------
 3 files changed, 34 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/falcon/blob/b135f28f/replication/pom.xml
----------------------------------------------------------------------
diff --git a/replication/pom.xml b/replication/pom.xml
index ff66e52..3f47226 100644
--- a/replication/pom.xml
+++ b/replication/pom.xml
@@ -52,6 +52,11 @@
             <groupId>org.apache.falcon</groupId>
             <artifactId>falcon-metrics</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.apache.falcon</groupId>
+            <artifactId>falcon-test-util</artifactId>
+            <scope>test</scope>
+        </dependency>
 
         <dependency>
             <groupId>org.slf4j</groupId>

http://git-wip-us.apache.org/repos/asf/falcon/blob/b135f28f/replication/src/main/java/org/apache/falcon/replication/FeedReplicator.java
----------------------------------------------------------------------
diff --git a/replication/src/main/java/org/apache/falcon/replication/FeedReplicator.java b/replication/src/main/java/org/apache/falcon/replication/FeedReplicator.java
index a8da51d..0906bd5 100644
--- a/replication/src/main/java/org/apache/falcon/replication/FeedReplicator.java
+++ b/replication/src/main/java/org/apache/falcon/replication/FeedReplicator.java
@@ -184,12 +184,13 @@ public class FeedReplicator extends Configured implements Tool {
         return new GnuParser().parse(options, args);
     }
 
-    protected DistCpOptions getDistCpOptions(CommandLine cmd) {
+    protected DistCpOptions getDistCpOptions(CommandLine cmd) throws FalconException, IOException {
         String[] paths = cmd.getOptionValue("sourcePaths").trim().split(",");
         List<Path> srcPaths = getPaths(paths);
-        String trgPath = cmd.getOptionValue("targetPath").trim();
+        String targetPathString = cmd.getOptionValue("targetPath").trim();
+        Path targetPath = new Path(targetPathString);
 
-        DistCpOptions distcpOptions = new DistCpOptions(srcPaths, new Path(trgPath));
+        DistCpOptions distcpOptions = new DistCpOptions(srcPaths, targetPath);
         distcpOptions.setBlocking(true);
         distcpOptions.setMaxMaps(Integer.parseInt(cmd.getOptionValue("maxMaps")));
         distcpOptions.setMapBandwidth(Integer.parseInt(cmd.getOptionValue("mapBandwidth")));
@@ -214,8 +215,16 @@ public class FeedReplicator extends Configured implements Tool {
         // Removing deleted files by default - FALCON-1844
         String removeDeletedFiles = cmd.getOptionValue(
                 ReplicationDistCpOption.DISTCP_OPTION_REMOVE_DELETED_FILES.getName(), "true");
-        distcpOptions.setDeleteMissing(Boolean.parseBoolean(removeDeletedFiles));
-
+        boolean deleteMissing = Boolean.parseBoolean(removeDeletedFiles);
+        distcpOptions.setDeleteMissing(deleteMissing);
+        if (deleteMissing) {
+            // DistCP will fail with InvalidInputException if deleteMissing is set to true and
+            // if targetPath does not exist. Create targetPath to avoid failures.
+            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(targetPath.toUri(), getConf());
+            if (!fs.exists(targetPath)) {
+                fs.mkdirs(targetPath);
+            }
+        }
 
         String preserveBlockSize = cmd.getOptionValue(
                 ReplicationDistCpOption.DISTCP_OPTION_PRESERVE_BLOCK_SIZE.getName());

http://git-wip-us.apache.org/repos/asf/falcon/blob/b135f28f/replication/src/test/java/org/apache/falcon/replication/FeedReplicatorTest.java
----------------------------------------------------------------------
diff --git a/replication/src/test/java/org/apache/falcon/replication/FeedReplicatorTest.java b/replication/src/test/java/org/apache/falcon/replication/FeedReplicatorTest.java
index e7e177e..2662ade 100644
--- a/replication/src/test/java/org/apache/falcon/replication/FeedReplicatorTest.java
+++ b/replication/src/test/java/org/apache/falcon/replication/FeedReplicatorTest.java
@@ -17,6 +17,7 @@
  */
 package org.apache.falcon.replication;
 
+import org.apache.falcon.cluster.util.EmbeddedCluster;
 import org.apache.commons.cli.CommandLine;
 import org.apache.falcon.entity.Storage;
 import org.apache.hadoop.fs.Path;
@@ -32,6 +33,8 @@ import java.util.List;
  */
 public class FeedReplicatorTest {
 
+    private String defaultPath = "jail://FeedReplicatorTest:00/tmp";
+
     @Test
     public void testArguments() throws Exception {
         /*
@@ -42,21 +45,26 @@ public class FeedReplicatorTest {
          * <arg>-sourcePaths</arg><arg>${distcpSourcePaths}</arg>
          * <arg>-targetPath</arg><arg>${distcpTargetPaths}</arg>
          */
+
+        // creates jailed cluster in which DistCpOtions command can be tested.
+        EmbeddedCluster cluster =  EmbeddedCluster.newCluster("FeedReplicatorTest");
+
         final String[] args = {
             "true",
             "-maxMaps", "3",
             "-mapBandwidth", "4",
-            "-sourcePaths", "hdfs://localhost:8020/tmp/",
-            "-targetPath", "hdfs://localhost1:8020/tmp/",
+            "-sourcePaths", defaultPath,
+            "-targetPath", defaultPath,
             "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(),
         };
 
         FeedReplicator replicator = new FeedReplicator();
         CommandLine cmd = replicator.getCommand(args);
+        replicator.setConf(cluster.getConf());
         DistCpOptions options = replicator.getDistCpOptions(cmd);
 
         List<Path> srcPaths = new ArrayList<Path>();
-        srcPaths.add(new Path("hdfs://localhost:8020/tmp/"));
+        srcPaths.add(new Path(defaultPath));
         validateMandatoryArguments(options, srcPaths, true);
         Assert.assertTrue(options.shouldDeleteMissing());
     }
@@ -82,8 +90,8 @@ public class FeedReplicatorTest {
             "true",
             "-maxMaps", "3",
             "-mapBandwidth", "4",
-            "-sourcePaths", "hdfs://localhost:8020/tmp/",
-            "-targetPath", "hdfs://localhost1:8020/tmp/",
+            "-sourcePaths", defaultPath,
+            "-targetPath", defaultPath,
             "-falconFeedStorageType", Storage.TYPE.FILESYSTEM.name(),
             "-overwrite", "true",
             "-ignoreErrors", "false",
@@ -99,7 +107,7 @@ public class FeedReplicatorTest {
         DistCpOptions options = replicator.getDistCpOptions(cmd);
 
         List<Path> srcPaths = new ArrayList<Path>();
-        srcPaths.add(new Path("hdfs://localhost:8020/tmp/"));
+        srcPaths.add(new Path(defaultPath));
         validateMandatoryArguments(options, srcPaths, false);
         validateOptionalArguments(options);
     }
@@ -108,7 +116,7 @@ public class FeedReplicatorTest {
         Assert.assertEquals(options.getMaxMaps(), 3);
         Assert.assertEquals(options.getMapBandwidth(), 4);
         Assert.assertEquals(options.getSourcePaths(), srcPaths);
-        Assert.assertEquals(options.getTargetPath(), new Path("hdfs://localhost1:8020/tmp/"));
+        Assert.assertEquals(options.getTargetPath(), new Path(defaultPath));
         Assert.assertEquals(options.shouldSyncFolder(), shouldSyncFolder);
     }