You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2019/03/22 10:36:50 UTC

[hadoop] branch branch-3.2 updated: HADOOP-16147. Allow CopyListing sequence file keys and values to be more easily customized.

This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new ade3af6  HADOOP-16147. Allow CopyListing sequence file keys and values to be more easily customized.
ade3af6 is described below

commit ade3af6ef233319b0a8efe14d212059a86fe8c23
Author: Andrew Olson <an...@cerner.com>
AuthorDate: Fri Mar 22 10:36:34 2019 +0000

    HADOOP-16147. Allow CopyListing sequence file keys and values to be more easily customized.
    
    Author:    Andrew Olson
    (cherry picked from commit faba3591d32f2e4808c2faeb9472348d52619c8a)
---
 .../java/org/apache/hadoop/tools/CopyListing.java  | 23 ++++++++++++++++++++++
 .../org/apache/hadoop/tools/SimpleCopyListing.java |  4 ++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
index e018b0b..6f8aa34 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
@@ -249,6 +249,29 @@ public abstract class CopyListing extends Configured {
   }
 
   /**
+   * Returns the key for an entry in the copy listing sequence file.
+   * @param sourcePathRoot the root source path for determining the relative
+   *                       target path
+   * @param fileStatus the copy listing file status
+   * @return the key for the sequence file entry
+   */
+  protected Text getFileListingKey(Path sourcePathRoot,
+      CopyListingFileStatus fileStatus) {
+    return new Text(DistCpUtils.getRelativePath(sourcePathRoot,
+        fileStatus.getPath()));
+  }
+
+  /**
+   * Returns the value for an entry in the copy listing sequence file.
+   * @param fileStatus the copy listing file status
+   * @return the value for the sequence file entry
+   */
+  protected CopyListingFileStatus getFileListingValue(
+      CopyListingFileStatus fileStatus) {
+    return fileStatus;
+  }
+
+  /**
    * Public Factory method with which the appropriate CopyListing implementation may be retrieved.
    * @param configuration The input configuration.
    * @param credentials Credentials object on which the FS delegation tokens are cached
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
index a908e12..7e5a26a 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -718,8 +718,8 @@ public class SimpleCopyListing extends CopyListing {
       return;
     }
 
-    fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
-        fileStatus.getPath())), fileStatus);
+    fileListWriter.append(getFileListingKey(sourcePathRoot, fileStatus),
+        getFileListingValue(fileStatus));
     fileListWriter.sync();
 
     if (!fileStatus.isDirectory()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org