You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by va...@apache.org on 2018/07/13 07:35:33 UTC

sqoop git commit: SQOOP-3330: Sqoop --append does not work with -Dmapreduce.output.basename

Repository: sqoop
Updated Branches:
  refs/heads/trunk 0cfbf5671 -> a6bedca4b


SQOOP-3330: Sqoop --append does not work with -Dmapreduce.output.basename

(Eric Lin via Szabolcs Vasas)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/a6bedca4
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/a6bedca4
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/a6bedca4

Branch: refs/heads/trunk
Commit: a6bedca4b272173292ab3aeadedea7283aacd16e
Parents: 0cfbf56
Author: Szabolcs Vasas <va...@apache.org>
Authored: Fri Jul 13 09:34:38 2018 +0200
Committer: Szabolcs Vasas <va...@apache.org>
Committed: Fri Jul 13 09:34:38 2018 +0200

----------------------------------------------------------------------
 src/java/org/apache/sqoop/util/AppendUtils.java | 23 ++++++++++--
 src/test/org/apache/sqoop/TestAppendUtils.java  | 38 +++++++++++++++++---
 2 files changed, 53 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/a6bedca4/src/java/org/apache/sqoop/util/AppendUtils.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/util/AppendUtils.java b/src/java/org/apache/sqoop/util/AppendUtils.java
index fa85280..20c0d13 100644
--- a/src/java/org/apache/sqoop/util/AppendUtils.java
+++ b/src/java/org/apache/sqoop/util/AppendUtils.java
@@ -43,7 +43,7 @@ public class AppendUtils {
   private static final String FILEPART_SEPARATOR = "-";
   private static final String FILEEXT_SEPARATOR = ".";
 
-  private static final Pattern DATA_PART_PATTERN = Pattern.compile("part.*-([0-9]{" + PARTITION_DIGITS + "}+).*");
+  public static final String DATA_PART_PATTERN_PREFIX = "part";
 
   private ImportJobContext context = null;
 
@@ -115,7 +115,7 @@ public class AppendUtils {
       for (FileStatus fileStat : existingFiles) {
         if (!fileStat.isDir()) {
           String filename = fileStat.getPath().getName();
-          Matcher mat = DATA_PART_PATTERN.matcher(filename);
+          Matcher mat = getDataFileNamePattern().matcher(filename);
           if (mat.matches()) {
             int thisPart = Integer.parseInt(mat.group(1));
             if (thisPart >= nextPartition) {
@@ -204,7 +204,7 @@ public class AppendUtils {
 
           LOG.debug("Directory: " + sourceFilename + " renamed to: " + destPath.getName());
         }
-      } else if (DATA_PART_PATTERN.matcher(sourceFilename).matches()) {    // move only matching top-level files
+      } else if (getDataFileNamePattern().matcher(sourceFilename).matches()) {    // move only matching top-level files
         do {
           // clear the builder in case this isn't the first iteration
           destFilename.setLength(0);
@@ -276,4 +276,21 @@ public class AppendUtils {
     return new Path(tempDir);
   }
 
+  /**
+   * Return the Pattern of the file name that will end up in the target directory
+   *
+   * Take into account that user might pass mapreduce.output.basename, which should
+   * override the default filename prefix of "part"
+   *
+   * @return Pattern
+   */
+  private Pattern getDataFileNamePattern() {
+    String prefix = context.getOptions().getConf().get("mapreduce.output.basename");
+
+    if(null == prefix || prefix.length() == 0) {
+      prefix = DATA_PART_PATTERN_PREFIX;
+    }
+
+    return Pattern.compile(prefix + ".*-([0-9]{" + PARTITION_DIGITS + "}+).*");
+  }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/a6bedca4/src/test/org/apache/sqoop/TestAppendUtils.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/TestAppendUtils.java b/src/test/org/apache/sqoop/TestAppendUtils.java
index f14fc6a..3d66bec 100644
--- a/src/test/org/apache/sqoop/TestAppendUtils.java
+++ b/src/test/org/apache/sqoop/TestAppendUtils.java
@@ -131,10 +131,10 @@ public class TestAppendUtils extends ImportJobTestCase {
   }
 
   /** @return FileStatus for data files only. */
-  private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException {
+  private FileStatus[] listFiles(FileSystem fs, Path path, String prefix) throws IOException {
     FileStatus[] fileStatuses = fs.listStatus(path);
     ArrayList files = new ArrayList();
-    Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
+    Pattern patt = Pattern.compile(prefix + ".*-([0-9][0-9][0-9][0-9][0-9]).*");
     for (FileStatus fstat : fileStatuses) {
       String fname = fstat.getPath().getName();
       if (!fstat.isDir()) {
@@ -147,6 +147,10 @@ public class TestAppendUtils extends ImportJobTestCase {
     return (FileStatus[]) files.toArray(new FileStatus[files.size()]);
   }
 
+  private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException {
+    return listFiles(fs, path, AppendUtils.DATA_PART_PATTERN_PREFIX);
+  }
+
   private class StatusPathComparator implements Comparator<FileStatus> {
 
     @Override
@@ -183,13 +187,17 @@ public class TestAppendUtils extends ImportJobTestCase {
     }
   }
 
+  public void runAppendTest(ArrayList args, Path outputPath) throws IOException {
+    runAppendTest(args, outputPath, AppendUtils.DATA_PART_PATTERN_PREFIX);
+  }
+
   /**
    * Test for ouput path file-count increase, current files untouched and new
    * correct partition number.
    *
    * @throws IOException
    */
-  public void runAppendTest(ArrayList args, Path outputPath)
+  public void runAppendTest(ArrayList args, Path outputPath, String prefix)
       throws IOException {
 
     try {
@@ -205,7 +213,7 @@ public class TestAppendUtils extends ImportJobTestCase {
       runUncleanImport(argv);
 
       // get current file count
-      FileStatus[] fileStatuses = listFiles(fs, outputPath);
+      FileStatus[] fileStatuses = listFiles(fs, outputPath, prefix);
       Arrays.sort(fileStatuses, new StatusPathComparator());
       int previousFileCount = fileStatuses.length;
 
@@ -223,7 +231,7 @@ public class TestAppendUtils extends ImportJobTestCase {
       runUncleanImport(argv);
 
       // check directory file increase
-      fileStatuses = listFiles(fs, outputPath);
+      fileStatuses = listFiles(fs, outputPath, prefix);
       Arrays.sort(fileStatuses, new StatusPathComparator());
       int currentFileCount = fileStatuses.length;
       assertTrue("Output directory didn't got increased in file count ",
@@ -311,5 +319,25 @@ public class TestAppendUtils extends ImportJobTestCase {
     utils.append();
   }
 
+  /**
+   * Test that when we pass in -Dmapreduce.output.basename=prefix, file should also got appended
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testAppendWithMapreduceOutputBasename() throws IOException {
+    String prefix = "prefix-test";
+
+    ArrayList<String> args = new ArrayList<>();
+    args.add("-D");
+    args.add("mapreduce.output.basename=" + prefix);
+    args.addAll(getOutputlessArgv(false, true, HsqldbTestServer.getFieldNames(), getConf()));
+    String targetDir = getWarehouseDir() + "/tempTargetDirOutputBaseNameTest";
+    args.add("--target-dir");
+    args.add(targetDir);
+
+    Path output = new Path(targetDir);
+    runAppendTest(args, output, prefix);
+  }
 }