You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ps...@apache.org on 2021/01/05 08:25:00 UTC

[hbase] branch master updated: HBASE-25318 Config option for IntegrationTestImportTsv where to generate HFiles to bulkload (#2777)

This is an automated email from the ASF dual-hosted git repository.

psomogyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 481662a  HBASE-25318 Config option for IntegrationTestImportTsv where to generate HFiles to bulkload (#2777)
481662a is described below

commit 481662ab39f8803849001b002bc5a8470f667d1e
Author: Mate Szalay-Beko <sz...@gmail.com>
AuthorDate: Tue Jan 5 09:24:24 2021 +0100

    HBASE-25318 Config option for IntegrationTestImportTsv where to generate HFiles to bulkload (#2777)
    
    IntegrationTestImportTsv is generating HFiles under the working directory of the
    current hdfs user executing the tool, before bulkloading it into HBase.
    
    Assuming you encrypt the HBase root directory within HDFS (using HDFS
    Transparent Encryption), you can bulkload HFiles only if they sit in the same
    encryption zone in HDFS as the HBase root directory itself.
    
    When IntegrationTestImportTsv is executed against a real distributed cluster
    and the working directory of the current user (e.g. /user/hbase) is not in the
    same encryption zone as the HBase root directory (e.g. /hbase/data) then you
    will get an exception:
    
    ```
    ERROR org.apache.hadoop.hbase.regionserver.HRegion: There was a partial failure
    due to IO when attempting to load d :
    hdfs://mycluster/user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/
    IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc
    
    org.apache.hadoop.ipc.RemoteException(java.io.IOException):
    /user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/
    IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc
    can't be moved into an encryption zone.
    ```
    
    In this commit I make it configurable where the IntegrationTestImportTsv
    generates the HFiles.
    
    Co-authored-by: Mate Szalay-Beko <sy...@apache.com>
    Signed-off-by: Peter Somogyi <ps...@apache.org>
---
 .../hbase/mapreduce/IntegrationTestImportTsv.java  | 36 ++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
index c80d61c..28b4ae4 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -29,6 +30,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.UUID;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
@@ -66,6 +68,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
 
   private static final String NAME = IntegrationTestImportTsv.class.getSimpleName();
   private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestImportTsv.class);
+  private static final String GENERATED_HFILE_FOLDER_PARAM_KEY =
+    "IntegrationTestImportTsv.generatedHFileFolder";
 
   protected static final String simple_tsv =
       "row1\t1\tc1\tc2\n" +
@@ -190,8 +194,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
   void generateAndLoad(final TableName table) throws Exception {
     LOG.info("Running test testGenerateAndLoad.");
     String cf = "d";
-    Path hfiles = new Path(
-        util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles");
+    Path hfiles = initGeneratedHFilePath(table);
+    LOG.info("The folder where the HFiles will be generated: {}", hfiles.toString());
 
     Map<String, String> args = new HashMap<>();
     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
@@ -220,6 +224,12 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
       System.err.println(format("%s [genericOptions]", NAME));
       System.err.println("  Runs ImportTsv integration tests against a distributed cluster.");
       System.err.println();
+      System.err.println("  Use '-D" + GENERATED_HFILE_FOLDER_PARAM_KEY + "=<path>' to define a");
+      System.err.println("  base folder for the generated HFiles. If HDFS Transparent Encryption");
+      System.err.println("  is configured, then make sure to set this parameter to a folder in");
+      System.err.println("  the same encryption zone in HDFS as the HBase root directory,");
+      System.err.println("  otherwise the bulkload will fail.");
+      System.err.println();
       ToolRunner.printGenericCommandUsage(System.err);
       return 1;
     }
@@ -237,6 +247,28 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
     return 0;
   }
 
+  private Path initGeneratedHFilePath(final TableName table) throws IOException {
+    String folderParam = getConf().getTrimmed(GENERATED_HFILE_FOLDER_PARAM_KEY);
+    if (folderParam == null || folderParam.isEmpty()) {
+      // by default, fall back to the test data dir
+      return new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles");
+    }
+
+    Path hfiles = new Path(folderParam, UUID.randomUUID().toString());
+    FileSystem fs = util.getTestFileSystem();
+    String shouldPreserve = System.getProperty("hbase.testing.preserve.testdir", "false");
+    if (!Boolean.parseBoolean(shouldPreserve)) {
+      if (fs.getUri().getScheme().equals(FileSystem.getLocal(getConf()).getUri().getScheme())) {
+        File localFoler = new File(hfiles.toString());
+        localFoler.deleteOnExit();
+      } else {
+        fs.deleteOnExit(hfiles);
+      }
+    }
+    return hfiles;
+  }
+
+
   public static void main(String[] args) throws Exception {
     Configuration conf = HBaseConfiguration.create();
     IntegrationTestingUtility.setUseDistributedCluster(conf);