You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/06/21 17:35:19 UTC

[kudu] 01/02: [backup] Fix backup tools packaging

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch branch-1.10.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 0c9189be0ede1cfe413c8a9898ea3017f5f76722
Author: Grant Henke <gr...@apache.org>
AuthorDate: Thu Jun 20 14:25:59 2019 -0500

    [backup] Fix backup tools packaging
    
    This patch adjusts how we package the kudu-backup-tools
    module. Instead of building a fat jar that contains all of
    the hadoop classes, the hadoop classes are now marked
    as provided and therfore not included.
    
    This change was made with two concerns in mind.
    Primarily because the defacto way to load the
    environments hadoop configuration is from the
    classpath via `hadoop classpath`. A secondary benefit
    is that loading the classpath this way avoids any
    potential hadoop compatibility issues.
    
    An example execution of the jar would now be:
    `java -cp $(hadoop classpath):kudu-backup-tools-1.10.0.jar org.apache.kudu.backup.KuduBackupCLI`
    
    Additionally this patch adds integration tests for the
    `hdfs` uri in the tests using a hadoop-minicluster.
    
    Change-Id: I334a38bb5d7252b5e6d2722c4f1fc1d3b5622db7
    Reviewed-on: http://gerrit.cloudera.org:8080/13691
    Tested-by: Kudu Jenkins
    Reviewed-by: Mike Percy <mp...@apache.org>
    (cherry picked from commit ef921b2c35589e75de4c3fcb2673e38123d9fb94)
    Reviewed-on: http://gerrit.cloudera.org:8080/13705
    Tested-by: Grant Henke <gr...@apache.org>
---
 java/gradle/dependencies.gradle                    |  1 +
 java/gradle/shadow.gradle                          | 18 ------
 java/kudu-backup-common/build.gradle               |  5 +-
 java/kudu-backup-tools/build.gradle                | 21 +------
 .../org/apache/kudu/backup/KuduBackupLister.scala  |  2 +-
 .../apache/kudu/backup/TestKuduBackupCleaner.scala | 59 ++++++++++++-----
 .../apache/kudu/backup/TestKuduBackupLister.scala  | 73 ++++++++++++++--------
 7 files changed, 99 insertions(+), 80 deletions(-)

diff --git a/java/gradle/dependencies.gradle b/java/gradle/dependencies.gradle
index c80763e..221e055 100755
--- a/java/gradle/dependencies.gradle
+++ b/java/gradle/dependencies.gradle
@@ -85,6 +85,7 @@ libs += [
     guava                : "com.google.guava:guava:$versions.guava",
     hadoopClient         : "org.apache.hadoop:hadoop-client:$versions.hadoop",
     hadoopCommon         : "org.apache.hadoop:hadoop-common:$versions.hadoop",
+    hadoopMiniCluster    : "org.apache.hadoop:hadoop-minicluster:$versions.hadoop",
     hadoopMRClientCommon : "org.apache.hadoop:hadoop-mapreduce-client-common:$versions.hadoop",
     hadoopMRClientCore   : "org.apache.hadoop:hadoop-mapreduce-client-core:$versions.hadoop",
     hamcrest             : "org.hamcrest:hamcrest:$versions.hamcrest",
diff --git a/java/gradle/shadow.gradle b/java/gradle/shadow.gradle
index a46e678..bb414e5 100644
--- a/java/gradle/shadow.gradle
+++ b/java/gradle/shadow.gradle
@@ -76,27 +76,9 @@ configurations.create("compileUnshaded")
 configurations.shadow.extendsFrom(configurations.compileUnshaded)
 configurations.compile.extendsFrom(configurations.compileUnshaded)
 
-// Define an overridable property to indicate tool jars that should
-// include all of their dependencies.
-// We use this below to ensure slf4j is included.
-shadow.ext {
-  isToolJar = false
-}
-
 // We use afterEvaluate to add additional configuration once all the definitions
 // in the projects build script have been applied
 afterEvaluate {
-  if (!shadow.isToolJar) {
-    shadowJar {
-      dependencies {
-        // Our shaded library jars always try to pull in the slf4j api from
-        // kudu-client, though we never want it included. Excluding it
-        // here prevents the need to always list it.
-        exclude dependency(libs.slf4jApi)
-      }
-    }
-  }
-  
   // Ensure compileUnshaded dependencies are included in the pom.
   [install, uploadArchives].each { task ->
     task.repositories.each {
diff --git a/java/kudu-backup-common/build.gradle b/java/kudu-backup-common/build.gradle
index 82c4248..65983ec 100644
--- a/java/kudu-backup-common/build.gradle
+++ b/java/kudu-backup-common/build.gradle
@@ -25,10 +25,11 @@ dependencies {
     // Make sure wrong Guava version is not pulled in.
     exclude group: "com.google.guava", module: "guava"
   }
-  compile libs.hadoopCommon
-  compile libs.scalaLibrary
   compile libs.slf4jApi
 
+  provided libs.hadoopCommon
+  provided libs.scalaLibrary
+
   optional libs.yetusAnnotations
 
   testCompile project(path: ":kudu-test-utils", configuration: "shadow")
diff --git a/java/kudu-backup-tools/build.gradle b/java/kudu-backup-tools/build.gradle
index e08fb4e..c1eb612 100644
--- a/java/kudu-backup-tools/build.gradle
+++ b/java/kudu-backup-tools/build.gradle
@@ -18,37 +18,22 @@
 apply plugin: "scala"
 apply from: "$rootDir/gradle/shadow.gradle"
 
-// Mark this as a tool jar so shadow doesn't exclude any dependencies when shading.
-shadow {
-  isToolJar = true
-}
-
-
-// Add the main class to the manifest so the jar can be run via `java -jar`
-jar {
-  manifest {
-    attributes 'Main-Class': 'org.apache.kudu.backup.KuduBackupCLI'
-  }
-}
-
 dependencies {
   compile project(path: ":kudu-backup-common")
   compile project(path: ":kudu-client", configuration: "shadow")
-  compile libs.hadoopCommon
   compile (libs.scopt)  {
     // Make sure wrong Scala version is not pulled in.
     exclude group: "org.scala-lang", module: "scala-library"
   }
   compile libs.scalaLibrary
-
-  // Include the logging implementation given this is a standalone command line tool.
   compile libs.slf4jApi
-  compile libs.log4j
-  compile libs.log4jSlf4jImpl
+
+  provided libs.hadoopClient
 
   optional libs.yetusAnnotations
 
   testCompile project(path: ":kudu-test-utils", configuration: "shadow")
+  testCompile libs.hadoopMiniCluster
   testCompile libs.junit
   testCompile libs.scalatest
 }
diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala
index e2e4f34..94ac3f7 100644
--- a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala
+++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala
@@ -34,7 +34,7 @@ object KuduBackupLister {
   // Run the backup CLI tool with the given options. Like a command, returns 0 if successful, or
   // a nonzero error code.
   def run(options: BackupCLIOptions): Int = {
-    Preconditions.checkArgument(options.mode == Mode.LIST);
+    Preconditions.checkArgument(options.mode == Mode.LIST)
 
     // Sort by table name for a consistent ordering (at least if there's no duplicate names).
     val sortedTables = options.tables.sorted
diff --git a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala
index 879d269..1362532 100644
--- a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala
+++ b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala
@@ -16,15 +16,17 @@
 // under the License.
 package org.apache.kudu.backup
 
+import java.io.File
 import java.nio.file.Files
-import java.nio.file.Path
 import java.time.temporal.ChronoUnit
 import java.time.Duration
 import java.time.Instant
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileUtil
 import org.apache.hadoop.fs.{Path => HPath}
+import org.apache.hadoop.hdfs.MiniDFSCluster
 import org.junit.After
 import org.junit.Assert._
 import org.junit.Before
@@ -32,20 +34,10 @@ import org.junit.Test
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
 
-class TestKuduBackupCleaner {
+abstract class BaseTestKuduBackupCleaner {
   val log: Logger = LoggerFactory.getLogger(getClass)
 
-  var rootPath: Path = _
-
-  @Before
-  def setUp(): Unit = {
-    rootPath = Files.createTempDirectory("backupcli")
-  }
-
-  @After
-  def tearDown(): Unit = {
-    FileUtils.deleteDirectory(rootPath.toFile)
-  }
+  var rootPath: String = _
 
   // Return the epoch time in milliseconds that is 'secsBefore' seconds before 'current'.
   private def epochMillisBefore(current: Instant, secsBefore: Long): Long = {
@@ -54,7 +46,7 @@ class TestKuduBackupCleaner {
 
   @Test
   def testBackupCleaner(): Unit = {
-    val io = new BackupIO(new Configuration(), rootPath.toUri.toString)
+    val io = new BackupIO(new Configuration(), rootPath)
     val expirationAge = Duration.of(15, ChronoUnit.SECONDS)
     val now = Instant.now
     val tableName = "taco"
@@ -123,13 +115,13 @@ class TestKuduBackupCleaner {
   }
 
   def createOptions(
-      rootPath: Path,
+      rootPath: String,
       expirationAge: Duration,
       tables: Seq[String] = Seq(),
       dryRun: Boolean = false,
       verbose: Boolean = false): BackupCLIOptions = {
     new BackupCLIOptions(
-      rootPath.toUri.toString,
+      rootPath,
       Mode.CLEAN,
       tables = tables,
       expirationAge = expirationAge,
@@ -137,3 +129,38 @@ class TestKuduBackupCleaner {
       verbose = verbose)
   }
 }
+
+class LocalTestKuduBackupCleaner extends BaseTestKuduBackupCleaner {
+
+  @Before
+  def setUp(): Unit = {
+    rootPath = Files.createTempDirectory("backupcli").toAbsolutePath.toString
+  }
+
+  @After
+  def tearDown(): Unit = {
+    FileUtils.deleteDirectory(new File(rootPath))
+  }
+}
+
+class HDFSTestKuduBackupCleaner extends BaseTestKuduBackupCleaner {
+  var baseDir: File = _
+
+  @Before
+  def setUp(): Unit = {
+    baseDir = Files.createTempDirectory("hdfs-test").toFile.getAbsoluteFile
+
+    // Create an HDFS mini-cluster.
+    val conf = new Configuration()
+    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath)
+    val hdfsCluster = new MiniDFSCluster.Builder(conf).build()
+
+    // Set the root path to use the HDFS URI.
+    rootPath = "hdfs://localhost:" + hdfsCluster.getNameNodePort + "/"
+  }
+
+  @After
+  def tearDown(): Unit = {
+    FileUtil.fullyDelete(baseDir)
+  }
+}
diff --git a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala
index cf75ba1..e6dd8a6 100644
--- a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala
+++ b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala
@@ -17,13 +17,15 @@
 package org.apache.kudu.backup
 
 import java.io.ByteArrayOutputStream
+import java.io.File
 import java.io.PrintStream
 import java.nio.file.Files
-import java.nio.file.Path
 import java.text.SimpleDateFormat
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileUtil
+import org.apache.hadoop.hdfs.MiniDFSCluster
 import org.junit.Assert._
 import org.junit.After
 import org.junit.Before
@@ -31,20 +33,10 @@ import org.junit.Test
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
 
-class TestKuduBackupLister {
+abstract class BaseTestKuduBackupLister {
   val log: Logger = LoggerFactory.getLogger(getClass)
 
-  var rootPath: Path = _
-
-  @Before
-  def setUp(): Unit = {
-    rootPath = Files.createTempDirectory("backupcli")
-  }
-
-  @After
-  def tearDown(): Unit = {
-    FileUtils.deleteDirectory(rootPath.toFile)
-  }
+  var rootPath: String = _
 
   // Helper to write a standard collection of backup metadata useful for a few tests.
   private def createStandardTableMetadata(io: BackupIO): Unit = {
@@ -71,7 +63,7 @@ class TestKuduBackupLister {
 
   @Test
   def testListAllBackups(): Unit = {
-    val io = new BackupIO(new Configuration(), rootPath.toUri.toString)
+    val io = new BackupIO(new Configuration(), rootPath)
     createStandardTableMetadata(io)
 
     val options = createOptions(rootPath, ListType.ALL)
@@ -97,7 +89,7 @@ class TestKuduBackupLister {
 
   @Test
   def testListLatestBackups(): Unit = {
-    val io = new BackupIO(new Configuration(), rootPath.toUri.toString)
+    val io = new BackupIO(new Configuration(), rootPath)
     createStandardTableMetadata(io)
 
     val options = createOptions(rootPath, ListType.LATEST)
@@ -117,7 +109,7 @@ class TestKuduBackupLister {
 
   @Test
   def testListRestorePath(): Unit = {
-    val io = new BackupIO(new Configuration(), rootPath.toUri.toString)
+    val io = new BackupIO(new Configuration(), rootPath)
     createStandardTableMetadata(io)
 
     val options = createOptions(rootPath, ListType.RESTORE_SEQUENCE)
@@ -142,7 +134,7 @@ class TestKuduBackupLister {
 
   @Test
   def testTableFilter(): Unit = {
-    val io = new BackupIO(new Configuration(), rootPath.toUri.toString)
+    val io = new BackupIO(new Configuration(), rootPath)
     createStandardTableMetadata(io)
 
     val options = createOptions(rootPath, ListType.ALL, Seq("taco"))
@@ -163,7 +155,7 @@ class TestKuduBackupLister {
 
   @Test
   def testMissingTable(): Unit = {
-    val io = new BackupIO(new Configuration(), rootPath.toUri.toString)
+    val io = new BackupIO(new Configuration(), rootPath)
     createStandardTableMetadata(io)
 
     val options = createOptions(rootPath, ListType.ALL, Seq("pizza", "nope"))
@@ -188,15 +180,46 @@ class TestKuduBackupLister {
   }
 
   def createOptions(
-      rootPath: Path,
+      rootPath: String,
       listType: ListType.Value,
       tables: Seq[String] = Seq(),
       format: Format.Value = Format.CSV): BackupCLIOptions = {
-    new BackupCLIOptions(
-      rootPath.toUri.toString,
-      Mode.LIST,
-      tables = tables,
-      listType = listType,
-      format = format)
+    new BackupCLIOptions(rootPath, Mode.LIST, tables = tables, listType = listType, format = format)
+  }
+}
+
+class LocalTestKuduBackupLister extends BaseTestKuduBackupLister {
+
+  @Before
+  def setUp(): Unit = {
+    rootPath = Files.createTempDirectory("local-test").toAbsolutePath.toString
+  }
+
+  @After
+  def tearDown(): Unit = {
+    FileUtils.deleteDirectory(new File(rootPath))
+  }
+}
+
+class HDFSTestKuduBackupLister extends BaseTestKuduBackupLister {
+
+  var baseDir: File = _
+
+  @Before
+  def setUp(): Unit = {
+    baseDir = Files.createTempDirectory("hdfs-test").toFile.getAbsoluteFile
+
+    // Create an HDFS mini-cluster.
+    val conf = new Configuration()
+    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath)
+    val hdfsCluster = new MiniDFSCluster.Builder(conf).build()
+
+    // Set the root path to use the HDFS URI.
+    rootPath = "hdfs://localhost:" + hdfsCluster.getNameNodePort + "/"
+  }
+
+  @After
+  def tearDown(): Unit = {
+    FileUtil.fullyDelete(baseDir)
   }
 }