You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2020/05/28 00:19:18 UTC

[hudi] 35/40: [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.5.3
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit b69bb18fdd3d5ab825d1f59024b10204890968ff
Author: Balaji Varadarajan <va...@uber.com>
AuthorDate: Wed May 20 05:29:43 2020 -0700

    [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634)
---
 .../apache/hudi/config/HoodieCompactionConfig.java |  2 +-
 .../org/apache/hudi/config/HoodieWriteConfig.java  |  2 +-
 .../hudi/table/compact/TestHoodieCompactor.java    | 10 ++++-
 hudi-hive/pom.xml                                  |  6 ---
 .../org/apache/hudi/hive/util/HiveTestService.java |  1 +
 hudi-spark/pom.xml                                 | 44 +++++++++++++++++++++-
 hudi-utilities/pom.xml                             |  7 +---
 pom.xml                                            |  9 +----
 8 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
index 074ea78..5fa2b16 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
@@ -95,7 +95,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
   private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
   private static final String DEFAULT_AUTO_CLEAN = "true";
   private static final String DEFAULT_INLINE_COMPACT = "false";
-  private static final String DEFAULT_INCREMENTAL_CLEANER = "false";
+  private static final String DEFAULT_INCREMENTAL_CLEANER = "true";
   private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
   private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
   private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10";
diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 24984db..47c109a 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -79,7 +79,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
   private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
 
   private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
-  private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false";
+  private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "true";
 
   private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving";
   private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true";
diff --git a/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java b/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java
index 482d721..8fa55ec 100644
--- a/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java
+++ b/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java
@@ -27,8 +27,12 @@ import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieTestUtils;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstant.State;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieMemoryConfig;
@@ -148,9 +152,13 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
       HoodieIndex index = new HoodieBloomIndex<>(config);
       updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect();
 
-      // Write them to corresponding avro logfiles
+      // Write them to corresponding avro logfiles. Also, set the state transition properly.
       HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
           HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords);
+      metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
+          HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty());
+      writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty());
+      metaClient.reloadActiveTimeline();
 
       // Verify that all data file has one log file
       metaClient = HoodieTableMetaClient.reload(metaClient);
diff --git a/hudi-hive/pom.xml b/hudi-hive/pom.xml
index 01e6ff1..5ad8708 100644
--- a/hudi-hive/pom.xml
+++ b/hudi-hive/pom.xml
@@ -153,12 +153,6 @@
       <scope>test</scope>
     </dependency>
 
-    <dependency>
-      <groupId>org.eclipse.jetty.aggregate</groupId>
-      <artifactId>jetty-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-
     <!-- Hadoop - Test -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
index 0cef82b..ee5b09a 100644
--- a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
+++ b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
@@ -105,6 +105,7 @@ public class HiveTestService {
     executorService = Executors.newSingleThreadExecutor();
     tServer = startMetaStore(bindIP, metastorePort, serverConf);
 
+    serverConf.set("hive.in.test", "true");
     hiveServer = startHiveServer(serverConf);
 
     String serverHostname;
diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml
index 501cd8f..6b90a69 100644
--- a/hudi-spark/pom.xml
+++ b/hudi-spark/pom.xml
@@ -220,6 +220,12 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
+        <exclusions>
+           <exclusion>
+               <groupId>javax.servlet</groupId>
+               <artifactId>*</artifactId>
+           </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -248,7 +254,17 @@
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <scope>provided</scope>
+       <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+     <scope>provided</scope>
     </dependency>
 
     <!-- Hive -->
@@ -272,16 +288,42 @@
       <groupId>${hive.groupid}</groupId>
       <artifactId>hive-jdbc</artifactId>
       <version>${hive.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>${hive.groupid}</groupId>
       <artifactId>hive-metastore</artifactId>
       <version>${hive.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>${hive.groupid}</groupId>
       <artifactId>hive-common</artifactId>
       <version>${hive.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.eclipse.jetty.orbit</groupId>
+          <artifactId>javax.servlet</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <!-- Hoodie - Test -->
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index b8b71ac..6211311 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -82,13 +82,8 @@
       <!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
       <groupId>org.eclipse.jetty.aggregate</groupId>
       <artifactId>jetty-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
       <version>${jetty.version}</version>
+      <classifier>uber</classifier>
       <scope>test</scope>
     </dependency>
 
diff --git a/pom.xml b/pom.xml
index b49c653..8603024 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,7 +101,7 @@
     <scalatest.version>3.0.1</scalatest.version>
     <surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
     <thrift.version>0.12.0</thrift.version>
-    <jetty.version>7.6.0.v20120127</jetty.version>
+    <jetty.version>9.4.15.v20190215</jetty.version>
     <hbase.version>1.2.3</hbase.version>
     <codehaus-jackson.version>1.9.13</codehaus-jackson.version>
     <h2.version>1.4.199</h2.version>
@@ -456,13 +456,6 @@
         <artifactId>jersey-container-servlet-core</artifactId>
         <version>${glassfish.version}</version>
       </dependency>
-      <!-- Needed for running HiveServer for Tests -->
-      <dependency>
-        <groupId>org.eclipse.jetty.aggregate</groupId>
-        <artifactId>jetty-all</artifactId>
-        <scope>test</scope>
-        <version>${jetty.version}</version>
-      </dependency>
 
       <!-- Avro -->
       <dependency>