You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2020/05/28 00:19:18 UTC
[hudi] 35/40: [HUDI-846][HUDI-848] Enable Incremental cleaning and
embedded timeline-server by default (#1634)
This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch release-0.5.3
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit b69bb18fdd3d5ab825d1f59024b10204890968ff
Author: Balaji Varadarajan <va...@uber.com>
AuthorDate: Wed May 20 05:29:43 2020 -0700
[HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634)
---
.../apache/hudi/config/HoodieCompactionConfig.java | 2 +-
.../org/apache/hudi/config/HoodieWriteConfig.java | 2 +-
.../hudi/table/compact/TestHoodieCompactor.java | 10 ++++-
hudi-hive/pom.xml | 6 ---
.../org/apache/hudi/hive/util/HiveTestService.java | 1 +
hudi-spark/pom.xml | 44 +++++++++++++++++++++-
hudi-utilities/pom.xml | 7 +---
pom.xml | 9 +----
8 files changed, 57 insertions(+), 24 deletions(-)
diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
index 074ea78..5fa2b16 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
@@ -95,7 +95,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
private static final String DEFAULT_AUTO_CLEAN = "true";
private static final String DEFAULT_INLINE_COMPACT = "false";
- private static final String DEFAULT_INCREMENTAL_CLEANER = "false";
+ private static final String DEFAULT_INCREMENTAL_CLEANER = "true";
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10";
diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 24984db..47c109a 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -79,7 +79,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
- private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false";
+ private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "true";
private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving";
private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true";
diff --git a/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java b/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java
index 482d721..8fa55ec 100644
--- a/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java
+++ b/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java
@@ -27,8 +27,12 @@ import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstant.State;
+import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieMemoryConfig;
@@ -148,9 +152,13 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
HoodieIndex index = new HoodieBloomIndex<>(config);
updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect();
- // Write them to corresponding avro logfiles
+ // Write them to corresponding avro logfiles. Also, set the state transition properly.
HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords);
+ metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
+ HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty());
+ writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty());
+ metaClient.reloadActiveTimeline();
// Verify that all data file has one log file
metaClient = HoodieTableMetaClient.reload(metaClient);
diff --git a/hudi-hive/pom.xml b/hudi-hive/pom.xml
index 01e6ff1..5ad8708 100644
--- a/hudi-hive/pom.xml
+++ b/hudi-hive/pom.xml
@@ -153,12 +153,6 @@
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.eclipse.jetty.aggregate</groupId>
- <artifactId>jetty-all</artifactId>
- <scope>test</scope>
- </dependency>
-
<!-- Hadoop - Test -->
<dependency>
<groupId>org.apache.hadoop</groupId>
diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
index 0cef82b..ee5b09a 100644
--- a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
+++ b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java
@@ -105,6 +105,7 @@ public class HiveTestService {
executorService = Executors.newSingleThreadExecutor();
tServer = startMetaStore(bindIP, metastorePort, serverConf);
+ serverConf.set("hive.in.test", "true");
hiveServer = startHiveServer(serverConf);
String serverHostname;
diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml
index 501cd8f..6b90a69 100644
--- a/hudi-spark/pom.xml
+++ b/hudi-spark/pom.xml
@@ -220,6 +220,12 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@@ -248,7 +254,17 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ <scope>provided</scope>
</dependency>
<!-- Hive -->
@@ -272,16 +288,42 @@
<groupId>${hive.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.eclipse.jetty.orbit</groupId>
+ <artifactId>javax.servlet</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<!-- Hoodie - Test -->
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index b8b71ac..6211311 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -82,13 +82,8 @@
<!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
<groupId>org.eclipse.jetty.aggregate</groupId>
<artifactId>jetty-all</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
<version>${jetty.version}</version>
+ <classifier>uber</classifier>
<scope>test</scope>
</dependency>
diff --git a/pom.xml b/pom.xml
index b49c653..8603024 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,7 +101,7 @@
<scalatest.version>3.0.1</scalatest.version>
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
<thrift.version>0.12.0</thrift.version>
- <jetty.version>7.6.0.v20120127</jetty.version>
+ <jetty.version>9.4.15.v20190215</jetty.version>
<hbase.version>1.2.3</hbase.version>
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
<h2.version>1.4.199</h2.version>
@@ -456,13 +456,6 @@
<artifactId>jersey-container-servlet-core</artifactId>
<version>${glassfish.version}</version>
</dependency>
- <!-- Needed for running HiveServer for Tests -->
- <dependency>
- <groupId>org.eclipse.jetty.aggregate</groupId>
- <artifactId>jetty-all</artifactId>
- <scope>test</scope>
- <version>${jetty.version}</version>
- </dependency>
<!-- Avro -->
<dependency>