You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/11/23 18:06:40 UTC
[hudi] branch master updated: [HUDI-1364] Add
HoodieJavaEngineContext to hudi-java-client (#2222)
This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new d9411c3 [HUDI-1364] Add HoodieJavaEngineContext to hudi-java-client (#2222)
d9411c3 is described below
commit d9411c38dbe6684ed2b0b59b27fbb1a46ac16c34
Author: Shen Hong <sh...@126.com>
AuthorDate: Tue Nov 24 02:06:28 2020 +0800
[HUDI-1364] Add HoodieJavaEngineContext to hudi-java-client (#2222)
---
.../client/common/function/FunctionWrapper.java | 0
.../org/apache/hudi/DummyTaskContextSupplier.java | 48 +++++++
hudi-client/hudi-java-client/pom.xml | 152 +++++++++++++++++++++
.../client/common/HoodieJavaEngineContext.java | 86 ++++++++++++
.../src/main/resources/log4j.properties | 23 ++++
.../client/common/TestHoodieJavaEngineContext.java | 85 ++++++++++++
.../src/test/resources/log4j-surefire.properties | 31 +++++
hudi-client/pom.xml | 1 +
8 files changed, 426 insertions(+)
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java
similarity index 100%
rename from hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/DummyTaskContextSupplier.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/DummyTaskContextSupplier.java
new file mode 100644
index 0000000..0221705
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/DummyTaskContextSupplier.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+import org.apache.hudi.client.common.EngineProperty;
+import org.apache.hudi.client.common.TaskContextSupplier;
+import org.apache.hudi.common.util.Option;
+
+import java.util.function.Supplier;
+
+public class DummyTaskContextSupplier extends TaskContextSupplier {
+
+ @Override
+ public Supplier<Integer> getPartitionIdSupplier() {
+ return null;
+ }
+
+ @Override
+ public Supplier<Integer> getStageIdSupplier() {
+ return null;
+ }
+
+ @Override
+ public Supplier<Long> getAttemptIdSupplier() {
+ return null;
+ }
+
+ @Override
+ public Option<String> getProperty(EngineProperty prop) {
+ return null;
+ }
+}
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
new file mode 100644
index 0000000..6429ade
--- /dev/null
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>hudi-client</artifactId>
+ <groupId>org.apache.hudi</groupId>
+ <version>0.6.1-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>hudi-java-client</artifactId>
+ <version>${parent.version}</version>
+
+ <name>hudi-java-client</name>
+ <packaging>jar</packaging>
+
+ <dependencies>
+ <!-- Hudi -->
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-client-common</artifactId>
+ <version>${parent.version}</version>
+ </dependency>
+
+ <!-- Hoodie - Test -->
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-common</artifactId>
+ <version>${project.version}</version>
+ <classifier>tests</classifier>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-client-common</artifactId>
+ <version>${project.version}</version>
+ <classifier>tests</classifier>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Test -->
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.vintage</groupId>
+ <artifactId>junit-vintage-engine</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-params</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-junit-jupiter</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.platform</groupId>
+ <artifactId>junit-platform-runner</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.platform</groupId>
+ <artifactId>junit-platform-suite-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.platform</groupId>
+ <artifactId>junit-platform-commons</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.jacoco</groupId>
+ <artifactId>jacoco-maven-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>compile</phase>
+ <goals>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ <phase>test-compile</phase>
+ </execution>
+ </executions>
+ <configuration>
+ <skip>false</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ </plugin>
+ </plugins>
+
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ </resource>
+ <resource>
+ <directory>src/test/resources</directory>
+ </resource>
+ </resources>
+ </build>
+
+</project>
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
new file mode 100644
index 0000000..a04a18b
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.common;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.client.common.function.SerializableConsumer;
+import org.apache.hudi.client.common.function.SerializableFunction;
+import org.apache.hudi.client.common.function.SerializablePairFunction;
+import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.util.Option;
+
+import scala.Tuple2;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.toList;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingFlatMapWrapper;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingForeachWrapper;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapToPairWrapper;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapWrapper;
+
+/**
+ * A java engine implementation of HoodieEngineContext.
+ */
+public class HoodieJavaEngineContext extends HoodieEngineContext {
+
+ public HoodieJavaEngineContext(Configuration conf, TaskContextSupplier taskContextSupplier) {
+ super(new SerializableConfiguration(conf), taskContextSupplier);
+ }
+
+ @Override
+ public <I, O> List<O> map(List<I> data, SerializableFunction<I, O> func, int parallelism) {
+ return data.stream().parallel().map(throwingMapWrapper(func)).collect(toList());
+ }
+
+ @Override
+ public <I, O> List<O> flatMap(List<I> data, SerializableFunction<I, Stream<O>> func, int parallelism) {
+ return data.stream().parallel().flatMap(throwingFlatMapWrapper(func)).collect(toList());
+ }
+
+ @Override
+ public <I> void foreach(List<I> data, SerializableConsumer<I> consumer, int parallelism) {
+ data.stream().forEach(throwingForeachWrapper(consumer));
+ }
+
+ @Override
+ public <I, K, V> Map<K, V> mapToPair(List<I> data, SerializablePairFunction<I, K, V> func, Integer parallelism) {
+ return data.stream().map(throwingMapToPairWrapper(func)).collect(
+ Collectors.toMap(Tuple2::_1, Tuple2::_2, (oldVal, newVal) -> newVal)
+ );
+ }
+
+ @Override
+ public void setProperty(EngineProperty key, String value) {
+ // no operation for now
+ }
+
+ @Override
+ public Option<String> getProperty(EngineProperty key) {
+ return Option.empty();
+ }
+
+ @Override
+ public void setJobStatus(String activeModule, String activityDescription) {
+ // no operation for now
+ }
+}
diff --git a/hudi-client/hudi-java-client/src/main/resources/log4j.properties b/hudi-client/hudi-java-client/src/main/resources/log4j.properties
new file mode 100644
index 0000000..ff268fa
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/resources/log4j.properties
@@ -0,0 +1,23 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=INFO, A1
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/common/TestHoodieJavaEngineContext.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/common/TestHoodieJavaEngineContext.java
new file mode 100644
index 0000000..b81c11b
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/common/TestHoodieJavaEngineContext.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.common;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.DummyTaskContextSupplier;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+
+import scala.Tuple2;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+public class TestHoodieJavaEngineContext {
+ private HoodieJavaEngineContext context =
+ new HoodieJavaEngineContext(new Configuration(), new DummyTaskContextSupplier());
+
+ @Test
+ public void testMap() {
+ List<Integer> mapList = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ List<Integer> result = context.map(mapList, x -> x + 1, 2);
+ result.removeAll(mapList);
+
+ Assertions.assertEquals(1, result.size());
+ Assertions.assertEquals(11, result.get(0));
+ }
+
+ @Test
+ public void testFlatMap() {
+ List<String> list1 = Arrays.asList("a", "b", "c");
+ List<String> list2 = Arrays.asList("d", "e", "f");
+ List<String> list3 = Arrays.asList("g", "h", "i");
+
+ List<List<String>> inputList = new ArrayList<>();
+ inputList.add(list1);
+ inputList.add(list2);
+ inputList.add(list3);
+
+ List<String> result = context.flatMap(inputList, Collection::stream, 2);
+
+ Assertions.assertEquals(9, result.size());
+ }
+
+ @Test
+ public void testForeach() {
+ List<Integer> mapList = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ List<Integer> result = new ArrayList<>(10);
+ context.foreach(mapList, result::add, 2);
+
+ Assertions.assertEquals(result.size(), mapList.size());
+ Assertions.assertTrue(result.containsAll(mapList));
+ }
+
+ @Test
+ public void testMapToPair() {
+ List<String> mapList = Arrays.asList("hudi_flink", "hudi_spark", "hudi_java");
+
+ Map<String, String> resultMap = context.mapToPair(mapList, x -> {
+ String[] splits = x.split("_");
+ return Tuple2.apply(splits[0], splits[1]);
+ }, 2);
+
+ Assertions.assertNotNull(resultMap.get("hudi"));
+ }
+}
diff --git a/hudi-client/hudi-java-client/src/test/resources/log4j-surefire.properties b/hudi-client/hudi-java-client/src/test/resources/log4j-surefire.properties
new file mode 100644
index 0000000..32af462
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,31 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache=INFO
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml
index e8ff9e9..cb83839 100644
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -32,6 +32,7 @@
<modules>
<module>hudi-client-common</module>
+ <module>hudi-java-client</module>
<module>hudi-spark-client</module>
<module>hudi-flink-client</module>
</modules>