You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vi...@apache.org on 2020/11/23 18:06:40 UTC

[hudi] branch master updated: [HUDI-1364] Add HoodieJavaEngineContext to hudi-java-client (#2222)

This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new d9411c3  [HUDI-1364] Add HoodieJavaEngineContext to hudi-java-client (#2222)
d9411c3 is described below

commit d9411c38dbe6684ed2b0b59b27fbb1a46ac16c34
Author: Shen Hong <sh...@126.com>
AuthorDate: Tue Nov 24 02:06:28 2020 +0800

    [HUDI-1364] Add HoodieJavaEngineContext to hudi-java-client (#2222)
---
 .../client/common/function/FunctionWrapper.java    |   0
 .../org/apache/hudi/DummyTaskContextSupplier.java  |  48 +++++++
 hudi-client/hudi-java-client/pom.xml               | 152 +++++++++++++++++++++
 .../client/common/HoodieJavaEngineContext.java     |  86 ++++++++++++
 .../src/main/resources/log4j.properties            |  23 ++++
 .../client/common/TestHoodieJavaEngineContext.java |  85 ++++++++++++
 .../src/test/resources/log4j-surefire.properties   |  31 +++++
 hudi-client/pom.xml                                |   1 +
 8 files changed, 426 insertions(+)

diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java
similarity index 100%
rename from hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/common/function/FunctionWrapper.java
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/DummyTaskContextSupplier.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/DummyTaskContextSupplier.java
new file mode 100644
index 0000000..0221705
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/DummyTaskContextSupplier.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+import org.apache.hudi.client.common.EngineProperty;
+import org.apache.hudi.client.common.TaskContextSupplier;
+import org.apache.hudi.common.util.Option;
+
+import java.util.function.Supplier;
+
+public class DummyTaskContextSupplier extends TaskContextSupplier {
+
+  @Override
+  public Supplier<Integer> getPartitionIdSupplier() {
+    return null;
+  }
+
+  @Override
+  public Supplier<Integer> getStageIdSupplier() {
+    return null;
+  }
+
+  @Override
+  public Supplier<Long> getAttemptIdSupplier() {
+    return null;
+  }
+
+  @Override
+  public Option<String> getProperty(EngineProperty prop) {
+    return null;
+  }
+}
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
new file mode 100644
index 0000000..6429ade
--- /dev/null
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hudi-client</artifactId>
+        <groupId>org.apache.hudi</groupId>
+        <version>0.6.1-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>hudi-java-client</artifactId>
+    <version>${parent.version}</version>
+
+    <name>hudi-java-client</name>
+    <packaging>jar</packaging>
+
+    <dependencies>
+        <!-- Hudi  -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${parent.version}</version>
+        </dependency>
+
+        <!-- Hoodie - Test -->
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hudi</groupId>
+            <artifactId>hudi-client-common</artifactId>
+            <version>${project.version}</version>
+            <classifier>tests</classifier>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- Test -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.vintage</groupId>
+            <artifactId>junit-vintage-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-params</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-junit-jupiter</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.platform</groupId>
+            <artifactId>junit-platform-runner</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.platform</groupId>
+            <artifactId>junit-platform-suite-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.platform</groupId>
+            <artifactId>junit-platform-commons</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.jacoco</groupId>
+                <artifactId>jacoco-maven-plugin</artifactId>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                        <phase>test-compile</phase>
+                    </execution>
+                </executions>
+                <configuration>
+                    <skip>false</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+            </resource>
+            <resource>
+                <directory>src/test/resources</directory>
+            </resource>
+        </resources>
+    </build>
+
+</project>
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
new file mode 100644
index 0000000..a04a18b
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.common;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.client.common.function.SerializableConsumer;
+import org.apache.hudi.client.common.function.SerializableFunction;
+import org.apache.hudi.client.common.function.SerializablePairFunction;
+import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.util.Option;
+
+import scala.Tuple2;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.toList;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingFlatMapWrapper;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingForeachWrapper;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapToPairWrapper;
+import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapWrapper;
+
+/**
+ * A java engine implementation of HoodieEngineContext.
+ */
+public class HoodieJavaEngineContext extends HoodieEngineContext {
+
+  public HoodieJavaEngineContext(Configuration conf, TaskContextSupplier taskContextSupplier) {
+    super(new SerializableConfiguration(conf), taskContextSupplier);
+  }
+
+  @Override
+  public <I, O> List<O> map(List<I> data, SerializableFunction<I, O> func, int parallelism) {
+    return data.stream().parallel().map(throwingMapWrapper(func)).collect(toList());
+  }
+
+  @Override
+  public <I, O> List<O> flatMap(List<I> data, SerializableFunction<I, Stream<O>> func, int parallelism) {
+    return data.stream().parallel().flatMap(throwingFlatMapWrapper(func)).collect(toList());
+  }
+
+  @Override
+  public <I> void foreach(List<I> data, SerializableConsumer<I> consumer, int parallelism) {
+    data.stream().forEach(throwingForeachWrapper(consumer));
+  }
+
+  @Override
+  public <I, K, V> Map<K, V> mapToPair(List<I> data, SerializablePairFunction<I, K, V> func, Integer parallelism) {
+    return data.stream().map(throwingMapToPairWrapper(func)).collect(
+        Collectors.toMap(Tuple2::_1, Tuple2::_2, (oldVal, newVal) -> newVal)
+    );
+  }
+
+  @Override
+  public void setProperty(EngineProperty key, String value) {
+    // no operation for now
+  }
+
+  @Override
+  public Option<String> getProperty(EngineProperty key) {
+    return Option.empty();
+  }
+
+  @Override
+  public void setJobStatus(String activeModule, String activityDescription) {
+    // no operation for now
+  }
+}
diff --git a/hudi-client/hudi-java-client/src/main/resources/log4j.properties b/hudi-client/hudi-java-client/src/main/resources/log4j.properties
new file mode 100644
index 0000000..ff268fa
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/main/resources/log4j.properties
@@ -0,0 +1,23 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=INFO, A1
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/common/TestHoodieJavaEngineContext.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/common/TestHoodieJavaEngineContext.java
new file mode 100644
index 0000000..b81c11b
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/common/TestHoodieJavaEngineContext.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.common;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.DummyTaskContextSupplier;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Assertions;
+
+import scala.Tuple2;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+public class TestHoodieJavaEngineContext {
+  private HoodieJavaEngineContext context =
+      new HoodieJavaEngineContext(new Configuration(), new DummyTaskContextSupplier());
+
+  @Test
+  public void testMap() {
+    List<Integer> mapList = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    List<Integer> result = context.map(mapList, x -> x + 1, 2);
+    result.removeAll(mapList);
+
+    Assertions.assertEquals(1, result.size());
+    Assertions.assertEquals(11, result.get(0));
+  }
+
+  @Test
+  public void testFlatMap() {
+    List<String> list1 = Arrays.asList("a", "b", "c");
+    List<String> list2 = Arrays.asList("d", "e", "f");
+    List<String> list3 = Arrays.asList("g", "h", "i");
+
+    List<List<String>> inputList = new ArrayList<>();
+    inputList.add(list1);
+    inputList.add(list2);
+    inputList.add(list3);
+
+    List<String> result = context.flatMap(inputList, Collection::stream, 2);
+
+    Assertions.assertEquals(9, result.size());
+  }
+
+  @Test
+  public void testForeach() {
+    List<Integer> mapList = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    List<Integer> result = new ArrayList<>(10);
+    context.foreach(mapList, result::add, 2);
+
+    Assertions.assertEquals(result.size(), mapList.size());
+    Assertions.assertTrue(result.containsAll(mapList));
+  }
+
+  @Test
+  public void testMapToPair() {
+    List<String> mapList = Arrays.asList("hudi_flink", "hudi_spark", "hudi_java");
+
+    Map<String, String> resultMap = context.mapToPair(mapList, x -> {
+      String[] splits = x.split("_");
+      return Tuple2.apply(splits[0], splits[1]);
+    }, 2);
+
+    Assertions.assertNotNull(resultMap.get("hudi"));
+  }
+}
diff --git a/hudi-client/hudi-java-client/src/test/resources/log4j-surefire.properties b/hudi-client/hudi-java-client/src/test/resources/log4j-surefire.properties
new file mode 100644
index 0000000..32af462
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/test/resources/log4j-surefire.properties
@@ -0,0 +1,31 @@
+###
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###
+log4j.rootLogger=WARN, CONSOLE
+log4j.logger.org.apache=INFO
+log4j.logger.org.apache.hudi=DEBUG
+log4j.logger.org.apache.hadoop.hbase=ERROR
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+# A1 uses PatternLayout.
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
+log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
+log4j.appender.CONSOLE.filter.a.LevelMin=WARN
+log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml
index e8ff9e9..cb83839 100644
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -32,6 +32,7 @@
 
   <modules>
     <module>hudi-client-common</module>
+    <module>hudi-java-client</module>
     <module>hudi-spark-client</module>
     <module>hudi-flink-client</module>
   </modules>