You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ec...@apache.org on 2018/11/27 15:39:11 UTC

[beam] 01/20: Add an empty spark-structured-streaming runner project targeting spark 2.4.0

This is an automated email from the ASF dual-hosted git repository.

echauchot pushed a commit to branch spark-runner_structured-streaming
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 03d333d62193181e24d8a874068f99796b987d12
Author: Etienne Chauchot <ec...@apache.org>
AuthorDate: Tue Nov 13 17:07:02 2018 +0100

    Add an empty spark-structured-streaming runner project targeting spark 2.4.0
---
 .../org/apache/beam/gradle/BeamModulePlugin.groovy |  2 +
 runners/spark-structured-streaming/build.gradle    | 93 ++++++++++++++++++++++
 settings.gradle                                    |  2 +
 3 files changed, 97 insertions(+)

diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index ba17983..8888cd2 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -321,6 +321,7 @@ class BeamModulePlugin implements Plugin<Project> {
     def hadoop_version = "2.7.3"
     def jackson_version = "2.9.5"
     def spark_version = "2.3.2"
+    def spark_structured_streaming_version = "2.4.0"
     def apex_core_version = "3.7.0"
     def apex_malhar_version = "3.4.0"
     def postgres_version = "42.2.2"
@@ -432,6 +433,7 @@ class BeamModulePlugin implements Plugin<Project> {
         slf4j_jdk14                                 : "org.slf4j:slf4j-jdk14:1.7.25",
         slf4j_log4j12                               : "org.slf4j:slf4j-log4j12:1.7.25",
         snappy_java                                 : "org.xerial.snappy:snappy-java:1.1.4",
+        spark_sql                                   : "org.apache.spark:spark-core_2.11:$spark_structured_streaming_version",
         spark_core                                  : "org.apache.spark:spark-core_2.11:$spark_version",
         spark_network_common                        : "org.apache.spark:spark-network-common_2.11:$spark_version",
         spark_streaming                             : "org.apache.spark:spark-streaming_2.11:$spark_version",
diff --git a/runners/spark-structured-streaming/build.gradle b/runners/spark-structured-streaming/build.gradle
new file mode 100644
index 0000000..b33a2b6
--- /dev/null
+++ b/runners/spark-structured-streaming/build.gradle
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import groovy.json.JsonOutput
+
+apply plugin: org.apache.beam.gradle.BeamModulePlugin
+applyJavaNature()
+
+description = "Apache Beam :: Runners :: Spark-Structured-Streaming"
+
+/*
+ * We need to rely on manually specifying these evaluationDependsOn to ensure that
+ * the following projects are evaluated before we evaluate this project. This is because
+ * we are attempting to reference the "sourceSets.test.output" directly.
+ */
+evaluationDependsOn(":beam-sdks-java-core")
+
+configurations {
+  validatesRunner
+}
+
+test {
+  systemProperty "spark.ui.enabled", "false"
+  systemProperty "spark.ui.showConsoleProgress", "false"
+  forkEvery 1
+  maxParallelForks 4
+  useJUnit {
+    //TODO add test excludes
+  }
+}
+
+dependencies {
+  shadow project(path: ":beam-model-pipeline", configuration: "shadow")
+  shadow project(path: ":beam-sdks-java-core", configuration: "shadow")
+  shadow project(path: ":beam-runners-core-construction-java", configuration: "shadow")
+  shadow project(path: ":beam-runners-core-java", configuration: "shadow")
+  shadow library.java.guava
+  shadow library.java.jackson_annotations
+  shadow library.java.slf4j_api
+  shadow library.java.joda_time
+  shadow "io.dropwizard.metrics:metrics-core:3.1.2"
+  shadow library.java.jackson_module_scala
+  provided library.java.spark_sql
+  provided library.java.hadoop_common
+  provided library.java.hadoop_mapreduce_client_core
+  provided library.java.commons_compress
+  provided library.java.commons_lang3
+  provided library.java.commons_io_2x
+  provided library.java.hamcrest_core
+  provided library.java.hamcrest_library
+  provided "org.apache.zookeeper:zookeeper:3.4.11"
+  provided "org.scala-lang:scala-library:2.11.8"
+  provided "com.esotericsoftware.kryo:kryo:2.21"
+  shadowTest project(path: ":beam-sdks-java-io-kafka", configuration: "shadow")
+  shadowTest project(path: ":beam-sdks-java-core", configuration: "shadowTest")
+  shadowTest project(path: ":beam-runners-core-java", configuration: "shadowTest")
+  shadowTest library.java.avro
+  shadowTest library.java.kafka_clients
+  shadowTest library.java.junit
+  shadowTest library.java.mockito_core
+  shadowTest library.java.jackson_dataformat_yaml
+  shadowTest "org.apache.kafka:kafka_2.11:0.11.0.1"
+  validatesRunner project(path: ":beam-sdks-java-core", configuration: "shadowTest")
+  validatesRunner project(path: project.path, configuration: "shadowTest")
+  validatesRunner project(path: project.path, configuration: "shadow")
+  validatesRunner project(path: project.path, configuration: "provided")
+}
+
+configurations.testRuntimeClasspath {
+  // Testing the Spark runner causes a StackOverflowError if slf4j-jdk14 is on the classpath
+  exclude group: "org.slf4j", module: "slf4j-jdk14"
+}
+
+configurations.validatesRunner {
+  // Testing the Spark runner causes a StackOverflowError if slf4j-jdk14 is on the classpath
+  exclude group: "org.slf4j", module: "slf4j-jdk14"
+}
+
diff --git a/settings.gradle b/settings.gradle
index 7c21cdc..daca389 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -62,6 +62,8 @@ include "beam-runners-reference-job-server"
 project(":beam-runners-reference-job-server").dir = file("runners/reference/job-server")
 include "beam-runners-spark"
 project(":beam-runners-spark").dir = file("runners/spark")
+include "beam-runners-spark-structured-streaming"
+project(":beam-runners-spark-structured-streaming").dir = file("runners/spark-structured-streaming")
 include "beam-runners-samza"
 project(":beam-runners-samza").dir = file("runners/samza")
 include "beam-sdks-go"