You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/27 02:44:27 UTC
[spark] branch master updated: [SPARK-32058][BUILD] Use Apache Hadoop 3.2.0 dependency by default

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c134b5  [SPARK-32058][BUILD] Use Apache Hadoop 3.2.0 dependency by default
9c134b5 is described below

commit 9c134b57bff5b7e7f9c85aeed2e9539117a5b57d
Author: Dongjoon Hyun <do...@apache.org>
AuthorDate: Fri Jun 26 19:43:29 2020 -0700

    [SPARK-32058][BUILD] Use Apache Hadoop 3.2.0 dependency by default
    
    ### What changes were proposed in this pull request?
    
    According to the dev mailing list discussion, this PR aims to switch the default Apache Hadoop dependency from 2.7.4 to 3.2.0 for Apache Spark 3.1.0 on December 2020.
    
    | Item | Default Hadoop Dependency |
    |------|-----------------------------|
    | Apache Spark Website | 3.2.0 |
    | Apache Download Site | 3.2.0 |
    | Apache Snapshot | 3.2.0 |
    | Maven Central | 3.2.0 |
    | PyPI | 2.7.4 (We will switch later) |
    | CRAN | 2.7.4 (We will switch later) |
    | Homebrew | 3.2.0 (already) |
    
    In Apache Spark 3.0.0 release, we focused on the other features. This PR targets for [Apache Spark 3.1.0 scheduled on December 2020](https://spark.apache.org/versioning-policy.html).
    
    ### Why are the changes needed?
    
    Apache Hadoop 3.2 has many fixes and new cloud-friendly features.
    
    **Reference**
    - 2017-08-04: https://hadoop.apache.org/release/2.7.4.html
    - 2019-01-16: https://hadoop.apache.org/release/3.2.0.html
    
    ### Does this PR introduce _any_ user-facing change?
    
    Since the default Hadoop dependency changes, the users will get a better support in a cloud environment.
    
    ### How was this patch tested?
    
    Pass the Jenkins.
    
    Closes #28897 from dongjoon-hyun/SPARK-32058.
    
    Authored-by: Dongjoon Hyun <do...@apache.org>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 dev/create-release/release-build.sh                    |  4 ++--
 dev/run-tests.py                                       |  2 +-
 pom.xml                                                | 18 +++++++++---------
 resource-managers/kubernetes/integration-tests/pom.xml |  6 +++---
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index eb97258..3163345 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -276,14 +276,14 @@ if [[ "$1" == "package" ]]; then
   # list of packages to be built, so it's ok for things to be missing in BINARY_PKGS_EXTRA.
 
   declare -A BINARY_PKGS_ARGS
-  BINARY_PKGS_ARGS["hadoop2.7"]="-Phadoop-2.7 $HIVE_PROFILES"
+  BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
   if ! is_dry_run; then
     BINARY_PKGS_ARGS["without-hadoop"]="-Phadoop-provided"
     if [[ $SPARK_VERSION < "3.0." ]]; then
       BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
     else
       BINARY_PKGS_ARGS["hadoop2.7-hive1.2"]="-Phadoop-2.7 -Phive-1.2 $HIVE_PROFILES"
-      BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
+      BINARY_PKGS_ARGS["hadoop2.7"]="-Phadoop-2.7 $HIVE_PROFILES"
     fi
   fi
 
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 5255a77..ec04c37 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -574,7 +574,7 @@ def main():
         # if we're on the Amplab Jenkins build servers setup variables
         # to reflect the environment settings
         build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
-        hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
+        hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop3.2")
         hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3")
         test_env = "amplab_jenkins"
         # add path for Python3 in Jenkins if we're calling from a Jenkins machine
diff --git a/pom.xml b/pom.xml
index 82c12ae..08ca13b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -119,11 +119,11 @@
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.30</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
-    <hadoop.version>2.7.4</hadoop.version>
+    <hadoop.version>3.2.0</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
     <zookeeper.version>3.4.14</zookeeper.version>
-    <curator.version>2.7.1</curator.version>
+    <curator.version>2.13.0</curator.version>
     <hive.group>org.apache.hive</hive.group>
     <hive.classifier>core</hive.classifier>
     <!-- Version used in Maven Hive dependency -->
@@ -170,7 +170,7 @@
     <snappy.version>1.1.7.5</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
-    <commons-io.version>2.4</commons-io.version>
+    <commons-io.version>2.5</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->
@@ -3054,16 +3054,16 @@
 
     <profile>
       <id>hadoop-2.7</id>
-      <!-- Default hadoop profile. Uses global properties. -->
+      <properties>
+        <hadoop.version>2.7.4</hadoop.version>
+        <curator.version>2.7.1</curator.version>
+        <commons-io.version>2.4</commons-io.version>
+      </properties>
     </profile>
 
     <profile>
       <id>hadoop-3.2</id>
-      <properties>
-        <hadoop.version>3.2.0</hadoop.version>
-        <curator.version>2.13.0</curator.version>
-        <commons-io.version>2.5</commons-io.version>
-      </properties>
+      <!-- Default hadoop profile. Uses global properties. -->
     </profile>
 
     <profile>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 5035404..d1e00cc 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -186,9 +186,6 @@
   <profiles>
     <profile>
       <id>hadoop-2.7</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
       <dependencies>
         <dependency>
           <groupId>com.amazonaws</groupId>
@@ -200,6 +197,9 @@
     </profile>
     <profile>
       <id>hadoop-3.2</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>com.amazonaws</groupId>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org