You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2014/12/23 21:54:27 UTC
spark git commit: [SPARK-4914][Build] Cleans lib_managed before compiling with Hive 0.13.1

Repository: spark
Updated Branches:
  refs/heads/master 9c251c555 -> 395b771fe


[SPARK-4914][Build] Cleans lib_managed before compiling with Hive 0.13.1

This PR tries to fix the Hive tests failure encountered in PR #3157 by cleaning `lib_managed` before building assembly jar against Hive 0.13.1 in `dev/run-tests`. Otherwise two sets of datanucleus jars would be left in `lib_managed` and may mess up class paths while executing Hive test suites. Please refer to [this thread] [1] for details. A clean build would be even safer, but we only clean `lib_managed` here to save build time.

This PR also takes the chance to clean up some minor typos and formatting issues in the comments.

[1]: https://github.com/apache/spark/pull/3157#issuecomment-67656488

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/3756)
<!-- Reviewable:end -->

Author: Cheng Lian <li...@databricks.com>

Closes #3756 from liancheng/clean-lib-managed and squashes the following commits:

e2bd21d [Cheng Lian] Adds lib_managed to clean set
c9f2f3e [Cheng Lian] Cleans lib_managed before compiling with Hive 0.13.1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/395b771f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/395b771f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/395b771f

Branch: refs/heads/master
Commit: 395b771feed3fc90e5053defbe86dbd673fde582
Parents: 9c251c5
Author: Cheng Lian <li...@databricks.com>
Authored: Tue Dec 23 12:54:20 2014 -0800
Committer: Josh Rosen <jo...@databricks.com>
Committed: Tue Dec 23 12:54:20 2014 -0800

----------------------------------------------------------------------
 dev/run-tests | 26 ++++++++++++++------------
 pom.xml       |  3 +++
 2 files changed, 17 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/395b771f/dev/run-tests
----------------------------------------------------------------------
diff --git a/dev/run-tests b/dev/run-tests
index 328a73b..9192cb7 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -141,20 +141,22 @@ CURRENT_BLOCK=$BLOCK_BUILD
 {
 
   # NOTE: echo "q" is needed because sbt on encountering a build file with failure
-  #+ (either resolution or compilation) prompts the user for input either q, r, etc
-  #+ to quit or retry. This echo is there to make it not block.
+  # (either resolution or compilation) prompts the user for input either q, r, etc
+  # to quit or retry. This echo is there to make it not block.
   # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a
-  #+ single argument!
+  # single argument!
   # QUESTION: Why doesn't 'yes "q"' work?
   # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
-  # First build with 0.12 to ensure patches do not break the hive 12 build
+  # First build with Hive 0.12.0 to ensure patches do not break the Hive 0.12.0 build
   HIVE_12_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver -Phive-0.12.0"
-  echo "[info] Compile with hive 0.12"
+  echo "[info] Compile with Hive 0.12.0"
   echo -e "q\n" \
     | sbt/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
     | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 
-  # Then build with default version(0.13.1) because tests are based on this version
+  # Then build with default Hive version (0.13.1) because tests are based on this version
+  echo "[info] Compile with Hive 0.13.1"
+  rm -rf lib_managed
   echo "[info] Building Spark with these arguments: $SBT_MAVEN_PROFILES_ARGS"\
     " -Phive -Phive-thriftserver"
   echo -e "q\n" \
@@ -178,7 +180,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
   
   if [ -n "$_SQL_TESTS_ONLY" ]; then
     # This must be an array of individual arguments. Otherwise, having one long string
-    #+ will be interpreted as a single test, which doesn't work.
+    # will be interpreted as a single test, which doesn't work.
     SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "mllib/test")
   else
     SBT_MAVEN_TEST_ARGS=("test")
@@ -187,11 +189,11 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
   echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS ${SBT_MAVEN_TEST_ARGS[@]}"
   
   # NOTE: echo "q" is needed because sbt on encountering a build file with failure
-  #+ (either resolution or compilation) prompts the user for input either q, r, etc
-  #+ to quit or retry. This echo is there to make it not block.
+  # (either resolution or compilation) prompts the user for input either q, r, etc
+  # to quit or retry. This echo is there to make it not block.
   # NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a 
-  #+ single argument!
-  #+ "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
+  # single argument!
+  # "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
   # QUESTION: Why doesn't 'yes "q"' work?
   # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
   echo -e "q\n" \
@@ -210,7 +212,7 @@ CURRENT_BLOCK=$BLOCK_PYSPARK_UNIT_TESTS
 
 echo ""
 echo "========================================================================="
-echo "Detecting binary incompatibilites with MiMa"
+echo "Detecting binary incompatibilities with MiMa"
 echo "========================================================================="
 
 CURRENT_BLOCK=$BLOCK_MIMA

http://git-wip-us.apache.org/repos/asf/spark/blob/395b771f/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 4be8c22..e4db139 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1010,6 +1010,9 @@
               <fileset>
                 <directory>checkpoint</directory>
               </fileset>
+              <fileset>
+                <directory>lib_managed</directory>
+              </fileset>
             </filesets>
           </configuration>
         </plugin>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org