You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2021/04/09 01:43:49 UTC

[impala] 01/02: IMPALA-10455: Reorder Maven repositories for cleaner mirror semantics

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 267f4d67f4f9c8b10af539f8f2e0a2abfa4bafd5
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Fri Jan 8 17:16:36 2021 -0800

    IMPALA-10455: Reorder Maven repositories for cleaner mirror semantics
    
    When using a Maven mirror that uses a mirrorOf pattern, the order
    of repositories in the pom.xml has a strong influence on whether the
    build tries the mirror for a particular artifact. If an early
    repository matches the mirrorOf condition, Maven may try the mirror
    for all artifacts, even those that only exist in the s3 bucket.
    This extra check can slow down the build, especially if the mirror
    is slow to respond for unknown artifacts.
    
    For Impala, the common case is for a mirror to cover everything
    except the artifacts that come from the Kudu local repository or
    the s3 bucket. To optimize for that case, this reorders the Maven
    repositories to be in this order:
    1. Local/S3 repositories
    2. Regular repositories
    3. Banned repositories
    The repositories are otherwise unchanged.
    
    Testing:
     - Ran an ordinary build
     - Ran a build with a mirrorOf "external:*,!impala.cdp.repo" and verified
       that the build went directly to the s3 bucket first.
    
    Change-Id: I7046c7ec5391833e98ee6a463fb8c08b6a04cb26
    Reviewed-on: http://gerrit.cloudera.org:8080/17020
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 java/pom.xml | 91 ++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 54 insertions(+), 37 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index f7b80a2..efbecd6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -75,7 +75,58 @@ under the License.
 
   <repositories>
     <!--
-    Blacklist Apache repositories that tend to cause timeouts - see IMPALA-8516.
+    Repository order matters, especially when using mirrors. This optimizes
+    for the common case where the mirror is intended to catch artifacts
+    that would otherwise come from central and other physical servers.
+    That means putting repositories in this order:
+    1. Local / S3 repositories
+    2. Regular remote repositories
+    3. Banned repositories
+    This allows builds to get artifacts from the local repositories
+    and s3 buckets without trying the mirror first, while still
+    getting other artifacts from the mirror.
+    -->
+    <!-- Local/S3 repositories -->
+    <repository>
+      <id>impala.toolchain.kudu.repo</id>
+      <url>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY}</url>
+      <name>Impala Toolchain Kudu Repository</name>
+      <releases>
+        <enabled>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED}</enabled>
+      </releases>
+      <!--
+      This repository now uses explicit versions, so snapshots are no longer required.
+      -->
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <!--
+      The Maven repository for the CDP build identified by CDP_BUILD_NUMBER.
+      CDP does not use maven SNAPSHOT versions - every build has a version number.
+      -->
+      <id>impala.cdp.repo</id>
+      <url>${env.CDP_MAVEN_REPOSITORY}</url>
+      <name>Impala CDP Repository</name>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+
+    <!-- Regular remote repositories -->
+    <repository>
+      <id>cdh.rcs.releases.repo</id>
+      <url>https://repository.cloudera.com/artifactory/cdh-releases-rcs</url>
+      <name>CDH Releases Repository</name>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+
+    <!-- Banned repositories -->
+    <!--
+    Ban Apache repositories that tend to cause timeouts - see IMPALA-8516.
     These are pulled in via transitive deps, e.g. Sentry. Other repositories contain
     mirrored versions of these dependencies but don't have the same timeout issues.
     -->
@@ -138,8 +189,8 @@ under the License.
       The Impala development bootstrapping depends on CDH Maven snapshots
       which transitively pull dependencies from other repositories which
       can cause the build to be non-reproducible, e.g. IMPALA-7316. This
-      patch makes the build to be reproducible by blacklisting
-      cdh.snapshots.repo so that Maven does not accidentally downloads the
+      patch makes the build to be reproducible by banning
+      cdh.snapshots.repo so that Maven does not accidentally download the
       latest CDH snapshots when running a build, which can cause
       incompatibility issues.
       -->
@@ -154,40 +205,6 @@ under the License.
       </snapshots>
     </repository>
     <repository>
-      <id>cdh.rcs.releases.repo</id>
-      <url>https://repository.cloudera.com/artifactory/cdh-releases-rcs</url>
-      <name>CDH Releases Repository</name>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </repository>
-    <repository>
-      <!--
-      The Maven repository for the CDP build identified by CDP_BUILD_NUMBER.
-      CDP does not use maven SNAPSHOT versions - every build has a version number.
-      -->
-      <id>impala.cdp.repo</id>
-      <url>${env.CDP_MAVEN_REPOSITORY}</url>
-      <name>Impala CDP Repository</name>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </repository>
-    <repository>
-      <id>impala.toolchain.kudu.repo</id>
-      <url>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY}</url>
-      <name>Impala Toolchain Kudu Repository</name>
-      <releases>
-        <enabled>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED}</enabled>
-      </releases>
-      <!--
-      This repository now uses explicit versions, so snapshots are no longer required.
-      -->
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </repository>
-    <repository>
       <!--
       HWX Nexus is disabled. This is a tombstone to list out why:
       1. Snapshots are disabled because HWX Nexus contains snapshots of artifacts