You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/08/29 03:37:37 UTC
[impala] branch master updated: IMPALA-10073: Create shaded
dependency for S3A and aws-java-sdk-bundle
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 5daff34 IMPALA-10073: Create shaded dependency for S3A and aws-java-sdk-bundle
5daff34 is described below
commit 5daff3472440dc6174f0f31a28bbdafee4f68716
Author: Sahil Takiar <ta...@gmail.com>
AuthorDate: Tue Aug 11 10:36:50 2020 -0700
IMPALA-10073: Create shaded dependency for S3A and aws-java-sdk-bundle
The aws-java-sdk-bundle is one of the largest dependencies in the Impala
Docker images and continues to grow. The jar includes SDKs for
every single AWS service.
This patch removes most of the unnecessary SDKs from the
aws-java-sdk-bundle, thus drastically decreasing the size of the
dependency. The Maven shade plugin is used to do this, and the
implementation is similar to what is currently done for the hive-exec
jar.
This patch takes a conservative approach to removing packages from the
aws-java-sdk-bundle jar, and I ensured no direct dependencies of the S3
SDK were removed. The idea is to only remove dependencies that S3A would
never conceivably need. Given the huge number of AWS services, I only
focused on removing the largest SDKs (the size of each SDK is estimated
by the number of classes in the SDK).
This decreases the size of the Docker images by about 100 MB.
Testing:
* Ran core tests against S3
Change-Id: I0939f73be986f83cc1fd07921563b4d9201780f2
Reviewed-on: http://gerrit.cloudera.org:8080/16342
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
CMakeLists.txt | 3 +-
ext-data-source/CMakeLists.txt | 3 +-
fe/CMakeLists.txt | 4 +-
fe/pom.xml | 14 ++
shaded-deps/.gitignore | 1 -
shaded-deps/{ => hive-exec}/CMakeLists.txt | 2 +-
shaded-deps/{ => hive-exec}/pom.xml | 4 +-
shaded-deps/{ => s3a-aws-sdk}/CMakeLists.txt | 2 +-
shaded-deps/s3a-aws-sdk/pom.xml | 188 +++++++++++++++++++++++++++
9 files changed, 212 insertions(+), 9 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12f92e4..cc2c8aa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -413,7 +413,8 @@ add_subdirectory(common/yarn-extras)
add_subdirectory(common/protobuf)
add_subdirectory(be)
add_subdirectory(docker)
-add_subdirectory(shaded-deps)
+add_subdirectory(shaded-deps/hive-exec)
+add_subdirectory(shaded-deps/s3a-aws-sdk)
add_subdirectory(fe)
add_subdirectory(impala-parent)
add_subdirectory(ext-data-source)
diff --git a/ext-data-source/CMakeLists.txt b/ext-data-source/CMakeLists.txt
index 46e437c..0f88dd2 100644
--- a/ext-data-source/CMakeLists.txt
+++ b/ext-data-source/CMakeLists.txt
@@ -17,6 +17,7 @@
# The dependency on shaded-deps is only added to avoid parallel downloads
# of dependencies. For more details see IMPALA-7051, which was a similar issue.
-add_custom_target(ext-data-source ALL DEPENDS gen-deps impala-parent shaded-deps
+add_custom_target(ext-data-source ALL DEPENDS gen-deps impala-parent
+ shaded-deps-hive-exec shaded-deps-s3a-aws-sdk
COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
)
diff --git a/fe/CMakeLists.txt b/fe/CMakeLists.txt
index 06ea20c..51f6760 100644
--- a/fe/CMakeLists.txt
+++ b/fe/CMakeLists.txt
@@ -16,7 +16,7 @@
# under the License.
add_custom_target(fe ALL DEPENDS
- shaded-deps thrift-deps fb-deps yarn-extras function-registry ext-data-source
- query-event-hook-api impala-parent
+ shaded-deps-hive-exec shaded-deps-s3a-aws-sdk thrift-deps fb-deps yarn-extras
+ function-registry ext-data-source query-event-hook-api impala-parent
COMMAND ${CMAKE_SOURCE_DIR}/bin/mvn-quiet.sh -B install -DskipTests
)
diff --git a/fe/pom.xml b/fe/pom.xml
index 68fed3a..061a739 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -130,6 +130,20 @@ under the License.
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
+ <!-- Exclude the aws-java-sdk-bundle dependency because the Impala minimal
+ version of this dependency is used instead. -->
+ <exclusions>
+ <exclusion>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-bundle</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.impala</groupId>
+ <artifactId>impala-minimal-s3a-aws-sdk</artifactId>
+ <version>${project.version}</version>
</dependency>
<dependency>
diff --git a/shaded-deps/.gitignore b/shaded-deps/.gitignore
deleted file mode 100644
index 916e17c..0000000
--- a/shaded-deps/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-dependency-reduced-pom.xml
diff --git a/shaded-deps/CMakeLists.txt b/shaded-deps/hive-exec/CMakeLists.txt
similarity index 92%
copy from shaded-deps/CMakeLists.txt
copy to shaded-deps/hive-exec/CMakeLists.txt
index 73d353c..7d8b6b1 100644
--- a/shaded-deps/CMakeLists.txt
+++ b/shaded-deps/hive-exec/CMakeLists.txt
@@ -15,6 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-add_custom_target(shaded-deps ALL DEPENDS impala-parent
+add_custom_target(shaded-deps-hive-exec ALL DEPENDS impala-parent
COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
)
diff --git a/shaded-deps/pom.xml b/shaded-deps/hive-exec/pom.xml
similarity index 98%
rename from shaded-deps/pom.xml
rename to shaded-deps/hive-exec/pom.xml
index ff6fa25..eadc397 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/hive-exec/pom.xml
@@ -28,7 +28,7 @@ the same dependencies
<groupId>org.apache.impala</groupId>
<artifactId>impala-parent</artifactId>
<version>0.1-SNAPSHOT</version>
- <relativePath>../impala-parent/pom.xml</relativePath>
+ <relativePath>../../impala-parent/pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.impala</groupId>
@@ -76,7 +76,7 @@ the same dependencies
<include>org/apache/hadoop/hive/conf/**/*</include>
<include>org/apache/hadoop/hive/common/FileUtils*</include>
<include>org/apache/hive/common/util/TxnIdUtils*</include>
- <!-- Needed to support describe formatted command compat with Hive -->
+ <!-- Needed to support describe formatted command compat with Hive -->
<include>org/apache/hadoop/hive/ql/metadata/**/*</include>
<include>org/apache/hadoop/hive/ql/parse/SemanticException.class</include>
<!-- Needed to support Hive udfs -->
diff --git a/shaded-deps/CMakeLists.txt b/shaded-deps/s3a-aws-sdk/CMakeLists.txt
similarity index 92%
rename from shaded-deps/CMakeLists.txt
rename to shaded-deps/s3a-aws-sdk/CMakeLists.txt
index 73d353c..956f5eb 100644
--- a/shaded-deps/CMakeLists.txt
+++ b/shaded-deps/s3a-aws-sdk/CMakeLists.txt
@@ -15,6 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-add_custom_target(shaded-deps ALL DEPENDS impala-parent
+add_custom_target(shaded-deps-s3a-aws-sdk ALL DEPENDS impala-parent
COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
)
diff --git a/shaded-deps/s3a-aws-sdk/pom.xml b/shaded-deps/s3a-aws-sdk/pom.xml
new file mode 100644
index 0000000..392ea10
--- /dev/null
+++ b/shaded-deps/s3a-aws-sdk/pom.xml
@@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+ <!-- This pom creates a minimal version of the aws-java-sdk-bundle jar. The S3A
+dependency is used here to ensure the correct version of the aws-java-sdk-bundle jar is
+used. Only AWS service SDKs are excluded, all thirdparty jars are still included, even
+though some of them might not be necessary. The exclusions are sorted alphabetically.
+ -->
+ <parent>
+ <groupId>org.apache.impala</groupId>
+ <artifactId>impala-parent</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ <relativePath>../../impala-parent/pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.impala</groupId>
+ <artifactId>impala-minimal-s3a-aws-sdk</artifactId>
+ <packaging>jar</packaging>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-aws</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.2.1</version>
+ <configuration>
+ <artifactSet>
+ <includes>
+ <include>com.amazonaws:aws-java-sdk-bundle</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>com.amazonaws:aws-java-sdk-bundle</artifact>
+ <excludes>
+ <exclude>com/amazonaws/services/alexaforbusiness/**</exclude>
+ <exclude>com/amazonaws/services/amplify/**</exclude>
+ <exclude>com/amazonaws/services/apigateway/**</exclude>
+ <exclude>com/amazonaws/services/apigatewayv2/**</exclude>
+ <exclude>com/amazonaws/services/applicationautoscaling/**</exclude>
+ <exclude>com/amazonaws/services/applicationdiscovery/**</exclude>
+ <exclude>com/amazonaws/services/appmesh/**</exclude>
+ <exclude>com/amazonaws/services/appstream/**</exclude>
+ <exclude>com/amazonaws/services/appsync/**</exclude>
+ <exclude>com/amazonaws/services/athena/**</exclude>
+ <exclude>com/amazonaws/services/autoscaling/**</exclude>
+ <exclude>com/amazonaws/services/autoscalingplans/**</exclude>
+ <exclude>com/amazonaws/services/batch/**</exclude>
+ <exclude>com/amazonaws/services/chime/**</exclude>
+ <exclude>com/amazonaws/services/cloud9/**</exclude>
+ <exclude>com/amazonaws/services/clouddirectory/**</exclude>
+ <exclude>com/amazonaws/services/cloudformation/**</exclude>
+ <exclude>com/amazonaws/services/cloudfront/**</exclude>
+ <exclude>com/amazonaws/services/cloudsearchv2/**</exclude>
+ <exclude>com/amazonaws/services/cloudwatch/**</exclude>
+ <exclude>com/amazonaws/services/cloudwatchevents/**</exclude>
+ <exclude>com/amazonaws/services/codebuild/**</exclude>
+ <exclude>com/amazonaws/services/codecommit/**</exclude>
+ <exclude>com/amazonaws/services/codedeploy/**</exclude>
+ <exclude>com/amazonaws/services/codepipeline/**</exclude>
+ <exclude>com/amazonaws/services/codestar/**</exclude>
+ <exclude>com/amazonaws/services/cognitoidp/**</exclude>
+ <exclude>com/amazonaws/services/cognitosync/**</exclude>
+ <exclude>com/amazonaws/services/comprehend/**</exclude>
+ <exclude>com/amazonaws/services/connect/**</exclude>
+ <exclude>com/amazonaws/services/databasemigrationservice/**</exclude>
+ <exclude>com/amazonaws/services/devicefarm/**</exclude>
+ <exclude>com/amazonaws/services/directory/**</exclude>
+ <exclude>com/amazonaws/services/docdb/**</exclude>
+ <exclude>com/amazonaws/services/ec2/**</exclude>
+ <exclude>com/amazonaws/services/ecr/**</exclude>
+ <exclude>com/amazonaws/services/ecs/**</exclude>
+ <exclude>com/amazonaws/services/eks/**</exclude>
+ <exclude>com/amazonaws/services/elasticache/**</exclude>
+ <exclude>com/amazonaws/services/elasticbeanstalk/**</exclude>
+ <exclude>com/amazonaws/services/elasticfilesystem/**</exclude>
+ <exclude>com/amazonaws/services/elasticloadbalancing/**</exclude>
+ <exclude>com/amazonaws/services/elasticloadbalancingv2/**</exclude>
+ <exclude>com/amazonaws/services/elasticmapreduce/**</exclude>
+ <exclude>com/amazonaws/services/elasticsearch/**</exclude>
+ <exclude>com/amazonaws/services/elastictranscoder/**</exclude>
+ <exclude>com/amazonaws/services/fms/**</exclude>
+ <exclude>com/amazonaws/services/globalaccelerator/**</exclude>
+ <exclude>com/amazonaws/services/glue/**</exclude>
+ <exclude>com/amazonaws/services/greengrass/**</exclude>
+ <exclude>com/amazonaws/services/groundstation/**</exclude>
+ <exclude>com/amazonaws/services/guardduty/**</exclude>
+ <exclude>com/amazonaws/services/inspector/**</exclude>
+ <exclude>com/amazonaws/services/iot/**</exclude>
+ <exclude>com/amazonaws/services/iot1clickdevices/**</exclude>
+ <exclude>com/amazonaws/services/iot1clickprojects/**</exclude>
+ <exclude>com/amazonaws/services/iotanalytics/**</exclude>
+ <exclude>com/amazonaws/services/iotevents/**</exclude>
+ <exclude>com/amazonaws/services/iotthingsgraph/**</exclude>
+ <exclude>com/amazonaws/services/kafka/**</exclude>
+ <exclude>com/amazonaws/services/kinesis/**</exclude>
+ <exclude>com/amazonaws/services/kinesisanalytics/**</exclude>
+ <exclude>com/amazonaws/services/kinesisanalyticsv2/**</exclude>
+ <exclude>com/amazonaws/services/kinesisfirehose/**</exclude>
+ <exclude>com/amazonaws/services/kinesisvideo/**</exclude>
+ <exclude>com/amazonaws/services/lambda/**</exclude>
+ <exclude>com/amazonaws/services/lexmodelbuilding/**</exclude>
+ <exclude>com/amazonaws/services/licensemanager/**</exclude>
+ <exclude>com/amazonaws/services/lightsail/**</exclude>
+ <exclude>com/amazonaws/services/machinelearning/**</exclude>
+ <exclude>com/amazonaws/services/managedblockchain/**</exclude>
+ <exclude>com/amazonaws/services/mediaconnect/**</exclude>
+ <exclude>com/amazonaws/services/mediaconvert/**</exclude>
+ <exclude>com/amazonaws/services/medialive/**</exclude>
+ <exclude>com/amazonaws/services/mediapackage/**</exclude>
+ <exclude>com/amazonaws/services/mediapackagevod/**</exclude>
+ <exclude>com/amazonaws/services/mediastore/**</exclude>
+ <exclude>com/amazonaws/services/migrationhub/**</exclude>
+ <exclude>com/amazonaws/services/mq/**</exclude>
+ <exclude>com/amazonaws/services/mturk/**</exclude>
+ <exclude>com/amazonaws/services/neptune/**</exclude>
+ <exclude>com/amazonaws/services/opsworks/**</exclude>
+ <exclude>com/amazonaws/services/pinpoint/**</exclude>
+ <exclude>com/amazonaws/services/pinpointemail/**</exclude>
+ <exclude>com/amazonaws/services/pinpointsmsvoice/**</exclude>
+ <exclude>com/amazonaws/services/polly/**</exclude>
+ <exclude>com/amazonaws/services/quicksight/**</exclude>
+ <exclude>com/amazonaws/services/rds/**</exclude>
+ <exclude>com/amazonaws/services/redshift/**</exclude>
+ <exclude>com/amazonaws/services/rekognition/**</exclude>
+ <exclude>com/amazonaws/services/robomaker/**</exclude>
+ <exclude>com/amazonaws/services/route53/**</exclude>
+ <exclude>com/amazonaws/services/route53domains/**</exclude>
+ <exclude>com/amazonaws/services/route53resolver/**</exclude>
+ <exclude>com/amazonaws/services/sagemaker/**</exclude>
+ <exclude>com/amazonaws/services/securityhub/**</exclude>
+ <exclude>com/amazonaws/services/serverlessapplicationrepository/**</exclude>
+ <exclude>com/amazonaws/services/servermigration/**</exclude>
+ <exclude>com/amazonaws/services/servicecatalog/**</exclude>
+ <exclude>com/amazonaws/services/servicediscovery/**</exclude>
+ <exclude>com/amazonaws/services/shield/**</exclude>
+ <exclude>com/amazonaws/services/simpledb/**</exclude>
+ <exclude>com/amazonaws/services/simpleemail/**</exclude>
+ <exclude>com/amazonaws/services/simplesystemsmanagement/**</exclude>
+ <exclude>com/amazonaws/services/simpleworkflow/**</exclude>
+ <exclude>com/amazonaws/services/sqs/**</exclude>
+ <exclude>com/amazonaws/services/stepfunctions/**</exclude>
+ <exclude>com/amazonaws/services/support/**</exclude>
+ <exclude>com/amazonaws/services/textract/**</exclude>
+ <exclude>com/amazonaws/services/transcribe/**</exclude>
+ <exclude>com/amazonaws/services/waf/**</exclude>
+ <exclude>com/amazonaws/services/workdocs/**</exclude>
+ <exclude>com/amazonaws/services/worklink/**</exclude>
+ <exclude>com/amazonaws/services/workmail/**</exclude>
+ <exclude>com/amazonaws/services/workspaces/**</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>