You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@accumulo.apache.org by GitBox <gi...@apache.org> on 2022/09/19 20:52:40 UTC

[GitHub] [accumulo-docker] keith-turner commented on a diff in pull request #23: Refactor Docker image to allow faster rebuilds

keith-turner commented on code in PR #23:
URL: https://github.com/apache/accumulo-docker/pull/23#discussion_r974678307


##########
Dockerfile:
##########
@@ -13,72 +13,110 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM rockylinux:9
 
-ARG ACCUMULO_VERSION=2.1.0
-ARG HADOOP_VERSION=3.3.3
-ARG ZOOKEEPER_VERSION=3.8.0
-ARG HADOOP_USER_NAME=accumulo
-ARG ACCUMULO_FILE=
-ARG HADOOP_FILE=
-ARG ZOOKEEPER_FILE=
+##
+## Base image. Rocky Linux 9 with updates, JRE 11 headless, and updated CA certs.
+##
+FROM rockylinux:9 as base
 
-ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk HADOOP_USER_NAME=$HADOOP_USER_NAME
+RUN set -eux; \
+  yum install -y ca-certificates java-11-openjdk-headless && \
+  update-ca-trust extract && \
+  yum clean all && \
+  rm -rf /var/cache/yum
 
-ENV APACHE_DIST_URLS \
-  https://www.apache.org/dyn/closer.cgi?action=download&filename= \
-# if the version is outdated (or we're grabbing the .asc file), we might have to pull from the dist/archive :/
-  https://www-us.apache.org/dist/ \
-  https://www.apache.org/dist/ \
-  https://archive.apache.org/dist/
+##
+## Base image for building. Adds wget, JDK and make (for building Accumulo native libs).
+##
+FROM base as buildbase
 
-COPY README.md $ACCUMULO_FILE $HADOOP_FILE $ZOOKEEPER_FILE /tmp/
+RUN set -eux; \
+  yum install -y java-11-openjdk-devel make gcc-c++ wget && \
+  update-ca-trust extract
 
-RUN yum install -y ca-certificates java-11-openjdk-devel make gcc-c++ wget && \
-  update-ca-trust extract && \
-  set -eux; \
-  download() { \
-    local f="$1"; shift; \
-    local distFile="$1"; shift; \
-    local success=; \
-    local distUrl=; \
-    for distUrl in $APACHE_DIST_URLS; do \
-      if wget -nv -O "$f" "$distUrl$distFile"; then \
-        success=1; \
-        break; \
-      fi; \
-    done; \
-    [ -n "$success" ]; \
-  }; \
-  \
-  if [ -z "$HADOOP_FILE" ]; then \
-    download "hadoop.tar.gz" "hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"; \
-  else \
-    mv "/tmp/$HADOOP_FILE" "hadoop.tar.gz"; \
-  fi; \
-  if [ -z "$ZOOKEEPER_FILE" ]; then \
-    download "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz"; \
-  else \
-    mv "/tmp/$ZOOKEEPER_FILE" "zookeeper.tar.gz"; \
-  fi; \
-  if [ -z "$ACCUMULO_FILE" ]; then \
-    download "accumulo.tar.gz" "accumulo/$ACCUMULO_VERSION/accumulo-$ACCUMULO_VERSION-bin.tar.gz"; \
-  else \
-    mv "/tmp/$ACCUMULO_FILE" "accumulo.tar.gz"; \
-  fi && \
-  tar xzf accumulo.tar.gz -C /tmp/ && \
-  tar xzf hadoop.tar.gz -C /tmp/ && \
-  tar xzf zookeeper.tar.gz -C /tmp/ && \
-  mv /tmp/hadoop-$HADOOP_VERSION /opt/hadoop && \
-  mv /tmp/apache-zookeeper-$ZOOKEEPER_VERSION-bin /opt/zookeeper && \
-  mv /tmp/accumulo-$ACCUMULO_VERSION* /opt/accumulo && \
-  rm -f accumulo.tar.gz hadoop.tar.gz zookeeper.tar.gz && \
-  rm -rf /opt/hadoop/share/doc/hadoop && \
+COPY download.sh /usr/local/bin/
+
+##
+## Hadoop image. Download/copy and extract the Hadoop installation.
+##
+FROM buildbase as hadoop
+
+ARG HADOOP_VERSION=3.3.3 \
+  HADOOP_FILE=_NOT_SET
+
+# Copy a known file along with the optional files (that might not exist).
+# The known file, along with '*' for the optional file allows the command
+# to succeed even if the optional file does not exist. If we used an empty
+# string for the optional file default value, then this command would copy
+# the entire build context, which is not what we want.
+COPY download.sh ${HADOOP_FILE}* /tmp/
+
+RUN set -eux; \
+  download.sh "${HADOOP_FILE}" "hadoop.tar.gz" "hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"; \
+  tar xzf hadoop.tar.gz -C /tmp/; \
+  mv /tmp/hadoop-$HADOOP_VERSION /opt/hadoop; \
+  rm -rf /opt/hadoop/share/doc/hadoop
+
+##
+## Zookeeper image. Download/copy and extract the Zookeeper installation.
+##
+FROM buildbase as zookeeper
+
+ARG ZOOKEEPER_VERSION=3.8.0 \
+  ZOOKEEPER_FILE=_NOT_SET
+# Copy a known file along with the optional files (that might not exist).
+# The known file, along with '*' for the optional file allows the command
+# to succeed even if the optional file does not exist. If we used an empty
+# string for the optional file default value, then this command would copy
+# the entire build context, which is not what we want.
+COPY download.sh ${ZOOKEEPER_FILE}* /tmp/
+
+RUN set -eux; \
+  download.sh "${ZOOKEEPER_FILE}" "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz"; \
+  tar xzf zookeeper.tar.gz -C /tmp/; \
+  mv /tmp/apache-zookeeper-$ZOOKEEPER_VERSION-bin /opt/zookeeper
+
+##
+## Accumulo image. Download/copy and extract the Accumulo installation, build native libs, and copy in properties.
+##
+FROM buildbase as accumulo
+
+ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk
+
+ARG ACCUMULO_VERSION=2.1.0 \
+  ACCUMULO_FILE=_NOT_SET
+# Copy a known file along with the optional files (that might not exist).
+# The known file, along with '*' for the optional file allows the command
+# to succeed even if the optional file does not exist. If we used an empty
+# string for the optional file default value, then this command would copy
+# the entire build context, which is not what we want.
+COPY download.sh ${ACCUMULO_FILE}* /tmp/
+
+RUN set -eux; \
+  download.sh "${ACCUMULO_FILE}" "accumulo.tar.gz" "accumulo/$ACCUMULO_VERSION/accumulo-$ACCUMULO_VERSION-bin.tar.gz"; \
+  tar xzf accumulo.tar.gz -C /tmp/; \
+  mv /tmp/accumulo-$ACCUMULO_VERSION*/ /opt/accumulo; \

Review Comment:
   It would be nice if when setting the build arg `ACCUMULO_FILE` you didn't have to also set `ACCUMULO_VERSION`.  Maybe the following would achieve this.
   
   ```suggestion
     mv /tmp/accumulo-*_VERSION*/ /opt/accumulo; \
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org