You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/05/07 16:43:04 UTC

[2/3] impala git commit: IMPALA-6949: Add the option to start the minicluster with EC enabled

IMPALA-6949: Add the option to start the minicluster with EC enabled

In this patch we add the "ERASURE_CODING" enviornment variable. If we
enable it, a cluster with 5 data nodes will be created during data
loading and HDFS will be started with erasure coding enabled.

Testing:
I ran the core build, and verified that erasure coding gets enabled in
HDFS. Many of our EE tests failed however.

Cherry-picks: not for 2.x

Change-Id: I397aed491354be21b0a8441ca671232dca25146c
Reviewed-on: http://gerrit.cloudera.org:8080/10275
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c05696dd
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c05696dd
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c05696dd

Branch: refs/heads/master
Commit: c05696dd6abc1fbf9a85f634ae56b3eff1efb348
Parents: 5592ecf
Author: Taras Bobrovytsky <ta...@apache.org>
Authored: Tue May 1 16:36:48 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Sat May 5 01:20:59 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                                | 12 +++++++++++-
 bin/run-all-tests.sh                                |  6 ++++++
 testdata/bin/create-load-data.sh                    | 16 ++++++++--------
 testdata/bin/setup-hdfs-env.sh                      |  6 ++++++
 testdata/cluster/admin                              |  3 +++
 .../common/etc/hadoop/conf/hdfs-site.xml.tmpl       |  7 +++++++
 6 files changed, 41 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 941beb1..eede064 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -333,6 +333,7 @@ export HADOOP_LZO="${HADOOP_LZO-$IMPALA_HOME/../hadoop-lzo}"
 export IMPALA_LZO="${IMPALA_LZO-$IMPALA_HOME/../Impala-lzo}"
 export IMPALA_AUX_TEST_HOME="${IMPALA_AUX_TEST_HOME-$IMPALA_HOME/../Impala-auxiliary-tests}"
 export TARGET_FILESYSTEM="${TARGET_FILESYSTEM-hdfs}"
+export ERASURE_CODING="${ERASURE_CODING-false}"
 export FILESYSTEM_PREFIX="${FILESYSTEM_PREFIX-}"
 export S3_BUCKET="${S3_BUCKET-}"
 export azure_tenant_id="${azure_tenant_id-DummyAdlsTenantId}"
@@ -446,7 +447,16 @@ elif [ "${TARGET_FILESYSTEM}" = "local" ]; then
   fi
   export DEFAULT_FS="${LOCAL_FS}"
   export FILESYSTEM_PREFIX="${LOCAL_FS}"
-elif [ "${TARGET_FILESYSTEM}" != "hdfs" ]; then
+elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
+  if [[ "${ERASURE_CODING}" = true ]]; then
+    if [[ "${IMPALA_MINICLUSTER_PROFILE}" -lt 3 ]]; then
+      echo "Hadoop 3 is required for HDFS erasure coding."
+      return 1
+    fi
+    export HDFS_ERASURECODE_POLICY="RS-3-2-1024k"
+    export HDFS_ERASURECODE_PATH="/"
+  fi
+else
   echo "Unsupported filesystem '$TARGET_FILESYSTEM'"
   echo "Valid values are: hdfs, isilon, s3, local"
   return 1

http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/bin/run-all-tests.sh
----------------------------------------------------------------------
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 7702134..4488f2c 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -69,6 +69,12 @@ else
   TEST_START_CLUSTER_ARGS="${TEST_START_CLUSTER_ARGS} --cluster_size=3"
 fi
 
+if [[ "${ERASURE_CODING}" = true ]]; then
+  # We do not run FE tests when erasure coding is enabled because planner tests
+  # would fail.
+  FE_TEST=false
+fi
+
 # If KRPC tests are disabled, pass the flag to disable KRPC during cluster start.
 if [[ "${DISABLE_KRPC}" == "true" ]]; then
   TEST_START_CLUSTER_ARGS="${TEST_START_CLUSTER_ARGS} --disable_krpc"

http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index fcb7e69..c78ddb9 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -95,6 +95,14 @@ do
   shift;
 done
 
+# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
+# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not
+# needed.
+if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then
+  run-step "Setting up HDFS environment" setup-hdfs-env.log \
+      ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh
+fi
+
 if [[ $SKIP_METADATA_LOAD -eq 0  && "$SNAPSHOT_FILE" = "" ]]; then
   if [[ -z "$REMOTE_LOAD" ]]; then
     run-step "Loading Hive Builtins" load-hive-builtins.log \
@@ -504,14 +512,6 @@ if [[ -z "$REMOTE_LOAD" ]]; then
     ${START_CLUSTER_ARGS}
 fi
 
-# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
-# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not
-# needed.
-if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then
-  run-step "Setting up HDFS environment" setup-hdfs-env.log \
-      ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh
-fi
-
 if [ $SKIP_METADATA_LOAD -eq 0 ]; then
   run-step "Loading custom schemas" load-custom-schemas.log load-custom-schemas
   # Run some steps in parallel, with run-step-backgroundable / run-step-wait-all.

http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/bin/setup-hdfs-env.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/setup-hdfs-env.sh b/testdata/bin/setup-hdfs-env.sh
index ece94de..a07a9dd 100755
--- a/testdata/bin/setup-hdfs-env.sh
+++ b/testdata/bin/setup-hdfs-env.sh
@@ -72,3 +72,9 @@ hdfs cacheadmin -addPool testPool ${CACHEADMIN_ARGS}
 if [ "${PREVIOUS_PRINCIPAL}" != "" ]; then
   kinit -k -t ${KRB5_KTNAME} ${PREVIOUS_PRINCIPAL}
 fi
+
+if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then
+  hdfs ec -enablePolicy -policy "${HDFS_ERASURECODE_POLICY}"
+  hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" \
+    -path "${HDFS_ERASURECODE_PATH:=/}"
+fi

http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/cluster/admin
----------------------------------------------------------------------
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 74b5a9c..f0a4a81 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -46,6 +46,9 @@ shift $(($OPTIND-1))
 DIR=$(dirname $0)
 NODES_DIR="$DIR/cdh$CDH_MAJOR_VERSION"
 NODE_COUNT=3
+if [[ "$TARGET_FILESYSTEM" == "hdfs" && "$ERASURE_CODING" = true ]]; then
+  NODE_COUNT=5
+fi
 NODE_PREFIX=node-
 COMMON_NODE_TEMPLATE="$DIR/node_templates/common"
 NODE_TEMPLATE="$DIR/node_templates/cdh$CDH_MAJOR_VERSION"

http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
----------------------------------------------------------------------
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
index c9ee70b..6882fa3 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
@@ -22,6 +22,13 @@
     <value>true</value>
   </property>
 
+  <!-- The release of Hadoop we're depending on requires an explicit key to allow erasure
+       coding. -->
+  <property>
+    <name>cloudera.erasure_coding.enabled</name>
+    <value>true</value>
+  </property>
+
   <property>
     <name>dfs.datanode.address</name>
     <value>127.0.0.1:${DATANODE_PORT}</value>