You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/05/07 16:43:04 UTC
[2/3] impala git commit: IMPALA-6949: Add the option to start the
minicluster with EC enabled
IMPALA-6949: Add the option to start the minicluster with EC enabled
In this patch we add the "ERASURE_CODING" enviornment variable. If we
enable it, a cluster with 5 data nodes will be created during data
loading and HDFS will be started with erasure coding enabled.
Testing:
I ran the core build, and verified that erasure coding gets enabled in
HDFS. Many of our EE tests failed however.
Cherry-picks: not for 2.x
Change-Id: I397aed491354be21b0a8441ca671232dca25146c
Reviewed-on: http://gerrit.cloudera.org:8080/10275
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c05696dd
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c05696dd
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c05696dd
Branch: refs/heads/master
Commit: c05696dd6abc1fbf9a85f634ae56b3eff1efb348
Parents: 5592ecf
Author: Taras Bobrovytsky <ta...@apache.org>
Authored: Tue May 1 16:36:48 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Sat May 5 01:20:59 2018 +0000
----------------------------------------------------------------------
bin/impala-config.sh | 12 +++++++++++-
bin/run-all-tests.sh | 6 ++++++
testdata/bin/create-load-data.sh | 16 ++++++++--------
testdata/bin/setup-hdfs-env.sh | 6 ++++++
testdata/cluster/admin | 3 +++
.../common/etc/hadoop/conf/hdfs-site.xml.tmpl | 7 +++++++
6 files changed, 41 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 941beb1..eede064 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -333,6 +333,7 @@ export HADOOP_LZO="${HADOOP_LZO-$IMPALA_HOME/../hadoop-lzo}"
export IMPALA_LZO="${IMPALA_LZO-$IMPALA_HOME/../Impala-lzo}"
export IMPALA_AUX_TEST_HOME="${IMPALA_AUX_TEST_HOME-$IMPALA_HOME/../Impala-auxiliary-tests}"
export TARGET_FILESYSTEM="${TARGET_FILESYSTEM-hdfs}"
+export ERASURE_CODING="${ERASURE_CODING-false}"
export FILESYSTEM_PREFIX="${FILESYSTEM_PREFIX-}"
export S3_BUCKET="${S3_BUCKET-}"
export azure_tenant_id="${azure_tenant_id-DummyAdlsTenantId}"
@@ -446,7 +447,16 @@ elif [ "${TARGET_FILESYSTEM}" = "local" ]; then
fi
export DEFAULT_FS="${LOCAL_FS}"
export FILESYSTEM_PREFIX="${LOCAL_FS}"
-elif [ "${TARGET_FILESYSTEM}" != "hdfs" ]; then
+elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
+ if [[ "${ERASURE_CODING}" = true ]]; then
+ if [[ "${IMPALA_MINICLUSTER_PROFILE}" -lt 3 ]]; then
+ echo "Hadoop 3 is required for HDFS erasure coding."
+ return 1
+ fi
+ export HDFS_ERASURECODE_POLICY="RS-3-2-1024k"
+ export HDFS_ERASURECODE_PATH="/"
+ fi
+else
echo "Unsupported filesystem '$TARGET_FILESYSTEM'"
echo "Valid values are: hdfs, isilon, s3, local"
return 1
http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/bin/run-all-tests.sh
----------------------------------------------------------------------
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 7702134..4488f2c 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -69,6 +69,12 @@ else
TEST_START_CLUSTER_ARGS="${TEST_START_CLUSTER_ARGS} --cluster_size=3"
fi
+if [[ "${ERASURE_CODING}" = true ]]; then
+ # We do not run FE tests when erasure coding is enabled because planner tests
+ # would fail.
+ FE_TEST=false
+fi
+
# If KRPC tests are disabled, pass the flag to disable KRPC during cluster start.
if [[ "${DISABLE_KRPC}" == "true" ]]; then
TEST_START_CLUSTER_ARGS="${TEST_START_CLUSTER_ARGS} --disable_krpc"
http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index fcb7e69..c78ddb9 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -95,6 +95,14 @@ do
shift;
done
+# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
+# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not
+# needed.
+if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then
+ run-step "Setting up HDFS environment" setup-hdfs-env.log \
+ ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh
+fi
+
if [[ $SKIP_METADATA_LOAD -eq 0 && "$SNAPSHOT_FILE" = "" ]]; then
if [[ -z "$REMOTE_LOAD" ]]; then
run-step "Loading Hive Builtins" load-hive-builtins.log \
@@ -504,14 +512,6 @@ if [[ -z "$REMOTE_LOAD" ]]; then
${START_CLUSTER_ARGS}
fi
-# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
-# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not
-# needed.
-if [[ "${TARGET_FILESYSTEM}" == "hdfs" ]]; then
- run-step "Setting up HDFS environment" setup-hdfs-env.log \
- ${IMPALA_HOME}/testdata/bin/setup-hdfs-env.sh
-fi
-
if [ $SKIP_METADATA_LOAD -eq 0 ]; then
run-step "Loading custom schemas" load-custom-schemas.log load-custom-schemas
# Run some steps in parallel, with run-step-backgroundable / run-step-wait-all.
http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/bin/setup-hdfs-env.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/setup-hdfs-env.sh b/testdata/bin/setup-hdfs-env.sh
index ece94de..a07a9dd 100755
--- a/testdata/bin/setup-hdfs-env.sh
+++ b/testdata/bin/setup-hdfs-env.sh
@@ -72,3 +72,9 @@ hdfs cacheadmin -addPool testPool ${CACHEADMIN_ARGS}
if [ "${PREVIOUS_PRINCIPAL}" != "" ]; then
kinit -k -t ${KRB5_KTNAME} ${PREVIOUS_PRINCIPAL}
fi
+
+if [[ -n "${HDFS_ERASURECODE_POLICY:-}" ]]; then
+ hdfs ec -enablePolicy -policy "${HDFS_ERASURECODE_POLICY}"
+ hdfs ec -setPolicy -policy "${HDFS_ERASURECODE_POLICY}" \
+ -path "${HDFS_ERASURECODE_PATH:=/}"
+fi
http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/cluster/admin
----------------------------------------------------------------------
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 74b5a9c..f0a4a81 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -46,6 +46,9 @@ shift $(($OPTIND-1))
DIR=$(dirname $0)
NODES_DIR="$DIR/cdh$CDH_MAJOR_VERSION"
NODE_COUNT=3
+if [[ "$TARGET_FILESYSTEM" == "hdfs" && "$ERASURE_CODING" = true ]]; then
+ NODE_COUNT=5
+fi
NODE_PREFIX=node-
COMMON_NODE_TEMPLATE="$DIR/node_templates/common"
NODE_TEMPLATE="$DIR/node_templates/cdh$CDH_MAJOR_VERSION"
http://git-wip-us.apache.org/repos/asf/impala/blob/c05696dd/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
----------------------------------------------------------------------
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
index c9ee70b..6882fa3 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
@@ -22,6 +22,13 @@
<value>true</value>
</property>
+ <!-- The release of Hadoop we're depending on requires an explicit key to allow erasure
+ coding. -->
+ <property>
+ <name>cloudera.erasure_coding.enabled</name>
+ <value>true</value>
+ </property>
+
<property>
<name>dfs.datanode.address</name>
<value>127.0.0.1:${DATANODE_PORT}</value>