You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2013/12/07 05:10:48 UTC

[1/3] git commit: ACCUMULO-1794 adds hdfs failover to continuous integration test.

Updated Branches:
  refs/heads/1.5.1-SNAPSHOT 072ed6186 -> bc0ce605c


ACCUMULO-1794 adds hdfs failover to continuous integration test.

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/872fd1df
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/872fd1df
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/872fd1df

Branch: refs/heads/1.5.1-SNAPSHOT
Commit: 872fd1dfb252e45560b5547aad43399fe433f1a1
Parents: 513f4d2
Author: Sean Busbey <bu...@clouderagovt.com>
Authored: Sat Nov 16 02:46:40 2013 -0600
Committer: Josh Elser <el...@apache.org>
Committed: Fri Dec 6 20:35:28 2013 -0500

----------------------------------------------------------------------
 .../system/continuous/continuous-env.sh.example |   9 +
 test/system/continuous/hdfs-agitator.pl         | 217 +++++++++++++++++++
 test/system/continuous/start-agitator.sh        |   5 +
 test/system/continuous/stop-agitator.sh         |   4 +
 4 files changed, 235 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/continuous-env.sh.example
----------------------------------------------------------------------
diff --git a/test/system/continuous/continuous-env.sh.example b/test/system/continuous/continuous-env.sh.example
index 830ae86..1d39034 100644
--- a/test/system/continuous/continuous-env.sh.example
+++ b/test/system/continuous/continuous-env.sh.example
@@ -89,6 +89,15 @@ MAX_KILL=1
 MASTER_KILL_SLEEP_TIME=60
 MASTER_RESTART_SLEEP_TIME=2
 
+#Do we want to perturb HDFS? Only works on HDFS versions with HA, i.e. Hadoop 2
+# AGITATE_HDFS=true
+#Defaults to false for Accumulo versions that ship default against Hadoop 1
+AGITATE_HDFS=false
+AGITATE_HDFS_SLEEP_TIME=10
+AGITATE_HDFS_SUPERUSER=hdfs
+AGITATE_HDFS_COMMAND="${HADOOP_PREFIX:/usr/lib/hadoop}/share/hadoop/hdfs/bin/hdfs"
+AGITATE_HDFS_SUDO=`which sudo`
+
 #settings for the verification map reduce job
 VERIFY_OUT=/tmp/continuous_verify
 VERIFY_MAX_MAPS=64

http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/hdfs-agitator.pl
----------------------------------------------------------------------
diff --git a/test/system/continuous/hdfs-agitator.pl b/test/system/continuous/hdfs-agitator.pl
new file mode 100755
index 0000000..85eab32
--- /dev/null
+++ b/test/system/continuous/hdfs-agitator.pl
@@ -0,0 +1,217 @@
+#! /usr/bin/env perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use POSIX qw(strftime);
+use Getopt::Long;
+use Pod::Usage;
+
+my $help = 0;
+my $man = 0;
+my $sleep = 10;
+my $superuser = 'hdfs';
+my $hdfsCmd;
+if( defined $ENV{'HADOOP_PREFIX'} ){
+  $hdfsCmd = $ENV{'HADOOP_PREFIX'} . '/share/hadoop/hdfs/bin/hdfs';
+}
+my $sudo;
+my $nameservice;
+
+GetOptions('help|?' => \$help, 'man' => \$man, 'sleep=i' => \$sleep, 'nameservice=s' => \$nameservice, 'superuser=s' => \$superuser, 'hdfs-cmd=s' => \$hdfsCmd, 'sudo:s' => \$sudo) or pod2usage(2);
+pod2usage(-exitval => 0, -verbose => 1) if $help;
+pod2usage(-exitval => 0, -verbose => 2) if $man;
+pod2usage(-exitval => 1, -verbose => 1, -message => '$HADOOP_PREFIX not defined and no hdfs-cmd given. please use --hdfs-cmd to specify where your hdfs cli is.') if not defined $hdfsCmd;
+pod2usage(-exitval => 1, -verbose => 1, -message => "Your specified hdfs cli '$hdfsCmd' is not executable.") if not -x $hdfsCmd;
+if( defined $sudo and "" eq $sudo ){
+  $sudo = `which sudo`;
+  pod2usage(-exitval => 1, -verbose => 1, -message => "Error attempting to find the sudo command, please specify it with --sudo /path/to/sudo") if 0 != $?;
+  chomp($sudo);
+}
+if( defined $sudo ){
+  pod2usage(-exitval => 1, -verbose => 1, -message => "Your specified sudo command '$sudo' is not executable.") if not -x $sudo;
+}
+
+my $needsudo = defined $sudo;
+my $haadmin = "$hdfsCmd haadmin";
+if($needsudo) {
+  $haadmin = "$sudo -u $superuser $haadmin";
+  print STDERR "Starting HDFS agitator, configured to fail over every $sleep minutes. will run hdfs command '$hdfsCmd' as user '$superuser' via '$sudo'.\n";
+} else {
+  print STDERR "Starting HDFS agitator, configured to fail over every $sleep minutes. will run hdfs command '$hdfsCmd' as the current user.\n";
+}
+while(1){
+  sleep($sleep * 60);
+  my $t = strftime "%Y%m%d %H:%M:%S", localtime;
+  my @failServices;
+  if( defined $nameservice ){
+    @failServices = ($nameservice);
+  } else {
+    my $nameservicesRaw = `$hdfsCmd getconf -confKey dfs.nameservices`;
+    if(0 != $?) {
+      print STDERR "$t HDFS CLI failed. please see --help to set it correctly\n";
+      exit(1);
+    }
+    chomp($nameservicesRaw);
+    my @nameservices = split(/,/, $nameservicesRaw);
+    if(1 > scalar(@nameservices)) {
+      print STDERR "$t No HDFS NameServices found. Are you sure you're running in HA?\n";
+      exit(1);
+    }
+    if(rand(1) < .5){
+      my $serviceToFail = $nameservices[int(rand(scalar(@nameservices)))];
+      print STDERR "$t Failing over nameservice $serviceToFail\n";
+      @failServices = ($serviceToFail);
+    } else {
+      print STDERR "$t Failing over all nameservices\n";
+      @failServices = @nameservices;
+    }
+  }
+  for my $toFail (@failServices){
+    my $namenodesRaw = `$hdfsCmd getconf -confKey dfs.ha.namenodes.$toFail`;
+    if(0 != $?) {
+      print STDERR "$t HDFS CLI failed to look up namenodes in service $toFail.\n";
+      exit(1);
+    }
+    chomp($namenodesRaw);
+    my @namenodes = split(/,/, $namenodesRaw);
+    if(2 > scalar(@namenodes)) {
+      print STDERR "$t WARN NameService $toFail does not have at least 2 namenodes according to the HDFS configuration, skipping.\n";
+      next;
+    }
+    my $active;
+    for my $namenode (@namenodes){
+      my $status = `$haadmin -ns $toFail -getServiceState $namenode`;
+      if(0 != $?) {
+        if($needsudo) {
+          print STDERR "$t WARN Error while attempting to get the service state of $toFail :: $namenode\n";
+          $status = 'error';
+        } else {
+          print STDERR "$t WARN Current user may not run the HDFS haadmin utility, attempting to sudo to the $superuser user.\n";
+          $needsudo = 1;
+          if(not defined $sudo) {
+            $sudo = `which sudo`;
+            pod2usage(-exitval => 1, -verbose => 1, -message => "Error attempting to find the sudo command, please specify it with --sudo") if 0 != $?;
+            chomp($sudo);
+            pod2usage(-exitval => 1, -verbose => 1, -message => "The sudo command '$sudo' is not executable. please specify sudo with --sudo") if not -x $sudo;
+          }
+          $haadmin = "$sudo -u $superuser $haadmin";
+          redo;
+        }
+      }
+      chomp($status);
+      if( 'active' eq $status ){
+        $active = $namenode;
+        last;
+      }
+    }
+    if( defined $active ){
+      my @standby = grep { $_ ne $active } @namenodes;
+      my $newActive = $standby[int(rand(scalar(@standby)))];
+      print STDERR "$t Transitioning nameservice $toFail from $active to $newActive\n";
+      my $cmd = "$haadmin -ns $toFail -failover $active $newActive";
+      print "$t $cmd\n";
+      system($cmd);
+    } else {
+      my $newActive = $namenodes[int(rand(scalar(@namenodes)))];
+      print STDERR "$t WARN nameservice $toFail did not have an active namenode. Transitioning a random namenode to active. This will fail if HDFS is configured for automatic failover.\n";
+      my $cmd = "$haadmin -ns $toFail -transitionToActive $newActive";
+      print "$t $cmd\n";
+      system($cmd);
+    }
+  }
+}
+__END__
+
+=head1 NAME
+
+hdfs-agitator - causes HDFS to failover
+
+=head1 DESCRIPTION
+
+Sleeps for a configurable amount of time, then causes a NameNode failover in one
+or more HDFS NameServices. If a given NameService does not have an Active
+NameNode when it comes time to failover, a random standby is promoted.
+
+Only works on HDFS versions that support HA configurations and the haadmin
+command. In order to function, the user running this script must be able to
+use the haadmin command. This requires access to an HDFS superuser. By default,
+it will attempt to sudo to perform calls.
+
+=head1 SYNOPSIS
+
+hdfs-agitator [options]
+
+  Options:
+    --help         Brief help message
+    --man          Full documentation
+    --sleep        Time to sleep between failovers in minutes. Default 10
+    --superuser    HDFS superuser. Default 'hdfs'
+    --hdfs-cmd     hdfs command path. Default '$HADOOP_PREFIX/share/hadoop/hdfs/bin/hdfs'
+    --nameservice  Limit failovers to specified nameservice. Default all nameservices
+    --sudo         command to call to sudo to the HDFS superuser. Default 'sudo' if needed.
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--sleep>
+
+Sleep the given number of minutes between attempts to fail over nameservices.
+
+=item B<--nameservice>
+
+Limit failover attempts to the given nameservice. By default, we attempt ot list
+all known nameservices and choose either one or all of them to failover in a
+given cycle.
+
+=item B<--superuser>
+
+An HDFS superuser capable of running the haadmin command. Defaults to "hdfs".
+
+=item B<--hdfs-cmd>
+
+Path to the HDFS cli. Will be used both for non-administrative commands (e.g.
+listing the nameservices and serviceids in a given nameservice) and admin-only
+actions such as checking status and failing over.
+
+Defaults to using $HADOOP_PREFIX.
+
+=item B<--sudo>
+
+Command to allow us to act as the given HDFS superuser. By default we assume the current user
+can run HDFS administrative commands. When this argument is specified we will instead attempt
+to use the HDFS superuser instead. If given an argument, it will be called like
+sudo, i.e. "sudo -u $superuser $cmd". Defaults to "sudo" on the shell's path.
+
+=back
+
+=head1 SEE ALSO
+
+See the Apache Hadoop documentation on configuring HDFS HA
+
+=over 8
+
+=item B<HA with QJM>
+
+http://hadoop.apache.org/docs/r2.2.0/hadoop-yarn/hadoop-yarn-site/HDFSHighAvailabilityWithQJM.html#Administrative_commands
+
+=item B<HA with NFS>
+
+http://hadoop.apache.org/docs/r2.2.0/hadoop-yarn/hadoop-yarn-site/HDFSHighAvailabilityWithNFS.html#Administrative_commands
+
+=back

http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/start-agitator.sh
----------------------------------------------------------------------
diff --git a/test/system/continuous/start-agitator.sh b/test/system/continuous/start-agitator.sh
index 52e5a4e..c734943 100755
--- a/test/system/continuous/start-agitator.sh
+++ b/test/system/continuous/start-agitator.sh
@@ -21,3 +21,8 @@ CONTINUOUS_CONF_DIR=${CONTINUOUS_CONF_DIR:-$ACCUMULO_HOME/test/system/continuous
 nohup ./agitator.pl $KILL_SLEEP_TIME $TUP_SLEEP_TIME $MIN_KILL $MAX_KILL >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.err &
 
 nohup ./magitator.pl $MASTER_KILL_SLEEP_TIME $MASTER_RESTART_SLEEP_TIME >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.err &
+
+if ${AGITATE_HDFS:-false} ; then
+  AGITATOR_LOG=${CONTINUOUS_LOG_DIR}/`date +%Y%m%d%H%M%S`_`hostname`_hdfs-agitator
+  nohup ./hdfs-agitator.pl --sleep ${AGITATE_HDFS_SLEEP_TIME} --hdfs-cmd ${AGITATE_HDFS_COMMAND} --superuser ${AGITATE_HDFS_SUPERUSER} --sudo ${AGITATE_HDFS_SUDO} >${AGITATOR_LOG}.out 2>${AGITATOR_LOG}.err &
+fi

http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/stop-agitator.sh
----------------------------------------------------------------------
diff --git a/test/system/continuous/stop-agitator.sh b/test/system/continuous/stop-agitator.sh
index b853a55..f26e3b2 100755
--- a/test/system/continuous/stop-agitator.sh
+++ b/test/system/continuous/stop-agitator.sh
@@ -18,5 +18,9 @@
 CONTINUOUS_CONF_DIR=${CONTINUOUS_CONF_DIR:-$ACCUMULO_HOME/test/system/continuous/}
 . $CONTINUOUS_CONF_DIR/continuous-env.sh
 
+if ${AGITATE_HDFS:-false} ; then
+  pkill -f hdfs-agitator.pl
+fi
+
 pkill -f agitator.pl
 


[3/3] git commit: Merge branch '1.5.1-SNAPSHOT' of https://git-wip-us.apache.org/repos/asf/accumulo into 1.5.1-SNAPSHOT

Posted by el...@apache.org.
Merge branch '1.5.1-SNAPSHOT' of https://git-wip-us.apache.org/repos/asf/accumulo into 1.5.1-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/bc0ce605
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/bc0ce605
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/bc0ce605

Branch: refs/heads/1.5.1-SNAPSHOT
Commit: bc0ce605c9d5d950fab80ee6bb4bd0e194708d4d
Parents: 0453987 072ed61
Author: Josh Elser <el...@apache.org>
Authored: Fri Dec 6 23:09:09 2013 -0500
Committer: Josh Elser <el...@apache.org>
Committed: Fri Dec 6 23:09:09 2013 -0500

----------------------------------------------------------------------
 examples/simple/pom.xml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------



[2/3] git commit: Merge branch '1.4.5-SNAPSHOT' into 1.5.1-SNAPSHOT

Posted by el...@apache.org.
Merge branch '1.4.5-SNAPSHOT' into 1.5.1-SNAPSHOT

Conflicts:
	test/system/continuous/start-agitator.sh
	test/system/continuous/stop-agitator.sh


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/04539871
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/04539871
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/04539871

Branch: refs/heads/1.5.1-SNAPSHOT
Commit: 0453987164216aaea0ad17765f35eef5b139f366
Parents: b9adbd7 872fd1d
Author: Josh Elser <el...@apache.org>
Authored: Fri Dec 6 23:07:09 2013 -0500
Committer: Josh Elser <el...@apache.org>
Committed: Fri Dec 6 23:07:09 2013 -0500

----------------------------------------------------------------------
 .../system/continuous/continuous-env.sh.example |   9 +
 test/system/continuous/hdfs-agitator.pl         | 217 +++++++++++++++++++
 test/system/continuous/start-agitator.sh        |   5 +
 test/system/continuous/stop-agitator.sh         |   4 +
 4 files changed, 235 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/04539871/test/system/continuous/continuous-env.sh.example
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/accumulo/blob/04539871/test/system/continuous/start-agitator.sh
----------------------------------------------------------------------
diff --cc test/system/continuous/start-agitator.sh
index 8c2bafb,c734943..e476c8d
--- a/test/system/continuous/start-agitator.sh
+++ b/test/system/continuous/start-agitator.sh
@@@ -17,20 -17,12 +17,25 @@@
  
  CONTINUOUS_CONF_DIR=${CONTINUOUS_CONF_DIR:-$ACCUMULO_HOME/test/system/continuous/}
  . $CONTINUOUS_CONF_DIR/continuous-env.sh
 +export HADOOP_PREFIX
  
 -nohup ./agitator.pl $KILL_SLEEP_TIME $TUP_SLEEP_TIME $MIN_KILL $MAX_KILL >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.err &
 +mkdir -p $CONTINUOUS_LOG_DIR
  
 -nohup ./magitator.pl $MASTER_KILL_SLEEP_TIME $MASTER_RESTART_SLEEP_TIME >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.err &
 +# Agitator needs to handle HDFS and Accumulo - can't switch to a single user and expect it to work
 +nohup ./agitator.pl $KILL_SLEEP_TIME $TUP_SLEEP_TIME $HDFS_USER $ACCUMULO_USER $MIN_KILL $MAX_KILL >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.err &
 +
 +if [[ "`whoami`" == "root" ]];  then
 +  # Change to the correct user if started as root
 +  su -c "nohup $CONTINUOUS_CONF_DIR/magitator.pl $MASTER_KILL_SLEEP_TIME $MASTER_RESTART_SLEEP_TIME >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.err &" -m - $ACCUMULO_USER
 +elif [[ "`whoami`" == $ACCUMULO_USER ]]; then
 +  # Just run the magitator if we're the accumulo user
 +  nohup $CONTINUOUS_CONF_DIR/magitator.pl $MASTER_KILL_SLEEP_TIME $MASTER_RESTART_SLEEP_TIME >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.err &
 +else
 +  # Not root, and not the accumulo user, hope you can sudo to it
 +  sudo -m -u $ACCUMULO_USER "nohup $CONTINUOUS_CONF_DIR/magitator.pl $MASTER_KILL_SLEEP_TIME $MASTER_RESTART_SLEEP_TIME >$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.err &"
 +fi
+ 
+ if ${AGITATE_HDFS:-false} ; then
+   AGITATOR_LOG=${CONTINUOUS_LOG_DIR}/`date +%Y%m%d%H%M%S`_`hostname`_hdfs-agitator
+   nohup ./hdfs-agitator.pl --sleep ${AGITATE_HDFS_SLEEP_TIME} --hdfs-cmd ${AGITATE_HDFS_COMMAND} --superuser ${AGITATE_HDFS_SUPERUSER} --sudo ${AGITATE_HDFS_SUDO} >${AGITATOR_LOG}.out 2>${AGITATOR_LOG}.err &
+ fi

http://git-wip-us.apache.org/repos/asf/accumulo/blob/04539871/test/system/continuous/stop-agitator.sh
----------------------------------------------------------------------
diff --cc test/system/continuous/stop-agitator.sh
index 8ce448e,f26e3b2..3d64e2d
--- a/test/system/continuous/stop-agitator.sh
+++ b/test/system/continuous/stop-agitator.sh
@@@ -17,10 -18,9 +17,14 @@@
  CONTINUOUS_CONF_DIR=${CONTINUOUS_CONF_DIR:-$ACCUMULO_HOME/test/system/continuous/}
  . $CONTINUOUS_CONF_DIR/continuous-env.sh
  
 +# Try to use sudo when we wouldn't normally be able to kill the processes
 +if [[ ("`whoami`" != "root") && ("`whoami`" != $ACCUMULO_USER) ]];  then
 +  sudo -u $ACCUMULO_USER pkill -f agitator.pl
 +else
 +  pkill -f agitator.pl
 +fi
 +
+ if ${AGITATE_HDFS:-false} ; then
+   pkill -f hdfs-agitator.pl
+ fi
+ 
 -pkill -f agitator.pl
 -