You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by om...@apache.org on 2011/03/08 07:01:18 UTC
svn commit: r1079259 - in
/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system:
./ org/ org/apache/ org/apache/hadoop/ org/apache/hadoop/mapred/
org/apache/hadoop/mapred/gridmix/ org/apache/hadoop/mapred/gridmix/test/
org/apache/h...
Author: omalley
Date: Tue Mar 8 06:01:17 2011
New Revision: 1079259
URL: http://svn.apache.org/viewvc?rev=1079259&view=rev
Log:
commit 0d588f7762d61fc01dbb443710fbc155337c6a8f
Author: Vinay Kumar Thota <vi...@yahoo-inc.com>
Date: Wed Feb 2 02:33:11 2011 -0800
3926020 from
+++ b/YAHOO-CHANGES.txt
+ [MR-2033] : Gridmix generate data test with various submission
+ policies and different user resolver. Patch available at
+ (vinayt)
+
Added:
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java
hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
Added: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java?rev=1079259&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java Tue Mar 8 06:01:17 2011
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapreduce.test.system.MRCluster;
+import org.apache.hadoop.mapreduce.test.system.JTProtocol;
+import org.apache.hadoop.mapreduce.test.system.JTClient;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobStatus;
+import org.apache.hadoop.mapred.gridmix.RoundRobinUserResolver;
+import org.apache.hadoop.mapred.gridmix.EchoUserResolver;
+import org.apache.hadoop.mapred.gridmix.SubmitterUserResolver;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.ContentSummary;
+import org.junit.BeforeClass;
+import org.junit.AfterClass;
+import org.junit.Test;
+import org.junit.Assert;
+import java.io.IOException;
+
+/**
+ * Verify the Gridmix data generation with various submission policies and
+ * user resolver modes.
+ */
+public class TestGridMixDataGeneration {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridMixDataGeneration.class);
+ private static Configuration conf = new Configuration();
+ private static MRCluster cluster;
+ private static JTClient jtClient;
+ private static JTProtocol rtClient;
+ private static Path gridmixDir;
+ private static int cSize;
+
+ @BeforeClass
+ public static void before() throws Exception {
+ String [] excludeExpList = {"java.net.ConnectException",
+ "java.io.IOException"};
+ cluster = MRCluster.createCluster(conf);
+ cluster.setExcludeExpList(excludeExpList);
+ cluster.setUp();
+ cSize = cluster.getTTClients().size();
+ jtClient = cluster.getJTClient();
+ rtClient = jtClient.getProxy();
+ gridmixDir = new Path("herriot-gridmix");
+ UtilsForGridmix.createDirs(gridmixDir, rtClient.getDaemonConf());
+ }
+
+ @AfterClass
+ public static void after() throws Exception {
+ UtilsForGridmix.cleanup(gridmixDir,conf);
+ cluster.tearDown();
+ }
+
+ /**
+ * Generate the data in a STRESS submission policy with SubmitterUserResolver
+ * mode and verify whether the generated data matches with given
+ * input size or not.
+ * @throws IOException
+ */
+ @Test
+ public void testGenerateDataWithSTRESSSubmission() throws Exception {
+ conf = rtClient.getDaemonConf();
+ final long inputSize = cSize * 128;
+ String [] runtimeValues ={"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSize+"m",
+ "file:///dev/null"};
+
+ int exitCode = UtilsForGridmix.runGridmixJob(gridmixDir,
+ conf,GridMixRunMode.DATA_GENERATION, runtimeValues);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ checkGeneratedDataAndJobStatus(inputSize);
+ }
+
+ /**
+ * Generate the data in a REPLAY submission policy with RoundRobinUserResolver
+ * mode and verify whether the generated data matches with the given
+ * input size or not.
+ * @throws Exception
+ */
+ @Test
+ public void testGenerateDataWithREPLAYSubmission() throws Exception {
+ conf = rtClient.getDaemonConf();
+ final long inputSize = cSize * 300;
+ String [] runtimeValues ={"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "REPLAY",
+ inputSize +"m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ "file:///dev/null"};
+
+ int exitCode = UtilsForGridmix.runGridmixJob(gridmixDir,
+ conf,GridMixRunMode.DATA_GENERATION, runtimeValues);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ checkGeneratedDataAndJobStatus(inputSize);
+ }
+
+ /**
+ * Generate the data in a SERIAL submission policy with EchoUserResolver
+ * mode and also set the no.of bytes per file in the data.Verify whether each
+ * file size matches with given per file size or not and also
+ * verify the overall size of generated data.
+ * @throws Exception
+ */
+ @Test
+ public void testGenerateDataWithSERIALSubmission() throws Exception {
+ conf = rtClient.getDaemonConf();
+ int perNodeSize = 500; // 500 mb per node data
+ final long inputSize = cSize * perNodeSize;
+ String [] runtimeValues ={"LOADJOB",
+ EchoUserResolver.class.getName(),
+ "SERIAL",
+ inputSize + "m",
+ "file:///dev/null"};
+ int bytesPerFile = 200; // 200 mb per file of data
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_BYTES_PER_FILE +
+ "=" + (bytesPerFile * 1024 * 1024)
+ };
+ int exitCode = UtilsForGridmix.runGridmixJob(gridmixDir,
+ conf,GridMixRunMode.DATA_GENERATION, runtimeValues,otherArgs);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ LOG.info("Verify the eache file size in a generate data.");
+ verifyEachNodeSize(new Path(gridmixDir,"input"));
+ verifyNumOfFilesGeneratedInEachNode(new Path(gridmixDir,"input"),
+ perNodeSize, bytesPerFile);
+ checkGeneratedDataAndJobStatus(inputSize);
+ }
+
+ private void checkGeneratedDataAndJobStatus(long inputSize)
+ throws IOException {
+ LOG.info("Verify the generated data size.");
+ long dataSize = getDataSize(new Path(gridmixDir,"input"));
+ Assert.assertTrue("Generate data has not matched with given size",
+ dataSize + 0.1 > inputSize || dataSize - 0.1 < inputSize);
+
+ JobClient jobClient = jtClient.getClient();
+ LOG.info("Verify the job status after completion of job.");
+ Assert.assertEquals("Job has not succeeded.", JobStatus.SUCCEEDED,
+ jobClient.getAllJobs()[0].getRunState());
+ }
+
+ private void verifyEachNodeSize(Path inputDir) throws IOException {
+ FileSystem fs = inputDir.getFileSystem(conf);
+ FileStatus [] fstatus = fs.listStatus(inputDir);
+ for (FileStatus fstat : fstatus) {
+ if ( fstat.isDir()) {
+ long fileSize = getDataSize(fstat.getPath());
+ Assert.assertTrue("The Size has not " +
+ " matched with given per node file size(500mb)",
+ fileSize + 0.1 > 500 || fileSize - 0.1 < 500);
+ }
+ }
+ }
+
+ private void verifyNumOfFilesGeneratedInEachNode(Path inputDir,
+ int nodeSize, int fileSize) throws IOException {
+ int expFileCount = Math.round(nodeSize/fileSize) +
+ ((nodeSize%fileSize != 0)? 1:0);
+ FileSystem fs = inputDir.getFileSystem(conf);
+ FileStatus [] fstatus = fs.listStatus(inputDir);
+ for (FileStatus fstat : fstatus) {
+ if ( fstat.isDir()) {
+ FileSystem nodeFs = fstat.getPath().getFileSystem(conf);
+ long actFileCount = nodeFs.getContentSummary(fstat.getPath())
+ .getFileCount();
+ Assert.assertEquals("File count has not matched.",
+ expFileCount, actFileCount);
+ }
+ }
+ }
+
+ private static long getDataSize(Path inputDir) throws IOException {
+ FileSystem fs = inputDir.getFileSystem(conf);
+ ContentSummary csmry = fs.getContentSummary(inputDir);
+ long dataSize = csmry.getLength();
+ dataSize = dataSize/(1024 * 1024);
+ return dataSize;
+ }
+}
Added: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java?rev=1079259&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java Tue Mar 8 06:01:17 2011
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+
+public class GridMixConfig {
+ /**
+ * Gridmix logger mode.
+ */
+ public static final String GRIDMIX_LOG_MODE =
+ "log4j.logger.org.apache.hadoop.mapred.gridmix";
+
+ /**
+ * Gridmix output directory.
+ */
+ public static final String GRIDMIX_OUTPUT_DIR =
+ "gridmix.output.directory";
+
+ /**
+ * Gridmix job type (LOADJOB/SLEEPJOB).
+ */
+ public static final String GRIDMIX_JOB_TYPE =
+ "gridmix.job.type";
+
+ /**
+ * Gridmix submission use queue.
+ */
+ public static final String GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE =
+ "gridmix.job-submission.use-queue-in-trace";
+
+ /**
+ * Gridmix user resolver(RoundRobinUserResolver/
+ * SubmitterUserResolver/EchoUserResolver).
+ */
+ public static final String GRIDMIX_USER_RESOLVER =
+ "gridmix.user.resolve.class";
+
+ /**
+ * Gridmix queue depth.
+ */
+ public static final String GRIDMIX_QUEUE_DEPTH =
+ "gridmix.client.pending.queue.depth";
+
+ /**
+ * Gridmix generate bytes per file.
+ */
+ public static final String GRIDMIX_BYTES_PER_FILE =
+ "gridmix.gen.bytes.per.file";
+
+ /**
+ * Gridmix job submission policy(STRESS/REPLAY/SERIAL).
+ */
+ public static final String GRIDMIX_SUBMISSION_POLICY =
+ "gridmix.job-submission.policy";
+
+ /**
+ * Gridmix minimum file size.
+ */
+ public static final String GRIDMIX_MINIMUM_FILE_SIZE =
+ "gridmix.min.file.size";
+}
Added: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java?rev=1079259&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java Tue Mar 8 06:01:17 2011
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+/**
+ * Gridmix run modes.
+ *
+ */
+public class GridMixRunMode {
+ public static final int DATA_GENERATION = 1;
+ public static final int RUN_GRIDMIX = 2;
+ public static final int DATA_GENERATION_AND_RUN_GRIDMIX = 3;
+ private static String [] modeStr = {"DATA GENERATION",
+ "RUNNING GRIDMIX",
+ "DATA GENERATION AND RUNNING GRIDMIX"};
+ /**
+ * Get the appropriate message against the mode.
+ * @param mode - grimdix run mode either 1 or 2 or 3.
+ * @return - message as string.
+ */
+ public static String getMode(int mode){
+ return modeStr[mode-1];
+ }
+}
Added: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java?rev=1079259&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java Tue Mar 8 06:01:17 2011
@@ -0,0 +1,322 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+
+import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.mapred.gridmix.Gridmix;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Date;
+import java.util.Hashtable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.net.URI;
+import java.text.SimpleDateFormat;
+import java.io.OutputStream;
+import java.util.Set;
+import java.util.List;
+import java.util.Iterator;
+import java.util.Map;
+import java.io.File;
+import java.io.FileOutputStream;
+import org.apache.hadoop.test.system.ProxyUserDefinitions;
+import org.apache.hadoop.test.system.ProxyUserDefinitions.GroupsAndHost;
+
+/**
+ * Gridmix utilities.
+ */
+public class UtilsForGridmix {
+ private static final Log LOG = LogFactory.getLog(UtilsForGridmix.class);
+
+ /**
+ * cleanup the folder or file.
+ * @param path - folder or file path.
+ * @param conf - cluster configuration
+ * @throws IOException - If an I/O error occurs.
+ */
+ public static void cleanup(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = path.getFileSystem(conf);
+ fs.delete(path, true);
+ fs.close();
+ }
+
+ /**
+ * Get the login user.
+ * @return - login user as string..
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static String getUserName() throws IOException {
+ return UserGroupInformation.getLoginUser().getUserName();
+ }
+
+ /**
+ * Get the argument list for gridmix job.
+ * @param gridmixDir - gridmix parent directory.
+ * @param gridmixRunMode - gridmix modes either 1,2,3.
+ * @param values - gridmix runtime values.
+ * @param otherArgs - gridmix other generic args.
+ * @return - argument list as string array.
+ */
+ public static String [] getArgsList(Path gridmixDir, int gridmixRunMode,
+ String [] values, String [] otherArgs) {
+
+ String [] runtimeArgs = {
+ "-D", GridMixConfig.GRIDMIX_LOG_MODE +
+ "=DEBUG",
+ "-D", GridMixConfig.GRIDMIX_OUTPUT_DIR +
+ "=" + new Path(gridmixDir,"gridmix").toString(),
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE
+ + "=true",
+ "-D", GridMixConfig.GRIDMIX_JOB_TYPE
+ + "=" + values[0],
+ "-D", GridMixConfig.GRIDMIX_USER_RESOLVER +
+ "=" + values[1],
+ "-D", GridMixConfig.GRIDMIX_SUBMISSION_POLICY +
+ "=" + values[2]
+ };
+ String [] classArgs;
+ if ((gridmixRunMode == GridMixRunMode.DATA_GENERATION ||
+ gridmixRunMode == GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX) &&
+ values[1].indexOf("RoundRobinUserResolver") > 0) {
+ classArgs = new String[]{
+ "-generate", values[3],
+ "-users", values[4],
+ new Path(gridmixDir,"input").toString(),
+ values[5]};
+ } else if (gridmixRunMode == GridMixRunMode.DATA_GENERATION ||
+ gridmixRunMode == GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX){
+ classArgs = new String[]{
+ "-generate", values[3], new Path(gridmixDir,"input").toString(),
+ values[4]};
+ } else if(gridmixRunMode == GridMixRunMode.RUN_GRIDMIX
+ && values[1].indexOf("RoundRobinUserResolver") > 0) {
+ classArgs = new String[]{
+ "-users", values[3],
+ new Path(gridmixDir,"input").toString(),
+ values[4]};
+ } else {
+ classArgs = new String[]{
+ new Path(gridmixDir,"input").toString(),values[3]};
+ }
+
+ String [] args = new String [runtimeArgs.length +
+ classArgs.length + ((otherArgs != null)?otherArgs.length:0)];
+ System.arraycopy(runtimeArgs, 0, args, 0, runtimeArgs.length);
+ if (otherArgs !=null) {
+ System.arraycopy(otherArgs, 0, args, runtimeArgs.length,
+ otherArgs.length);
+ System.arraycopy(classArgs, 0, args, (runtimeArgs.length +
+ otherArgs.length), classArgs.length);
+ } else {
+ System.arraycopy(classArgs, 0, args, runtimeArgs.length,
+ classArgs.length);
+ }
+ return args;
+ }
+
+ /**
+ * Create a file with specified size in mb.
+ * @param sizeInMB - file size in mb.
+ * @param inputDir - input directory.
+ * @param conf - cluster configuration.
+ * @throws Exception - if an exception occurs.
+ */
+ public static void createFile(int sizeInMB, Path inputDir,
+ Configuration conf) throws Exception {
+ Date d = new Date();
+ SimpleDateFormat sdf = new SimpleDateFormat("ddMMyy_HHmmssS");
+ String formatDate = sdf.format(d);
+ FileSystem fs = inputDir.getFileSystem(conf);
+ OutputStream out = fs.create(new Path(inputDir,"datafile_" + formatDate));
+ final byte[] b = new byte[1024 * 1024];
+ for (int index = 0; index < sizeInMB; index++) {
+ out.write(b);
+ }
+ out.close();
+ fs.close();
+ }
+
+ /**
+ * Create directories for a path.
+ * @param path - directories path.
+ * @param conf - cluster configuration.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static void createDirs(Path path,Configuration conf)
+ throws IOException {
+ FileSystem fs = path.getFileSystem(conf);
+ if (!fs.exists(path)) {
+ fs.mkdirs(path);
+ }
+ }
+
+ /**
+ * Run the Gridmix job with given runtime arguments.
+ * @param gridmixDir - Gridmix parent directory.
+ * @param conf - cluster configuration.
+ * @param gridmixRunMode - gridmix run mode either 1,2,3
+ * @param runtimeValues -gridmix runtime values.
+ * @return - gridmix status either 0 or 1.
+ * @throws Exception
+ */
+ public static int runGridmixJob(Path gridmixDir, Configuration conf,
+ int gridmixRunMode, String [] runtimeValues) throws Exception {
+ return runGridmixJob(gridmixDir, conf, gridmixRunMode, runtimeValues, null);
+ }
+ /**
+ * Run the Gridmix job with given runtime arguments.
+ * @param gridmixDir - Gridmix parent directory
+ * @param conf - cluster configuration.
+ * @param gridmixRunMode - gridmix run mode.
+ * @param runtimeValues - gridmix runtime values.
+ * @param otherArgs - gridmix other generic args.
+ * @return - gridmix status either 0 or 1.
+ * @throws Exception
+ */
+
+ public static int runGridmixJob(Path gridmixDir, Configuration conf,
+ int gridmixRunMode, String [] runtimeValues,
+ String [] otherArgs) throws Exception {
+ Path outputDir = new Path(gridmixDir, "gridmix");
+ Path inputDir = new Path(gridmixDir, "input");
+ LOG.info("Cleanup the data if data already exists.");
+ switch (gridmixRunMode) {
+ case GridMixRunMode.DATA_GENERATION :
+ cleanup(inputDir, conf);
+ cleanup(outputDir, conf);
+ break;
+ case GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX :
+ cleanup(inputDir, conf);
+ cleanup(outputDir, conf);
+ break;
+ case GridMixRunMode.RUN_GRIDMIX :
+ cleanup(outputDir, conf);
+ break;
+ }
+
+ final String [] args = UtilsForGridmix.getArgsList(gridmixDir,
+ gridmixRunMode, runtimeValues, otherArgs);
+ Gridmix gridmix = new Gridmix();
+ LOG.info("Submit a Gridmix job in " + runtimeValues[1] +
+ " mode for " + GridMixRunMode.getMode(gridmixRunMode));
+ int exitCode = ToolRunner.run(conf, gridmix, args);
+ return exitCode;
+ }
+
+ /**
+ * Get the proxy users file.
+ * @param conf - cluster configuration.
+ * @return String - proxy users file.
+ * @Exception - if no proxy users found in configuration.
+ */
+ public static String getProxyUsersFile(Configuration conf)
+ throws Exception {
+ ProxyUserDefinitions pud = getProxyUsersData(conf);
+ String fileName = buildProxyUsersFile(pud.getProxyUsers());
+ if (fileName == null) {
+ LOG.error("Proxy users file not found.");
+ throw new Exception("Proxy users file not found.");
+ } else {
+ return fileName;
+ }
+ }
+
+ private static String buildProxyUsersFile(final Map<String, GroupsAndHost>
+ proxyUserData) throws Exception {
+ FileOutputStream fos = null;
+ File file = null;
+ StringBuffer input = new StringBuffer();
+ Set users = proxyUserData.keySet();
+ Iterator itr = users.iterator();
+ while (itr.hasNext()) {
+ String user = itr.next().toString();
+ if (! user.equals(UserGroupInformation.
+ getLoginUser().getShortUserName())) {
+ input.append(user);
+ final GroupsAndHost gah = proxyUserData.get(user);
+ final List <String> groups = gah.getGroups();
+ for (String group : groups) {
+ input.append(",");
+ input.append(group);
+ }
+ input.append("\n");
+ }
+ }
+ if (input.length() > 0) {
+ try {
+ file = File.createTempFile("proxyusers",null);
+ fos = new FileOutputStream(file);
+ fos.write(input.toString().getBytes());
+ } catch(IOException ioexp) {
+ LOG.warn(ioexp.getMessage());
+ return null;
+ } finally {
+ fos.close();
+ file.deleteOnExit();
+ }
+ LOG.info("file.toString():" + file.toString());
+ return file.toString();
+ } else {
+ return null;
+ }
+ }
+
+ private static ProxyUserDefinitions getProxyUsersData(Configuration conf)
+ throws Exception {
+ Iterator itr = conf.iterator();
+ List<String> proxyUsersData = new ArrayList<String>();
+ while (itr.hasNext()) {
+ String property = itr.next().toString();
+ if (property.indexOf("hadoop.proxyuser") >= 0 &&
+ property.indexOf("groups=") >= 0) {
+ proxyUsersData.add(property.split("\\.")[2]);
+ }
+ }
+
+ if (proxyUsersData.size() == 0) {
+ LOG.error("No proxy users found in the configuration.");
+ throw new Exception("No proxy users found in the configuration.");
+ }
+
+ ProxyUserDefinitions pud = new ProxyUserDefinitions() {
+ public boolean writeToFile(URI filePath) throws IOException {
+ throw new UnsupportedOperationException("No such methood exists.");
+ };
+ };
+
+ for (String userName : proxyUsersData) {
+ List<String> groups = Arrays.asList(conf.get("hadoop.proxyuser." +
+ userName + ".groups").split("//,"));
+ List<String> hosts = Arrays.asList(conf.get("hadoop.proxyuser." +
+ userName + ".hosts").split("//,"));
+ ProxyUserDefinitions.GroupsAndHost definitions =
+ pud.new GroupsAndHost();
+ definitions.setGroups(groups);
+ definitions.setHosts(hosts);
+ pud.addProxyUser(userName, definitions);
+ }
+ return pud;
+ }
+}