You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ss...@apache.org on 2012/10/16 02:03:53 UTC
svn commit: r1398581 [5/9] - in
/hadoop/common/branches/MR-3902/hadoop-mapreduce-project: ./ bin/ conf/
hadoop-mapreduce-client/
hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/
hadoop-mapreduce-client/hadoop-...
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java Tue Oct 16 00:02:55 2012
@@ -41,10 +41,10 @@ import org.apache.hadoop.fs.FSDataOutput
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
@@ -80,15 +80,24 @@ public class TestMRJobs {
private static final Log LOG = LogFactory.getLog(TestMRJobs.class);
protected static MiniMRYarnCluster mrCluster;
+ protected static MiniDFSCluster dfsCluster;
private static Configuration conf = new Configuration();
private static FileSystem localFs;
+ private static FileSystem remoteFs;
static {
try {
localFs = FileSystem.getLocal(conf);
} catch (IOException io) {
throw new RuntimeException("problem getting local fs", io);
}
+ try {
+ dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
+ .format(true).racks(null).build();
+ remoteFs = dfsCluster.getFileSystem();
+ } catch (IOException io) {
+ throw new RuntimeException("problem starting mini dfs cluster", io);
+ }
}
private static Path TEST_ROOT_DIR = new Path("target",
@@ -107,6 +116,8 @@ public class TestMRJobs {
if (mrCluster == null) {
mrCluster = new MiniMRYarnCluster(TestMRJobs.class.getName(), 3);
Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
+ conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");
mrCluster.init(conf);
mrCluster.start();
}
@@ -123,6 +134,10 @@ public class TestMRJobs {
mrCluster.stop();
mrCluster = null;
}
+ if (dfsCluster != null) {
+ dfsCluster.shutdown();
+ dfsCluster = null;
+ }
}
@Test
@@ -403,7 +418,6 @@ public class TestMRJobs {
Configuration conf = context.getConfiguration();
Path[] files = context.getLocalCacheFiles();
Path[] archives = context.getLocalCacheArchives();
- FileSystem fs = LocalFileSystem.get(conf);
// Check that 4 (2 + appjar + DistrubutedCacheChecker jar) files
// and 2 archives are present
@@ -411,13 +425,13 @@ public class TestMRJobs {
Assert.assertEquals(2, archives.length);
// Check lengths of the files
- Assert.assertEquals(1, fs.getFileStatus(files[1]).getLen());
- Assert.assertTrue(fs.getFileStatus(files[2]).getLen() > 1);
+ Assert.assertEquals(1, localFs.getFileStatus(files[1]).getLen());
+ Assert.assertTrue(localFs.getFileStatus(files[2]).getLen() > 1);
// Check extraction of the archive
- Assert.assertTrue(fs.exists(new Path(archives[0],
+ Assert.assertTrue(localFs.exists(new Path(archives[0],
"distributed.jar.inside3")));
- Assert.assertTrue(fs.exists(new Path(archives[1],
+ Assert.assertTrue(localFs.exists(new Path(archives[1],
"distributed.jar.inside4")));
// Check the class loaders
@@ -448,8 +462,7 @@ public class TestMRJobs {
}
}
- @Test
- public void testDistributedCache() throws Exception {
+ public void _testDistributedCache(String jobJarPath) throws Exception {
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
@@ -470,11 +483,13 @@ public class TestMRJobs {
// Set the job jar to a new "dummy" jar so we can check that its extracted
// properly
- job.setJar(makeJobJarWithLib(TEST_ROOT_DIR.toUri().toString()));
+ job.setJar(jobJarPath);
// Because the job jar is a "dummy" jar, we need to include the jar with
// DistributedCacheChecker or it won't be able to find it
- job.addFileToClassPath(new Path(
- JarFinder.getJar(DistributedCacheChecker.class)));
+ Path distributedCacheCheckerJar = new Path(
+ JarFinder.getJar(DistributedCacheChecker.class));
+ job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
+ localFs.getUri(), distributedCacheCheckerJar.getParent()));
job.setMapperClass(DistributedCacheChecker.class);
job.setOutputFormatClass(NullOutputFormat.class);
@@ -484,7 +499,9 @@ public class TestMRJobs {
job.addCacheFile(
new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClassPath(second);
- job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
+ // The AppMaster jar itself
+ job.addFileToClassPath(
+ APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent()));
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
job.setMaxMapAttempts(1); // speed up failures
@@ -497,6 +514,23 @@ public class TestMRJobs {
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
+
+ @Test
+ public void testDistributedCache() throws Exception {
+ // Test with a local (file:///) Job Jar
+ Path localJobJarPath = makeJobJarWithLib(TEST_ROOT_DIR.toUri().toString());
+ _testDistributedCache(localJobJarPath.toUri().toString());
+
+ // Test with a remote (hdfs://) Job Jar
+ Path remoteJobJarPath = new Path(remoteFs.getUri().toString() + "/",
+ localJobJarPath.getName());
+ remoteFs.moveFromLocalFile(localJobJarPath, remoteJobJarPath);
+ File localJobJarFile = new File(localJobJarPath.toUri().toString());
+ if (localJobJarFile.exists()) { // just to make sure
+ localJobJarFile.delete();
+ }
+ _testDistributedCache(remoteJobJarPath.toUri().toString());
+ }
private Path createTempFile(String filename, String contents)
throws IOException {
@@ -522,7 +556,7 @@ public class TestMRJobs {
return p;
}
- private String makeJobJarWithLib(String testDir) throws FileNotFoundException,
+ private Path makeJobJarWithLib(String testDir) throws FileNotFoundException,
IOException{
Path jobJarPath = new Path(testDir, "thejob.jar");
FileOutputStream fos =
@@ -535,7 +569,7 @@ public class TestMRJobs {
new Path(testDir, "lib2.jar").toUri().getPath()));
jos.close();
localFs.setPermission(jobJarPath, new FsPermission("700"));
- return jobJarPath.toUri().toString();
+ return jobJarPath;
}
private void createAndAddJarToJar(JarOutputStream jos, File jarFile)
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java Tue Oct 16 00:02:55 2012
@@ -177,8 +177,13 @@ public class TestYARNRunner extends Test
@Test
public void testResourceMgrDelegate() throws Exception {
/* we not want a mock of resourcemgr deleagte */
- ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
- ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf, clientRMProtocol);
+ final ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
+ ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf) {
+ @Override
+ public synchronized void start() {
+ this.rmClient = clientRMProtocol;
+ }
+ };
/* make sure kill calls finish application master */
when(clientRMProtocol.forceKillApplication(any(KillApplicationRequest.class)))
.thenReturn(null);
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml Tue Oct 16 00:02:55 2012
@@ -133,7 +133,31 @@
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</dependency>
-
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <scope>provided</scope>
+ </dependency>
</dependencies>
<build>
@@ -148,6 +172,18 @@
<effort>Max</effort>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <properties>
+ <property>
+ <name>listener</name>
+ <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+ </property>
+ </properties>
+ </configuration>
+ </plugin>
</plugins>
</build>
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java Tue Oct 16 00:02:55 2012
@@ -27,7 +27,6 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
-import java.util.Iterator;
import java.util.Random;
import org.apache.commons.logging.Log;
@@ -82,6 +81,7 @@ public class DBCountPageView extends Con
private Connection connection;
private boolean initialized = false;
+ private boolean isOracle = false;
private static final String[] AccessFieldNames = {"url", "referrer", "time"};
private static final String[] PageviewFieldNames = {"url", "pageview"};
@@ -102,7 +102,9 @@ public class DBCountPageView extends Con
private void createConnection(String driverClassName
, String url) throws Exception {
-
+ if(driverClassName.toLowerCase().contains("oracle")) {
+ isOracle = true;
+ }
Class.forName(driverClassName);
connection = DriverManager.getConnection(url);
connection.setAutoCommit(false);
@@ -142,7 +144,7 @@ public class DBCountPageView extends Con
}
private void dropTables() {
- String dropAccess = "DROP TABLE Access";
+ String dropAccess = "DROP TABLE HAccess";
String dropPageview = "DROP TABLE Pageview";
Statement st = null;
try {
@@ -157,18 +159,21 @@ public class DBCountPageView extends Con
}
private void createTables() throws SQLException {
-
+ String dataType = "BIGINT NOT NULL";
+ if(isOracle) {
+ dataType = "NUMBER(19) NOT NULL";
+ }
String createAccess =
"CREATE TABLE " +
- "Access(url VARCHAR(100) NOT NULL," +
+ "HAccess(url VARCHAR(100) NOT NULL," +
" referrer VARCHAR(100)," +
- " time BIGINT NOT NULL, " +
+ " time " + dataType + ", " +
" PRIMARY KEY (url, time))";
String createPageview =
"CREATE TABLE " +
"Pageview(url VARCHAR(100) NOT NULL," +
- " pageview BIGINT NOT NULL, " +
+ " pageview " + dataType + ", " +
" PRIMARY KEY (url))";
Statement st = connection.createStatement();
@@ -189,7 +194,7 @@ public class DBCountPageView extends Con
PreparedStatement statement = null ;
try {
statement = connection.prepareStatement(
- "INSERT INTO Access(url, referrer, time)" +
+ "INSERT INTO HAccess(url, referrer, time)" +
" VALUES (?, ?, ?)");
Random random = new Random();
@@ -248,7 +253,7 @@ public class DBCountPageView extends Con
/**Verifies the results are correct */
private boolean verify() throws SQLException {
//check total num pageview
- String countAccessQuery = "SELECT COUNT(*) FROM Access";
+ String countAccessQuery = "SELECT COUNT(*) FROM HAccess";
String sumPageviewQuery = "SELECT SUM(pageview) FROM Pageview";
Statement st = null;
ResultSet rs = null;
@@ -396,7 +401,7 @@ public class DBCountPageView extends Con
DBConfiguration.configureDB(conf, driverClassName, url);
- Job job = new Job(conf);
+ Job job = Job.getInstance(conf);
job.setJobName("Count Pageviews of URLs");
job.setJarByClass(DBCountPageView.class);
@@ -404,7 +409,7 @@ public class DBCountPageView extends Con
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(PageviewReducer.class);
- DBInputFormat.setInput(job, AccessRecord.class, "Access"
+ DBInputFormat.setInput(job, AccessRecord.class, "HAccess"
, null, "url", AccessFieldNames);
DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/SecondarySort.java Tue Oct 16 00:02:55 2012
@@ -211,7 +211,7 @@ public class SecondarySort {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
- System.err.println("Usage: secondarysrot <in> <out>");
+ System.err.println("Usage: secondarysort <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "secondary sort");
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java Tue Oct 16 00:02:55 2012
@@ -1,196 +1,196 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordMean extends Configured implements Tool {
-
- private double mean = 0;
-
- private final static Text COUNT = new Text("count");
- private final static Text LENGTH = new Text("length");
- private final static LongWritable ONE = new LongWritable(1);
-
- /**
- * Maps words from line of text into 2 key-value pairs; one key-value pair for
- * counting the word, another for counting its length.
- */
- public static class WordMeanMapper extends
- Mapper<Object, Text, Text, LongWritable> {
-
- private LongWritable wordLen = new LongWritable();
-
- /**
- * Emits 2 key-value pairs for counting the word and its length. Outputs are
- * (Text, LongWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
- this.wordLen.set(string.length());
- context.write(LENGTH, this.wordLen);
- context.write(COUNT, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordMeanReducer extends
- Reducer<Text, LongWritable, Text, LongWritable> {
-
- private LongWritable sum = new LongWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be one of 2 constants: LENGTH_STR or COUNT_STR.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(Text key, Iterable<LongWritable> values, Context context)
- throws IOException, InterruptedException {
-
- int theSum = 0;
- for (LongWritable val : values) {
- theSum += val.get();
- }
- sum.set(theSum);
- context.write(key, sum);
- }
- }
-
- /**
- * Reads the output file and parses the summation of lengths, and the word
- * count, to perform a quick calculation of the mean.
- *
- * @param path
- * The path to find the output file in. Set in main to the output
- * directory.
- * @throws IOException
- * If it cannot access the output directory, we throw an exception.
- */
- private double readAndCalcMean(Path path, Configuration conf)
- throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- BufferedReader br = null;
-
- // average = total sum / number of elements;
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
-
- long count = 0;
- long length = 0;
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab type
- String type = st.nextToken();
-
- // differentiate
- if (type.equals(COUNT.toString())) {
- String countLit = st.nextToken();
- count = Long.parseLong(countLit);
- } else if (type.equals(LENGTH.toString())) {
- String lengthLit = st.nextToken();
- length = Long.parseLong(lengthLit);
- }
- }
-
- double theMean = (((double) length) / ((double) count));
- System.out.println("The mean is: " + theMean);
- return theMean;
- } finally {
- br.close();
- }
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordMean(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordmean <in> <out>");
- return 0;
- }
-
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word mean");
- job.setJarByClass(WordMean.class);
- job.setMapperClass(WordMeanMapper.class);
- job.setCombinerClass(WordMeanReducer.class);
- job.setReducerClass(WordMeanReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(LongWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- Path outputpath = new Path(args[1]);
- FileOutputFormat.setOutputPath(job, outputpath);
- boolean result = job.waitForCompletion(true);
- mean = readAndCalcMean(outputpath, conf);
-
- return (result ? 0 : 1);
- }
-
- /**
- * Only valuable after run() called.
- *
- * @return Returns the mean value.
- */
- public double getMean() {
- return mean;
- }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordMean extends Configured implements Tool {
+
+ private double mean = 0;
+
+ private final static Text COUNT = new Text("count");
+ private final static Text LENGTH = new Text("length");
+ private final static LongWritable ONE = new LongWritable(1);
+
+ /**
+ * Maps words from line of text into 2 key-value pairs; one key-value pair for
+ * counting the word, another for counting its length.
+ */
+ public static class WordMeanMapper extends
+ Mapper<Object, Text, Text, LongWritable> {
+
+ private LongWritable wordLen = new LongWritable();
+
+ /**
+ * Emits 2 key-value pairs for counting the word and its length. Outputs are
+ * (Text, LongWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+ this.wordLen.set(string.length());
+ context.write(LENGTH, this.wordLen);
+ context.write(COUNT, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordMeanReducer extends
+ Reducer<Text, LongWritable, Text, LongWritable> {
+
+ private LongWritable sum = new LongWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be one of 2 constants: LENGTH_STR or COUNT_STR.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(Text key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
+
+ int theSum = 0;
+ for (LongWritable val : values) {
+ theSum += val.get();
+ }
+ sum.set(theSum);
+ context.write(key, sum);
+ }
+ }
+
+ /**
+ * Reads the output file and parses the summation of lengths, and the word
+ * count, to perform a quick calculation of the mean.
+ *
+ * @param path
+ * The path to find the output file in. Set in main to the output
+ * directory.
+ * @throws IOException
+ * If it cannot access the output directory, we throw an exception.
+ */
+ private double readAndCalcMean(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ BufferedReader br = null;
+
+ // average = total sum / number of elements;
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+
+ long count = 0;
+ long length = 0;
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab type
+ String type = st.nextToken();
+
+ // differentiate
+ if (type.equals(COUNT.toString())) {
+ String countLit = st.nextToken();
+ count = Long.parseLong(countLit);
+ } else if (type.equals(LENGTH.toString())) {
+ String lengthLit = st.nextToken();
+ length = Long.parseLong(lengthLit);
+ }
+ }
+
+ double theMean = (((double) length) / ((double) count));
+ System.out.println("The mean is: " + theMean);
+ return theMean;
+ } finally {
+ br.close();
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordMean(), args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordmean <in> <out>");
+ return 0;
+ }
+
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word mean");
+ job.setJarByClass(WordMean.class);
+ job.setMapperClass(WordMeanMapper.class);
+ job.setCombinerClass(WordMeanReducer.class);
+ job.setReducerClass(WordMeanReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(LongWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ Path outputpath = new Path(args[1]);
+ FileOutputFormat.setOutputPath(job, outputpath);
+ boolean result = job.waitForCompletion(true);
+ mean = readAndCalcMean(outputpath, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ /**
+ * Only valuable after run() called.
+ *
+ * @return Returns the mean value.
+ */
+ public double getMean() {
+ return mean;
+ }
}
\ No newline at end of file
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java Tue Oct 16 00:02:55 2012
@@ -1,208 +1,208 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskCounter;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordMedian extends Configured implements Tool {
-
- private double median = 0;
- private final static IntWritable ONE = new IntWritable(1);
-
- /**
- * Maps words from line of text into a key-value pair; the length of the word
- * as the key, and 1 as the value.
- */
- public static class WordMedianMapper extends
- Mapper<Object, Text, IntWritable, IntWritable> {
-
- private IntWritable length = new IntWritable();
-
- /**
- * Emits a key-value pair for counting the word. Outputs are (IntWritable,
- * IntWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
- length.set(string.length());
- context.write(length, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordMedianReducer extends
- Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
-
- private IntWritable val = new IntWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be a length of a word that was read.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(IntWritable key, Iterable<IntWritable> values,
- Context context) throws IOException, InterruptedException {
-
- int sum = 0;
- for (IntWritable value : values) {
- sum += value.get();
- }
- val.set(sum);
- context.write(key, val);
- }
- }
-
- /**
- * This is a standard program to read and find a median value based on a file
- * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
- * the word lengths and the following values are the number of times that
- * words of that length appear.
- *
- * @param path
- * The path to read the HDFS file from (part-r-00000...00001...etc).
- * @param medianIndex1
- * The first length value to look for.
- * @param medianIndex2
- * The second length value to look for (will be the same as the first
- * if there are an even number of words total).
- * @throws IOException
- * If file cannot be found, we throw an exception.
- * */
- private double readAndFindMedian(String path, int medianIndex1,
- int medianIndex2, Configuration conf) throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
- int num = 0;
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab length
- String currLen = st.nextToken();
-
- // grab count
- String lengthFreq = st.nextToken();
-
- int prevNum = num;
- num += Integer.parseInt(lengthFreq);
-
- if (medianIndex2 >= prevNum && medianIndex1 <= num) {
- System.out.println("The median is: " + currLen);
- br.close();
- return Double.parseDouble(currLen);
- } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
- String nextCurrLen = st.nextToken();
- double theMedian = (Integer.parseInt(currLen) + Integer
- .parseInt(nextCurrLen)) / 2.0;
- System.out.println("The median is: " + theMedian);
- br.close();
- return theMedian;
- }
- }
- } finally {
- br.close();
- }
- // error, no median found
- return -1;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordMedian(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordmedian <in> <out>");
- return 0;
- }
-
- setConf(new Configuration());
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word median");
- job.setJarByClass(WordMedian.class);
- job.setMapperClass(WordMedianMapper.class);
- job.setCombinerClass(WordMedianReducer.class);
- job.setReducerClass(WordMedianReducer.class);
- job.setOutputKeyClass(IntWritable.class);
- job.setOutputValueClass(IntWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- boolean result = job.waitForCompletion(true);
-
- // Wait for JOB 1 -- get middle value to check for Median
-
- long totalWords = job.getCounters()
- .getGroup(TaskCounter.class.getCanonicalName())
- .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
- int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
- int medianIndex2 = (int) Math.floor((totalWords / 2.0));
-
- median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
-
- return (result ? 0 : 1);
- }
-
- public double getMedian() {
- return median;
- }
-}
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordMedian extends Configured implements Tool {
+
+ private double median = 0;
+ private final static IntWritable ONE = new IntWritable(1);
+
+ /**
+ * Maps words from line of text into a key-value pair; the length of the word
+ * as the key, and 1 as the value.
+ */
+ public static class WordMedianMapper extends
+ Mapper<Object, Text, IntWritable, IntWritable> {
+
+ private IntWritable length = new IntWritable();
+
+ /**
+ * Emits a key-value pair for counting the word. Outputs are (IntWritable,
+ * IntWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+ length.set(string.length());
+ context.write(length, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordMedianReducer extends
+ Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
+
+ private IntWritable val = new IntWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be a length of a word that was read.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(IntWritable key, Iterable<IntWritable> values,
+ Context context) throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (IntWritable value : values) {
+ sum += value.get();
+ }
+ val.set(sum);
+ context.write(key, val);
+ }
+ }
+
+ /**
+ * This is a standard program to read and find a median value based on a file
+ * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
+ * the word lengths and the following values are the number of times that
+ * words of that length appear.
+ *
+ * @param path
+ * The path to read the HDFS file from (part-r-00000...00001...etc).
+ * @param medianIndex1
+ * The first length value to look for.
+ * @param medianIndex2
+ * The second length value to look for (will be the same as the first
+ * if there are an even number of words total).
+ * @throws IOException
+ * If file cannot be found, we throw an exception.
+ * */
+ private double readAndFindMedian(String path, int medianIndex1,
+ int medianIndex2, Configuration conf) throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+ int num = 0;
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab length
+ String currLen = st.nextToken();
+
+ // grab count
+ String lengthFreq = st.nextToken();
+
+ int prevNum = num;
+ num += Integer.parseInt(lengthFreq);
+
+ if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+ System.out.println("The median is: " + currLen);
+ br.close();
+ return Double.parseDouble(currLen);
+ } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+ String nextCurrLen = st.nextToken();
+ double theMedian = (Integer.parseInt(currLen) + Integer
+ .parseInt(nextCurrLen)) / 2.0;
+ System.out.println("The median is: " + theMedian);
+ br.close();
+ return theMedian;
+ }
+ }
+ } finally {
+ br.close();
+ }
+ // error, no median found
+ return -1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordMedian(), args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordmedian <in> <out>");
+ return 0;
+ }
+
+ setConf(new Configuration());
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word median");
+ job.setJarByClass(WordMedian.class);
+ job.setMapperClass(WordMedianMapper.class);
+ job.setCombinerClass(WordMedianReducer.class);
+ job.setReducerClass(WordMedianReducer.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setOutputValueClass(IntWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ FileOutputFormat.setOutputPath(job, new Path(args[1]));
+ boolean result = job.waitForCompletion(true);
+
+ // Wait for JOB 1 -- get middle value to check for Median
+
+ long totalWords = job.getCounters()
+ .getGroup(TaskCounter.class.getCanonicalName())
+ .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
+ int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
+ int medianIndex2 = (int) Math.floor((totalWords / 2.0));
+
+ median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ public double getMedian() {
+ return median;
+ }
+}
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java Tue Oct 16 00:02:55 2012
@@ -1,210 +1,210 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordStandardDeviation extends Configured implements Tool {
-
- private double stddev = 0;
-
- private final static Text LENGTH = new Text("length");
- private final static Text SQUARE = new Text("square");
- private final static Text COUNT = new Text("count");
- private final static LongWritable ONE = new LongWritable(1);
-
- /**
- * Maps words from line of text into 3 key-value pairs; one key-value pair for
- * counting the word, one for counting its length, and one for counting the
- * square of its length.
- */
- public static class WordStandardDeviationMapper extends
- Mapper<Object, Text, Text, LongWritable> {
-
- private LongWritable wordLen = new LongWritable();
- private LongWritable wordLenSq = new LongWritable();
-
- /**
- * Emits 3 key-value pairs for counting the word, its length, and the
- * squares of its length. Outputs are (Text, LongWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
-
- this.wordLen.set(string.length());
-
- // the square of an integer is an integer...
- this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
-
- context.write(LENGTH, this.wordLen);
- context.write(SQUARE, this.wordLenSq);
- context.write(COUNT, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordStandardDeviationReducer extends
- Reducer<Text, LongWritable, Text, LongWritable> {
-
- private LongWritable val = new LongWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
- * SQUARE_STR.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(Text key, Iterable<LongWritable> values, Context context)
- throws IOException, InterruptedException {
-
- int sum = 0;
- for (LongWritable value : values) {
- sum += value.get();
- }
- val.set(sum);
- context.write(key, val);
- }
- }
-
- /**
- * Reads the output file and parses the summation of lengths, the word count,
- * and the lengths squared, to perform a quick calculation of the standard
- * deviation.
- *
- * @param path
- * The path to find the output file in. Set in main to the output
- * directory.
- * @throws IOException
- * If it cannot access the output directory, we throw an exception.
- */
- private double readAndCalcStdDev(Path path, Configuration conf)
- throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- double stddev = 0;
- BufferedReader br = null;
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
- long count = 0;
- long length = 0;
- long square = 0;
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab type
- String type = st.nextToken();
-
- // differentiate
- if (type.equals(COUNT.toString())) {
- String countLit = st.nextToken();
- count = Long.parseLong(countLit);
- } else if (type.equals(LENGTH.toString())) {
- String lengthLit = st.nextToken();
- length = Long.parseLong(lengthLit);
- } else if (type.equals(SQUARE.toString())) {
- String squareLit = st.nextToken();
- square = Long.parseLong(squareLit);
- }
- }
- // average = total sum / number of elements;
- double mean = (((double) length) / ((double) count));
- // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
- mean = Math.pow(mean, 2.0);
- double term = (((double) square / ((double) count)));
- stddev = Math.sqrt((term - mean));
- System.out.println("The standard deviation is: " + stddev);
- } finally {
- br.close();
- }
- return stddev;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordStandardDeviation(),
- args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordstddev <in> <out>");
- return 0;
- }
-
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word stddev");
- job.setJarByClass(WordStandardDeviation.class);
- job.setMapperClass(WordStandardDeviationMapper.class);
- job.setCombinerClass(WordStandardDeviationReducer.class);
- job.setReducerClass(WordStandardDeviationReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(LongWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- Path outputpath = new Path(args[1]);
- FileOutputFormat.setOutputPath(job, outputpath);
- boolean result = job.waitForCompletion(true);
-
- // read output and calculate standard deviation
- stddev = readAndCalcStdDev(outputpath, conf);
-
- return (result ? 0 : 1);
- }
-
- public double getStandardDeviation() {
- return stddev;
- }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordStandardDeviation extends Configured implements Tool {
+
+ private double stddev = 0;
+
+ private final static Text LENGTH = new Text("length");
+ private final static Text SQUARE = new Text("square");
+ private final static Text COUNT = new Text("count");
+ private final static LongWritable ONE = new LongWritable(1);
+
+ /**
+ * Maps words from line of text into 3 key-value pairs; one key-value pair for
+ * counting the word, one for counting its length, and one for counting the
+ * square of its length.
+ */
+ public static class WordStandardDeviationMapper extends
+ Mapper<Object, Text, Text, LongWritable> {
+
+ private LongWritable wordLen = new LongWritable();
+ private LongWritable wordLenSq = new LongWritable();
+
+ /**
+ * Emits 3 key-value pairs for counting the word, its length, and the
+ * squares of its length. Outputs are (Text, LongWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+
+ this.wordLen.set(string.length());
+
+ // the square of an integer is an integer...
+ this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
+
+ context.write(LENGTH, this.wordLen);
+ context.write(SQUARE, this.wordLenSq);
+ context.write(COUNT, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordStandardDeviationReducer extends
+ Reducer<Text, LongWritable, Text, LongWritable> {
+
+ private LongWritable val = new LongWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
+ * SQUARE_STR.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(Text key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (LongWritable value : values) {
+ sum += value.get();
+ }
+ val.set(sum);
+ context.write(key, val);
+ }
+ }
+
+ /**
+ * Reads the output file and parses the summation of lengths, the word count,
+ * and the lengths squared, to perform a quick calculation of the standard
+ * deviation.
+ *
+ * @param path
+ * The path to find the output file in. Set in main to the output
+ * directory.
+ * @throws IOException
+ * If it cannot access the output directory, we throw an exception.
+ */
+ private double readAndCalcStdDev(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ double stddev = 0;
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+ long count = 0;
+ long length = 0;
+ long square = 0;
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab type
+ String type = st.nextToken();
+
+ // differentiate
+ if (type.equals(COUNT.toString())) {
+ String countLit = st.nextToken();
+ count = Long.parseLong(countLit);
+ } else if (type.equals(LENGTH.toString())) {
+ String lengthLit = st.nextToken();
+ length = Long.parseLong(lengthLit);
+ } else if (type.equals(SQUARE.toString())) {
+ String squareLit = st.nextToken();
+ square = Long.parseLong(squareLit);
+ }
+ }
+ // average = total sum / number of elements;
+ double mean = (((double) length) / ((double) count));
+ // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
+ mean = Math.pow(mean, 2.0);
+ double term = (((double) square / ((double) count)));
+ stddev = Math.sqrt((term - mean));
+ System.out.println("The standard deviation is: " + stddev);
+ } finally {
+ br.close();
+ }
+ return stddev;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordStandardDeviation(),
+ args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordstddev <in> <out>");
+ return 0;
+ }
+
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word stddev");
+ job.setJarByClass(WordStandardDeviation.class);
+ job.setMapperClass(WordStandardDeviationMapper.class);
+ job.setCombinerClass(WordStandardDeviationReducer.class);
+ job.setReducerClass(WordStandardDeviationReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(LongWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ Path outputpath = new Path(args[1]);
+ FileOutputFormat.setOutputPath(job, outputpath);
+ boolean result = job.waitForCompletion(true);
+
+ // read output and calculate standard deviation
+ stddev = readAndCalcStdDev(outputpath, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ public double getStandardDeviation() {
+ return stddev;
+ }
}
\ No newline at end of file
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java Tue Oct 16 00:02:55 2012
@@ -165,16 +165,30 @@ public class DistributedPentomino extend
}
public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
if (args.length == 0) {
- System.out.println("pentomino <output>");
+ System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
-
- Configuration conf = getConf();
- int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
- int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
- int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
+ // check for passed parameters, otherwise use defaults
+ int width = PENT_WIDTH;
+ int height = PENT_HEIGHT;
+ int depth = PENT_DEPTH;
+ for (int i = 0; i < args.length; i++) {
+ if (args[i].equalsIgnoreCase("-depth")) {
+ depth = Integer.parseInt(args[i++].trim());
+ } else if (args[i].equalsIgnoreCase("-height")) {
+ height = Integer.parseInt(args[i++].trim());
+ } else if (args[i].equalsIgnoreCase("-width") ) {
+ width = Integer.parseInt(args[i++].trim());
+ }
+ }
+ // now set the values within conf for M/R tasks to read, this
+ // will ensure values are set preventing MAPREDUCE-4678
+ conf.setInt(Pentomino.WIDTH, width);
+ conf.setInt(Pentomino.HEIGHT, height);
+ conf.setInt(Pentomino.DEPTH, depth);
Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS,
OneSidedPentomino.class, Pentomino.class);
int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java Tue Oct 16 00:02:55 2012
@@ -1,272 +1,272 @@
-package org.apache.hadoop.examples;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-import java.util.TreeMap;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestWordStats {
-
- private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
- private final static String MEAN_OUTPUT = "build/data/mean_output";
- private final static String MEDIAN_OUTPUT = "build/data/median_output";
- private final static String STDDEV_OUTPUT = "build/data/stddev_output";
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the standard deviation between all of the word
- * lengths.
- */
- public static class WordStdDevReader {
- private long wordsRead = 0;
- private long wordLengthsRead = 0;
- private long wordLengthsReadSquared = 0;
-
- public WordStdDevReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- this.wordLengthsRead += word.length();
- this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
- }
- }
-
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
- mean = Math.pow(mean, 2.0);
- double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
- double stddev = Math.sqrt((term - mean));
- return stddev;
- }
-
- }
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the median length of all the words.
- */
- public static class WordMedianReader {
- private long wordsRead = 0;
- private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
-
- public WordMedianReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- int num = 0;
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- if (this.map.get(word.length()) == null) {
- this.map.put(word.length(), 1);
- } else {
- int count = this.map.get(word.length());
- this.map.put(word.length(), count + 1);
- }
- }
- }
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
- int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
-
- for (Integer key : this.map.navigableKeySet()) {
- int prevNum = num;
- num += this.map.get(key);
-
- if (medianIndex2 >= prevNum && medianIndex1 <= num) {
- return key;
- } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
- Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
- double median = (key + nextCurrLen) / 2.0;
- return median;
- }
- }
- return -1;
- }
-
- }
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the mean length of all the words.
- */
- public static class WordMeanReader {
- private long wordsRead = 0;
- private long wordLengthsRead = 0;
-
- public WordMeanReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- this.wordLengthsRead += word.length();
- }
- }
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
- return mean;
- }
-
- }
-
- /**
- * Internal class designed to delete the output directory. Meant solely for
- * use before and after the test is run; this is so next iterations of the
- * test do not encounter a "file already exists" error.
- *
- * @param dir
- * The directory to delete.
- * @return Returns whether the deletion was successful or not.
- */
- public static boolean deleteDir(File dir) {
- if (dir.isDirectory()) {
- String[] children = dir.list();
- for (int i = 0; i < children.length; i++) {
- boolean success = deleteDir(new File(dir, children[i]));
- if (!success) {
- System.out.println("Could not delete directory after test!");
- return false;
- }
- }
- }
-
- // The directory is now empty so delete it
- return dir.delete();
- }
-
- @Before public void setup() throws Exception {
- deleteDir(new File(MEAN_OUTPUT));
- deleteDir(new File(MEDIAN_OUTPUT));
- deleteDir(new File(STDDEV_OUTPUT));
- }
-
- @Test public void testGetTheMean() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = MEAN_OUTPUT;
-
- WordMean wm = new WordMean();
- ToolRunner.run(new Configuration(), wm, args);
- double mean = wm.getMean();
-
- // outputs MUST match
- WordMeanReader wr = new WordMeanReader();
- assertEquals(mean, wr.read(INPUT), 0.0);
- }
-
- @Test public void testGetTheMedian() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = MEDIAN_OUTPUT;
-
- WordMedian wm = new WordMedian();
- ToolRunner.run(new Configuration(), wm, args);
- double median = wm.getMedian();
-
- // outputs MUST match
- WordMedianReader wr = new WordMedianReader();
- assertEquals(median, wr.read(INPUT), 0.0);
- }
-
- @Test public void testGetTheStandardDeviation() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = STDDEV_OUTPUT;
-
- WordStandardDeviation wsd = new WordStandardDeviation();
- ToolRunner.run(new Configuration(), wsd, args);
- double stddev = wsd.getStandardDeviation();
-
- // outputs MUST match
- WordStdDevReader wr = new WordStdDevReader();
- assertEquals(stddev, wr.read(INPUT), 0.0);
- }
-
-}
+package org.apache.hadoop.examples;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestWordStats {
+
+ private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
+ private final static String MEAN_OUTPUT = "build/data/mean_output";
+ private final static String MEDIAN_OUTPUT = "build/data/median_output";
+ private final static String STDDEV_OUTPUT = "build/data/stddev_output";
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the standard deviation between all of the word
+ * lengths.
+ */
+ public static class WordStdDevReader {
+ private long wordsRead = 0;
+ private long wordLengthsRead = 0;
+ private long wordLengthsReadSquared = 0;
+
+ public WordStdDevReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ this.wordLengthsRead += word.length();
+ this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
+ }
+ }
+
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+ mean = Math.pow(mean, 2.0);
+ double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
+ double stddev = Math.sqrt((term - mean));
+ return stddev;
+ }
+
+ }
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the median length of all the words.
+ */
+ public static class WordMedianReader {
+ private long wordsRead = 0;
+ private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
+
+ public WordMedianReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ int num = 0;
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ if (this.map.get(word.length()) == null) {
+ this.map.put(word.length(), 1);
+ } else {
+ int count = this.map.get(word.length());
+ this.map.put(word.length(), count + 1);
+ }
+ }
+ }
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
+ int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
+
+ for (Integer key : this.map.navigableKeySet()) {
+ int prevNum = num;
+ num += this.map.get(key);
+
+ if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+ return key;
+ } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+ Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
+ double median = (key + nextCurrLen) / 2.0;
+ return median;
+ }
+ }
+ return -1;
+ }
+
+ }
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the mean length of all the words.
+ */
+ public static class WordMeanReader {
+ private long wordsRead = 0;
+ private long wordLengthsRead = 0;
+
+ public WordMeanReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ this.wordLengthsRead += word.length();
+ }
+ }
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+ return mean;
+ }
+
+ }
+
+ /**
+ * Internal class designed to delete the output directory. Meant solely for
+ * use before and after the test is run; this is so next iterations of the
+ * test do not encounter a "file already exists" error.
+ *
+ * @param dir
+ * The directory to delete.
+ * @return Returns whether the deletion was successful or not.
+ */
+ public static boolean deleteDir(File dir) {
+ if (dir.isDirectory()) {
+ String[] children = dir.list();
+ for (int i = 0; i < children.length; i++) {
+ boolean success = deleteDir(new File(dir, children[i]));
+ if (!success) {
+ System.out.println("Could not delete directory after test!");
+ return false;
+ }
+ }
+ }
+
+ // The directory is now empty so delete it
+ return dir.delete();
+ }
+
+ @Before public void setup() throws Exception {
+ deleteDir(new File(MEAN_OUTPUT));
+ deleteDir(new File(MEDIAN_OUTPUT));
+ deleteDir(new File(STDDEV_OUTPUT));
+ }
+
+ @Test public void testGetTheMean() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = MEAN_OUTPUT;
+
+ WordMean wm = new WordMean();
+ ToolRunner.run(new Configuration(), wm, args);
+ double mean = wm.getMean();
+
+ // outputs MUST match
+ WordMeanReader wr = new WordMeanReader();
+ assertEquals(mean, wr.read(INPUT), 0.0);
+ }
+
+ @Test public void testGetTheMedian() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = MEDIAN_OUTPUT;
+
+ WordMedian wm = new WordMedian();
+ ToolRunner.run(new Configuration(), wm, args);
+ double median = wm.getMedian();
+
+ // outputs MUST match
+ WordMedianReader wr = new WordMedianReader();
+ assertEquals(median, wr.read(INPUT), 0.0);
+ }
+
+ @Test public void testGetTheStandardDeviation() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = STDDEV_OUTPUT;
+
+ WordStandardDeviation wsd = new WordStandardDeviation();
+ ToolRunner.run(new Configuration(), wsd, args);
+ double stddev = wsd.getStandardDeviation();
+
+ // outputs MUST match
+ WordStdDevReader wr = new WordStdDevReader();
+ assertEquals(stddev, wr.read(INPUT), 0.0);
+ }
+
+}
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/pom.xml Tue Oct 16 00:02:55 2012
@@ -219,6 +219,18 @@
</includes>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <properties>
+ <property>
+ <name>listener</name>
+ <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+ </property>
+ </properties>
+ </configuration>
+ </plugin>
</plugins>
</build>
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/c++/
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/c++:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/block_forensics/
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/block_forensics:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/build-contrib.xml
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/build-contrib.xml:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/build.xml
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/build.xml:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/data_join/
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/data_join:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/eclipse-plugin/
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/eclipse-plugin:r1363593-1396941
Propchange: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/
------------------------------------------------------------------------------
Merged /hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/src/contrib/index:r1363593-1396941
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data.txt Tue Oct 16 00:02:55 2012
@@ -1,10 +1,10 @@
-0 ins apache dot org
-1 ins apache
-2 ins apache
-3 ins apache
-4 ins apache
-5 ins apache
-6 ins apache
-7 ins apache
-8 ins apache
-9 ins apache
+0 ins apache dot org
+1 ins apache
+2 ins apache
+3 ins apache
+4 ins apache
+5 ins apache
+6 ins apache
+7 ins apache
+8 ins apache
+9 ins apache
Modified: hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt?rev=1398581&r1=1398580&r2=1398581&view=diff
==============================================================================
--- hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt (original)
+++ hadoop/common/branches/MR-3902/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt Tue Oct 16 00:02:55 2012
@@ -1,10 +1,10 @@
-0 del
-1 upd hadoop
-2 del
-3 upd hadoop
-4 del
-5 upd hadoop
-6 del
-7 upd hadoop
-8 del
-9 upd hadoop
+0 del
+1 upd hadoop
+2 del
+3 upd hadoop
+4 del
+5 upd hadoop
+6 del
+7 upd hadoop
+8 del
+9 upd hadoop