You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by su...@apache.org on 2012/10/12 06:49:55 UTC
svn commit: r1397435 [1/5] - in
/hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./
hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/
hadoop-mapreduce-examples/src/main/java/org/apache/hadoop...
Author: suresh
Date: Fri Oct 12 04:48:40 2012
New Revision: 1397435
URL: http://svn.apache.org/viewvc?rev=1397435&view=rev
Log:
HADOOP-8911. CRLF characters in source and text files (trunk equivalent patch). Contributed Raja Aluri.
Modified:
hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data.txt
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/DocumentAndOp.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/DocumentID.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IDistributionPolicy.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IIndexUpdater.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/ILocalAnalysis.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateMapper.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdatePartitioner.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateReducer.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/test/org/apache/hadoop/contrib/index/lucene/TestMixedDirectory.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestDistributionPolicy.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Fri Oct 12 04:48:40 2012
@@ -17,6 +17,9 @@ Release 2.0.3-alpha - Unreleased
MAPREDUCE-4616. Improve javadoc for MultipleOutputs. (Tony Burton via
acmurthy)
+ HADOOP-8911. CRLF characters in source and text files.
+ (Raja Aluri via suresh)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java Fri Oct 12 04:48:40 2012
@@ -1,120 +1,120 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.mapreduce;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.LocalJobRunner;
-import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
-import org.junit.Test;
-
-public class TestClientProtocolProviderImpls extends TestCase {
-
- @Test
- public void testClusterWithLocalClientProvider() throws Exception {
-
- Configuration conf = new Configuration();
-
- try {
- conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
- new Cluster(conf);
- fail("Cluster should not be initialized with incorrect framework name");
- } catch (IOException e) {
-
- }
-
- try {
- conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
- conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
-
- new Cluster(conf);
- fail("Cluster with Local Framework name should use local JT address");
- } catch (IOException e) {
-
- }
-
- try {
- conf.set(JTConfig.JT_IPC_ADDRESS, "local");
- Cluster cluster = new Cluster(conf);
- assertTrue(cluster.getClient() instanceof LocalJobRunner);
- cluster.close();
- } catch (IOException e) {
-
- }
- }
-
- @Test
- public void testClusterWithJTClientProvider() throws Exception {
-
- Configuration conf = new Configuration();
- try {
- conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
- new Cluster(conf);
- fail("Cluster should not be initialized with incorrect framework name");
-
- } catch (IOException e) {
-
- }
-
- try {
- conf.set(MRConfig.FRAMEWORK_NAME, "classic");
- conf.set(JTConfig.JT_IPC_ADDRESS, "local");
- new Cluster(conf);
- fail("Cluster with classic Framework name shouldnot use local JT address");
-
- } catch (IOException e) {
-
- }
-
- try {
- conf = new Configuration();
- conf.set(MRConfig.FRAMEWORK_NAME, "classic");
- conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
- Cluster cluster = new Cluster(conf);
- cluster.close();
- } catch (IOException e) {
-
- }
- }
-
- @Test
- public void testClusterException() {
-
- Configuration conf = new Configuration();
- conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
- conf.set(JTConfig.JT_IPC_ADDRESS, "local");
-
- // initializing a cluster with this conf should throw an error.
- // However the exception thrown should not be specific to either
- // the job tracker client provider or the local provider
- boolean errorThrown = false;
- try {
- Cluster cluster = new Cluster(conf);
- cluster.close();
- fail("Not expected - cluster init should have failed");
- } catch (IOException e) {
- errorThrown = true;
- assert(e.getMessage().contains("Cannot initialize Cluster. Please check"));
- }
- assert(errorThrown);
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.LocalJobRunner;
+import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
+import org.junit.Test;
+
+public class TestClientProtocolProviderImpls extends TestCase {
+
+ @Test
+ public void testClusterWithLocalClientProvider() throws Exception {
+
+ Configuration conf = new Configuration();
+
+ try {
+ conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
+ new Cluster(conf);
+ fail("Cluster should not be initialized with incorrect framework name");
+ } catch (IOException e) {
+
+ }
+
+ try {
+ conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
+ conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
+
+ new Cluster(conf);
+ fail("Cluster with Local Framework name should use local JT address");
+ } catch (IOException e) {
+
+ }
+
+ try {
+ conf.set(JTConfig.JT_IPC_ADDRESS, "local");
+ Cluster cluster = new Cluster(conf);
+ assertTrue(cluster.getClient() instanceof LocalJobRunner);
+ cluster.close();
+ } catch (IOException e) {
+
+ }
+ }
+
+ @Test
+ public void testClusterWithJTClientProvider() throws Exception {
+
+ Configuration conf = new Configuration();
+ try {
+ conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
+ new Cluster(conf);
+ fail("Cluster should not be initialized with incorrect framework name");
+
+ } catch (IOException e) {
+
+ }
+
+ try {
+ conf.set(MRConfig.FRAMEWORK_NAME, "classic");
+ conf.set(JTConfig.JT_IPC_ADDRESS, "local");
+ new Cluster(conf);
+ fail("Cluster with classic Framework name shouldnot use local JT address");
+
+ } catch (IOException e) {
+
+ }
+
+ try {
+ conf = new Configuration();
+ conf.set(MRConfig.FRAMEWORK_NAME, "classic");
+ conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
+ Cluster cluster = new Cluster(conf);
+ cluster.close();
+ } catch (IOException e) {
+
+ }
+ }
+
+ @Test
+ public void testClusterException() {
+
+ Configuration conf = new Configuration();
+ conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
+ conf.set(JTConfig.JT_IPC_ADDRESS, "local");
+
+ // initializing a cluster with this conf should throw an error.
+ // However the exception thrown should not be specific to either
+ // the job tracker client provider or the local provider
+ boolean errorThrown = false;
+ try {
+ Cluster cluster = new Cluster(conf);
+ cluster.close();
+ fail("Not expected - cluster init should have failed");
+ } catch (IOException e) {
+ errorThrown = true;
+ assert(e.getMessage().contains("Cannot initialize Cluster. Please check"));
+ }
+ assert(errorThrown);
+ }
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java Fri Oct 12 04:48:40 2012
@@ -1,129 +1,129 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.mapreduce;
-
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.LocalJobRunner;
-import org.apache.hadoop.mapred.ResourceMgrDelegate;
-import org.apache.hadoop.mapred.YARNRunner;
-import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse;
-import org.apache.hadoop.yarn.api.records.DelegationToken;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.junit.Test;
-
-public class TestYarnClientProtocolProvider extends TestCase {
-
- private static final RecordFactory recordFactory = RecordFactoryProvider.
- getRecordFactory(null);
-
- @Test
- public void testClusterWithYarnClientProtocolProvider() throws Exception {
-
- Configuration conf = new Configuration(false);
- Cluster cluster = null;
-
- try {
- cluster = new Cluster(conf);
- } catch (Exception e) {
- throw new Exception(
- "Failed to initialize a local runner w/o a cluster framework key", e);
- }
-
- try {
- assertTrue("client is not a LocalJobRunner",
- cluster.getClient() instanceof LocalJobRunner);
- } finally {
- if (cluster != null) {
- cluster.close();
- }
- }
-
- try {
- conf = new Configuration();
- conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
- cluster = new Cluster(conf);
- ClientProtocol client = cluster.getClient();
- assertTrue("client is a YARNRunner", client instanceof YARNRunner);
- } catch (IOException e) {
-
- } finally {
- if (cluster != null) {
- cluster.close();
- }
- }
- }
-
-
- @Test
- public void testClusterGetDelegationToken() throws Exception {
-
- Configuration conf = new Configuration(false);
- Cluster cluster = null;
- try {
- conf = new Configuration();
- conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
- cluster = new Cluster(conf);
- YARNRunner yrunner = (YARNRunner) cluster.getClient();
- GetDelegationTokenResponse getDTResponse =
- recordFactory.newRecordInstance(GetDelegationTokenResponse.class);
- DelegationToken rmDTToken = recordFactory.newRecordInstance(
- DelegationToken.class);
- rmDTToken.setIdentifier(ByteBuffer.wrap(new byte[2]));
- rmDTToken.setKind("Testclusterkind");
- rmDTToken.setPassword(ByteBuffer.wrap("testcluster".getBytes()));
- rmDTToken.setService("0.0.0.0:8032");
- getDTResponse.setRMDelegationToken(rmDTToken);
- final ClientRMProtocol cRMProtocol = mock(ClientRMProtocol.class);
- when(cRMProtocol.getDelegationToken(any(
- GetDelegationTokenRequest.class))).thenReturn(getDTResponse);
- ResourceMgrDelegate rmgrDelegate = new ResourceMgrDelegate(
- new YarnConfiguration(conf)) {
- @Override
- public synchronized void start() {
- this.rmClient = cRMProtocol;
- }
- };
- yrunner.setResourceMgrDelegate(rmgrDelegate);
- Token t = cluster.getDelegationToken(new Text(" "));
- assertTrue("Token kind is instead " + t.getKind().toString(),
- "Testclusterkind".equals(t.getKind().toString()));
- } finally {
- if (cluster != null) {
- cluster.close();
- }
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.LocalJobRunner;
+import org.apache.hadoop.mapred.ResourceMgrDelegate;
+import org.apache.hadoop.mapred.YARNRunner;
+import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse;
+import org.apache.hadoop.yarn.api.records.DelegationToken;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.junit.Test;
+
+public class TestYarnClientProtocolProvider extends TestCase {
+
+ private static final RecordFactory recordFactory = RecordFactoryProvider.
+ getRecordFactory(null);
+
+ @Test
+ public void testClusterWithYarnClientProtocolProvider() throws Exception {
+
+ Configuration conf = new Configuration(false);
+ Cluster cluster = null;
+
+ try {
+ cluster = new Cluster(conf);
+ } catch (Exception e) {
+ throw new Exception(
+ "Failed to initialize a local runner w/o a cluster framework key", e);
+ }
+
+ try {
+ assertTrue("client is not a LocalJobRunner",
+ cluster.getClient() instanceof LocalJobRunner);
+ } finally {
+ if (cluster != null) {
+ cluster.close();
+ }
+ }
+
+ try {
+ conf = new Configuration();
+ conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
+ cluster = new Cluster(conf);
+ ClientProtocol client = cluster.getClient();
+ assertTrue("client is a YARNRunner", client instanceof YARNRunner);
+ } catch (IOException e) {
+
+ } finally {
+ if (cluster != null) {
+ cluster.close();
+ }
+ }
+ }
+
+
+ @Test
+ public void testClusterGetDelegationToken() throws Exception {
+
+ Configuration conf = new Configuration(false);
+ Cluster cluster = null;
+ try {
+ conf = new Configuration();
+ conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
+ cluster = new Cluster(conf);
+ YARNRunner yrunner = (YARNRunner) cluster.getClient();
+ GetDelegationTokenResponse getDTResponse =
+ recordFactory.newRecordInstance(GetDelegationTokenResponse.class);
+ DelegationToken rmDTToken = recordFactory.newRecordInstance(
+ DelegationToken.class);
+ rmDTToken.setIdentifier(ByteBuffer.wrap(new byte[2]));
+ rmDTToken.setKind("Testclusterkind");
+ rmDTToken.setPassword(ByteBuffer.wrap("testcluster".getBytes()));
+ rmDTToken.setService("0.0.0.0:8032");
+ getDTResponse.setRMDelegationToken(rmDTToken);
+ final ClientRMProtocol cRMProtocol = mock(ClientRMProtocol.class);
+ when(cRMProtocol.getDelegationToken(any(
+ GetDelegationTokenRequest.class))).thenReturn(getDTResponse);
+ ResourceMgrDelegate rmgrDelegate = new ResourceMgrDelegate(
+ new YarnConfiguration(conf)) {
+ @Override
+ public synchronized void start() {
+ this.rmClient = cRMProtocol;
+ }
+ };
+ yrunner.setResourceMgrDelegate(rmgrDelegate);
+ Token t = cluster.getDelegationToken(new Text(" "));
+ assertTrue("Token kind is instead " + t.getKind().toString(),
+ "Testclusterkind".equals(t.getKind().toString()));
+ } finally {
+ if (cluster != null) {
+ cluster.close();
+ }
+ }
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMean.java Fri Oct 12 04:48:40 2012
@@ -1,196 +1,196 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordMean extends Configured implements Tool {
-
- private double mean = 0;
-
- private final static Text COUNT = new Text("count");
- private final static Text LENGTH = new Text("length");
- private final static LongWritable ONE = new LongWritable(1);
-
- /**
- * Maps words from line of text into 2 key-value pairs; one key-value pair for
- * counting the word, another for counting its length.
- */
- public static class WordMeanMapper extends
- Mapper<Object, Text, Text, LongWritable> {
-
- private LongWritable wordLen = new LongWritable();
-
- /**
- * Emits 2 key-value pairs for counting the word and its length. Outputs are
- * (Text, LongWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
- this.wordLen.set(string.length());
- context.write(LENGTH, this.wordLen);
- context.write(COUNT, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordMeanReducer extends
- Reducer<Text, LongWritable, Text, LongWritable> {
-
- private LongWritable sum = new LongWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be one of 2 constants: LENGTH_STR or COUNT_STR.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(Text key, Iterable<LongWritable> values, Context context)
- throws IOException, InterruptedException {
-
- int theSum = 0;
- for (LongWritable val : values) {
- theSum += val.get();
- }
- sum.set(theSum);
- context.write(key, sum);
- }
- }
-
- /**
- * Reads the output file and parses the summation of lengths, and the word
- * count, to perform a quick calculation of the mean.
- *
- * @param path
- * The path to find the output file in. Set in main to the output
- * directory.
- * @throws IOException
- * If it cannot access the output directory, we throw an exception.
- */
- private double readAndCalcMean(Path path, Configuration conf)
- throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- BufferedReader br = null;
-
- // average = total sum / number of elements;
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
-
- long count = 0;
- long length = 0;
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab type
- String type = st.nextToken();
-
- // differentiate
- if (type.equals(COUNT.toString())) {
- String countLit = st.nextToken();
- count = Long.parseLong(countLit);
- } else if (type.equals(LENGTH.toString())) {
- String lengthLit = st.nextToken();
- length = Long.parseLong(lengthLit);
- }
- }
-
- double theMean = (((double) length) / ((double) count));
- System.out.println("The mean is: " + theMean);
- return theMean;
- } finally {
- br.close();
- }
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordMean(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordmean <in> <out>");
- return 0;
- }
-
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word mean");
- job.setJarByClass(WordMean.class);
- job.setMapperClass(WordMeanMapper.class);
- job.setCombinerClass(WordMeanReducer.class);
- job.setReducerClass(WordMeanReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(LongWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- Path outputpath = new Path(args[1]);
- FileOutputFormat.setOutputPath(job, outputpath);
- boolean result = job.waitForCompletion(true);
- mean = readAndCalcMean(outputpath, conf);
-
- return (result ? 0 : 1);
- }
-
- /**
- * Only valuable after run() called.
- *
- * @return Returns the mean value.
- */
- public double getMean() {
- return mean;
- }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordMean extends Configured implements Tool {
+
+ private double mean = 0;
+
+ private final static Text COUNT = new Text("count");
+ private final static Text LENGTH = new Text("length");
+ private final static LongWritable ONE = new LongWritable(1);
+
+ /**
+ * Maps words from line of text into 2 key-value pairs; one key-value pair for
+ * counting the word, another for counting its length.
+ */
+ public static class WordMeanMapper extends
+ Mapper<Object, Text, Text, LongWritable> {
+
+ private LongWritable wordLen = new LongWritable();
+
+ /**
+ * Emits 2 key-value pairs for counting the word and its length. Outputs are
+ * (Text, LongWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+ this.wordLen.set(string.length());
+ context.write(LENGTH, this.wordLen);
+ context.write(COUNT, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordMeanReducer extends
+ Reducer<Text, LongWritable, Text, LongWritable> {
+
+ private LongWritable sum = new LongWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be one of 2 constants: LENGTH_STR or COUNT_STR.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(Text key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
+
+ int theSum = 0;
+ for (LongWritable val : values) {
+ theSum += val.get();
+ }
+ sum.set(theSum);
+ context.write(key, sum);
+ }
+ }
+
+ /**
+ * Reads the output file and parses the summation of lengths, and the word
+ * count, to perform a quick calculation of the mean.
+ *
+ * @param path
+ * The path to find the output file in. Set in main to the output
+ * directory.
+ * @throws IOException
+ * If it cannot access the output directory, we throw an exception.
+ */
+ private double readAndCalcMean(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ BufferedReader br = null;
+
+ // average = total sum / number of elements;
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+
+ long count = 0;
+ long length = 0;
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab type
+ String type = st.nextToken();
+
+ // differentiate
+ if (type.equals(COUNT.toString())) {
+ String countLit = st.nextToken();
+ count = Long.parseLong(countLit);
+ } else if (type.equals(LENGTH.toString())) {
+ String lengthLit = st.nextToken();
+ length = Long.parseLong(lengthLit);
+ }
+ }
+
+ double theMean = (((double) length) / ((double) count));
+ System.out.println("The mean is: " + theMean);
+ return theMean;
+ } finally {
+ br.close();
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordMean(), args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordmean <in> <out>");
+ return 0;
+ }
+
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word mean");
+ job.setJarByClass(WordMean.class);
+ job.setMapperClass(WordMeanMapper.class);
+ job.setCombinerClass(WordMeanReducer.class);
+ job.setReducerClass(WordMeanReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(LongWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ Path outputpath = new Path(args[1]);
+ FileOutputFormat.setOutputPath(job, outputpath);
+ boolean result = job.waitForCompletion(true);
+ mean = readAndCalcMean(outputpath, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ /**
+ * Only valuable after run() called.
+ *
+ * @return Returns the mean value.
+ */
+ public double getMean() {
+ return mean;
+ }
}
\ No newline at end of file
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordMedian.java Fri Oct 12 04:48:40 2012
@@ -1,208 +1,208 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.TaskCounter;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordMedian extends Configured implements Tool {
-
- private double median = 0;
- private final static IntWritable ONE = new IntWritable(1);
-
- /**
- * Maps words from line of text into a key-value pair; the length of the word
- * as the key, and 1 as the value.
- */
- public static class WordMedianMapper extends
- Mapper<Object, Text, IntWritable, IntWritable> {
-
- private IntWritable length = new IntWritable();
-
- /**
- * Emits a key-value pair for counting the word. Outputs are (IntWritable,
- * IntWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
- length.set(string.length());
- context.write(length, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordMedianReducer extends
- Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
-
- private IntWritable val = new IntWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be a length of a word that was read.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(IntWritable key, Iterable<IntWritable> values,
- Context context) throws IOException, InterruptedException {
-
- int sum = 0;
- for (IntWritable value : values) {
- sum += value.get();
- }
- val.set(sum);
- context.write(key, val);
- }
- }
-
- /**
- * This is a standard program to read and find a median value based on a file
- * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
- * the word lengths and the following values are the number of times that
- * words of that length appear.
- *
- * @param path
- * The path to read the HDFS file from (part-r-00000...00001...etc).
- * @param medianIndex1
- * The first length value to look for.
- * @param medianIndex2
- * The second length value to look for (will be the same as the first
- * if there are an even number of words total).
- * @throws IOException
- * If file cannot be found, we throw an exception.
- * */
- private double readAndFindMedian(String path, int medianIndex1,
- int medianIndex2, Configuration conf) throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
- int num = 0;
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab length
- String currLen = st.nextToken();
-
- // grab count
- String lengthFreq = st.nextToken();
-
- int prevNum = num;
- num += Integer.parseInt(lengthFreq);
-
- if (medianIndex2 >= prevNum && medianIndex1 <= num) {
- System.out.println("The median is: " + currLen);
- br.close();
- return Double.parseDouble(currLen);
- } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
- String nextCurrLen = st.nextToken();
- double theMedian = (Integer.parseInt(currLen) + Integer
- .parseInt(nextCurrLen)) / 2.0;
- System.out.println("The median is: " + theMedian);
- br.close();
- return theMedian;
- }
- }
- } finally {
- br.close();
- }
- // error, no median found
- return -1;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordMedian(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordmedian <in> <out>");
- return 0;
- }
-
- setConf(new Configuration());
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word median");
- job.setJarByClass(WordMedian.class);
- job.setMapperClass(WordMedianMapper.class);
- job.setCombinerClass(WordMedianReducer.class);
- job.setReducerClass(WordMedianReducer.class);
- job.setOutputKeyClass(IntWritable.class);
- job.setOutputValueClass(IntWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- boolean result = job.waitForCompletion(true);
-
- // Wait for JOB 1 -- get middle value to check for Median
-
- long totalWords = job.getCounters()
- .getGroup(TaskCounter.class.getCanonicalName())
- .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
- int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
- int medianIndex2 = (int) Math.floor((totalWords / 2.0));
-
- median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
-
- return (result ? 0 : 1);
- }
-
- public double getMedian() {
- return median;
- }
-}
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordMedian extends Configured implements Tool {
+
+ private double median = 0;
+ private final static IntWritable ONE = new IntWritable(1);
+
+ /**
+ * Maps words from line of text into a key-value pair; the length of the word
+ * as the key, and 1 as the value.
+ */
+ public static class WordMedianMapper extends
+ Mapper<Object, Text, IntWritable, IntWritable> {
+
+ private IntWritable length = new IntWritable();
+
+ /**
+ * Emits a key-value pair for counting the word. Outputs are (IntWritable,
+ * IntWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+ length.set(string.length());
+ context.write(length, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordMedianReducer extends
+ Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
+
+ private IntWritable val = new IntWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be a length of a word that was read.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(IntWritable key, Iterable<IntWritable> values,
+ Context context) throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (IntWritable value : values) {
+ sum += value.get();
+ }
+ val.set(sum);
+ context.write(key, val);
+ }
+ }
+
+ /**
+ * This is a standard program to read and find a median value based on a file
+ * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
+ * the word lengths and the following values are the number of times that
+ * words of that length appear.
+ *
+ * @param path
+ * The path to read the HDFS file from (part-r-00000...00001...etc).
+ * @param medianIndex1
+ * The first length value to look for.
+ * @param medianIndex2
+ * The second length value to look for (will be the same as the first
+ * if there are an even number of words total).
+ * @throws IOException
+ * If file cannot be found, we throw an exception.
+ * */
+ private double readAndFindMedian(String path, int medianIndex1,
+ int medianIndex2, Configuration conf) throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+ int num = 0;
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab length
+ String currLen = st.nextToken();
+
+ // grab count
+ String lengthFreq = st.nextToken();
+
+ int prevNum = num;
+ num += Integer.parseInt(lengthFreq);
+
+ if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+ System.out.println("The median is: " + currLen);
+ br.close();
+ return Double.parseDouble(currLen);
+ } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+ String nextCurrLen = st.nextToken();
+ double theMedian = (Integer.parseInt(currLen) + Integer
+ .parseInt(nextCurrLen)) / 2.0;
+ System.out.println("The median is: " + theMedian);
+ br.close();
+ return theMedian;
+ }
+ }
+ } finally {
+ br.close();
+ }
+ // error, no median found
+ return -1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordMedian(), args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordmedian <in> <out>");
+ return 0;
+ }
+
+ setConf(new Configuration());
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word median");
+ job.setJarByClass(WordMedian.class);
+ job.setMapperClass(WordMedianMapper.class);
+ job.setCombinerClass(WordMedianReducer.class);
+ job.setReducerClass(WordMedianReducer.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setOutputValueClass(IntWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ FileOutputFormat.setOutputPath(job, new Path(args[1]));
+ boolean result = job.waitForCompletion(true);
+
+ // Wait for JOB 1 -- get middle value to check for Median
+
+ long totalWords = job.getCounters()
+ .getGroup(TaskCounter.class.getCanonicalName())
+ .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
+ int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
+ int medianIndex2 = (int) Math.floor((totalWords / 2.0));
+
+ median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ public double getMedian() {
+ return median;
+ }
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java Fri Oct 12 04:48:40 2012
@@ -1,210 +1,210 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordStandardDeviation extends Configured implements Tool {
-
- private double stddev = 0;
-
- private final static Text LENGTH = new Text("length");
- private final static Text SQUARE = new Text("square");
- private final static Text COUNT = new Text("count");
- private final static LongWritable ONE = new LongWritable(1);
-
- /**
- * Maps words from line of text into 3 key-value pairs; one key-value pair for
- * counting the word, one for counting its length, and one for counting the
- * square of its length.
- */
- public static class WordStandardDeviationMapper extends
- Mapper<Object, Text, Text, LongWritable> {
-
- private LongWritable wordLen = new LongWritable();
- private LongWritable wordLenSq = new LongWritable();
-
- /**
- * Emits 3 key-value pairs for counting the word, its length, and the
- * squares of its length. Outputs are (Text, LongWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
-
- this.wordLen.set(string.length());
-
- // the square of an integer is an integer...
- this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
-
- context.write(LENGTH, this.wordLen);
- context.write(SQUARE, this.wordLenSq);
- context.write(COUNT, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordStandardDeviationReducer extends
- Reducer<Text, LongWritable, Text, LongWritable> {
-
- private LongWritable val = new LongWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
- * SQUARE_STR.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(Text key, Iterable<LongWritable> values, Context context)
- throws IOException, InterruptedException {
-
- int sum = 0;
- for (LongWritable value : values) {
- sum += value.get();
- }
- val.set(sum);
- context.write(key, val);
- }
- }
-
- /**
- * Reads the output file and parses the summation of lengths, the word count,
- * and the lengths squared, to perform a quick calculation of the standard
- * deviation.
- *
- * @param path
- * The path to find the output file in. Set in main to the output
- * directory.
- * @throws IOException
- * If it cannot access the output directory, we throw an exception.
- */
- private double readAndCalcStdDev(Path path, Configuration conf)
- throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- double stddev = 0;
- BufferedReader br = null;
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
- long count = 0;
- long length = 0;
- long square = 0;
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab type
- String type = st.nextToken();
-
- // differentiate
- if (type.equals(COUNT.toString())) {
- String countLit = st.nextToken();
- count = Long.parseLong(countLit);
- } else if (type.equals(LENGTH.toString())) {
- String lengthLit = st.nextToken();
- length = Long.parseLong(lengthLit);
- } else if (type.equals(SQUARE.toString())) {
- String squareLit = st.nextToken();
- square = Long.parseLong(squareLit);
- }
- }
- // average = total sum / number of elements;
- double mean = (((double) length) / ((double) count));
- // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
- mean = Math.pow(mean, 2.0);
- double term = (((double) square / ((double) count)));
- stddev = Math.sqrt((term - mean));
- System.out.println("The standard deviation is: " + stddev);
- } finally {
- br.close();
- }
- return stddev;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordStandardDeviation(),
- args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordstddev <in> <out>");
- return 0;
- }
-
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word stddev");
- job.setJarByClass(WordStandardDeviation.class);
- job.setMapperClass(WordStandardDeviationMapper.class);
- job.setCombinerClass(WordStandardDeviationReducer.class);
- job.setReducerClass(WordStandardDeviationReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(LongWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- Path outputpath = new Path(args[1]);
- FileOutputFormat.setOutputPath(job, outputpath);
- boolean result = job.waitForCompletion(true);
-
- // read output and calculate standard deviation
- stddev = readAndCalcStdDev(outputpath, conf);
-
- return (result ? 0 : 1);
- }
-
- public double getStandardDeviation() {
- return stddev;
- }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordStandardDeviation extends Configured implements Tool {
+
+ private double stddev = 0;
+
+ private final static Text LENGTH = new Text("length");
+ private final static Text SQUARE = new Text("square");
+ private final static Text COUNT = new Text("count");
+ private final static LongWritable ONE = new LongWritable(1);
+
+ /**
+ * Maps words from line of text into 3 key-value pairs; one key-value pair for
+ * counting the word, one for counting its length, and one for counting the
+ * square of its length.
+ */
+ public static class WordStandardDeviationMapper extends
+ Mapper<Object, Text, Text, LongWritable> {
+
+ private LongWritable wordLen = new LongWritable();
+ private LongWritable wordLenSq = new LongWritable();
+
+ /**
+ * Emits 3 key-value pairs for counting the word, its length, and the
+ * squares of its length. Outputs are (Text, LongWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+
+ this.wordLen.set(string.length());
+
+ // the square of an integer is an integer...
+ this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
+
+ context.write(LENGTH, this.wordLen);
+ context.write(SQUARE, this.wordLenSq);
+ context.write(COUNT, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordStandardDeviationReducer extends
+ Reducer<Text, LongWritable, Text, LongWritable> {
+
+ private LongWritable val = new LongWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
+ * SQUARE_STR.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(Text key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (LongWritable value : values) {
+ sum += value.get();
+ }
+ val.set(sum);
+ context.write(key, val);
+ }
+ }
+
+ /**
+ * Reads the output file and parses the summation of lengths, the word count,
+ * and the lengths squared, to perform a quick calculation of the standard
+ * deviation.
+ *
+ * @param path
+ * The path to find the output file in. Set in main to the output
+ * directory.
+ * @throws IOException
+ * If it cannot access the output directory, we throw an exception.
+ */
+ private double readAndCalcStdDev(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ double stddev = 0;
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+ long count = 0;
+ long length = 0;
+ long square = 0;
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab type
+ String type = st.nextToken();
+
+ // differentiate
+ if (type.equals(COUNT.toString())) {
+ String countLit = st.nextToken();
+ count = Long.parseLong(countLit);
+ } else if (type.equals(LENGTH.toString())) {
+ String lengthLit = st.nextToken();
+ length = Long.parseLong(lengthLit);
+ } else if (type.equals(SQUARE.toString())) {
+ String squareLit = st.nextToken();
+ square = Long.parseLong(squareLit);
+ }
+ }
+ // average = total sum / number of elements;
+ double mean = (((double) length) / ((double) count));
+ // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
+ mean = Math.pow(mean, 2.0);
+ double term = (((double) square / ((double) count)));
+ stddev = Math.sqrt((term - mean));
+ System.out.println("The standard deviation is: " + stddev);
+ } finally {
+ br.close();
+ }
+ return stddev;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordStandardDeviation(),
+ args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordstddev <in> <out>");
+ return 0;
+ }
+
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word stddev");
+ job.setJarByClass(WordStandardDeviation.class);
+ job.setMapperClass(WordStandardDeviationMapper.class);
+ job.setCombinerClass(WordStandardDeviationReducer.class);
+ job.setReducerClass(WordStandardDeviationReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(LongWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ Path outputpath = new Path(args[1]);
+ FileOutputFormat.setOutputPath(job, outputpath);
+ boolean result = job.waitForCompletion(true);
+
+ // read output and calculate standard deviation
+ stddev = readAndCalcStdDev(outputpath, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ public double getStandardDeviation() {
+ return stddev;
+ }
}
\ No newline at end of file
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java Fri Oct 12 04:48:40 2012
@@ -1,272 +1,272 @@
-package org.apache.hadoop.examples;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-import java.util.TreeMap;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestWordStats {
-
- private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
- private final static String MEAN_OUTPUT = "build/data/mean_output";
- private final static String MEDIAN_OUTPUT = "build/data/median_output";
- private final static String STDDEV_OUTPUT = "build/data/stddev_output";
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the standard deviation between all of the word
- * lengths.
- */
- public static class WordStdDevReader {
- private long wordsRead = 0;
- private long wordLengthsRead = 0;
- private long wordLengthsReadSquared = 0;
-
- public WordStdDevReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- this.wordLengthsRead += word.length();
- this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
- }
- }
-
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
- mean = Math.pow(mean, 2.0);
- double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
- double stddev = Math.sqrt((term - mean));
- return stddev;
- }
-
- }
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the median length of all the words.
- */
- public static class WordMedianReader {
- private long wordsRead = 0;
- private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
-
- public WordMedianReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- int num = 0;
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- if (this.map.get(word.length()) == null) {
- this.map.put(word.length(), 1);
- } else {
- int count = this.map.get(word.length());
- this.map.put(word.length(), count + 1);
- }
- }
- }
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
- int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
-
- for (Integer key : this.map.navigableKeySet()) {
- int prevNum = num;
- num += this.map.get(key);
-
- if (medianIndex2 >= prevNum && medianIndex1 <= num) {
- return key;
- } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
- Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
- double median = (key + nextCurrLen) / 2.0;
- return median;
- }
- }
- return -1;
- }
-
- }
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the mean length of all the words.
- */
- public static class WordMeanReader {
- private long wordsRead = 0;
- private long wordLengthsRead = 0;
-
- public WordMeanReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- this.wordLengthsRead += word.length();
- }
- }
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
- return mean;
- }
-
- }
-
- /**
- * Internal class designed to delete the output directory. Meant solely for
- * use before and after the test is run; this is so next iterations of the
- * test do not encounter a "file already exists" error.
- *
- * @param dir
- * The directory to delete.
- * @return Returns whether the deletion was successful or not.
- */
- public static boolean deleteDir(File dir) {
- if (dir.isDirectory()) {
- String[] children = dir.list();
- for (int i = 0; i < children.length; i++) {
- boolean success = deleteDir(new File(dir, children[i]));
- if (!success) {
- System.out.println("Could not delete directory after test!");
- return false;
- }
- }
- }
-
- // The directory is now empty so delete it
- return dir.delete();
- }
-
- @Before public void setup() throws Exception {
- deleteDir(new File(MEAN_OUTPUT));
- deleteDir(new File(MEDIAN_OUTPUT));
- deleteDir(new File(STDDEV_OUTPUT));
- }
-
- @Test public void testGetTheMean() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = MEAN_OUTPUT;
-
- WordMean wm = new WordMean();
- ToolRunner.run(new Configuration(), wm, args);
- double mean = wm.getMean();
-
- // outputs MUST match
- WordMeanReader wr = new WordMeanReader();
- assertEquals(mean, wr.read(INPUT), 0.0);
- }
-
- @Test public void testGetTheMedian() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = MEDIAN_OUTPUT;
-
- WordMedian wm = new WordMedian();
- ToolRunner.run(new Configuration(), wm, args);
- double median = wm.getMedian();
-
- // outputs MUST match
- WordMedianReader wr = new WordMedianReader();
- assertEquals(median, wr.read(INPUT), 0.0);
- }
-
- @Test public void testGetTheStandardDeviation() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = STDDEV_OUTPUT;
-
- WordStandardDeviation wsd = new WordStandardDeviation();
- ToolRunner.run(new Configuration(), wsd, args);
- double stddev = wsd.getStandardDeviation();
-
- // outputs MUST match
- WordStdDevReader wr = new WordStdDevReader();
- assertEquals(stddev, wr.read(INPUT), 0.0);
- }
-
-}
+package org.apache.hadoop.examples;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestWordStats {
+
+ private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
+ private final static String MEAN_OUTPUT = "build/data/mean_output";
+ private final static String MEDIAN_OUTPUT = "build/data/median_output";
+ private final static String STDDEV_OUTPUT = "build/data/stddev_output";
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the standard deviation between all of the word
+ * lengths.
+ */
+ public static class WordStdDevReader {
+ private long wordsRead = 0;
+ private long wordLengthsRead = 0;
+ private long wordLengthsReadSquared = 0;
+
+ public WordStdDevReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ this.wordLengthsRead += word.length();
+ this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
+ }
+ }
+
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+ mean = Math.pow(mean, 2.0);
+ double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
+ double stddev = Math.sqrt((term - mean));
+ return stddev;
+ }
+
+ }
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the median length of all the words.
+ */
+ public static class WordMedianReader {
+ private long wordsRead = 0;
+ private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
+
+ public WordMedianReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ int num = 0;
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ if (this.map.get(word.length()) == null) {
+ this.map.put(word.length(), 1);
+ } else {
+ int count = this.map.get(word.length());
+ this.map.put(word.length(), count + 1);
+ }
+ }
+ }
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
+ int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
+
+ for (Integer key : this.map.navigableKeySet()) {
+ int prevNum = num;
+ num += this.map.get(key);
+
+ if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+ return key;
+ } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+ Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
+ double median = (key + nextCurrLen) / 2.0;
+ return median;
+ }
+ }
+ return -1;
+ }
+
+ }
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the mean length of all the words.
+ */
+ public static class WordMeanReader {
+ private long wordsRead = 0;
+ private long wordLengthsRead = 0;
+
+ public WordMeanReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ this.wordLengthsRead += word.length();
+ }
+ }
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+ return mean;
+ }
+
+ }
+
+ /**
+ * Internal class designed to delete the output directory. Meant solely for
+ * use before and after the test is run; this is so next iterations of the
+ * test do not encounter a "file already exists" error.
+ *
+ * @param dir
+ * The directory to delete.
+ * @return Returns whether the deletion was successful or not.
+ */
+ public static boolean deleteDir(File dir) {
+ if (dir.isDirectory()) {
+ String[] children = dir.list();
+ for (int i = 0; i < children.length; i++) {
+ boolean success = deleteDir(new File(dir, children[i]));
+ if (!success) {
+ System.out.println("Could not delete directory after test!");
+ return false;
+ }
+ }
+ }
+
+ // The directory is now empty so delete it
+ return dir.delete();
+ }
+
+ @Before public void setup() throws Exception {
+ deleteDir(new File(MEAN_OUTPUT));
+ deleteDir(new File(MEDIAN_OUTPUT));
+ deleteDir(new File(STDDEV_OUTPUT));
+ }
+
+ @Test public void testGetTheMean() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = MEAN_OUTPUT;
+
+ WordMean wm = new WordMean();
+ ToolRunner.run(new Configuration(), wm, args);
+ double mean = wm.getMean();
+
+ // outputs MUST match
+ WordMeanReader wr = new WordMeanReader();
+ assertEquals(mean, wr.read(INPUT), 0.0);
+ }
+
+ @Test public void testGetTheMedian() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = MEDIAN_OUTPUT;
+
+ WordMedian wm = new WordMedian();
+ ToolRunner.run(new Configuration(), wm, args);
+ double median = wm.getMedian();
+
+ // outputs MUST match
+ WordMedianReader wr = new WordMedianReader();
+ assertEquals(median, wr.read(INPUT), 0.0);
+ }
+
+ @Test public void testGetTheStandardDeviation() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = STDDEV_OUTPUT;
+
+ WordStandardDeviation wsd = new WordStandardDeviation();
+ ToolRunner.run(new Configuration(), wsd, args);
+ double stddev = wsd.getStandardDeviation();
+
+ // outputs MUST match
+ WordStdDevReader wr = new WordStdDevReader();
+ assertEquals(stddev, wr.read(INPUT), 0.0);
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data.txt?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data.txt Fri Oct 12 04:48:40 2012
@@ -1,10 +1,10 @@
-0 ins apache dot org
-1 ins apache
-2 ins apache
-3 ins apache
-4 ins apache
-5 ins apache
-6 ins apache
-7 ins apache
-8 ins apache
-9 ins apache
+0 ins apache dot org
+1 ins apache
+2 ins apache
+3 ins apache
+4 ins apache
+5 ins apache
+6 ins apache
+7 ins apache
+8 ins apache
+9 ins apache