You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by su...@apache.org on 2012/10/22 22:43:30 UTC
svn commit: r1401071 [4/7] - in
/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project: ./ conf/
hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/
hadoop-mapreduce-client/hadoop-mapreduce-client-app/...
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordStandardDeviation.java Mon Oct 22 20:43:16 2012
@@ -1,210 +1,210 @@
-package org.apache.hadoop.examples;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-public class WordStandardDeviation extends Configured implements Tool {
-
- private double stddev = 0;
-
- private final static Text LENGTH = new Text("length");
- private final static Text SQUARE = new Text("square");
- private final static Text COUNT = new Text("count");
- private final static LongWritable ONE = new LongWritable(1);
-
- /**
- * Maps words from line of text into 3 key-value pairs; one key-value pair for
- * counting the word, one for counting its length, and one for counting the
- * square of its length.
- */
- public static class WordStandardDeviationMapper extends
- Mapper<Object, Text, Text, LongWritable> {
-
- private LongWritable wordLen = new LongWritable();
- private LongWritable wordLenSq = new LongWritable();
-
- /**
- * Emits 3 key-value pairs for counting the word, its length, and the
- * squares of its length. Outputs are (Text, LongWritable).
- *
- * @param value
- * This will be a line of text coming in from our input file.
- */
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- String string = itr.nextToken();
-
- this.wordLen.set(string.length());
-
- // the square of an integer is an integer...
- this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
-
- context.write(LENGTH, this.wordLen);
- context.write(SQUARE, this.wordLenSq);
- context.write(COUNT, ONE);
- }
- }
- }
-
- /**
- * Performs integer summation of all the values for each key.
- */
- public static class WordStandardDeviationReducer extends
- Reducer<Text, LongWritable, Text, LongWritable> {
-
- private LongWritable val = new LongWritable();
-
- /**
- * Sums all the individual values within the iterator and writes them to the
- * same key.
- *
- * @param key
- * This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
- * SQUARE_STR.
- * @param values
- * This will be an iterator of all the values associated with that
- * key.
- */
- public void reduce(Text key, Iterable<LongWritable> values, Context context)
- throws IOException, InterruptedException {
-
- int sum = 0;
- for (LongWritable value : values) {
- sum += value.get();
- }
- val.set(sum);
- context.write(key, val);
- }
- }
-
- /**
- * Reads the output file and parses the summation of lengths, the word count,
- * and the lengths squared, to perform a quick calculation of the standard
- * deviation.
- *
- * @param path
- * The path to find the output file in. Set in main to the output
- * directory.
- * @throws IOException
- * If it cannot access the output directory, we throw an exception.
- */
- private double readAndCalcStdDev(Path path, Configuration conf)
- throws IOException {
- FileSystem fs = FileSystem.get(conf);
- Path file = new Path(path, "part-r-00000");
-
- if (!fs.exists(file))
- throw new IOException("Output not found!");
-
- double stddev = 0;
- BufferedReader br = null;
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(file)));
- long count = 0;
- long length = 0;
- long square = 0;
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
-
- // grab type
- String type = st.nextToken();
-
- // differentiate
- if (type.equals(COUNT.toString())) {
- String countLit = st.nextToken();
- count = Long.parseLong(countLit);
- } else if (type.equals(LENGTH.toString())) {
- String lengthLit = st.nextToken();
- length = Long.parseLong(lengthLit);
- } else if (type.equals(SQUARE.toString())) {
- String squareLit = st.nextToken();
- square = Long.parseLong(squareLit);
- }
- }
- // average = total sum / number of elements;
- double mean = (((double) length) / ((double) count));
- // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
- mean = Math.pow(mean, 2.0);
- double term = (((double) square / ((double) count)));
- stddev = Math.sqrt((term - mean));
- System.out.println("The standard deviation is: " + stddev);
- } finally {
- br.close();
- }
- return stddev;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordStandardDeviation(),
- args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: wordstddev <in> <out>");
- return 0;
- }
-
- Configuration conf = getConf();
-
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "word stddev");
- job.setJarByClass(WordStandardDeviation.class);
- job.setMapperClass(WordStandardDeviationMapper.class);
- job.setCombinerClass(WordStandardDeviationReducer.class);
- job.setReducerClass(WordStandardDeviationReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(LongWritable.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- Path outputpath = new Path(args[1]);
- FileOutputFormat.setOutputPath(job, outputpath);
- boolean result = job.waitForCompletion(true);
-
- // read output and calculate standard deviation
- stddev = readAndCalcStdDev(outputpath, conf);
-
- return (result ? 0 : 1);
- }
-
- public double getStandardDeviation() {
- return stddev;
- }
+package org.apache.hadoop.examples;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordStandardDeviation extends Configured implements Tool {
+
+ private double stddev = 0;
+
+ private final static Text LENGTH = new Text("length");
+ private final static Text SQUARE = new Text("square");
+ private final static Text COUNT = new Text("count");
+ private final static LongWritable ONE = new LongWritable(1);
+
+ /**
+ * Maps words from line of text into 3 key-value pairs; one key-value pair for
+ * counting the word, one for counting its length, and one for counting the
+ * square of its length.
+ */
+ public static class WordStandardDeviationMapper extends
+ Mapper<Object, Text, Text, LongWritable> {
+
+ private LongWritable wordLen = new LongWritable();
+ private LongWritable wordLenSq = new LongWritable();
+
+ /**
+ * Emits 3 key-value pairs for counting the word, its length, and the
+ * squares of its length. Outputs are (Text, LongWritable).
+ *
+ * @param value
+ * This will be a line of text coming in from our input file.
+ */
+ public void map(Object key, Text value, Context context)
+ throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ String string = itr.nextToken();
+
+ this.wordLen.set(string.length());
+
+ // the square of an integer is an integer...
+ this.wordLenSq.set((long) Math.pow(string.length(), 2.0));
+
+ context.write(LENGTH, this.wordLen);
+ context.write(SQUARE, this.wordLenSq);
+ context.write(COUNT, ONE);
+ }
+ }
+ }
+
+ /**
+ * Performs integer summation of all the values for each key.
+ */
+ public static class WordStandardDeviationReducer extends
+ Reducer<Text, LongWritable, Text, LongWritable> {
+
+ private LongWritable val = new LongWritable();
+
+ /**
+ * Sums all the individual values within the iterator and writes them to the
+ * same key.
+ *
+ * @param key
+ * This will be one of 2 constants: LENGTH_STR, COUNT_STR, or
+ * SQUARE_STR.
+ * @param values
+ * This will be an iterator of all the values associated with that
+ * key.
+ */
+ public void reduce(Text key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (LongWritable value : values) {
+ sum += value.get();
+ }
+ val.set(sum);
+ context.write(key, val);
+ }
+ }
+
+ /**
+ * Reads the output file and parses the summation of lengths, the word count,
+ * and the lengths squared, to perform a quick calculation of the standard
+ * deviation.
+ *
+ * @param path
+ * The path to find the output file in. Set in main to the output
+ * directory.
+ * @throws IOException
+ * If it cannot access the output directory, we throw an exception.
+ */
+ private double readAndCalcStdDev(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+ Path file = new Path(path, "part-r-00000");
+
+ if (!fs.exists(file))
+ throw new IOException("Output not found!");
+
+ double stddev = 0;
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(file)));
+ long count = 0;
+ long length = 0;
+ long square = 0;
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+
+ // grab type
+ String type = st.nextToken();
+
+ // differentiate
+ if (type.equals(COUNT.toString())) {
+ String countLit = st.nextToken();
+ count = Long.parseLong(countLit);
+ } else if (type.equals(LENGTH.toString())) {
+ String lengthLit = st.nextToken();
+ length = Long.parseLong(lengthLit);
+ } else if (type.equals(SQUARE.toString())) {
+ String squareLit = st.nextToken();
+ square = Long.parseLong(squareLit);
+ }
+ }
+ // average = total sum / number of elements;
+ double mean = (((double) length) / ((double) count));
+ // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
+ mean = Math.pow(mean, 2.0);
+ double term = (((double) square / ((double) count)));
+ stddev = Math.sqrt((term - mean));
+ System.out.println("The standard deviation is: " + stddev);
+ } finally {
+ br.close();
+ }
+ return stddev;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new WordStandardDeviation(),
+ args);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: wordstddev <in> <out>");
+ return 0;
+ }
+
+ Configuration conf = getConf();
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(conf, "word stddev");
+ job.setJarByClass(WordStandardDeviation.class);
+ job.setMapperClass(WordStandardDeviationMapper.class);
+ job.setCombinerClass(WordStandardDeviationReducer.class);
+ job.setReducerClass(WordStandardDeviationReducer.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(LongWritable.class);
+ FileInputFormat.addInputPath(job, new Path(args[0]));
+ Path outputpath = new Path(args[1]);
+ FileOutputFormat.setOutputPath(job, outputpath);
+ boolean result = job.waitForCompletion(true);
+
+ // read output and calculate standard deviation
+ stddev = readAndCalcStdDev(outputpath, conf);
+
+ return (result ? 0 : 1);
+ }
+
+ public double getStandardDeviation() {
+ return stddev;
+ }
}
\ No newline at end of file
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestWordStats.java Mon Oct 22 20:43:16 2012
@@ -1,272 +1,272 @@
-package org.apache.hadoop.examples;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.StringTokenizer;
-import java.util.TreeMap;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestWordStats {
-
- private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
- private final static String MEAN_OUTPUT = "build/data/mean_output";
- private final static String MEDIAN_OUTPUT = "build/data/median_output";
- private final static String STDDEV_OUTPUT = "build/data/stddev_output";
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the standard deviation between all of the word
- * lengths.
- */
- public static class WordStdDevReader {
- private long wordsRead = 0;
- private long wordLengthsRead = 0;
- private long wordLengthsReadSquared = 0;
-
- public WordStdDevReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- this.wordLengthsRead += word.length();
- this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
- }
- }
-
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
- mean = Math.pow(mean, 2.0);
- double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
- double stddev = Math.sqrt((term - mean));
- return stddev;
- }
-
- }
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the median length of all the words.
- */
- public static class WordMedianReader {
- private long wordsRead = 0;
- private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
-
- public WordMedianReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- int num = 0;
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- if (this.map.get(word.length()) == null) {
- this.map.put(word.length(), 1);
- } else {
- int count = this.map.get(word.length());
- this.map.put(word.length(), count + 1);
- }
- }
- }
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
- int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
-
- for (Integer key : this.map.navigableKeySet()) {
- int prevNum = num;
- num += this.map.get(key);
-
- if (medianIndex2 >= prevNum && medianIndex1 <= num) {
- return key;
- } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
- Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
- double median = (key + nextCurrLen) / 2.0;
- return median;
- }
- }
- return -1;
- }
-
- }
-
- /**
- * Modified internal test class that is designed to read all the files in the
- * input directory, and find the mean length of all the words.
- */
- public static class WordMeanReader {
- private long wordsRead = 0;
- private long wordLengthsRead = 0;
-
- public WordMeanReader() {
- }
-
- public double read(String path) throws IOException {
- FileSystem fs = FileSystem.get(new Configuration());
- FileStatus[] files = fs.listStatus(new Path(path));
-
- for (FileStatus fileStat : files) {
- if (!fileStat.isFile())
- continue;
-
- BufferedReader br = null;
-
- try {
- br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
-
- String line;
- while ((line = br.readLine()) != null) {
- StringTokenizer st = new StringTokenizer(line);
- String word;
- while (st.hasMoreTokens()) {
- word = st.nextToken();
- this.wordsRead++;
- this.wordLengthsRead += word.length();
- }
- }
- } catch (IOException e) {
- System.out.println("Output could not be read!");
- throw e;
- } finally {
- br.close();
- }
- }
-
- double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
- return mean;
- }
-
- }
-
- /**
- * Internal class designed to delete the output directory. Meant solely for
- * use before and after the test is run; this is so next iterations of the
- * test do not encounter a "file already exists" error.
- *
- * @param dir
- * The directory to delete.
- * @return Returns whether the deletion was successful or not.
- */
- public static boolean deleteDir(File dir) {
- if (dir.isDirectory()) {
- String[] children = dir.list();
- for (int i = 0; i < children.length; i++) {
- boolean success = deleteDir(new File(dir, children[i]));
- if (!success) {
- System.out.println("Could not delete directory after test!");
- return false;
- }
- }
- }
-
- // The directory is now empty so delete it
- return dir.delete();
- }
-
- @Before public void setup() throws Exception {
- deleteDir(new File(MEAN_OUTPUT));
- deleteDir(new File(MEDIAN_OUTPUT));
- deleteDir(new File(STDDEV_OUTPUT));
- }
-
- @Test public void testGetTheMean() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = MEAN_OUTPUT;
-
- WordMean wm = new WordMean();
- ToolRunner.run(new Configuration(), wm, args);
- double mean = wm.getMean();
-
- // outputs MUST match
- WordMeanReader wr = new WordMeanReader();
- assertEquals(mean, wr.read(INPUT), 0.0);
- }
-
- @Test public void testGetTheMedian() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = MEDIAN_OUTPUT;
-
- WordMedian wm = new WordMedian();
- ToolRunner.run(new Configuration(), wm, args);
- double median = wm.getMedian();
-
- // outputs MUST match
- WordMedianReader wr = new WordMedianReader();
- assertEquals(median, wr.read(INPUT), 0.0);
- }
-
- @Test public void testGetTheStandardDeviation() throws Exception {
- String args[] = new String[2];
- args[0] = INPUT;
- args[1] = STDDEV_OUTPUT;
-
- WordStandardDeviation wsd = new WordStandardDeviation();
- ToolRunner.run(new Configuration(), wsd, args);
- double stddev = wsd.getStandardDeviation();
-
- // outputs MUST match
- WordStdDevReader wr = new WordStdDevReader();
- assertEquals(stddev, wr.read(INPUT), 0.0);
- }
-
-}
+package org.apache.hadoop.examples;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestWordStats {
+
+ private final static String INPUT = "src/test/java/org/apache/hadoop/examples/pi/math";
+ private final static String MEAN_OUTPUT = "build/data/mean_output";
+ private final static String MEDIAN_OUTPUT = "build/data/median_output";
+ private final static String STDDEV_OUTPUT = "build/data/stddev_output";
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the standard deviation between all of the word
+ * lengths.
+ */
+ public static class WordStdDevReader {
+ private long wordsRead = 0;
+ private long wordLengthsRead = 0;
+ private long wordLengthsReadSquared = 0;
+
+ public WordStdDevReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ this.wordLengthsRead += word.length();
+ this.wordLengthsReadSquared += (long) Math.pow(word.length(), 2.0);
+ }
+ }
+
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+ mean = Math.pow(mean, 2.0);
+ double term = (((double) this.wordLengthsReadSquared / ((double) this.wordsRead)));
+ double stddev = Math.sqrt((term - mean));
+ return stddev;
+ }
+
+ }
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the median length of all the words.
+ */
+ public static class WordMedianReader {
+ private long wordsRead = 0;
+ private TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
+
+ public WordMedianReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ int num = 0;
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ if (this.map.get(word.length()) == null) {
+ this.map.put(word.length(), 1);
+ } else {
+ int count = this.map.get(word.length());
+ this.map.put(word.length(), count + 1);
+ }
+ }
+ }
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ int medianIndex1 = (int) Math.ceil((this.wordsRead / 2.0));
+ int medianIndex2 = (int) Math.floor((this.wordsRead / 2.0));
+
+ for (Integer key : this.map.navigableKeySet()) {
+ int prevNum = num;
+ num += this.map.get(key);
+
+ if (medianIndex2 >= prevNum && medianIndex1 <= num) {
+ return key;
+ } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
+ Integer nextCurrLen = this.map.navigableKeySet().iterator().next();
+ double median = (key + nextCurrLen) / 2.0;
+ return median;
+ }
+ }
+ return -1;
+ }
+
+ }
+
+ /**
+ * Modified internal test class that is designed to read all the files in the
+ * input directory, and find the mean length of all the words.
+ */
+ public static class WordMeanReader {
+ private long wordsRead = 0;
+ private long wordLengthsRead = 0;
+
+ public WordMeanReader() {
+ }
+
+ public double read(String path) throws IOException {
+ FileSystem fs = FileSystem.get(new Configuration());
+ FileStatus[] files = fs.listStatus(new Path(path));
+
+ for (FileStatus fileStat : files) {
+ if (!fileStat.isFile())
+ continue;
+
+ BufferedReader br = null;
+
+ try {
+ br = new BufferedReader(new InputStreamReader(fs.open(fileStat.getPath())));
+
+ String line;
+ while ((line = br.readLine()) != null) {
+ StringTokenizer st = new StringTokenizer(line);
+ String word;
+ while (st.hasMoreTokens()) {
+ word = st.nextToken();
+ this.wordsRead++;
+ this.wordLengthsRead += word.length();
+ }
+ }
+ } catch (IOException e) {
+ System.out.println("Output could not be read!");
+ throw e;
+ } finally {
+ br.close();
+ }
+ }
+
+ double mean = (((double) this.wordLengthsRead) / ((double) this.wordsRead));
+ return mean;
+ }
+
+ }
+
+ /**
+ * Internal class designed to delete the output directory. Meant solely for
+ * use before and after the test is run; this is so next iterations of the
+ * test do not encounter a "file already exists" error.
+ *
+ * @param dir
+ * The directory to delete.
+ * @return Returns whether the deletion was successful or not.
+ */
+ public static boolean deleteDir(File dir) {
+ if (dir.isDirectory()) {
+ String[] children = dir.list();
+ for (int i = 0; i < children.length; i++) {
+ boolean success = deleteDir(new File(dir, children[i]));
+ if (!success) {
+ System.out.println("Could not delete directory after test!");
+ return false;
+ }
+ }
+ }
+
+ // The directory is now empty so delete it
+ return dir.delete();
+ }
+
+ @Before public void setup() throws Exception {
+ deleteDir(new File(MEAN_OUTPUT));
+ deleteDir(new File(MEDIAN_OUTPUT));
+ deleteDir(new File(STDDEV_OUTPUT));
+ }
+
+ @Test public void testGetTheMean() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = MEAN_OUTPUT;
+
+ WordMean wm = new WordMean();
+ ToolRunner.run(new Configuration(), wm, args);
+ double mean = wm.getMean();
+
+ // outputs MUST match
+ WordMeanReader wr = new WordMeanReader();
+ assertEquals(mean, wr.read(INPUT), 0.0);
+ }
+
+ @Test public void testGetTheMedian() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = MEDIAN_OUTPUT;
+
+ WordMedian wm = new WordMedian();
+ ToolRunner.run(new Configuration(), wm, args);
+ double median = wm.getMedian();
+
+ // outputs MUST match
+ WordMedianReader wr = new WordMedianReader();
+ assertEquals(median, wr.read(INPUT), 0.0);
+ }
+
+ @Test public void testGetTheStandardDeviation() throws Exception {
+ String args[] = new String[2];
+ args[0] = INPUT;
+ args[1] = STDDEV_OUTPUT;
+
+ WordStandardDeviation wsd = new WordStandardDeviation();
+ ToolRunner.run(new Configuration(), wsd, args);
+ double stddev = wsd.getStandardDeviation();
+
+ // outputs MUST match
+ WordStdDevReader wr = new WordStdDevReader();
+ assertEquals(stddev, wr.read(INPUT), 0.0);
+ }
+
+}
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/c++/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/c++:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/block_forensics/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/block_forensics:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/build-contrib.xml
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/build-contrib.xml:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/build.xml
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/build.xml:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/data_join/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/data_join:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/eclipse-plugin/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/eclipse-plugin:r1397381-1401062
Propchange: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index:r1397381-1401062
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data.txt?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data.txt (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data.txt Mon Oct 22 20:43:16 2012
@@ -1,10 +1,10 @@
-0 ins apache dot org
-1 ins apache
-2 ins apache
-3 ins apache
-4 ins apache
-5 ins apache
-6 ins apache
-7 ins apache
-8 ins apache
-9 ins apache
+0 ins apache dot org
+1 ins apache
+2 ins apache
+3 ins apache
+4 ins apache
+5 ins apache
+6 ins apache
+7 ins apache
+8 ins apache
+9 ins apache
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt Mon Oct 22 20:43:16 2012
@@ -1,10 +1,10 @@
-0 del
-1 upd hadoop
-2 del
-3 upd hadoop
-4 del
-5 upd hadoop
-6 del
-7 upd hadoop
-8 del
-9 upd hadoop
+0 del
+1 upd hadoop
+2 del
+3 upd hadoop
+4 del
+5 upd hadoop
+6 del
+7 upd hadoop
+8 del
+9 upd hadoop
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java Mon Oct 22 20:43:16 2012
@@ -1,56 +1,56 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert or delete based on document id hashing. Do
- * NOT use this distribution policy when the number of shards changes.
- */
-public class HashingDistributionPolicy implements IDistributionPolicy {
-
- private int numShards;
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
- */
- public void init(Shard[] shards) {
- numShards = shards.length;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForInsert(DocumentID key) {
- int hashCode = key.hashCode();
- return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForDelete(DocumentID key) {
- int hashCode = key.hashCode();
- return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert or delete based on document id hashing. Do
+ * NOT use this distribution policy when the number of shards changes.
+ */
+public class HashingDistributionPolicy implements IDistributionPolicy {
+
+ private int numShards;
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+ */
+ public void init(Shard[] shards) {
+ numShards = shards.length;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForInsert(DocumentID key) {
+ int hashCode = key.hashCode();
+ return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForDelete(DocumentID key) {
+ int hashCode = key.hashCode();
+ return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
+ }
+
+}
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java Mon Oct 22 20:43:16 2012
@@ -1,57 +1,57 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Identity local analysis maps inputs directly into outputs.
- */
-public class IdentityLocalAnalysis implements
- ILocalAnalysis<DocumentID, DocumentAndOp> {
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- public void map(DocumentID key, DocumentAndOp value,
- OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
- throws IOException {
- output.collect(key, value);
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
- */
- public void configure(JobConf job) {
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Closeable#close()
- */
- public void close() throws IOException {
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Identity local analysis maps inputs directly into outputs.
+ */
+public class IdentityLocalAnalysis implements
+ ILocalAnalysis<DocumentID, DocumentAndOp> {
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
+ */
+ public void map(DocumentID key, DocumentAndOp value,
+ OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
+ throws IOException {
+ output.collect(key, value);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
+ */
+ public void configure(JobConf job) {
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Closeable#close()
+ */
+ public void close() throws IOException {
+ }
+
+}
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java Mon Oct 22 20:43:16 2012
@@ -1,46 +1,46 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * An InputFormat for LineDoc for plain text files where each line is a doc.
- */
-public class LineDocInputFormat extends
- FileInputFormat<DocumentID, LineDocTextAndOp> {
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.FileInputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit, org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
- */
- public RecordReader<DocumentID, LineDocTextAndOp> getRecordReader(
- InputSplit split, JobConf job, Reporter reporter) throws IOException {
- reporter.setStatus(split.toString());
- return new LineDocRecordReader(job, (FileSplit) split);
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * An InputFormat for LineDoc for plain text files where each line is a doc.
+ */
+public class LineDocInputFormat extends
+ FileInputFormat<DocumentID, LineDocTextAndOp> {
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.FileInputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit, org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
+ */
+ public RecordReader<DocumentID, LineDocTextAndOp> getRecordReader(
+ InputSplit split, JobConf job, Reporter reporter) throws IOException {
+ reporter.setStatus(split.toString());
+ return new LineDocRecordReader(job, (FileSplit) split);
+ }
+
+}
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java Mon Oct 22 20:43:16 2012
@@ -1,80 +1,80 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.Term;
-
-/**
- * Convert LineDocTextAndOp to DocumentAndOp as required by ILocalAnalysis.
- */
-public class LineDocLocalAnalysis implements
- ILocalAnalysis<DocumentID, LineDocTextAndOp> {
-
- private static String docidFieldName = "id";
- private static String contentFieldName = "content";
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- public void map(DocumentID key, LineDocTextAndOp value,
- OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
- throws IOException {
-
- DocumentAndOp.Op op = value.getOp();
- Document doc = null;
- Term term = null;
-
- if (op == DocumentAndOp.Op.INSERT || op == DocumentAndOp.Op.UPDATE) {
- doc = new Document();
- doc.add(new Field(docidFieldName, key.getText().toString(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
- doc.add(new Field(contentFieldName, value.getText().toString(),
- Field.Store.NO, Field.Index.TOKENIZED));
- }
-
- if (op == DocumentAndOp.Op.DELETE || op == DocumentAndOp.Op.UPDATE) {
- term = new Term(docidFieldName, key.getText().toString());
- }
-
- output.collect(key, new DocumentAndOp(op, doc, term));
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
- */
- public void configure(JobConf job) {
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Closeable#close()
- */
- public void close() throws IOException {
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+
+/**
+ * Convert LineDocTextAndOp to DocumentAndOp as required by ILocalAnalysis.
+ */
+public class LineDocLocalAnalysis implements
+ ILocalAnalysis<DocumentID, LineDocTextAndOp> {
+
+ private static String docidFieldName = "id";
+ private static String contentFieldName = "content";
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
+ */
+ public void map(DocumentID key, LineDocTextAndOp value,
+ OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
+ throws IOException {
+
+ DocumentAndOp.Op op = value.getOp();
+ Document doc = null;
+ Term term = null;
+
+ if (op == DocumentAndOp.Op.INSERT || op == DocumentAndOp.Op.UPDATE) {
+ doc = new Document();
+ doc.add(new Field(docidFieldName, key.getText().toString(),
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field(contentFieldName, value.getText().toString(),
+ Field.Store.NO, Field.Index.TOKENIZED));
+ }
+
+ if (op == DocumentAndOp.Op.DELETE || op == DocumentAndOp.Op.UPDATE) {
+ term = new Term(docidFieldName, key.getText().toString());
+ }
+
+ output.collect(key, new DocumentAndOp(op, doc, term));
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
+ */
+ public void configure(JobConf job) {
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Closeable#close()
+ */
+ public void close() throws IOException {
+ }
+
+}
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java Mon Oct 22 20:43:16 2012
@@ -1,231 +1,231 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.RecordReader;
-
-/**
- * A simple RecordReader for LineDoc for plain text files where each line is a
- * doc. Each line is as follows: documentID<SPACE>op<SPACE>content<EOF>,
- * where op can be "i", "ins" or "insert" for insert, "d", "del" or "delete"
- * for delete, or "u", "upd" or "update" for update.
- */
-public class LineDocRecordReader implements
- RecordReader<DocumentID, LineDocTextAndOp> {
- private static final char SPACE = ' ';
- private static final char EOL = '\n';
-
- private long start;
- private long pos;
- private long end;
- private BufferedInputStream in;
- private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
-
- /**
- * Provide a bridge to get the bytes from the ByteArrayOutputStream without
- * creating a new byte array.
- */
- private static class TextStuffer extends OutputStream {
- public Text target;
-
- public void write(int b) {
- throw new UnsupportedOperationException("write(byte) not supported");
- }
-
- public void write(byte[] data, int offset, int len) throws IOException {
- target.set(data, offset, len);
- }
- }
-
- private TextStuffer bridge = new TextStuffer();
-
- /**
- * Constructor
- * @param job
- * @param split
- * @throws IOException
- */
- public LineDocRecordReader(Configuration job, FileSplit split)
- throws IOException {
- long start = split.getStart();
- long end = start + split.getLength();
- final Path file = split.getPath();
-
- // open the file and seek to the start of the split
- FileSystem fs = file.getFileSystem(job);
- FSDataInputStream fileIn = fs.open(split.getPath());
- InputStream in = fileIn;
- boolean skipFirstLine = false;
- if (start != 0) {
- skipFirstLine = true; // wait till BufferedInputStream to skip
- --start;
- fileIn.seek(start);
- }
-
- this.in = new BufferedInputStream(in);
- if (skipFirstLine) { // skip first line and re-establish "start".
- start += LineDocRecordReader.readData(this.in, null, EOL);
- }
- this.start = start;
- this.pos = start;
- this.end = end;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#close()
- */
- public void close() throws IOException {
- in.close();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#createKey()
- */
- public DocumentID createKey() {
- return new DocumentID();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#createValue()
- */
- public LineDocTextAndOp createValue() {
- return new LineDocTextAndOp();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#getPos()
- */
- public long getPos() throws IOException {
- return pos;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#getProgress()
- */
- public float getProgress() throws IOException {
- if (start == end) {
- return 0.0f;
- } else {
- return Math.min(1.0f, (pos - start) / (float) (end - start));
- }
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object, java.lang.Object)
- */
- public synchronized boolean next(DocumentID key, LineDocTextAndOp value)
- throws IOException {
- if (pos >= end) {
- return false;
- }
-
- // key is document id, which are bytes until first space
- if (!readInto(key.getText(), SPACE)) {
- return false;
- }
-
- // read operation: i/d/u, or ins/del/upd, or insert/delete/update
- Text opText = new Text();
- if (!readInto(opText, SPACE)) {
- return false;
- }
- String opStr = opText.toString();
- DocumentAndOp.Op op;
- if (opStr.equals("i") || opStr.equals("ins") || opStr.equals("insert")) {
- op = DocumentAndOp.Op.INSERT;
- } else if (opStr.equals("d") || opStr.equals("del")
- || opStr.equals("delete")) {
- op = DocumentAndOp.Op.DELETE;
- } else if (opStr.equals("u") || opStr.equals("upd")
- || opStr.equals("update")) {
- op = DocumentAndOp.Op.UPDATE;
- } else {
- // default is insert
- op = DocumentAndOp.Op.INSERT;
- }
- value.setOp(op);
-
- if (op == DocumentAndOp.Op.DELETE) {
- return true;
- } else {
- // read rest of the line
- return readInto(value.getText(), EOL);
- }
- }
-
- private boolean readInto(Text text, char delimiter) throws IOException {
- buffer.reset();
- long bytesRead = readData(in, buffer, delimiter);
- if (bytesRead == 0) {
- return false;
- }
- pos += bytesRead;
- bridge.target = text;
- buffer.writeTo(bridge);
- return true;
- }
-
- private static long readData(InputStream in, OutputStream out, char delimiter)
- throws IOException {
- long bytes = 0;
- while (true) {
-
- int b = in.read();
- if (b == -1) {
- break;
- }
- bytes += 1;
-
- byte c = (byte) b;
- if (c == EOL || c == delimiter) {
- break;
- }
-
- if (c == '\r') {
- in.mark(1);
- byte nextC = (byte) in.read();
- if (nextC != EOL || c == delimiter) {
- in.reset();
- } else {
- bytes += 1;
- }
- break;
- }
-
- if (out != null) {
- out.write(c);
- }
- }
- return bytes;
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.RecordReader;
+
+/**
+ * A simple RecordReader for LineDoc for plain text files where each line is a
+ * doc. Each line is as follows: documentID<SPACE>op<SPACE>content<EOF>,
+ * where op can be "i", "ins" or "insert" for insert, "d", "del" or "delete"
+ * for delete, or "u", "upd" or "update" for update.
+ */
+public class LineDocRecordReader implements
+ RecordReader<DocumentID, LineDocTextAndOp> {
+ private static final char SPACE = ' ';
+ private static final char EOL = '\n';
+
+ private long start;
+ private long pos;
+ private long end;
+ private BufferedInputStream in;
+ private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
+
+ /**
+ * Provide a bridge to get the bytes from the ByteArrayOutputStream without
+ * creating a new byte array.
+ */
+ private static class TextStuffer extends OutputStream {
+ public Text target;
+
+ public void write(int b) {
+ throw new UnsupportedOperationException("write(byte) not supported");
+ }
+
+ public void write(byte[] data, int offset, int len) throws IOException {
+ target.set(data, offset, len);
+ }
+ }
+
+ private TextStuffer bridge = new TextStuffer();
+
+ /**
+ * Constructor
+ * @param job
+ * @param split
+ * @throws IOException
+ */
+ public LineDocRecordReader(Configuration job, FileSplit split)
+ throws IOException {
+ long start = split.getStart();
+ long end = start + split.getLength();
+ final Path file = split.getPath();
+
+ // open the file and seek to the start of the split
+ FileSystem fs = file.getFileSystem(job);
+ FSDataInputStream fileIn = fs.open(split.getPath());
+ InputStream in = fileIn;
+ boolean skipFirstLine = false;
+ if (start != 0) {
+ skipFirstLine = true; // wait till BufferedInputStream to skip
+ --start;
+ fileIn.seek(start);
+ }
+
+ this.in = new BufferedInputStream(in);
+ if (skipFirstLine) { // skip first line and re-establish "start".
+ start += LineDocRecordReader.readData(this.in, null, EOL);
+ }
+ this.start = start;
+ this.pos = start;
+ this.end = end;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#close()
+ */
+ public void close() throws IOException {
+ in.close();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#createKey()
+ */
+ public DocumentID createKey() {
+ return new DocumentID();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#createValue()
+ */
+ public LineDocTextAndOp createValue() {
+ return new LineDocTextAndOp();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#getPos()
+ */
+ public long getPos() throws IOException {
+ return pos;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#getProgress()
+ */
+ public float getProgress() throws IOException {
+ if (start == end) {
+ return 0.0f;
+ } else {
+ return Math.min(1.0f, (pos - start) / (float) (end - start));
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object, java.lang.Object)
+ */
+ public synchronized boolean next(DocumentID key, LineDocTextAndOp value)
+ throws IOException {
+ if (pos >= end) {
+ return false;
+ }
+
+ // key is document id, which are bytes until first space
+ if (!readInto(key.getText(), SPACE)) {
+ return false;
+ }
+
+ // read operation: i/d/u, or ins/del/upd, or insert/delete/update
+ Text opText = new Text();
+ if (!readInto(opText, SPACE)) {
+ return false;
+ }
+ String opStr = opText.toString();
+ DocumentAndOp.Op op;
+ if (opStr.equals("i") || opStr.equals("ins") || opStr.equals("insert")) {
+ op = DocumentAndOp.Op.INSERT;
+ } else if (opStr.equals("d") || opStr.equals("del")
+ || opStr.equals("delete")) {
+ op = DocumentAndOp.Op.DELETE;
+ } else if (opStr.equals("u") || opStr.equals("upd")
+ || opStr.equals("update")) {
+ op = DocumentAndOp.Op.UPDATE;
+ } else {
+ // default is insert
+ op = DocumentAndOp.Op.INSERT;
+ }
+ value.setOp(op);
+
+ if (op == DocumentAndOp.Op.DELETE) {
+ return true;
+ } else {
+ // read rest of the line
+ return readInto(value.getText(), EOL);
+ }
+ }
+
+ private boolean readInto(Text text, char delimiter) throws IOException {
+ buffer.reset();
+ long bytesRead = readData(in, buffer, delimiter);
+ if (bytesRead == 0) {
+ return false;
+ }
+ pos += bytesRead;
+ bridge.target = text;
+ buffer.writeTo(bridge);
+ return true;
+ }
+
+ private static long readData(InputStream in, OutputStream out, char delimiter)
+ throws IOException {
+ long bytes = 0;
+ while (true) {
+
+ int b = in.read();
+ if (b == -1) {
+ break;
+ }
+ bytes += 1;
+
+ byte c = (byte) b;
+ if (c == EOL || c == delimiter) {
+ break;
+ }
+
+ if (c == '\r') {
+ in.mark(1);
+ byte nextC = (byte) in.read();
+ if (nextC != EOL || c == delimiter) {
+ in.reset();
+ } else {
+ bytes += 1;
+ }
+ break;
+ }
+
+ if (out != null) {
+ out.write(c);
+ }
+ }
+ return bytes;
+ }
+}
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java Mon Oct 22 20:43:16 2012
@@ -1,92 +1,92 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-/**
- * This class represents an operation. The operation can be an insert, a delete
- * or an update. If the operation is an insert or an update, a (new) document,
- * which is in the form of text, is specified.
- */
-public class LineDocTextAndOp implements Writable {
- private DocumentAndOp.Op op;
- private Text doc;
-
- /**
- * Constructor
- */
- public LineDocTextAndOp() {
- doc = new Text();
- }
-
- /**
- * Set the type of the operation.
- * @param op the type of the operation
- */
- public void setOp(DocumentAndOp.Op op) {
- this.op = op;
- }
-
- /**
- * Get the type of the operation.
- * @return the type of the operation
- */
- public DocumentAndOp.Op getOp() {
- return op;
- }
-
- /**
- * Get the text that represents a document.
- * @return the text that represents a document
- */
- public Text getText() {
- return doc;
- }
-
- /* (non-Javadoc)
- * @see java.lang.Object#toString()
- */
- public String toString() {
- return this.getClass().getName() + "[op=" + op + ", text=" + doc + "]";
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
- */
- public void write(DataOutput out) throws IOException {
- throw new IOException(this.getClass().getName()
- + ".write should never be called");
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
- */
- public void readFields(DataInput in) throws IOException {
- throw new IOException(this.getClass().getName()
- + ".readFields should never be called");
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents an operation. The operation can be an insert, a delete
+ * or an update. If the operation is an insert or an update, a (new) document,
+ * which is in the form of text, is specified.
+ */
+public class LineDocTextAndOp implements Writable {
+ private DocumentAndOp.Op op;
+ private Text doc;
+
+ /**
+ * Constructor
+ */
+ public LineDocTextAndOp() {
+ doc = new Text();
+ }
+
+ /**
+ * Set the type of the operation.
+ * @param op the type of the operation
+ */
+ public void setOp(DocumentAndOp.Op op) {
+ this.op = op;
+ }
+
+ /**
+ * Get the type of the operation.
+ * @return the type of the operation
+ */
+ public DocumentAndOp.Op getOp() {
+ return op;
+ }
+
+ /**
+ * Get the text that represents a document.
+ * @return the text that represents a document
+ */
+ public Text getText() {
+ return doc;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ public String toString() {
+ return this.getClass().getName() + "[op=" + op + ", text=" + doc + "]";
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
+ */
+ public void write(DataOutput out) throws IOException {
+ throw new IOException(this.getClass().getName()
+ + ".write should never be called");
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
+ */
+ public void readFields(DataInput in) throws IOException {
+ throw new IOException(this.getClass().getName()
+ + ".readFields should never be called");
+ }
+
+}
Modified: hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java?rev=1401071&r1=1401070&r2=1401071&view=diff
==============================================================================
--- hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java (original)
+++ hadoop/common/branches/branch-trunk-win/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java Mon Oct 22 20:43:16 2012
@@ -1,58 +1,58 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert in a round-robin fashion. Choose all the
- * shards for each delete because we don't know where it is stored.
- */
-public class RoundRobinDistributionPolicy implements IDistributionPolicy {
-
- private int numShards;
- private int rr; // round-robin implementation
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
- */
- public void init(Shard[] shards) {
- numShards = shards.length;
- rr = 0;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForInsert(DocumentID key) {
- int chosen = rr;
- rr = (rr + 1) % numShards;
- return chosen;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForDelete(DocumentID key) {
- // -1 represents all the shards
- return -1;
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert in a round-robin fashion. Choose all the
+ * shards for each delete because we don't know where it is stored.
+ */
+public class RoundRobinDistributionPolicy implements IDistributionPolicy {
+
+ private int numShards;
+ private int rr; // round-robin implementation
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+ */
+ public void init(Shard[] shards) {
+ numShards = shards.length;
+ rr = 0;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForInsert(DocumentID key) {
+ int chosen = rr;
+ rr = (rr + 1) % numShards;
+ return chosen;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForDelete(DocumentID key) {
+ // -1 represents all the shards
+ return -1;
+ }
+}