You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by bi...@apache.org on 2014/09/29 02:35:19 UTC
[13/50] [abbrv] TEZ-1578. Remove TeraSort from Tez codebase. (hitesh)
http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java
deleted file mode 100644
index 5a097f2..0000000
--- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.URI;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * Generates the sampled split points, launches the job, and waits for it to
- * finish.
- * <p>
- * To run the program:
- * <b>bin/hadoop jar hadoop-*-examples.jar terasort in-dir out-dir</b>
- */
-public class TeraSort extends Configured implements Tool {
- private static final Log LOG = LogFactory.getLog(TeraSort.class);
- static String SIMPLE_PARTITIONER = "mapreduce.terasort.simplepartitioner";
- static String OUTPUT_REPLICATION = "mapreduce.terasort.output.replication";
-
- /**
- * A partitioner that splits text keys into roughly equal partitions
- * in a global sorted order.
- */
- static class TotalOrderPartitioner extends Partitioner<Text,Text>
- implements Configurable {
- private TrieNode trie;
- private Text[] splitPoints;
- private Configuration conf;
-
- /**
- * A generic trie node
- */
- static abstract class TrieNode {
- private int level;
- TrieNode(int level) {
- this.level = level;
- }
- abstract int findPartition(Text key);
- abstract void print(PrintStream strm) throws IOException;
- int getLevel() {
- return level;
- }
- }
-
- /**
- * An inner trie node that contains 256 children based on the next
- * character.
- */
- static class InnerTrieNode extends TrieNode {
- private TrieNode[] child = new TrieNode[256];
-
- InnerTrieNode(int level) {
- super(level);
- }
- int findPartition(Text key) {
- int level = getLevel();
- if (key.getLength() <= level) {
- return child[0].findPartition(key);
- }
- return child[key.getBytes()[level] & 0xff].findPartition(key);
- }
- void setChild(int idx, TrieNode child) {
- this.child[idx] = child;
- }
- void print(PrintStream strm) throws IOException {
- for(int ch=0; ch < 256; ++ch) {
- for(int i = 0; i < 2*getLevel(); ++i) {
- strm.print(' ');
- }
- strm.print(ch);
- strm.println(" ->");
- if (child[ch] != null) {
- child[ch].print(strm);
- }
- }
- }
- }
-
- /**
- * A leaf trie node that does string compares to figure out where the given
- * key belongs between lower..upper.
- */
- static class LeafTrieNode extends TrieNode {
- int lower;
- int upper;
- Text[] splitPoints;
- LeafTrieNode(int level, Text[] splitPoints, int lower, int upper) {
- super(level);
- this.splitPoints = splitPoints;
- this.lower = lower;
- this.upper = upper;
- }
- int findPartition(Text key) {
- for(int i=lower; i<upper; ++i) {
- if (splitPoints[i].compareTo(key) > 0) {
- return i;
- }
- }
- return upper;
- }
- void print(PrintStream strm) throws IOException {
- for(int i = 0; i < 2*getLevel(); ++i) {
- strm.print(' ');
- }
- strm.print(lower);
- strm.print(", ");
- strm.println(upper);
- }
- }
-
-
- /**
- * Read the cut points from the given sequence file.
- * @param fs the file system
- * @param p the path to read
- * @param job the job config
- * @return the strings to split the partitions on
- * @throws IOException
- */
- private static Text[] readPartitions(FileSystem fs, Path p,
- Configuration conf) throws IOException {
- int reduces = conf.getInt(MRJobConfig.NUM_REDUCES, 1);
- Text[] result = new Text[reduces - 1];
- DataInputStream reader = fs.open(p);
- for(int i=0; i < reduces - 1; ++i) {
- result[i] = new Text();
- result[i].readFields(reader);
- }
- reader.close();
- return result;
- }
-
- /**
- * Given a sorted set of cut points, build a trie that will find the correct
- * partition quickly.
- * @param splits the list of cut points
- * @param lower the lower bound of partitions 0..numPartitions-1
- * @param upper the upper bound of partitions 0..numPartitions-1
- * @param prefix the prefix that we have already checked against
- * @param maxDepth the maximum depth we will build a trie for
- * @return the trie node that will divide the splits correctly
- */
- private static TrieNode buildTrie(Text[] splits, int lower, int upper,
- Text prefix, int maxDepth) {
- int depth = prefix.getLength();
- if (depth >= maxDepth || lower == upper) {
- return new LeafTrieNode(depth, splits, lower, upper);
- }
- InnerTrieNode result = new InnerTrieNode(depth);
- Text trial = new Text(prefix);
- // append an extra byte on to the prefix
- trial.append(new byte[1], 0, 1);
- int currentBound = lower;
- for(int ch = 0; ch < 255; ++ch) {
- trial.getBytes()[depth] = (byte) (ch + 1);
- lower = currentBound;
- while (currentBound < upper) {
- if (splits[currentBound].compareTo(trial) >= 0) {
- break;
- }
- currentBound += 1;
- }
- trial.getBytes()[depth] = (byte) ch;
- result.child[ch] = buildTrie(splits, lower, currentBound, trial,
- maxDepth);
- }
- // pick up the rest
- trial.getBytes()[depth] = (byte) 255;
- result.child[255] = buildTrie(splits, currentBound, upper, trial,
- maxDepth);
- return result;
- }
-
- public void setConf(Configuration conf) {
- try {
- FileSystem fs = FileSystem.getLocal(conf);
- this.conf = conf;
- Path partFile = new Path(TeraInputFormat.PARTITION_FILENAME);
- splitPoints = readPartitions(fs, partFile, conf);
- trie = buildTrie(splitPoints, 0, splitPoints.length, new Text(), 2);
- } catch (IOException ie) {
- throw new IllegalArgumentException("can't read paritions file", ie);
- }
- }
-
- public Configuration getConf() {
- return conf;
- }
-
- public TotalOrderPartitioner() {
- }
-
- public int getPartition(Text key, Text value, int numPartitions) {
- return trie.findPartition(key);
- }
-
- }
-
- /**
- * A total order partitioner that assigns keys based on their first
- * PREFIX_LENGTH bytes, assuming a flat distribution.
- */
- public static class SimplePartitioner extends Partitioner<Text, Text>
- implements Configurable {
- int prefixesPerReduce;
- private static final int PREFIX_LENGTH = 3;
- private Configuration conf = null;
- public void setConf(Configuration conf) {
- this.conf = conf;
- prefixesPerReduce = (int) Math.ceil((1 << (8 * PREFIX_LENGTH)) /
- (float) conf.getInt(MRJobConfig.NUM_REDUCES, 1));
- }
-
- public Configuration getConf() {
- return conf;
- }
-
- @Override
- public int getPartition(Text key, Text value, int numPartitions) {
- byte[] bytes = key.getBytes();
- int len = Math.min(PREFIX_LENGTH, key.getLength());
- int prefix = 0;
- for(int i=0; i < len; ++i) {
- prefix = (prefix << 8) | (0xff & bytes[i]);
- }
- return prefix / prefixesPerReduce;
- }
- }
-
- public static boolean getUseSimplePartitioner(JobContext job) {
- return job.getConfiguration().getBoolean(SIMPLE_PARTITIONER, false);
- }
-
- public static void setUseSimplePartitioner(Job job, boolean value) {
- job.getConfiguration().setBoolean(SIMPLE_PARTITIONER, value);
- }
-
- public static int getOutputReplication(JobContext job) {
- return job.getConfiguration().getInt(OUTPUT_REPLICATION, 1);
- }
-
- public static void setOutputReplication(Job job, int value) {
- job.getConfiguration().setInt(OUTPUT_REPLICATION, value);
- }
-
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Invalid no. of arguments provided");
- System.err.println("Usage: terasort <input-dir> <output-dir>");
- return -1;
- }
-
- LOG.info("starting");
- Job job = Job.getInstance(getConf());
- Path inputDir = new Path(args[0]);
- Path outputDir = new Path(args[1]);
- boolean useSimplePartitioner = getUseSimplePartitioner(job);
- TeraInputFormat.setInputPaths(job, inputDir);
- FileOutputFormat.setOutputPath(job, outputDir);
- job.setJobName("TeraSort");
- job.setJarByClass(TeraSort.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- job.setInputFormatClass(TeraInputFormat.class);
- job.setOutputFormatClass(TeraOutputFormat.class);
- if (useSimplePartitioner) {
- job.setPartitionerClass(SimplePartitioner.class);
- } else {
- long start = System.currentTimeMillis();
- Path partitionFile = new Path(outputDir,
- TeraInputFormat.PARTITION_FILENAME);
- URI partitionUri = new URI(partitionFile.toString() +
- "#" + TeraInputFormat.PARTITION_FILENAME);
- try {
- TeraInputFormat.writePartitionFile(job, partitionFile);
- } catch (Throwable e) {
- LOG.error(e.getMessage());
- return -1;
- }
- job.addCacheFile(partitionUri);
- long end = System.currentTimeMillis();
- System.out.println("Spent " + (end - start) + "ms computing partitions.");
- job.setPartitionerClass(TotalOrderPartitioner.class);
- }
-
- job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
- TeraOutputFormat.setFinalSync(job, true);
- int ret = job.waitForCompletion(true) ? 0 : 1;
- LOG.info("done");
- return ret;
- }
-
- /**
- * @param args
- */
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new TeraSort(), args);
- System.exit(res);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java
deleted file mode 100644
index 24c3145..0000000
--- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.IOException;
-import java.util.zip.Checksum;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.PureJavaCrc32;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * Generate 1 mapper per a file that checks to make sure the keys
- * are sorted within each file. The mapper also generates
- * "$file:begin", first key and "$file:end", last key. The reduce verifies that
- * all of the start/end items are in order.
- * Any output from the reduce is problem report.
- * <p>
- * To run the program:
- * <b>bin/hadoop jar hadoop-*-examples.jar teravalidate out-dir report-dir</b>
- * <p>
- * If there is any output, something is wrong and the output of the reduce
- * will have the problem report.
- */
-public class TeraValidate extends Configured implements Tool {
- private static final Text ERROR = new Text("error");
- private static final Text CHECKSUM = new Text("checksum");
-
- private static String textifyBytes(Text t) {
- BytesWritable b = new BytesWritable();
- b.set(t.getBytes(), 0, t.getLength());
- return b.toString();
- }
-
- static class ValidateMapper extends Mapper<Text,Text,Text,Text> {
- private Text lastKey;
- private String filename;
- private Unsigned16 checksum = new Unsigned16();
- private Unsigned16 tmp = new Unsigned16();
- private Checksum crc32 = new PureJavaCrc32();
-
- /**
- * Get the final part of the input name
- * @param split the input split
- * @return the "part-r-00000" for the input
- */
- private String getFilename(FileSplit split) {
- return split.getPath().getName();
- }
-
- public void map(Text key, Text value, Context context)
- throws IOException, InterruptedException {
- if (lastKey == null) {
- FileSplit fs = (FileSplit) context.getInputSplit();
- filename = getFilename(fs);
- context.write(new Text(filename + ":begin"), key);
- lastKey = new Text();
- } else {
- if (key.compareTo(lastKey) < 0) {
- context.write(ERROR, new Text("misorder in " + filename +
- " between " + textifyBytes(lastKey) +
- " and " + textifyBytes(key)));
- }
- }
- // compute the crc of the key and value and add it to the sum
- crc32.reset();
- crc32.update(key.getBytes(), 0, key.getLength());
- crc32.update(value.getBytes(), 0, value.getLength());
- tmp.set(crc32.getValue());
- checksum.add(tmp);
- lastKey.set(key);
- }
-
- public void cleanup(Context context)
- throws IOException, InterruptedException {
- if (lastKey != null) {
- context.write(new Text(filename + ":end"), lastKey);
- context.write(CHECKSUM, new Text(checksum.toString()));
- }
- }
- }
-
- /**
- * Check the boundaries between the output files by making sure that the
- * boundary keys are always increasing.
- * Also passes any error reports along intact.
- */
- static class ValidateReducer extends Reducer<Text,Text,Text,Text> {
- private boolean firstKey = true;
- private Text lastKey = new Text();
- private Text lastValue = new Text();
- public void reduce(Text key, Iterable<Text> values,
- Context context) throws IOException, InterruptedException {
- if (ERROR.equals(key)) {
- for (Text val : values) {
- context.write(key, val);
- }
- } else if (CHECKSUM.equals(key)) {
- Unsigned16 tmp = new Unsigned16();
- Unsigned16 sum = new Unsigned16();
- for (Text val : values) {
- tmp.set(val.toString());
- sum.add(tmp);
- }
- context.write(CHECKSUM, new Text(sum.toString()));
- } else {
- Text value = values.iterator().next();
- if (firstKey) {
- firstKey = false;
- } else {
- if (value.compareTo(lastValue) < 0) {
- context.write(ERROR,
- new Text("bad key partitioning:\n file " +
- lastKey + " key " +
- textifyBytes(lastValue) +
- "\n file " + key + " key " +
- textifyBytes(value)));
- }
- }
- lastKey.set(key);
- lastValue.set(value);
- }
- }
-
- }
-
- private static void usage() throws IOException {
- System.err.println("teravalidate <out-dir> <report-dir>");
- }
-
- public int run(String[] args) throws Exception {
- Job job = Job.getInstance(getConf());
- if (args.length != 2) {
- usage();
- return 1;
- }
- TeraInputFormat.setInputPaths(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- job.setJobName("TeraValidate");
- job.setJarByClass(TeraValidate.class);
- job.setMapperClass(ValidateMapper.class);
- job.setReducerClass(ValidateReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- // force a single reducer
- job.setNumReduceTasks(1);
- // force a single split
- FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
- job.setInputFormatClass(TeraInputFormat.class);
- return job.waitForCompletion(true) ? 0 : 1;
- }
-
- /**
- * @param args
- */
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new TeraValidate(), args);
- System.exit(res);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java
deleted file mode 100644
index 1335bf4..0000000
--- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Writable;
-
-/**
- * An unsigned 16 byte integer class that supports addition, multiplication,
- * and left shifts.
- */
-class Unsigned16 implements Writable {
- private long hi8;
- private long lo8;
-
- public Unsigned16() {
- hi8 = 0;
- lo8 = 0;
- }
-
- public Unsigned16(long l) {
- hi8 = 0;
- lo8 = l;
- }
-
- public Unsigned16(Unsigned16 other) {
- hi8 = other.hi8;
- lo8 = other.lo8;
- }
-
- @Override
- public boolean equals(Object o) {
- if (o instanceof Unsigned16) {
- Unsigned16 other = (Unsigned16) o;
- return other.hi8 == hi8 && other.lo8 == lo8;
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- return (int) lo8;
- }
-
- /**
- * Parse a hex string
- * @param s the hex string
- */
- public Unsigned16(String s) throws NumberFormatException {
- set(s);
- }
-
- /**
- * Set the number from a hex string
- * @param s the number in hexadecimal
- * @throws NumberFormatException if the number is invalid
- */
- public void set(String s) throws NumberFormatException {
- hi8 = 0;
- lo8 = 0;
- final long lastDigit = 0xfl << 60;
- for (int i = 0; i < s.length(); ++i) {
- int digit = getHexDigit(s.charAt(i));
- if ((lastDigit & hi8) != 0) {
- throw new NumberFormatException(s + " overflowed 16 bytes");
- }
- hi8 <<= 4;
- hi8 |= (lo8 & lastDigit) >>> 60;
- lo8 <<= 4;
- lo8 |= digit;
- }
- }
-
- /**
- * Set the number to a given long.
- * @param l the new value, which is treated as an unsigned number
- */
- public void set(long l) {
- lo8 = l;
- hi8 = 0;
- }
-
- /**
- * Map a hexadecimal character into a digit.
- * @param ch the character
- * @return the digit from 0 to 15
- * @throws NumberFormatException
- */
- private static int getHexDigit(char ch) throws NumberFormatException {
- if (ch >= '0' && ch <= '9') {
- return ch - '0';
- }
- if (ch >= 'a' && ch <= 'f') {
- return ch - 'a' + 10;
- }
- if (ch >= 'A' && ch <= 'F') {
- return ch - 'A' + 10;
- }
- throw new NumberFormatException(ch + " is not a valid hex digit");
- }
-
- private static final Unsigned16 TEN = new Unsigned16(10);
-
- public static Unsigned16 fromDecimal(String s) throws NumberFormatException {
- Unsigned16 result = new Unsigned16();
- Unsigned16 tmp = new Unsigned16();
- for(int i=0; i < s.length(); i++) {
- char ch = s.charAt(i);
- if (ch < '0' || ch > '9') {
- throw new NumberFormatException(ch + " not a valid decimal digit");
- }
- int digit = ch - '0';
- result.multiply(TEN);
- tmp.set(digit);
- result.add(tmp);
- }
- return result;
- }
-
- /**
- * Return the number as a hex string.
- */
- public String toString() {
- if (hi8 == 0) {
- return Long.toHexString(lo8);
- } else {
- StringBuilder result = new StringBuilder();
- result.append(Long.toHexString(hi8));
- String loString = Long.toHexString(lo8);
- for(int i=loString.length(); i < 16; ++i) {
- result.append('0');
- }
- result.append(loString);
- return result.toString();
- }
- }
-
- /**
- * Get a given byte from the number.
- * @param b the byte to get with 0 meaning the most significant byte
- * @return the byte or 0 if b is outside of 0..15
- */
- public byte getByte(int b) {
- if (b >= 0 && b < 16) {
- if (b < 8) {
- return (byte) (hi8 >> (56 - 8*b));
- } else {
- return (byte) (lo8 >> (120 - 8*b));
- }
- }
- return 0;
- }
-
- /**
- * Get the hexadecimal digit at the given position.
- * @param p the digit position to get with 0 meaning the most significant
- * @return the character or '0' if p is outside of 0..31
- */
- public char getHexDigit(int p) {
- byte digit = getByte(p / 2);
- if (p % 2 == 0) {
- digit >>>= 4;
- }
- digit &= 0xf;
- if (digit < 10) {
- return (char) ('0' + digit);
- } else {
- return (char) ('A' + digit - 10);
- }
- }
-
- /**
- * Get the high 8 bytes as a long.
- */
- public long getHigh8() {
- return hi8;
- }
-
- /**
- * Get the low 8 bytes as a long.
- */
- public long getLow8() {
- return lo8;
- }
-
- /**
- * Multiple the current number by a 16 byte unsigned integer. Overflow is not
- * detected and the result is the low 16 bytes of the result. The numbers
- * are divided into 32 and 31 bit chunks so that the product of two chucks
- * fits in the unsigned 63 bits of a long.
- * @param b the other number
- */
- void multiply(Unsigned16 b) {
- // divide the left into 4 32 bit chunks
- long[] left = new long[4];
- left[0] = lo8 & 0xffffffffl;
- left[1] = lo8 >>> 32;
- left[2] = hi8 & 0xffffffffl;
- left[3] = hi8 >>> 32;
- // divide the right into 5 31 bit chunks
- long[] right = new long[5];
- right[0] = b.lo8 & 0x7fffffffl;
- right[1] = (b.lo8 >>> 31) & 0x7fffffffl;
- right[2] = (b.lo8 >>> 62) + ((b.hi8 & 0x1fffffffl) << 2);
- right[3] = (b.hi8 >>> 29) & 0x7fffffffl;
- right[4] = (b.hi8 >>> 60);
- // clear the cur value
- set(0);
- Unsigned16 tmp = new Unsigned16();
- for(int l=0; l < 4; ++l) {
- for (int r=0; r < 5; ++r) {
- long prod = left[l] * right[r];
- if (prod != 0) {
- int off = l*32 + r*31;
- tmp.set(prod);
- tmp.shiftLeft(off);
- add(tmp);
- }
- }
- }
- }
-
- /**
- * Add the given number into the current number.
- * @param b the other number
- */
- public void add(Unsigned16 b) {
- long sumHi;
- long sumLo;
- long reshibit, hibit0, hibit1;
-
- sumHi = hi8 + b.hi8;
-
- hibit0 = (lo8 & 0x8000000000000000L);
- hibit1 = (b.lo8 & 0x8000000000000000L);
- sumLo = lo8 + b.lo8;
- reshibit = (sumLo & 0x8000000000000000L);
- if ((hibit0 & hibit1) != 0 | ((hibit0 ^ hibit1) != 0 && reshibit == 0))
- sumHi++; /* add carry bit */
- hi8 = sumHi;
- lo8 = sumLo;
- }
-
- /**
- * Shift the number a given number of bit positions. The number is the low
- * order bits of the result.
- * @param bits the bit positions to shift by
- */
- public void shiftLeft(int bits) {
- if (bits != 0) {
- if (bits < 64) {
- hi8 <<= bits;
- hi8 |= (lo8 >>> (64 - bits));
- lo8 <<= bits;
- } else if (bits < 128) {
- hi8 = lo8 << (bits - 64);
- lo8 = 0;
- } else {
- hi8 = 0;
- lo8 = 0;
- }
- }
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- hi8 = in.readLong();
- lo8 = in.readLong();
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeLong(hi8);
- out.writeLong(lo8);
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java b/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java
deleted file mode 100644
index 97b6538..0000000
--- a/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.HadoopTestCase;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.tez.mapreduce.examples.terasort.TeraGen;
-import org.apache.tez.mapreduce.examples.terasort.TeraSort;
-import org.apache.tez.mapreduce.examples.terasort.TeraValidate;
-
-import org.junit.Ignore;
-
-@Ignore
-public class TestTeraSort extends HadoopTestCase {
-
- public TestTeraSort()
- throws IOException {
- super(CLUSTER_MR, DFS_FS, 1, 1);
- }
-
- protected void tearDown() throws Exception {
- getFileSystem().delete(new Path(TEST_DIR), true);
- super.tearDown();
- }
-
- // Input/Output paths for sort
- private static final String TEST_DIR =
- new File(System.getProperty("test.build.data", "/tmp"), "org/apache/tez/mapreduce/examples/terasort")
- .getAbsolutePath();
- private static final Path SORT_INPUT_PATH = new Path(TEST_DIR, "sortin");
- private static final Path SORT_OUTPUT_PATH = new Path(TEST_DIR, "sortout");
- private static final Path TERA_OUTPUT_PATH = new Path(TEST_DIR, "validate");
- private static final String NUM_ROWS = "100";
-
- private void runTeraGen(Configuration conf, Path sortInput)
- throws Exception {
- String[] genArgs = {NUM_ROWS, sortInput.toString()};
-
- // Run TeraGen
- assertEquals(ToolRunner.run(conf, new TeraGen(), genArgs), 0);
- }
-
- private void runTeraSort(Configuration conf,
- Path sortInput, Path sortOutput) throws Exception {
-
- // Setup command-line arguments to 'sort'
- String[] sortArgs = {sortInput.toString(), sortOutput.toString()};
-
- // Run Sort
- assertEquals(ToolRunner.run(conf, new TeraSort(), sortArgs), 0);
- }
-
- private void runTeraValidator(Configuration job,
- Path sortOutput, Path valOutput)
- throws Exception {
- String[] svArgs = {sortOutput.toString(), valOutput.toString()};
-
- // Run Tera-Validator
- assertEquals(ToolRunner.run(job, new TeraValidate(), svArgs), 0);
- }
-
- public void testTeraSort() throws Exception {
- // Run TeraGen to generate input for 'terasort'
- runTeraGen(createJobConf(), SORT_INPUT_PATH);
-
- // Run terasort
- runTeraSort(createJobConf(), SORT_INPUT_PATH, SORT_OUTPUT_PATH);
-
- // Run tera-validator to check if sort worked correctly
- runTeraValidator(createJobConf(), SORT_OUTPUT_PATH,
- TERA_OUTPUT_PATH);
- }
-
-}