You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by je...@apache.org on 2014/09/18 21:49:58 UTC

[13/25] TEZ-1578. Remove TeraSort from Tez codebase. (hitesh)

http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java
deleted file mode 100644
index 5a097f2..0000000
--- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraSort.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.URI;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * Generates the sampled split points, launches the job, and waits for it to
- * finish. 
- * <p>
- * To run the program: 
- * <b>bin/hadoop jar hadoop-*-examples.jar terasort in-dir out-dir</b>
- */
-public class TeraSort extends Configured implements Tool {
-  private static final Log LOG = LogFactory.getLog(TeraSort.class);
-  static String SIMPLE_PARTITIONER = "mapreduce.terasort.simplepartitioner";
-  static String OUTPUT_REPLICATION = "mapreduce.terasort.output.replication";
-
-  /**
-   * A partitioner that splits text keys into roughly equal partitions
-   * in a global sorted order.
-   */
-  static class TotalOrderPartitioner extends Partitioner<Text,Text>
-      implements Configurable {
-    private TrieNode trie;
-    private Text[] splitPoints;
-    private Configuration conf;
-
-    /**
-     * A generic trie node
-     */
-    static abstract class TrieNode {
-      private int level;
-      TrieNode(int level) {
-        this.level = level;
-      }
-      abstract int findPartition(Text key);
-      abstract void print(PrintStream strm) throws IOException;
-      int getLevel() {
-        return level;
-      }
-    }
-
-    /**
-     * An inner trie node that contains 256 children based on the next
-     * character.
-     */
-    static class InnerTrieNode extends TrieNode {
-      private TrieNode[] child = new TrieNode[256];
-      
-      InnerTrieNode(int level) {
-        super(level);
-      }
-      int findPartition(Text key) {
-        int level = getLevel();
-        if (key.getLength() <= level) {
-          return child[0].findPartition(key);
-        }
-        return child[key.getBytes()[level] & 0xff].findPartition(key);
-      }
-      void setChild(int idx, TrieNode child) {
-        this.child[idx] = child;
-      }
-      void print(PrintStream strm) throws IOException {
-        for(int ch=0; ch < 256; ++ch) {
-          for(int i = 0; i < 2*getLevel(); ++i) {
-            strm.print(' ');
-          }
-          strm.print(ch);
-          strm.println(" ->");
-          if (child[ch] != null) {
-            child[ch].print(strm);
-          }
-        }
-      }
-    }
-
-    /**
-     * A leaf trie node that does string compares to figure out where the given
-     * key belongs between lower..upper.
-     */
-    static class LeafTrieNode extends TrieNode {
-      int lower;
-      int upper;
-      Text[] splitPoints;
-      LeafTrieNode(int level, Text[] splitPoints, int lower, int upper) {
-        super(level);
-        this.splitPoints = splitPoints;
-        this.lower = lower;
-        this.upper = upper;
-      }
-      int findPartition(Text key) {
-        for(int i=lower; i<upper; ++i) {
-          if (splitPoints[i].compareTo(key) > 0) {
-            return i;
-          }
-        }
-        return upper;
-      }
-      void print(PrintStream strm) throws IOException {
-        for(int i = 0; i < 2*getLevel(); ++i) {
-          strm.print(' ');
-        }
-        strm.print(lower);
-        strm.print(", ");
-        strm.println(upper);
-      }
-    }
-
-
-    /**
-     * Read the cut points from the given sequence file.
-     * @param fs the file system
-     * @param p the path to read
-     * @param job the job config
-     * @return the strings to split the partitions on
-     * @throws IOException
-     */
-    private static Text[] readPartitions(FileSystem fs, Path p,
-        Configuration conf) throws IOException {
-      int reduces = conf.getInt(MRJobConfig.NUM_REDUCES, 1);
-      Text[] result = new Text[reduces - 1];
-      DataInputStream reader = fs.open(p);
-      for(int i=0; i < reduces - 1; ++i) {
-        result[i] = new Text();
-        result[i].readFields(reader);
-      }
-      reader.close();
-      return result;
-    }
-
-    /**
-     * Given a sorted set of cut points, build a trie that will find the correct
-     * partition quickly.
-     * @param splits the list of cut points
-     * @param lower the lower bound of partitions 0..numPartitions-1
-     * @param upper the upper bound of partitions 0..numPartitions-1
-     * @param prefix the prefix that we have already checked against
-     * @param maxDepth the maximum depth we will build a trie for
-     * @return the trie node that will divide the splits correctly
-     */
-    private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
-                                      Text prefix, int maxDepth) {
-      int depth = prefix.getLength();
-      if (depth >= maxDepth || lower == upper) {
-        return new LeafTrieNode(depth, splits, lower, upper);
-      }
-      InnerTrieNode result = new InnerTrieNode(depth);
-      Text trial = new Text(prefix);
-      // append an extra byte on to the prefix
-      trial.append(new byte[1], 0, 1);
-      int currentBound = lower;
-      for(int ch = 0; ch < 255; ++ch) {
-        trial.getBytes()[depth] = (byte) (ch + 1);
-        lower = currentBound;
-        while (currentBound < upper) {
-          if (splits[currentBound].compareTo(trial) >= 0) {
-            break;
-          }
-          currentBound += 1;
-        }
-        trial.getBytes()[depth] = (byte) ch;
-        result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
-                                     maxDepth);
-      }
-      // pick up the rest
-      trial.getBytes()[depth] = (byte) 255;
-      result.child[255] = buildTrie(splits, currentBound, upper, trial,
-                                    maxDepth);
-      return result;
-    }
-
-    public void setConf(Configuration conf) {
-      try {
-        FileSystem fs = FileSystem.getLocal(conf);
-        this.conf = conf;
-        Path partFile = new Path(TeraInputFormat.PARTITION_FILENAME);
-        splitPoints = readPartitions(fs, partFile, conf);
-        trie = buildTrie(splitPoints, 0, splitPoints.length, new Text(), 2);
-      } catch (IOException ie) {
-        throw new IllegalArgumentException("can't read paritions file", ie);
-      }
-    }
-
-    public Configuration getConf() {
-      return conf;
-    }
-    
-    public TotalOrderPartitioner() {
-    }
-
-    public int getPartition(Text key, Text value, int numPartitions) {
-      return trie.findPartition(key);
-    }
-    
-  }
-  
-  /**
-   * A total order partitioner that assigns keys based on their first 
-   * PREFIX_LENGTH bytes, assuming a flat distribution.
-   */
-  public static class SimplePartitioner extends Partitioner<Text, Text>
-      implements Configurable {
-    int prefixesPerReduce;
-    private static final int PREFIX_LENGTH = 3;
-    private Configuration conf = null;
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      prefixesPerReduce = (int) Math.ceil((1 << (8 * PREFIX_LENGTH)) / 
-        (float) conf.getInt(MRJobConfig.NUM_REDUCES, 1));
-    }
-    
-    public Configuration getConf() {
-      return conf;
-    }
-    
-    @Override
-    public int getPartition(Text key, Text value, int numPartitions) {
-      byte[] bytes = key.getBytes();
-      int len = Math.min(PREFIX_LENGTH, key.getLength());
-      int prefix = 0;
-      for(int i=0; i < len; ++i) {
-        prefix = (prefix << 8) | (0xff & bytes[i]);
-      }
-      return prefix / prefixesPerReduce;
-    }
-  }
-
-  public static boolean getUseSimplePartitioner(JobContext job) {
-    return job.getConfiguration().getBoolean(SIMPLE_PARTITIONER, false);
-  }
-
-  public static void setUseSimplePartitioner(Job job, boolean value) {
-    job.getConfiguration().setBoolean(SIMPLE_PARTITIONER, value);
-  }
-
-  public static int getOutputReplication(JobContext job) {
-    return job.getConfiguration().getInt(OUTPUT_REPLICATION, 1);
-  }
-
-  public static void setOutputReplication(Job job, int value) {
-    job.getConfiguration().setInt(OUTPUT_REPLICATION, value);
-  }
-
-  public int run(String[] args) throws Exception {
-    if (args.length != 2) {
-      System.err.println("Invalid no. of arguments provided");
-      System.err.println("Usage: terasort <input-dir> <output-dir>");
-      return -1;
-    }
-
-    LOG.info("starting");
-    Job job = Job.getInstance(getConf());
-    Path inputDir = new Path(args[0]);
-    Path outputDir = new Path(args[1]);
-    boolean useSimplePartitioner = getUseSimplePartitioner(job);
-    TeraInputFormat.setInputPaths(job, inputDir);
-    FileOutputFormat.setOutputPath(job, outputDir);
-    job.setJobName("TeraSort");
-    job.setJarByClass(TeraSort.class);
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(Text.class);
-    job.setInputFormatClass(TeraInputFormat.class);
-    job.setOutputFormatClass(TeraOutputFormat.class);
-    if (useSimplePartitioner) {
-      job.setPartitionerClass(SimplePartitioner.class);
-    } else {
-      long start = System.currentTimeMillis();
-      Path partitionFile = new Path(outputDir, 
-                                    TeraInputFormat.PARTITION_FILENAME);
-      URI partitionUri = new URI(partitionFile.toString() +
-                                 "#" + TeraInputFormat.PARTITION_FILENAME);
-      try {
-        TeraInputFormat.writePartitionFile(job, partitionFile);
-      } catch (Throwable e) {
-        LOG.error(e.getMessage());
-        return -1;
-      }
-      job.addCacheFile(partitionUri);  
-      long end = System.currentTimeMillis();
-      System.out.println("Spent " + (end - start) + "ms computing partitions.");
-      job.setPartitionerClass(TotalOrderPartitioner.class);
-    }
-    
-    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
-    TeraOutputFormat.setFinalSync(job, true);
-    int ret = job.waitForCompletion(true) ? 0 : 1;
-    LOG.info("done");
-    return ret;
-  }
-
-  /**
-   * @param args
-   */
-  public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(new Configuration(), new TeraSort(), args);
-    System.exit(res);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java
deleted file mode 100644
index 24c3145..0000000
--- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/TeraValidate.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.IOException;
-import java.util.zip.Checksum;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.PureJavaCrc32;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-/**
- * Generate 1 mapper per a file that checks to make sure the keys
- * are sorted within each file. The mapper also generates 
- * "$file:begin", first key and "$file:end", last key. The reduce verifies that
- * all of the start/end items are in order.
- * Any output from the reduce is problem report.
- * <p>
- * To run the program: 
- * <b>bin/hadoop jar hadoop-*-examples.jar teravalidate out-dir report-dir</b>
- * <p>
- * If there is any output, something is wrong and the output of the reduce
- * will have the problem report.
- */
-public class TeraValidate extends Configured implements Tool {
-  private static final Text ERROR = new Text("error");
-  private static final Text CHECKSUM = new Text("checksum");
-  
-  private static String textifyBytes(Text t) {
-    BytesWritable b = new BytesWritable();
-    b.set(t.getBytes(), 0, t.getLength());
-    return b.toString();
-  }
-
-  static class ValidateMapper extends Mapper<Text,Text,Text,Text> {
-    private Text lastKey;
-    private String filename;
-    private Unsigned16 checksum = new Unsigned16();
-    private Unsigned16 tmp = new Unsigned16();
-    private Checksum crc32 = new PureJavaCrc32();
-
-    /**
-     * Get the final part of the input name
-     * @param split the input split
-     * @return the "part-r-00000" for the input
-     */
-    private String getFilename(FileSplit split) {
-      return split.getPath().getName();
-    }
-
-    public void map(Text key, Text value, Context context) 
-        throws IOException, InterruptedException {
-      if (lastKey == null) {
-        FileSplit fs = (FileSplit) context.getInputSplit();
-        filename = getFilename(fs);
-        context.write(new Text(filename + ":begin"), key);
-        lastKey = new Text();
-      } else {
-        if (key.compareTo(lastKey) < 0) {
-          context.write(ERROR, new Text("misorder in " + filename + 
-                                         " between " + textifyBytes(lastKey) + 
-                                         " and " + textifyBytes(key)));
-        }
-      }
-      // compute the crc of the key and value and add it to the sum
-      crc32.reset();
-      crc32.update(key.getBytes(), 0, key.getLength());
-      crc32.update(value.getBytes(), 0, value.getLength());
-      tmp.set(crc32.getValue());
-      checksum.add(tmp);
-      lastKey.set(key);
-    }
-    
-    public void cleanup(Context context) 
-        throws IOException, InterruptedException  {
-      if (lastKey != null) {
-        context.write(new Text(filename + ":end"), lastKey);
-        context.write(CHECKSUM, new Text(checksum.toString()));
-      }
-    }
-  }
-
-  /**
-   * Check the boundaries between the output files by making sure that the
-   * boundary keys are always increasing.
-   * Also passes any error reports along intact.
-   */
-  static class ValidateReducer extends Reducer<Text,Text,Text,Text> {
-    private boolean firstKey = true;
-    private Text lastKey = new Text();
-    private Text lastValue = new Text();
-    public void reduce(Text key, Iterable<Text> values,
-        Context context) throws IOException, InterruptedException  {
-      if (ERROR.equals(key)) {
-        for (Text val : values) {
-          context.write(key, val);
-        }
-      } else if (CHECKSUM.equals(key)) {
-        Unsigned16 tmp = new Unsigned16();
-        Unsigned16 sum = new Unsigned16();
-        for (Text val : values) {
-          tmp.set(val.toString());
-          sum.add(tmp);
-        }
-        context.write(CHECKSUM, new Text(sum.toString()));
-      } else {
-        Text value = values.iterator().next();
-        if (firstKey) {
-          firstKey = false;
-        } else {
-          if (value.compareTo(lastValue) < 0) {
-            context.write(ERROR, 
-                           new Text("bad key partitioning:\n  file " + 
-                                    lastKey + " key " + 
-                                    textifyBytes(lastValue) +
-                                    "\n  file " + key + " key " + 
-                                    textifyBytes(value)));
-          }
-        }
-        lastKey.set(key);
-        lastValue.set(value);
-      }
-    }
-    
-  }
-
-  private static void usage() throws IOException {
-    System.err.println("teravalidate <out-dir> <report-dir>");
-  }
-
-  public int run(String[] args) throws Exception {
-    Job job = Job.getInstance(getConf());
-    if (args.length != 2) {
-      usage();
-      return 1;
-    }
-    TeraInputFormat.setInputPaths(job, new Path(args[0]));
-    FileOutputFormat.setOutputPath(job, new Path(args[1]));
-    job.setJobName("TeraValidate");
-    job.setJarByClass(TeraValidate.class);
-    job.setMapperClass(ValidateMapper.class);
-    job.setReducerClass(ValidateReducer.class);
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(Text.class);
-    // force a single reducer
-    job.setNumReduceTasks(1);
-    // force a single split 
-    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
-    job.setInputFormatClass(TeraInputFormat.class);
-    return job.waitForCompletion(true) ? 0 : 1;
-  }
-
-  /**
-   * @param args
-   */
-  public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(new Configuration(), new TeraValidate(), args);
-    System.exit(res);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java
deleted file mode 100644
index 1335bf4..0000000
--- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/terasort/Unsigned16.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Writable;
-
-/**
- * An unsigned 16 byte integer class that supports addition, multiplication,
- * and left shifts.
- */
-class Unsigned16 implements Writable {
-  private long hi8;
-  private long lo8;
-
-  public Unsigned16() {
-    hi8 = 0;
-    lo8 = 0;
-  }
-
-  public Unsigned16(long l) {
-    hi8 = 0;
-    lo8 = l;
-  }
-
-  public Unsigned16(Unsigned16 other) {
-    hi8 = other.hi8;
-    lo8 = other.lo8;
-  }
-  
-  @Override
-  public boolean equals(Object o) {
-    if (o instanceof Unsigned16) {
-      Unsigned16 other = (Unsigned16) o;
-      return other.hi8 == hi8 && other.lo8 == lo8;
-    }
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return (int) lo8;
-  }
-
-  /**
-   * Parse a hex string
-   * @param s the hex string
-   */
-  public Unsigned16(String s) throws NumberFormatException {
-    set(s);
-  }
-
-  /**
-   * Set the number from a hex string
-   * @param s the number in hexadecimal
-   * @throws NumberFormatException if the number is invalid
-   */
-  public void set(String s) throws NumberFormatException {
-    hi8 = 0;
-    lo8 = 0;
-    final long lastDigit = 0xfl << 60;
-    for (int i = 0; i < s.length(); ++i) {
-      int digit = getHexDigit(s.charAt(i));
-      if ((lastDigit & hi8) != 0) {
-        throw new NumberFormatException(s + " overflowed 16 bytes");
-      }
-      hi8 <<= 4;
-      hi8 |= (lo8 & lastDigit) >>> 60;
-      lo8 <<= 4;
-      lo8 |= digit;
-    }    
-  }
-
-  /**
-   * Set the number to a given long.
-   * @param l the new value, which is treated as an unsigned number
-   */
-  public void set(long l) {
-    lo8 = l;
-    hi8 = 0;
-  }
-
-  /**
-   * Map a hexadecimal character into a digit.
-   * @param ch the character
-   * @return the digit from 0 to 15
-   * @throws NumberFormatException
-   */
-  private static int getHexDigit(char ch) throws NumberFormatException {
-    if (ch >= '0' && ch <= '9') {
-      return ch - '0';
-    }
-    if (ch >= 'a' && ch <= 'f') {
-      return ch - 'a' + 10;
-    }
-    if (ch >= 'A' && ch <= 'F') {
-      return ch - 'A' + 10;
-    }
-    throw new NumberFormatException(ch + " is not a valid hex digit");
-  }
-
-  private static final Unsigned16 TEN = new Unsigned16(10);
-
-  public static Unsigned16 fromDecimal(String s) throws NumberFormatException {
-    Unsigned16 result = new Unsigned16();
-    Unsigned16 tmp = new Unsigned16();
-    for(int i=0; i < s.length(); i++) {
-      char ch = s.charAt(i);
-      if (ch < '0' || ch > '9') {
-        throw new NumberFormatException(ch + " not a valid decimal digit");
-      }
-      int digit = ch - '0';
-      result.multiply(TEN);
-      tmp.set(digit);
-      result.add(tmp);
-    }
-    return result;
-  }
-
-  /**
-   * Return the number as a hex string.
-   */
-  public String toString() {
-    if (hi8 == 0) {
-      return Long.toHexString(lo8);
-    } else {
-      StringBuilder result = new StringBuilder();
-      result.append(Long.toHexString(hi8));
-      String loString = Long.toHexString(lo8);
-      for(int i=loString.length(); i < 16; ++i) {
-        result.append('0');
-      }
-      result.append(loString);
-      return result.toString();
-    }
-  }
-
-  /**
-   * Get a given byte from the number.
-   * @param b the byte to get with 0 meaning the most significant byte
-   * @return the byte or 0 if b is outside of 0..15
-   */
-  public byte getByte(int b) {
-    if (b >= 0 && b < 16) {
-      if (b < 8) {
-        return (byte) (hi8 >> (56 - 8*b));
-      } else {
-        return (byte) (lo8 >> (120 - 8*b));
-      }
-    }
-    return 0;
-  }
-
-  /**
-   * Get the hexadecimal digit at the given position.
-   * @param p the digit position to get with 0 meaning the most significant
-   * @return the character or '0' if p is outside of 0..31
-   */
-  public char getHexDigit(int p) {
-    byte digit = getByte(p / 2);
-    if (p % 2 == 0) {
-      digit >>>= 4;
-    }
-    digit &= 0xf;
-    if (digit < 10) {
-      return (char) ('0' + digit);
-    } else {
-      return (char) ('A' + digit - 10);
-    }
-  }
-
-  /**
-   * Get the high 8 bytes as a long.
-   */
-  public long getHigh8() {
-    return hi8;
-  }
-  
-  /**
-   * Get the low 8 bytes as a long.
-   */
-  public long getLow8() {
-    return lo8;
-  }
-
-  /**
-   * Multiple the current number by a 16 byte unsigned integer. Overflow is not
-   * detected and the result is the low 16 bytes of the result. The numbers 
-   * are divided into 32 and 31 bit chunks so that the product of two chucks
-   * fits in the unsigned 63 bits of a long.
-   * @param b the other number
-   */
-  void multiply(Unsigned16 b) {
-    // divide the left into 4 32 bit chunks
-    long[] left = new long[4];
-    left[0] = lo8 & 0xffffffffl;
-    left[1] = lo8 >>> 32;
-    left[2] = hi8 & 0xffffffffl;
-    left[3] = hi8 >>> 32;
-    // divide the right into 5 31 bit chunks
-    long[] right = new long[5];
-    right[0] = b.lo8 & 0x7fffffffl;
-    right[1] = (b.lo8 >>> 31) & 0x7fffffffl;
-    right[2] = (b.lo8 >>> 62) + ((b.hi8 & 0x1fffffffl) << 2);
-    right[3] = (b.hi8 >>> 29) & 0x7fffffffl;
-    right[4] = (b.hi8 >>> 60);
-    // clear the cur value
-    set(0);
-    Unsigned16 tmp = new Unsigned16();
-    for(int l=0; l < 4; ++l) {
-      for (int r=0; r < 5; ++r) {
-        long prod = left[l] * right[r];
-        if (prod != 0) {
-          int off = l*32 + r*31;
-          tmp.set(prod);
-          tmp.shiftLeft(off);
-          add(tmp);
-        }
-      }
-    }
-  }
-
-  /**
-   * Add the given number into the current number.
-   * @param b the other number
-   */
-  public void add(Unsigned16 b) {
-    long sumHi;
-    long sumLo;
-    long  reshibit, hibit0, hibit1;
-
-    sumHi = hi8 + b.hi8;
-
-    hibit0 = (lo8 & 0x8000000000000000L);
-    hibit1 = (b.lo8 & 0x8000000000000000L);
-    sumLo = lo8 + b.lo8;
-    reshibit = (sumLo & 0x8000000000000000L);
-    if ((hibit0 & hibit1) != 0 | ((hibit0 ^ hibit1) != 0 && reshibit == 0))
-      sumHi++;  /* add carry bit */
-    hi8 = sumHi;
-    lo8 = sumLo;
-  }
-
-  /**
-   * Shift the number a given number of bit positions. The number is the low
-   * order bits of the result.
-   * @param bits the bit positions to shift by
-   */
-  public void shiftLeft(int bits) {
-    if (bits != 0) {
-      if (bits < 64) {
-        hi8 <<= bits;
-        hi8 |= (lo8 >>> (64 - bits));
-        lo8 <<= bits;
-      } else if (bits < 128) {
-        hi8 = lo8 << (bits - 64);
-        lo8 = 0;
-      } else {
-        hi8 = 0;
-        lo8 = 0;
-      }
-    }
-  }
-  
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    hi8 = in.readLong();
-    lo8 = in.readLong();
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    out.writeLong(hi8);
-    out.writeLong(lo8);
-  }
-  
-  
-}

http://git-wip-us.apache.org/repos/asf/tez/blob/8e382b34/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java b/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java
deleted file mode 100644
index 97b6538..0000000
--- a/tez-tests/src/test/java/org/apache/tez/mapreduce/examples/terasort/TestTeraSort.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tez.mapreduce.examples.terasort;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.HadoopTestCase;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.tez.mapreduce.examples.terasort.TeraGen;
-import org.apache.tez.mapreduce.examples.terasort.TeraSort;
-import org.apache.tez.mapreduce.examples.terasort.TeraValidate;
-
-import org.junit.Ignore;
-
-@Ignore
-public class TestTeraSort extends HadoopTestCase {
-  
-  public TestTeraSort()
-      throws IOException {
-    super(CLUSTER_MR, DFS_FS, 1, 1);
-  }
-
-  protected void tearDown() throws Exception {
-    getFileSystem().delete(new Path(TEST_DIR), true);
-    super.tearDown();
-  }
-  
-  // Input/Output paths for sort
-  private static final String TEST_DIR = 
-    new File(System.getProperty("test.build.data", "/tmp"), "org/apache/tez/mapreduce/examples/terasort")
-    .getAbsolutePath();
-  private static final Path SORT_INPUT_PATH = new Path(TEST_DIR, "sortin");
-  private static final Path SORT_OUTPUT_PATH = new Path(TEST_DIR, "sortout");
-  private static final Path TERA_OUTPUT_PATH = new Path(TEST_DIR, "validate");
-  private static final String NUM_ROWS = "100"; 
-
-  private void runTeraGen(Configuration conf, Path sortInput) 
-      throws Exception {
-    String[] genArgs = {NUM_ROWS, sortInput.toString()};
-    
-    // Run TeraGen
-    assertEquals(ToolRunner.run(conf, new TeraGen(), genArgs), 0);
-  }
-  
-  private void runTeraSort(Configuration conf,
-      Path sortInput, Path sortOutput) throws Exception {
-
-    // Setup command-line arguments to 'sort'
-    String[] sortArgs = {sortInput.toString(), sortOutput.toString()};
-    
-    // Run Sort
-    assertEquals(ToolRunner.run(conf, new TeraSort(), sortArgs), 0);
-  }
-  
-  private void runTeraValidator(Configuration job, 
-                                       Path sortOutput, Path valOutput) 
-  throws Exception {
-    String[] svArgs = {sortOutput.toString(), valOutput.toString()};
-
-    // Run Tera-Validator
-    assertEquals(ToolRunner.run(job, new TeraValidate(), svArgs), 0);
-  }
-  
-  public void testTeraSort() throws Exception {
-    // Run TeraGen to generate input for 'terasort'
-    runTeraGen(createJobConf(), SORT_INPUT_PATH);
-
-    // Run terasort
-    runTeraSort(createJobConf(), SORT_INPUT_PATH, SORT_OUTPUT_PATH);
-
-    // Run tera-validator to check if sort worked correctly
-    runTeraValidator(createJobConf(), SORT_OUTPUT_PATH,
-      TERA_OUTPUT_PATH);
-  }
-
-}