You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by jw...@apache.org on 2013/04/23 22:41:03 UTC

[01/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Updated Branches:
  refs/heads/master cbc7c2fb3 -> 890e0086a


http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/writable/WritablesTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/writable/WritablesTest.java b/crunch/src/test/java/org/apache/crunch/types/writable/WritablesTest.java
deleted file mode 100644
index 5396fba..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/writable/WritablesTest.java
+++ /dev/null
@@ -1,256 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class WritablesTest {
-
-  @Test
-  public void testNulls() throws Exception {
-    Void n = null;
-    NullWritable nw = NullWritable.get();
-    testInputOutputFn(Writables.nulls(), n, nw);
-  }
-
-  @Test
-  public void testStrings() throws Exception {
-    String s = "abc";
-    Text text = new Text(s);
-    testInputOutputFn(Writables.strings(), s, text);
-  }
-
-  @Test
-  public void testInts() throws Exception {
-    int j = 55;
-    IntWritable w = new IntWritable(j);
-    testInputOutputFn(Writables.ints(), j, w);
-  }
-
-  @Test
-  public void testLongs() throws Exception {
-    long j = 55;
-    LongWritable w = new LongWritable(j);
-    testInputOutputFn(Writables.longs(), j, w);
-  }
-
-  @Test
-  public void testFloats() throws Exception {
-    float j = 55.5f;
-    FloatWritable w = new FloatWritable(j);
-    testInputOutputFn(Writables.floats(), j, w);
-  }
-
-  @Test
-  public void testDoubles() throws Exception {
-    double j = 55.5d;
-    DoubleWritable w = new DoubleWritable(j);
-    testInputOutputFn(Writables.doubles(), j, w);
-  }
-
-  @Test
-  public void testBoolean() throws Exception {
-    boolean j = false;
-    BooleanWritable w = new BooleanWritable(j);
-    testInputOutputFn(Writables.booleans(), j, w);
-  }
-
-  @Test
-  public void testBytes() throws Exception {
-    byte[] bytes = new byte[] { 17, 26, -98 };
-    BytesWritable bw = new BytesWritable(bytes);
-    ByteBuffer bb = ByteBuffer.wrap(bytes);
-    testInputOutputFn(Writables.bytes(), bb, bw);
-  }
-
-  @Test
-  public void testCollections() throws Exception {
-    String s = "abc";
-    Collection<String> j = Lists.newArrayList();
-    j.add(s);
-    GenericArrayWritable<Text> w = new GenericArrayWritable<Text>(Text.class);
-    w.set(new Text[] { new Text(s) });
-    testInputOutputFn(Writables.collections(Writables.strings()), j, w);
-  }
-
-  @Test
-  public void testPairs() throws Exception {
-    Pair<String, String> j = Pair.of("a", "b");
-    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), });
-    w.setWritten(0);
-    w.setWritten(1);
-    testInputOutputFn(Writables.pairs(Writables.strings(), Writables.strings()), j, w);
-  }
-
-  @Test
-  public void testNestedTables() throws Exception {
-    PTableType<Long, Long> pll = Writables.tableOf(Writables.longs(), Writables.longs());
-    PTableType<Pair<Long, Long>, String> nest = Writables.tableOf(pll, Writables.strings());
-    assertNotNull(nest);
-  }
-
-  @Test
-  public void testPairEquals() throws Exception {
-    PType<Pair<Long, ByteBuffer>> t1 = Writables.pairs(Writables.longs(), Writables.bytes());
-    PType<Pair<Long, ByteBuffer>> t2 = Writables.pairs(Writables.longs(), Writables.bytes());
-    assertEquals(t1, t2);
-    assertEquals(t1.hashCode(), t2.hashCode());
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testTriples() throws Exception {
-    Tuple3 j = Tuple3.of("a", "b", "c");
-    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), new Text("c"), });
-    w.setWritten(0);
-    w.setWritten(1);
-    w.setWritten(2);
-    WritableType<?, ?> wt = Writables.triples(Writables.strings(), Writables.strings(), Writables.strings());
-    testInputOutputFn(wt, j, w);
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testQuads() throws Exception {
-    Tuple4 j = Tuple4.of("a", "b", "c", "d");
-    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), new Text("c"), new Text("d"), });
-    w.setWritten(0);
-    w.setWritten(1);
-    w.setWritten(2);
-    w.setWritten(3);
-    WritableType<?, ?> wt = Writables.quads(Writables.strings(), Writables.strings(), Writables.strings(),
-        Writables.strings());
-    testInputOutputFn(wt, j, w);
-  }
-
-  @Test
-  public void testTupleN() throws Exception {
-    TupleN j = new TupleN("a", "b", "c", "d", "e");
-    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), new Text("c"), new Text("d"),
-        new Text("e"), });
-    w.setWritten(0);
-    w.setWritten(1);
-    w.setWritten(2);
-    w.setWritten(3);
-    w.setWritten(4);
-    WritableType<?, ?> wt = Writables.tuples(Writables.strings(), Writables.strings(), Writables.strings(),
-        Writables.strings(), Writables.strings());
-    testInputOutputFn(wt, j, w);
-  }
-
-  protected static class TestWritable implements Writable {
-    String left;
-    int right;
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-      out.writeUTF(left);
-      out.writeInt(right);
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-      left = in.readUTF();
-      right = in.readInt();
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj)
-        return true;
-      if (obj == null)
-        return false;
-      if (getClass() != obj.getClass())
-        return false;
-      TestWritable other = (TestWritable) obj;
-      if (left == null) {
-        if (other.left != null)
-          return false;
-      } else if (!left.equals(other.left))
-        return false;
-      if (right != other.right)
-        return false;
-      return true;
-    }
-
-  }
-
-  @Test
-  public void testRecords() throws Exception {
-    TestWritable j = new TestWritable();
-    j.left = "a";
-    j.right = 1;
-    TestWritable w = new TestWritable();
-    w.left = "a";
-    w.right = 1;
-    WritableType<?, ?> wt = Writables.records(TestWritable.class);
-    testInputOutputFn(wt, j, w);
-  }
-
-  @Test
-  public void testTableOf() throws Exception {
-    Pair<String, String> j = Pair.of("a", "b");
-    Pair<Text, Text> w = Pair.of(new Text("a"), new Text("b"));
-    WritableTableType<String, String> wtt = Writables.tableOf(Writables.strings(), Writables.strings());
-    testInputOutputFn(wtt, j, w);
-  }
-
-  @Test
-  public void testRegister() throws Exception {
-    WritableType<TestWritable, TestWritable> wt = Writables.writables(TestWritable.class);
-    Writables.register(TestWritable.class, wt);
-    assertSame(Writables.records(TestWritable.class), wt);
-  }
-
-  @SuppressWarnings({ "unchecked", "rawtypes" })
-  protected static void testInputOutputFn(PType ptype, Object java, Object writable) {
-    ptype.getInputMapFn().initialize();
-    ptype.getOutputMapFn().initialize();
-    assertEquals(java, ptype.getInputMapFn().map(writable));
-    assertEquals(writable, ptype.getOutputMapFn().map(java));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/util/DistCacheTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/util/DistCacheTest.java b/crunch/src/test/java/org/apache/crunch/util/DistCacheTest.java
deleted file mode 100644
index 6784f14..0000000
--- a/crunch/src/test/java/org/apache/crunch/util/DistCacheTest.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.util;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class DistCacheTest {
-
-  // A temporary folder used to hold files created for the test.
-  @Rule
-  public TemporaryFolder testFolder = new TemporaryFolder();
-
-  // A configuration and lists of paths to use in tests.
-  private Configuration testConf;
-  private String[] testFilePaths;
-  private String[] testFileQualifiedPaths;
-
-  /**
-   * Setup resources for tests. These include:
-   * <ol>
-   * <li>A Hadoop configuration.
-   * <li>A directory of temporary files that includes 3 .jar files and 1 other
-   * file.
-   * <li>Arrays containing the canonical paths and qualified paths to the test
-   * files.
-   * </ol>
-   */
-  @Before
-  public void setup() throws IOException {
-    // Create a configuration for tests.
-    testConf = new Configuration();
-
-    // Create the test files and add their paths to the list of test file paths.
-    testFilePaths = new String[3];
-    testFilePaths[0] = testFolder.newFile("jar1.jar").getCanonicalPath();
-    testFilePaths[1] = testFolder.newFile("jar2.jar").getCanonicalPath();
-    testFilePaths[2] = testFolder.newFile("jar3.jar").getCanonicalPath();
-    testFolder.newFile("notJar.other");
-
-    // Populate a list of qualified paths from the test file paths.
-    testFileQualifiedPaths = new String[3];
-    for (int i = 0; i < testFilePaths.length; i++) {
-      testFileQualifiedPaths[i] = "file:" + testFilePaths[i];
-    }
-  }
-
-  /**
-   * Tests adding jars one-by-one to a job's configuration.
-   * 
-   * @throws IOException
-   *           If there is a problem adding the jars.
-   */
-  @Test
-  public void testAddJar() throws IOException {
-    // Add each valid jar path to the distributed cache configuration, and
-    // verify each was
-    // added correctly in turn.
-    for (int i = 0; i < testFilePaths.length; i++) {
-      DistCache.addJarToDistributedCache(testConf, testFilePaths[i]);
-      assertEquals("tmpjars configuration var does not contain expected value.",
-          StringUtils.join(testFileQualifiedPaths, ",", 0, i + 1), testConf.get("tmpjars"));
-    }
-  }
-
-  /**
-   * Tests that attempting to add the path to a jar that does not exist to the
-   * configuration throws an exception.
-   * 
-   * @throws IOException
-   *           If the added jar path does not exist. This exception is expected.
-   */
-  @Test(expected = IOException.class)
-  public void testAddJarThatDoesntExist() throws IOException {
-    DistCache.addJarToDistributedCache(testConf, "/garbage/doesntexist.jar");
-  }
-
-  /**
-   * Tests that adding a directory of jars to the configuration works as
-   * expected. .jar files under the added directory should be added to the
-   * configuration, and all other files should be skipped.
-   * 
-   * @throws IOException
-   *           If there is a problem adding the jar directory to the
-   *           configuration.
-   */
-  @Test
-  public void testAddJarDirectory() throws IOException {
-    DistCache.addJarDirToDistributedCache(testConf, testFolder.getRoot().getCanonicalPath());
-    // Throw the added jar paths in a set to detect duplicates.
-    String[] splitJarPaths = StringUtils.split(testConf.get("tmpjars"), ",");
-    Set<String> addedJarPaths = new HashSet<String>();
-    for (String path : splitJarPaths) {
-      addedJarPaths.add(path);
-    }
-    assertEquals("Incorrect number of jar paths added.", testFilePaths.length, addedJarPaths.size());
-
-    // Ensure all expected paths were added.
-    for (int i = 0; i < testFileQualifiedPaths.length; i++) {
-      assertTrue("Expected jar path missing from jar paths added to tmpjars: " + testFileQualifiedPaths[i],
-          addedJarPaths.contains(testFileQualifiedPaths[i]));
-    }
-  }
-
-  /**
-   * Tests that adding a jar directory that does not exist to the configuration
-   * throws an exception.
-   * 
-   * @throws IOException
-   *           If the added jar directory does not exist. This exception is
-   *           expected.
-   */
-  @Test(expected = IOException.class)
-  public void testAddJarDirectoryThatDoesntExist() throws IOException {
-    DistCache.addJarDirToDistributedCache(testConf, "/garbage/doesntexist");
-  }
-
-  /**
-   * Tests that adding a jar directory that is not a directory to the
-   * configuration throws an exception.
-   * 
-   * @throws IOException
-   *           If the added jar directory is not a directory. This exception is
-   *           expected.
-   */
-  @Test(expected = IOException.class)
-  public void testAddJarDirectoryNotDirectory() throws IOException {
-    DistCache.addJarDirToDistributedCache(testConf, testFilePaths[0]);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index d5f90f2..71f5e0f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,7 +44,7 @@ under the License.
   </prerequisites>
 
   <modules>
-    <module>crunch</module>
+    <module>crunch-core</module>
     <module>crunch-hbase</module>
     <module>crunch-test</module>
     <module>crunch-contrib</module>
@@ -103,7 +103,7 @@ under the License.
     <dependencies>
       <dependency>
         <groupId>org.apache.crunch</groupId>
-        <artifactId>crunch</artifactId>
+        <artifactId>crunch-core</artifactId>
         <version>${project.version}</version>
       </dependency>
 


[04/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/Writables.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/Writables.java b/crunch/src/main/java/org/apache/crunch/types/writable/Writables.java
deleted file mode 100644
index 78cf3ae..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/Writables.java
+++ /dev/null
@@ -1,588 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.fn.CompositeMapFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypes;
-import org.apache.crunch.types.TupleFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-/**
- * Defines static methods that are analogous to the methods defined in
- * {@link WritableTypeFamily} for convenient static importing.
- * 
- */
-public class Writables {
-  private static final MapFn<NullWritable, Void> NULL_WRITABLE_TO_VOID = new MapFn<NullWritable, Void>() {
-    @Override
-    public Void map(NullWritable input) {
-      return null;
-    }
-  };
-
-  private static final MapFn<Void, NullWritable> VOID_TO_NULL_WRITABLE = new MapFn<Void, NullWritable>() {
-    @Override
-    public NullWritable map(Void input) {
-      return NullWritable.get();
-    }
-  };
-
-  private static final MapFn<Text, String> TEXT_TO_STRING = new MapFn<Text, String>() {
-    @Override
-    public String map(Text input) {
-      return input.toString();
-    }
-  };
-
-  private static final MapFn<String, Text> STRING_TO_TEXT = new MapFn<String, Text>() {
-    @Override
-    public Text map(String input) {
-      return new Text(input);
-    }
-  };
-
-  private static final MapFn<IntWritable, Integer> IW_TO_INT = new MapFn<IntWritable, Integer>() {
-    @Override
-    public Integer map(IntWritable input) {
-      return input.get();
-    }
-  };
-
-  private static final MapFn<Integer, IntWritable> INT_TO_IW = new MapFn<Integer, IntWritable>() {
-    @Override
-    public IntWritable map(Integer input) {
-      return new IntWritable(input);
-    }
-  };
-
-  private static final MapFn<LongWritable, Long> LW_TO_LONG = new MapFn<LongWritable, Long>() {
-    @Override
-    public Long map(LongWritable input) {
-      return input.get();
-    }
-  };
-
-  private static final MapFn<Long, LongWritable> LONG_TO_LW = new MapFn<Long, LongWritable>() {
-    @Override
-    public LongWritable map(Long input) {
-      return new LongWritable(input);
-    }
-  };
-
-  private static final MapFn<FloatWritable, Float> FW_TO_FLOAT = new MapFn<FloatWritable, Float>() {
-    @Override
-    public Float map(FloatWritable input) {
-      return input.get();
-    }
-  };
-
-  private static final MapFn<Float, FloatWritable> FLOAT_TO_FW = new MapFn<Float, FloatWritable>() {
-    @Override
-    public FloatWritable map(Float input) {
-      return new FloatWritable(input);
-    }
-  };
-
-  private static final MapFn<DoubleWritable, Double> DW_TO_DOUBLE = new MapFn<DoubleWritable, Double>() {
-    @Override
-    public Double map(DoubleWritable input) {
-      return input.get();
-    }
-  };
-
-  private static final MapFn<Double, DoubleWritable> DOUBLE_TO_DW = new MapFn<Double, DoubleWritable>() {
-    @Override
-    public DoubleWritable map(Double input) {
-      return new DoubleWritable(input);
-    }
-  };
-
-  private static final MapFn<BooleanWritable, Boolean> BW_TO_BOOLEAN = new MapFn<BooleanWritable, Boolean>() {
-    @Override
-    public Boolean map(BooleanWritable input) {
-      return input.get();
-    }
-  };
-
-  private static final BooleanWritable TRUE = new BooleanWritable(true);
-  private static final BooleanWritable FALSE = new BooleanWritable(false);
-  private static final MapFn<Boolean, BooleanWritable> BOOLEAN_TO_BW = new MapFn<Boolean, BooleanWritable>() {
-    @Override
-    public BooleanWritable map(Boolean input) {
-      return input == Boolean.TRUE ? TRUE : FALSE;
-    }
-  };
-
-  private static final MapFn<BytesWritable, ByteBuffer> BW_TO_BB = new MapFn<BytesWritable, ByteBuffer>() {
-    @Override
-    public ByteBuffer map(BytesWritable input) {
-      return ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
-    }
-  };
-
-  private static final MapFn<ByteBuffer, BytesWritable> BB_TO_BW = new MapFn<ByteBuffer, BytesWritable>() {
-    @Override
-    public BytesWritable map(ByteBuffer input) {
-      BytesWritable bw = new BytesWritable();
-      bw.set(input.array(), input.arrayOffset(), input.limit());
-      return bw;
-    }
-  };
-
-  private static <S, W extends Writable> WritableType<S, W> create(Class<S> typeClass, Class<W> writableClass,
-      MapFn<W, S> inputDoFn, MapFn<S, W> outputDoFn) {
-    return new WritableType<S, W>(typeClass, writableClass, inputDoFn, outputDoFn);
-  }
-
-  private static final WritableType<Void, NullWritable> nulls = create(Void.class, NullWritable.class,
-      NULL_WRITABLE_TO_VOID, VOID_TO_NULL_WRITABLE);
-  private static final WritableType<String, Text> strings = create(String.class, Text.class, TEXT_TO_STRING,
-      STRING_TO_TEXT);
-  private static final WritableType<Long, LongWritable> longs = create(Long.class, LongWritable.class, LW_TO_LONG,
-      LONG_TO_LW);
-  private static final WritableType<Integer, IntWritable> ints = create(Integer.class, IntWritable.class, IW_TO_INT,
-      INT_TO_IW);
-  private static final WritableType<Float, FloatWritable> floats = create(Float.class, FloatWritable.class,
-      FW_TO_FLOAT, FLOAT_TO_FW);
-  private static final WritableType<Double, DoubleWritable> doubles = create(Double.class, DoubleWritable.class,
-      DW_TO_DOUBLE, DOUBLE_TO_DW);
-  private static final WritableType<Boolean, BooleanWritable> booleans = create(Boolean.class, BooleanWritable.class,
-      BW_TO_BOOLEAN, BOOLEAN_TO_BW);
-  private static final WritableType<ByteBuffer, BytesWritable> bytes = create(ByteBuffer.class, BytesWritable.class,
-      BW_TO_BB, BB_TO_BW);
-
-  private static final Map<Class<?>, PType<?>> PRIMITIVES = ImmutableMap.<Class<?>, PType<?>> builder()
-      .put(String.class, strings).put(Long.class, longs).put(Integer.class, ints).put(Float.class, floats)
-      .put(Double.class, doubles).put(Boolean.class, booleans).put(ByteBuffer.class, bytes).build();
-
-  private static final Map<Class<?>, WritableType<?, ?>> EXTENSIONS = Maps.newHashMap();
-
-  public static <T> PType<T> getPrimitiveType(Class<T> clazz) {
-    return (PType<T>) PRIMITIVES.get(clazz);
-  }
-
-  public static <T> void register(Class<T> clazz, WritableType<T, ? extends Writable> ptype) {
-    EXTENSIONS.put(clazz, ptype);
-  }
-
-  public static final WritableType<Void, NullWritable> nulls() {
-    return nulls;
-  }
-
-  public static final WritableType<String, Text> strings() {
-    return strings;
-  }
-
-  public static final WritableType<Long, LongWritable> longs() {
-    return longs;
-  }
-
-  public static final WritableType<Integer, IntWritable> ints() {
-    return ints;
-  }
-
-  public static final WritableType<Float, FloatWritable> floats() {
-    return floats;
-  }
-
-  public static final WritableType<Double, DoubleWritable> doubles() {
-    return doubles;
-  }
-
-  public static final WritableType<Boolean, BooleanWritable> booleans() {
-    return booleans;
-  }
-
-  public static final WritableType<ByteBuffer, BytesWritable> bytes() {
-    return bytes;
-  }
-
-  public static final <T, W extends Writable> WritableType<T, W> records(Class<T> clazz) {
-    if (EXTENSIONS.containsKey(clazz)) {
-      return (WritableType<T, W>) EXTENSIONS.get(clazz);
-    }
-    if (Writable.class.isAssignableFrom(clazz)) {
-      return (WritableType<T, W>) writables(clazz.asSubclass(Writable.class));
-    } else {
-      throw new IllegalArgumentException(
-          "Cannot create Writable records from non-Writable class"+ clazz.getCanonicalName());
-    }
-  }
-
-  public static <W extends Writable> WritableType<W, W> writables(Class<W> clazz) {
-    MapFn wIdentity = IdentityFn.getInstance();
-    return new WritableType<W, W>(clazz, clazz, wIdentity, wIdentity);
-  }
-
-  public static <K, V> WritableTableType<K, V> tableOf(PType<K> key, PType<V> value) {
-    if (key instanceof WritableTableType) {
-      WritableTableType wtt = (WritableTableType) key;
-      key = pairs(wtt.getKeyType(), wtt.getValueType());
-    } else if (!(key instanceof WritableType)) {
-      throw new IllegalArgumentException("Key type must be of class WritableType");
-    }
-    if (value instanceof WritableTableType) {
-      WritableTableType wtt = (WritableTableType) value;
-      value = pairs(wtt.getKeyType(), wtt.getValueType());
-    } else if (!(value instanceof WritableType)) {
-      throw new IllegalArgumentException("Value type must be of class WritableType");
-    }
-    return new WritableTableType((WritableType) key, (WritableType) value);
-  }
-
-  /**
-   * For mapping from {@link TupleWritable} instances to {@link Tuple}s.
-   * 
-   */
-  private static class TWTupleMapFn extends MapFn<TupleWritable, Tuple> {
-    private final TupleFactory<?> tupleFactory;
-    private final List<MapFn> fns;
-
-    private transient Object[] values;
-
-    public TWTupleMapFn(TupleFactory<?> tupleFactory, PType<?>... ptypes) {
-      this.tupleFactory = tupleFactory;
-      this.fns = Lists.newArrayList();
-      for (PType ptype : ptypes) {
-        fns.add(ptype.getInputMapFn());
-      }
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      for (MapFn fn : fns) {
-        fn.configure(conf);
-      }
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      for (MapFn fn : fns) {
-        fn.setContext(context);
-      }
-    }
-    
-    @Override
-    public void initialize() {
-      for (MapFn fn : fns) {
-        fn.initialize();
-      }
-      // The rest of the methods allocate new
-      // objects each time. However this one
-      // uses Tuple.tuplify which does a copy
-      this.values = new Object[fns.size()];
-      tupleFactory.initialize();
-    }
-
-    @Override
-    public Tuple map(TupleWritable in) {
-      for (int i = 0; i < values.length; i++) {
-        if (in.has(i)) {
-          values[i] = fns.get(i).map(in.get(i));
-        } else {
-          values[i] = null;
-        }
-      }
-      return tupleFactory.makeTuple(values);
-    }
-  }
-
-  /**
-   * For mapping from {@code Tuple}s to {@code TupleWritable}s.
-   * 
-   */
-  private static class TupleTWMapFn extends MapFn<Tuple, TupleWritable> {
-
-    private transient TupleWritable writable;
-    private transient Writable[] values;
-
-    private final List<MapFn> fns;
-
-    public TupleTWMapFn(PType<?>... ptypes) {
-      this.fns = Lists.newArrayList();
-      for (PType<?> ptype : ptypes) {
-        fns.add(ptype.getOutputMapFn());
-      }
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      for (MapFn fn : fns) {
-        fn.configure(conf);
-      }
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      for (MapFn fn : fns) {
-        fn.setContext(context);
-      }
-    }
-    
-    @Override
-    public void initialize() {
-      this.values = new Writable[fns.size()];
-      this.writable = new TupleWritable(values);
-      for (MapFn fn : fns) {
-        fn.initialize();
-      }
-    }
-
-    @Override
-    public TupleWritable map(Tuple input) {
-      writable.clearWritten();
-      for (int i = 0; i < input.size(); i++) {
-        Object value = input.get(i);
-        if (value != null) {
-          writable.setWritten(i);
-          values[i] = (Writable) fns.get(i).map(value);
-        }
-      }
-      return writable;
-    }
-  }
-
-  public static <V1, V2> WritableType<Pair<V1, V2>, TupleWritable> pairs(PType<V1> p1, PType<V2> p2) {
-    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.PAIR, p1, p2);
-    TupleTWMapFn output = new TupleTWMapFn(p1, p2);
-    return new WritableType(Pair.class, TupleWritable.class, input, output, p1, p2);
-  }
-
-  public static <V1, V2, V3> WritableType<Tuple3<V1, V2, V3>, TupleWritable> triples(PType<V1> p1, PType<V2> p2,
-      PType<V3> p3) {
-    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLE3, p1, p2, p3);
-    TupleTWMapFn output = new TupleTWMapFn(p1, p2, p3);
-    return new WritableType(Tuple3.class, TupleWritable.class, input, output, p1, p2, p3);
-  }
-
-  public static <V1, V2, V3, V4> WritableType<Tuple4<V1, V2, V3, V4>, TupleWritable> quads(PType<V1> p1, PType<V2> p2,
-      PType<V3> p3, PType<V4> p4) {
-    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLE4, p1, p2, p3, p4);
-    TupleTWMapFn output = new TupleTWMapFn(p1, p2, p3, p4);
-    return new WritableType(Tuple4.class, TupleWritable.class, input, output, p1, p2, p3, p4);
-  }
-
-  public static WritableType<TupleN, TupleWritable> tuples(PType... ptypes) {
-    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLEN, ptypes);
-    TupleTWMapFn output = new TupleTWMapFn(ptypes);
-    return new WritableType(TupleN.class, TupleWritable.class, input, output, ptypes);
-  }
-
-  public static <T extends Tuple> PType<T> tuples(Class<T> clazz, PType... ptypes) {
-    Class[] typeArgs = new Class[ptypes.length];
-    for (int i = 0; i < typeArgs.length; i++) {
-      typeArgs[i] = ptypes[i].getTypeClass();
-    }
-    TupleFactory<T> factory = TupleFactory.create(clazz, typeArgs);
-    TWTupleMapFn input = new TWTupleMapFn(factory, ptypes);
-    TupleTWMapFn output = new TupleTWMapFn(ptypes);
-    return new WritableType(clazz, TupleWritable.class, input, output, ptypes);
-  }
-
-  public static <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
-    WritableType<S, ?> wt = (WritableType<S, ?>) base;
-    MapFn input = new CompositeMapFn(wt.getInputMapFn(), inputFn);
-    MapFn output = new CompositeMapFn(outputFn, wt.getOutputMapFn());
-    return new WritableType(clazz, wt.getSerializationClass(), input, output, base.getSubTypes().toArray(new PType[0]));
-  }
-
-  private static class ArrayCollectionMapFn<T> extends MapFn<GenericArrayWritable, Collection<T>> {
-    private final MapFn<Object, T> mapFn;
-
-    public ArrayCollectionMapFn(MapFn<Object, T> mapFn) {
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public Collection<T> map(GenericArrayWritable input) {
-      Collection<T> collection = Lists.newArrayList();
-      for (Writable writable : input.get()) {
-        collection.add(mapFn.map(writable));
-      }
-      return collection;
-    }
-  }
-
-  private static class CollectionArrayMapFn<T> extends MapFn<Collection<T>, GenericArrayWritable> {
-
-    private final Class<? extends Writable> clazz;
-    private final MapFn<T, Object> mapFn;
-
-    public CollectionArrayMapFn(Class<? extends Writable> clazz, MapFn<T, Object> mapFn) {
-      this.clazz = clazz;
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public GenericArrayWritable map(Collection<T> input) {
-      GenericArrayWritable arrayWritable = new GenericArrayWritable(clazz);
-      Writable[] w = new Writable[input.size()];
-      int index = 0;
-      for (T in : input) {
-        w[index++] = ((Writable) mapFn.map(in));
-      }
-      arrayWritable.set(w);
-      return arrayWritable;
-    }
-  }
-
-  public static <T> WritableType<Collection<T>, GenericArrayWritable<T>> collections(PType<T> ptype) {
-    WritableType<T, ?> wt = (WritableType<T, ?>) ptype;
-    return new WritableType(Collection.class, GenericArrayWritable.class, new ArrayCollectionMapFn(wt.getInputMapFn()),
-        new CollectionArrayMapFn(wt.getSerializationClass(), wt.getOutputMapFn()), ptype);
-  }
-
-  private static class MapInputMapFn<T> extends MapFn<TextMapWritable<Writable>, Map<String, T>> {
-    private final MapFn<Writable, T> mapFn;
-
-    public MapInputMapFn(MapFn<Writable, T> mapFn) {
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public Map<String, T> map(TextMapWritable<Writable> input) {
-      Map<String, T> out = Maps.newHashMap();
-      for (Map.Entry<Text, Writable> e : input.entrySet()) {
-        out.put(e.getKey().toString(), mapFn.map(e.getValue()));
-      }
-      return out;
-    }
-  }
-
-  private static class MapOutputMapFn<T> extends MapFn<Map<String, T>, TextMapWritable<Writable>> {
-
-    private final Class<Writable> clazz;
-    private final MapFn<T, Writable> mapFn;
-
-    public MapOutputMapFn(Class<Writable> clazz, MapFn<T, Writable> mapFn) {
-      this.clazz = clazz;
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public TextMapWritable<Writable> map(Map<String, T> input) {
-      TextMapWritable<Writable> tmw = new TextMapWritable<Writable>(clazz);
-      for (Map.Entry<String, T> e : input.entrySet()) {
-        tmw.put(new Text(e.getKey()), mapFn.map(e.getValue()));
-      }
-      return tmw;
-    }
-  }
-
-  public static <T> WritableType<Map<String, T>, MapWritable> maps(PType<T> ptype) {
-    WritableType<T, ?> wt = (WritableType<T, ?>) ptype;
-    return new WritableType(Map.class, TextMapWritable.class, new MapInputMapFn(wt.getInputMapFn()),
-        new MapOutputMapFn(wt.getSerializationClass(), wt.getOutputMapFn()), ptype);
-  }
-
-  public static <T> PType<T> jsons(Class<T> clazz) {
-    return PTypes.jsonString(clazz, WritableTypeFamily.getInstance());
-  }
-
-  // Not instantiable
-  private Writables() {
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/package-info.java b/crunch/src/main/java/org/apache/crunch/types/writable/package-info.java
deleted file mode 100644
index 7d54743..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Business object serialization using Hadoop's Writables framework.
- */
-package org.apache.crunch.types.writable;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/util/CrunchTool.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/util/CrunchTool.java b/crunch/src/main/java/org/apache/crunch/util/CrunchTool.java
deleted file mode 100644
index ea66291..0000000
--- a/crunch/src/main/java/org/apache/crunch/util/CrunchTool.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.util;
-
-import java.io.Serializable;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.PipelineExecution;
-import org.apache.crunch.PipelineResult;
-import org.apache.crunch.Source;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.io.From;
-import org.apache.crunch.io.To;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.util.Tool;
-
-/**
- * An extension of the {@code Tool} interface that creates a {@code Pipeline}
- * instance and provides methods for working with the Pipeline from inside of
- * the Tool's run method.
- * 
- */
-public abstract class CrunchTool extends Configured implements Tool, Serializable {
-
-  protected static final From from = new From();
-  protected static final To to = new To();
-  protected static final At at = new At();
-
-  // Pipeline object itself isn't necessarily serializable.
-  private transient Pipeline pipeline;
-
-  public CrunchTool() {
-    this(false);
-  }
-
-  public CrunchTool(boolean inMemory) {
-    this.pipeline = inMemory ? MemPipeline.getInstance() : new MRPipeline(getClass());
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    super.setConf(conf);
-    if (conf != null && pipeline != null) {
-      pipeline.setConfiguration(conf);
-    }
-  }
-
-  @Override
-  public Configuration getConf() {
-    return pipeline.getConfiguration();
-  }
-
-  public void enableDebug() {
-    pipeline.enableDebug();
-  }
-
-  public <T> PCollection<T> read(Source<T> source) {
-    return pipeline.read(source);
-  }
-
-  public <K, V> PTable<K, V> read(TableSource<K, V> tableSource) {
-    return pipeline.read(tableSource);
-  }
-
-  public PCollection<String> readTextFile(String pathName) {
-    return pipeline.readTextFile(pathName);
-  }
-
-  public void write(PCollection<?> pcollection, Target target) {
-    pipeline.write(pcollection, target);
-  }
-
-  public void writeTextFile(PCollection<?> pcollection, String pathName) {
-    pipeline.writeTextFile(pcollection, pathName);
-  }
-  
-  public <T> Iterable<T> materialize(PCollection<T> pcollection) {
-    return pipeline.materialize(pcollection);
-  }
-
-  public PipelineResult run() {
-    return pipeline.run();
-  }
-
-  public PipelineExecution runAsync() {
-    return pipeline.runAsync();
-  }
-
-  public PipelineResult done() {
-    return pipeline.done();
-  }
-
-  protected Pipeline getPipeline() {
-    return pipeline;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/util/DistCache.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/util/DistCache.java b/crunch/src/main/java/org/apache/crunch/util/DistCache.java
deleted file mode 100644
index 3e49930..0000000
--- a/crunch/src/main/java/org/apache/crunch/util/DistCache.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.util;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.net.URI;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.util.Enumeration;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.filecache.DistributedCache;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-/**
- * Provides functions for working with Hadoop's distributed cache. These
- * include:
- * <ul>
- * <li>
- * Functions for working with a job-specific distributed cache of objects, like
- * the serialized runtime nodes in a MapReduce.</li>
- * <li>
- * Functions for adding library jars to the distributed cache, which will be
- * added to the classpath of MapReduce tasks.</li>
- * </ul>
- */
-public class DistCache {
-
-  // Configuration key holding the paths of jars to export to the distributed
-  // cache.
-  private static final String TMPJARS_KEY = "tmpjars";
-
-  public static void write(Configuration conf, Path path, Object value) throws IOException {
-    ObjectOutputStream oos = new ObjectOutputStream(path.getFileSystem(conf).create(path));
-    oos.writeObject(value);
-    oos.close();
-
-    DistributedCache.addCacheFile(path.toUri(), conf);
-  }
-
-  public static Object read(Configuration conf, Path path) throws IOException {
-    URI target = null;
-    for (URI uri : DistributedCache.getCacheFiles(conf)) {
-      if (uri.toString().equals(path.toString())) {
-        target = uri;
-        break;
-      }
-    }
-    Object value = null;
-    if (target != null) {
-      Path targetPath = new Path(target.toString());
-      ObjectInputStream ois = new ObjectInputStream(targetPath.getFileSystem(conf).open(targetPath));
-      try {
-        value = ois.readObject();
-      } catch (ClassNotFoundException e) {
-        throw new CrunchRuntimeException(e);
-      }
-      ois.close();
-    }
-    return value;
-  }
-
-  public static void addCacheFile(Path path, Configuration conf) {
-    DistributedCache.addCacheFile(path.toUri(), conf);
-  }
-  
-  public static Path getPathToCacheFile(Path path, Configuration conf) {
-    try {
-      for (Path localPath : DistributedCache.getLocalCacheFiles(conf)) {
-        if (localPath.toString().endsWith(path.getName())) {
-          return localPath.makeQualified(FileSystem.getLocal(conf));
-        }
-      }
-    } catch (IOException e) {
-      throw new CrunchRuntimeException(e);
-    }
-    return null;
-  }
-  
-  /**
-   * Adds the specified jar to the distributed cache of jobs using the provided
-   * configuration. The jar will be placed on the classpath of tasks run by the
-   * job.
-   * 
-   * @param conf
-   *          The configuration used to add the jar to the distributed cache.
-   * @param jarFile
-   *          The jar file to add to the distributed cache.
-   * @throws IOException
-   *           If the jar file does not exist or there is a problem accessing
-   *           the file.
-   */
-  public static void addJarToDistributedCache(Configuration conf, File jarFile) throws IOException {
-    if (!jarFile.exists()) {
-      throw new IOException("Jar file: " + jarFile.getCanonicalPath() + " does not exist.");
-    }
-    if (!jarFile.getName().endsWith(".jar")) {
-      throw new IllegalArgumentException("File: " + jarFile.getCanonicalPath() + " is not a .jar " + "file.");
-    }
-    // Get a qualified path for the jar.
-    FileSystem fileSystem = FileSystem.getLocal(conf);
-    Path jarPath = new Path(jarFile.getCanonicalPath());
-    String qualifiedPath = jarPath.makeQualified(fileSystem).toString();
-    // Add the jar to the configuration variable.
-    String jarConfiguration = conf.get(TMPJARS_KEY, "");
-    if (!jarConfiguration.isEmpty()) {
-      jarConfiguration += ",";
-    }
-    jarConfiguration += qualifiedPath;
-    conf.set(TMPJARS_KEY, jarConfiguration);
-  }
-
-  /**
-   * Adds the jar at the specified path to the distributed cache of jobs using
-   * the provided configuration. The jar will be placed on the classpath of
-   * tasks run by the job.
-   * 
-   * @param conf
-   *          The configuration used to add the jar to the distributed cache.
-   * @param jarFile
-   *          The path to the jar file to add to the distributed cache.
-   * @throws IOException
-   *           If the jar file does not exist or there is a problem accessing
-   *           the file.
-   */
-  public static void addJarToDistributedCache(Configuration conf, String jarFile) throws IOException {
-    addJarToDistributedCache(conf, new File(jarFile));
-  }
-
-  /**
-   * Finds the path to a jar that contains the class provided, if any. There is
-   * no guarantee that the jar returned will be the first on the classpath to
-   * contain the file. This method is basically lifted out of Hadoop's
-   * {@link org.apache.hadoop.mapred.JobConf} class.
-   * 
-   * @param jarClass
-   *          The class the jar file should contain.
-   * @return The path to a jar file that contains the class, or
-   *         <code>null</code> if no such jar exists.
-   * @throws IOException
-   *           If there is a problem searching for the jar file.
-   */
-  public static String findContainingJar(Class<?> jarClass) throws IOException {
-    ClassLoader loader = jarClass.getClassLoader();
-    String classFile = jarClass.getName().replaceAll("\\.", "/") + ".class";
-    for (Enumeration<URL> itr = loader.getResources(classFile); itr.hasMoreElements();) {
-      URL url = itr.nextElement();
-      if ("jar".equals(url.getProtocol())) {
-        String toReturn = url.getPath();
-        if (toReturn.startsWith("file:")) {
-          toReturn = toReturn.substring("file:".length());
-        }
-        // URLDecoder is a misnamed class, since it actually decodes
-        // x-www-form-urlencoded MIME type rather than actual
-        // URL encoding (which the file path has). Therefore it would
-        // decode +s to ' 's which is incorrect (spaces are actually
-        // either unencoded or encoded as "%20"). Replace +s first, so
-        // that they are kept sacred during the decoding process.
-        toReturn = toReturn.replaceAll("\\+", "%2B");
-        toReturn = URLDecoder.decode(toReturn, "UTF-8");
-        return toReturn.replaceAll("!.*$", "");
-      }
-    }
-    return null;
-  }
-
-  /**
-   * Adds all jars under the specified directory to the distributed cache of
-   * jobs using the provided configuration. The jars will be placed on the
-   * classpath of tasks run by the job. This method does not descend into
-   * subdirectories when adding jars.
-   * 
-   * @param conf
-   *          The configuration used to add jars to the distributed cache.
-   * @param jarDirectory
-   *          A directory containing jar files to add to the distributed cache.
-   * @throws IOException
-   *           If the directory does not exist or there is a problem accessing
-   *           the directory.
-   */
-  public static void addJarDirToDistributedCache(Configuration conf, File jarDirectory) throws IOException {
-    if (!jarDirectory.exists() || !jarDirectory.isDirectory()) {
-      throw new IOException("Jar directory: " + jarDirectory.getCanonicalPath() + " does not "
-          + "exist or is not a directory.");
-    }
-    for (File file : jarDirectory.listFiles()) {
-      if (!file.isDirectory() && file.getName().endsWith(".jar")) {
-        addJarToDistributedCache(conf, file);
-      }
-    }
-  }
-
-  /**
-   * Adds all jars under the directory at the specified path to the distributed
-   * cache of jobs using the provided configuration. The jars will be placed on
-   * the classpath of the tasks run by the job. This method does not descend
-   * into subdirectories when adding jars.
-   * 
-   * @param conf
-   *          The configuration used to add jars to the distributed cache.
-   * @param jarDirectory
-   *          The path to a directory containing jar files to add to the
-   *          distributed cache.
-   * @throws IOException
-   *           If the directory does not exist or there is a problem accessing
-   *           the directory.
-   */
-  public static void addJarDirToDistributedCache(Configuration conf, String jarDirectory) throws IOException {
-    addJarDirToDistributedCache(conf, new File(jarDirectory));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/util/PartitionUtils.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/util/PartitionUtils.java b/crunch/src/main/java/org/apache/crunch/util/PartitionUtils.java
deleted file mode 100644
index da8db6b..0000000
--- a/crunch/src/main/java/org/apache/crunch/util/PartitionUtils.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.util;
-
-import org.apache.crunch.PCollection;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- *
- */
-public class PartitionUtils {
-  public static final String BYTES_PER_REDUCE_TASK = "crunch.bytes.per.reduce.task";
-  public static final long DEFAULT_BYTES_PER_REDUCE_TASK = 1000L * 1000L * 1000L;
-  
-  public static <T> int getRecommendedPartitions(PCollection<T> pcollection, Configuration conf) {
-    long bytesPerTask = conf.getLong(BYTES_PER_REDUCE_TASK, DEFAULT_BYTES_PER_REDUCE_TASK);
-    return 1 + (int) (pcollection.getSize() / bytesPerTask);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/util/Tuples.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/util/Tuples.java b/crunch/src/main/java/org/apache/crunch/util/Tuples.java
deleted file mode 100644
index 9c8d7bd..0000000
--- a/crunch/src/main/java/org/apache/crunch/util/Tuples.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.util;
-
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.UnmodifiableIterator;
-
-/**
- * Utilities for working with subclasses of the {@code Tuple} interface.
- * 
- */
-public class Tuples {
-
-  private static abstract class TuplifyIterator<T> extends UnmodifiableIterator<T> {
-    protected List<Iterator<?>> iterators;
-
-    public TuplifyIterator(Iterator<?>... iterators) {
-      this.iterators = Lists.newArrayList(iterators);
-    }
-
-    @Override
-    public boolean hasNext() {
-      for (Iterator<?> iter : iterators) {
-        if (!iter.hasNext()) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    protected Object next(int index) {
-      return iterators.get(index).next();
-    }
-  }
-
-  public static class PairIterable<S, T> implements Iterable<Pair<S, T>> {
-    private final Iterable<S> first;
-    private final Iterable<T> second;
-
-    public PairIterable(Iterable<S> first, Iterable<T> second) {
-      this.first = first;
-      this.second = second;
-    }
-
-    @Override
-    public Iterator<Pair<S, T>> iterator() {
-      return new TuplifyIterator<Pair<S, T>>(first.iterator(), second.iterator()) {
-        @Override
-        public Pair<S, T> next() {
-          return Pair.of((S) next(0), (T) next(1));
-        }
-      };
-    }
-  }
-
-  public static class TripIterable<A, B, C> implements Iterable<Tuple3<A, B, C>> {
-    private final Iterable<A> first;
-    private final Iterable<B> second;
-    private final Iterable<C> third;
-
-    public TripIterable(Iterable<A> first, Iterable<B> second, Iterable<C> third) {
-      this.first = first;
-      this.second = second;
-      this.third = third;
-    }
-
-    @Override
-    public Iterator<Tuple3<A, B, C>> iterator() {
-      return new TuplifyIterator<Tuple3<A, B, C>>(first.iterator(), second.iterator(), third.iterator()) {
-        @Override
-        public Tuple3<A, B, C> next() {
-          return new Tuple3<A, B, C>((A) next(0), (B) next(1), (C) next(2));
-        }
-      };
-    }
-  }
-
-  public static class QuadIterable<A, B, C, D> implements Iterable<Tuple4<A, B, C, D>> {
-    private final Iterable<A> first;
-    private final Iterable<B> second;
-    private final Iterable<C> third;
-    private final Iterable<D> fourth;
-
-    public QuadIterable(Iterable<A> first, Iterable<B> second, Iterable<C> third, Iterable<D> fourth) {
-      this.first = first;
-      this.second = second;
-      this.third = third;
-      this.fourth = fourth;
-    }
-
-    @Override
-    public Iterator<Tuple4<A, B, C, D>> iterator() {
-      return new TuplifyIterator<Tuple4<A, B, C, D>>(first.iterator(), second.iterator(), third.iterator(),
-          fourth.iterator()) {
-        @Override
-        public Tuple4<A, B, C, D> next() {
-          return new Tuple4<A, B, C, D>((A) next(0), (B) next(1), (C) next(2), (D) next(3));
-        }
-      };
-    }
-  }
-
-  public static class TupleNIterable implements Iterable<TupleN> {
-    private final Iterator<?>[] iters;
-
-    public TupleNIterable(Iterable<?>... iterables) {
-      this.iters = new Iterator[iterables.length];
-      for (int i = 0; i < iters.length; i++) {
-        iters[i] = iterables[i].iterator();
-      }
-    }
-
-    @Override
-    public Iterator<TupleN> iterator() {
-      return new TuplifyIterator<TupleN>(iters) {
-        @Override
-        public TupleN next() {
-          Object[] values = new Object[iters.length];
-          for (int i = 0; i < values.length; i++) {
-            values[i] = next(i);
-          }
-          return new TupleN(values);
-        }
-      };
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/util/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/util/package-info.java b/crunch/src/main/java/org/apache/crunch/util/package-info.java
deleted file mode 100644
index 94d79a1..0000000
--- a/crunch/src/main/java/org/apache/crunch/util/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * An assorted set of utilities.
- */
-package org.apache.crunch.util;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch/src/main/resources/log4j.properties b/crunch/src/main/resources/log4j.properties
deleted file mode 100644
index 506b527..0000000
--- a/crunch/src/main/resources/log4j.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# ***** Set root logger level to INFO and its only appender to A.
-log4j.logger.org.apache.crunch=info, A
-
-# ***** A is set to be a ConsoleAppender.
-log4j.appender.A=org.apache.log4j.ConsoleAppender
-# ***** A uses PatternLayout.
-log4j.appender.A.layout=org.apache.log4j.PatternLayout
-log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/site/site.xml
----------------------------------------------------------------------
diff --git a/crunch/src/site/site.xml b/crunch/src/site/site.xml
deleted file mode 100644
index 73fbd17..0000000
--- a/crunch/src/site/site.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<project name="${project.name}"
-  xmlns="http://maven.apache.org/DECORATION/1.3.0"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/DECORATION/1.3.0
-                      http://maven.apache.org/xsd/decoration-1.3.0.xsd">
-
-  <body>
-    <!-- Note: Breadcrumbs for Doxia's Markdown parser are currently broken,
-               see https://jira.codehaus.org/browse/DOXIA-472 -->
-    <breadcrumbs>
-      <item name="Apache" href="http://www.apache.org/index.html" />
-      <item name="Crunch" href="../index.html"/>
-    </breadcrumbs>
-
-  </body>
-
-</project>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/avro/employee.avsc
----------------------------------------------------------------------
diff --git a/crunch/src/test/avro/employee.avsc b/crunch/src/test/avro/employee.avsc
deleted file mode 100644
index 35726e1..0000000
--- a/crunch/src/test/avro/employee.avsc
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-{
-"namespace": "org.apache.crunch.test",
-"name": "Employee",
-"type": "record",
-"fields": [
-  {"name": "name", "type": ["string", "null"] },
-  {"name": "salary", "type": "int"},
-  {"name": "department", "type": ["string", "null"] } ]
-} 

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/avro/person.avsc
----------------------------------------------------------------------
diff --git a/crunch/src/test/avro/person.avsc b/crunch/src/test/avro/person.avsc
deleted file mode 100644
index babd808..0000000
--- a/crunch/src/test/avro/person.avsc
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-{
-"namespace": "org.apache.crunch.test",
-"name": "Person",
-"type": "record",
-"fields": [
-  {"name": "name", "type": ["string", "null"] },
-  {"name": "age", "type": "int"},
-  {"name": "siblingnames", "type": {"type": "array", "items": "string"}} ]
-} 

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/AndFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/AndFnTest.java b/crunch/src/test/java/org/apache/crunch/AndFnTest.java
deleted file mode 100644
index 4b00874..0000000
--- a/crunch/src/test/java/org/apache/crunch/AndFnTest.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import org.apache.crunch.FilterFn.AndFn;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.junit.Before;
-import org.junit.Test;
-
-public class AndFnTest {
-
-  private FilterFn<Integer> fnA;
-  private FilterFn<Integer> fnB;
-  private AndFn<Integer> andFn;
-
-  @Before
-  public void setUp() {
-    fnA = mock(FilterFn.class);
-    fnB = mock(FilterFn.class);
-    andFn = new AndFn(fnA, fnB);
-  }
-
-  @Test
-  public void testSetContext() {
-    TaskInputOutputContext<?, ?, ?, ?> context = mock(TaskInputOutputContext.class);
-    andFn.setContext(context);
-
-    verify(fnA).setContext(context);
-    verify(fnB).setContext(context);
-  }
-
-  @Test
-  public void testAccept_False() {
-    when(fnA.accept(1)).thenReturn(true);
-    when(fnB.accept(1)).thenReturn(false);
-
-    assertFalse(andFn.accept(1));
-  }
-
-  @Test
-  public void testAccept_True() {
-    when(fnA.accept(1)).thenReturn(true);
-    when(fnB.accept(1)).thenReturn(true);
-
-    assertTrue(andFn.accept(1));
-  }
-
-  @Test
-  public void testCleanup() {
-    andFn.cleanup(mock(Emitter.class));
-
-    verify(fnA).cleanup();
-    verify(fnB).cleanup();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/CombineFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/CombineFnTest.java b/crunch/src/test/java/org/apache/crunch/CombineFnTest.java
deleted file mode 100644
index 39548e2..0000000
--- a/crunch/src/test/java/org/apache/crunch/CombineFnTest.java
+++ /dev/null
@@ -1,222 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.apache.crunch.CombineFn.MAX_BIGINTS;
-import static org.apache.crunch.CombineFn.MAX_DOUBLES;
-import static org.apache.crunch.CombineFn.MAX_FLOATS;
-import static org.apache.crunch.CombineFn.MAX_INTS;
-import static org.apache.crunch.CombineFn.MAX_LONGS;
-import static org.apache.crunch.CombineFn.MIN_BIGINTS;
-import static org.apache.crunch.CombineFn.MIN_DOUBLES;
-import static org.apache.crunch.CombineFn.MIN_FLOATS;
-import static org.apache.crunch.CombineFn.MIN_INTS;
-import static org.apache.crunch.CombineFn.MIN_LONGS;
-import static org.apache.crunch.CombineFn.SUM_BIGINTS;
-import static org.apache.crunch.CombineFn.SUM_DOUBLES;
-import static org.apache.crunch.CombineFn.SUM_FLOATS;
-import static org.apache.crunch.CombineFn.SUM_INTS;
-import static org.apache.crunch.CombineFn.SUM_LONGS;
-import static org.junit.Assert.assertEquals;
-
-import java.math.BigInteger;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.crunch.CombineFn.Aggregator;
-import org.apache.crunch.CombineFn.AggregatorFactory;
-import org.apache.crunch.CombineFn.FirstNAggregator;
-import org.apache.crunch.CombineFn.LastNAggregator;
-import org.apache.crunch.CombineFn.MaxNAggregator;
-import org.apache.crunch.CombineFn.MinNAggregator;
-import org.apache.crunch.CombineFn.PairAggregator;
-import org.apache.crunch.CombineFn.QuadAggregator;
-import org.apache.crunch.CombineFn.StringConcatAggregator;
-import org.apache.crunch.CombineFn.TripAggregator;
-import org.apache.crunch.CombineFn.TupleNAggregator;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-
-public class CombineFnTest {
-
-  private <T> Iterable<T> applyAggregator(AggregatorFactory<T> a, Iterable<T> values) {
-    return applyAggregator(a.create(), values);
-  }
-
-  private <T> Iterable<T> applyAggregator(Aggregator<T> a, Iterable<T> values) {
-    a.reset();
-    for (T value : values) {
-      a.update(value);
-    }
-    return a.results();
-  }
-
-  @Test
-  public void testSums() {
-    assertEquals(ImmutableList.of(1775L), applyAggregator(SUM_LONGS, ImmutableList.of(29L, 17L, 1729L)));
-
-    assertEquals(ImmutableList.of(1765L), applyAggregator(SUM_LONGS, ImmutableList.of(29L, 7L, 1729L)));
-
-    assertEquals(ImmutableList.of(1775), applyAggregator(SUM_INTS, ImmutableList.of(29, 17, 1729)));
-
-    assertEquals(ImmutableList.of(1775.0f), applyAggregator(SUM_FLOATS, ImmutableList.of(29f, 17f, 1729f)));
-
-    assertEquals(ImmutableList.of(1775.0), applyAggregator(SUM_DOUBLES, ImmutableList.of(29.0, 17.0, 1729.0)));
-
-    assertEquals(
-        ImmutableList.of(new BigInteger("1775")),
-        applyAggregator(SUM_BIGINTS,
-            ImmutableList.of(new BigInteger("29"), new BigInteger("17"), new BigInteger("1729"))));
-  }
-
-  @Test
-  public void testMax() {
-    assertEquals(ImmutableList.of(1729L), applyAggregator(MAX_LONGS, ImmutableList.of(29L, 17L, 1729L)));
-
-    assertEquals(ImmutableList.of(1729), applyAggregator(MAX_INTS, ImmutableList.of(29, 17, 1729)));
-
-    assertEquals(ImmutableList.of(1729.0f), applyAggregator(MAX_FLOATS, ImmutableList.of(29f, 17f, 1729f)));
-
-    assertEquals(ImmutableList.of(1729.0), applyAggregator(MAX_DOUBLES, ImmutableList.of(29.0, 17.0, 1729.0)));
-
-    assertEquals(ImmutableList.of(1745.0f), applyAggregator(MAX_FLOATS, ImmutableList.of(29f, 1745f, 17f, 1729f)));
-
-    assertEquals(
-        ImmutableList.of(new BigInteger("1729")),
-        applyAggregator(MAX_BIGINTS,
-            ImmutableList.of(new BigInteger("29"), new BigInteger("17"), new BigInteger("1729"))));
-  }
-
-  @Test
-  public void testMin() {
-    assertEquals(ImmutableList.of(17L), applyAggregator(MIN_LONGS, ImmutableList.of(29L, 17L, 1729L)));
-
-    assertEquals(ImmutableList.of(17), applyAggregator(MIN_INTS, ImmutableList.of(29, 17, 1729)));
-
-    assertEquals(ImmutableList.of(17.0f), applyAggregator(MIN_FLOATS, ImmutableList.of(29f, 17f, 1729f)));
-
-    assertEquals(ImmutableList.of(17.0), applyAggregator(MIN_DOUBLES, ImmutableList.of(29.0, 17.0, 1729.0)));
-
-    assertEquals(ImmutableList.of(29), applyAggregator(MIN_INTS, ImmutableList.of(29, 170, 1729)));
-
-    assertEquals(
-        ImmutableList.of(new BigInteger("17")),
-        applyAggregator(MIN_BIGINTS,
-            ImmutableList.of(new BigInteger("29"), new BigInteger("17"), new BigInteger("1729"))));
-  }
-
-  @Test
-  public void testMaxN() {
-    assertEquals(ImmutableList.of(98, 1009),
-        applyAggregator(new MaxNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
-  }
-
-  @Test
-  public void testMinN() {
-    assertEquals(ImmutableList.of(17, 29),
-        applyAggregator(new MinNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
-  }
-
-  @Test
-  public void testFirstN() {
-    assertEquals(ImmutableList.of(17, 34),
-        applyAggregator(new FirstNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
-  }
-
-  @Test
-  public void testLastN() {
-    assertEquals(ImmutableList.of(29, 1009),
-        applyAggregator(new LastNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
-  }
-
-  @Test
-  public void testPairs() {
-    List<Pair<Long, Double>> input = ImmutableList.of(Pair.of(1720L, 17.29), Pair.of(9L, -3.14));
-    Aggregator<Pair<Long, Double>> a = new PairAggregator<Long, Double>(SUM_LONGS.create(), MIN_DOUBLES.create());
-    assertEquals(Pair.of(1729L, -3.14), Iterables.getOnlyElement(applyAggregator(a, input)));
-  }
-
-  @Test
-  public void testPairsTwoLongs() {
-    List<Pair<Long, Long>> input = ImmutableList.of(Pair.of(1720L, 1L), Pair.of(9L, 19L));
-    Aggregator<Pair<Long, Long>> a = new PairAggregator<Long, Long>(SUM_LONGS.create(), SUM_LONGS.create());
-    assertEquals(Pair.of(1729L, 20L), Iterables.getOnlyElement(applyAggregator(a, input)));
-  }
-
-  @Test
-  public void testTrips() {
-    List<Tuple3<Float, Double, Double>> input = ImmutableList.of(Tuple3.of(17.29f, 12.2, 0.1),
-        Tuple3.of(3.0f, 1.2, 3.14), Tuple3.of(-1.0f, 14.5, -0.98));
-    Aggregator<Tuple3<Float, Double, Double>> a = new TripAggregator<Float, Double, Double>(MAX_FLOATS.create(),
-        MAX_DOUBLES.create(), MIN_DOUBLES.create());
-    assertEquals(Tuple3.of(17.29f, 14.5, -0.98), Iterables.getOnlyElement(applyAggregator(a, input)));
-  }
-
-  @Test
-  public void testQuads() {
-    List<Tuple4<Float, Double, Double, Integer>> input = ImmutableList.of(Tuple4.of(17.29f, 12.2, 0.1, 1),
-        Tuple4.of(3.0f, 1.2, 3.14, 2), Tuple4.of(-1.0f, 14.5, -0.98, 3));
-    Aggregator<Tuple4<Float, Double, Double, Integer>> a = new QuadAggregator<Float, Double, Double, Integer>(
-        MAX_FLOATS.create(), MAX_DOUBLES.create(), MIN_DOUBLES.create(), SUM_INTS.create());
-    assertEquals(Tuple4.of(17.29f, 14.5, -0.98, 6), Iterables.getOnlyElement(applyAggregator(a, input)));
-  }
-
-  @Test
-  public void testTupleN() {
-    List<TupleN> input = ImmutableList.of(new TupleN(1, 3.0, 1, 2.0, 4L), new TupleN(4, 17.0, 1, 9.7, 12L));
-    Aggregator<TupleN> a = new TupleNAggregator(MIN_INTS.create(), SUM_DOUBLES.create(), MAX_INTS.create(),
-        MIN_DOUBLES.create(), MAX_LONGS.create());
-    assertEquals(new TupleN(1, 20.0, 1, 2.0, 12L), Iterables.getOnlyElement(applyAggregator(a, input)));
-  }
-
-  @Test
-  public void testConcatenation() {
-    String[] arrayNull = new String[] { null, "" };
-    assertEquals(ImmutableList.of("foofoobarbar"), applyAggregator(
-        new StringConcatAggregator("", true), ImmutableList.of("foo", "foobar", "bar")));
-    assertEquals(ImmutableList.of("foo/foobar/bar"), applyAggregator(
-        new StringConcatAggregator("/", false), ImmutableList.of("foo", "foobar", "bar")));
-    assertEquals(ImmutableList.of("  "), applyAggregator(
-        new StringConcatAggregator(" ", true), ImmutableList.of(" ", "")));
-    assertEquals(ImmutableList.of(""), applyAggregator(
-        new StringConcatAggregator(" ", true), Arrays.asList(arrayNull)));
-    assertEquals(ImmutableList.of("foo bar"), applyAggregator(
-        new StringConcatAggregator(" ", true, 20, 3), ImmutableList.of("foo", "foobar", "bar")));
-    assertEquals(ImmutableList.of("foo foobar"), applyAggregator(
-        new StringConcatAggregator(" ", true, 10, 6), ImmutableList.of("foo", "foobar", "bar")));
-    assertEquals(ImmutableList.of("foo bar"), applyAggregator(
-        new StringConcatAggregator(" ", true, 9, 6), ImmutableList.of("foo", "foobar", "bar")));
-  }
-
-  @Test
-  public void testConcatenationReset() {
-    StringConcatAggregator a = new StringConcatAggregator(" ", true, 10, 6);
-
-    assertEquals(ImmutableList.of("foo foobar"), applyAggregator(a, ImmutableList.of("foo", "foobar", "bar")));
-    assertEquals(ImmutableList.of("foo foobar"), applyAggregator(a, ImmutableList.of("foo", "foobar", "bar")));
-  }
-
-  @Test(expected = NullPointerException.class)
-  public void testConcatenationNullException() {
-    String[] arrayNull = new String[] { null, "" };
-    assertEquals(ImmutableList.of(""), applyAggregator(
-        new StringConcatAggregator(" ", false), Arrays.asList(arrayNull)));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/NotFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/NotFnTest.java b/crunch/src/test/java/org/apache/crunch/NotFnTest.java
deleted file mode 100644
index 8af17a2..0000000
--- a/crunch/src/test/java/org/apache/crunch/NotFnTest.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.*;
-import static org.junit.Assert.fail;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import org.apache.crunch.FilterFn.NotFn;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.junit.Before;
-import org.junit.Test;
-
-public class NotFnTest {
-  
-  private FilterFn<Integer> base;
-  private NotFn<Integer> notFn;
-  
-  @Before
-  public void setUp() {
-    base = mock(FilterFn.class);
-    notFn = new NotFn(base);
-  }
-
-  @Test
-  public void testSetContext() {
-    TaskInputOutputContext<?, ?, ?, ?> context = mock(TaskInputOutputContext.class);
-    
-    notFn.setContext(context);
-    
-    verify(base).setContext(context);
-  }
-
-  @Test
-  public void testAccept_True() {
-    when(base.accept(1)).thenReturn(true);
-    
-    assertFalse(notFn.accept(1));
-  }
-  
-  @Test
-  public void testAccept_False() {
-    when(base.accept(1)).thenReturn(false);
-    
-    assertTrue(notFn.accept(1));
-  }
-
-  @Test
-  public void testCleanupEmitterOfT() {
-    notFn.cleanup(mock(Emitter.class));
-    
-    verify(base).cleanup();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/OrFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/OrFnTest.java b/crunch/src/test/java/org/apache/crunch/OrFnTest.java
deleted file mode 100644
index fde2376..0000000
--- a/crunch/src/test/java/org/apache/crunch/OrFnTest.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import org.apache.crunch.FilterFn.OrFn;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.junit.Before;
-import org.junit.Test;
-
-public class OrFnTest {
-
-  private FilterFn<Integer> fnA;
-  private FilterFn<Integer> fnB;
-  private OrFn<Integer> orFn;
-
-  @Before
-  public void setUp() {
-    fnA = mock(FilterFn.class);
-    fnB = mock(FilterFn.class);
-    orFn = new OrFn(fnA, fnB);
-  }
-
-  @Test
-  public void testSetContext() {
-    TaskInputOutputContext<?, ?, ?, ?> context = mock(TaskInputOutputContext.class);
-
-    orFn.setContext(context);
-
-    verify(fnA).setContext(context);
-    verify(fnB).setContext(context);
-  }
-
-  @Test
-  public void testAccept_True() {
-    when(fnA.accept(1)).thenReturn(false);
-    when(fnB.accept(1)).thenReturn(true);
-
-    assertTrue(orFn.accept(1));
-  }
-
-  @Test
-  public void testAccept_False() {
-    when(fnA.accept(1)).thenReturn(false);
-    when(fnB.accept(1)).thenReturn(false);
-
-    assertFalse(orFn.accept(1));
-  }
-
-  @Test
-  public void testCleanupEmitterOfT() {
-    orFn.cleanup(mock(Emitter.class));
-
-    verify(fnA).cleanup();
-    verify(fnB).cleanup();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/PairTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/PairTest.java b/crunch/src/test/java/org/apache/crunch/PairTest.java
deleted file mode 100644
index 106413c..0000000
--- a/crunch/src/test/java/org/apache/crunch/PairTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.junit.Test;
-
-public class PairTest {
-
-  @Test
-  public void testPairConstructor() {
-    Pair<String, Integer> pair = new Pair<String, Integer>("brock", 45);
-    test(pair);
-  }
-
-  @Test
-  public void testPairOf() {
-    Pair<String, Integer> pair = Pair.of("brock", 45);
-    test(pair);
-  }
-
-  protected void test(Pair<String, Integer> pair) {
-    assertTrue(pair.size() == 2);
-
-    assertEquals("brock", pair.first());
-    assertEquals(new Integer(45), pair.second());
-    assertEquals(Pair.of("brock", 45), pair);
-
-    assertEquals("brock", pair.get(0));
-    assertEquals(new Integer(45), pair.get(1));
-
-    try {
-      pair.get(-1);
-      fail();
-    } catch (IndexOutOfBoundsException e) {
-      // expected
-    }
-  }
-
-  @Test
-  public void testPairComparisons() {
-    assertEquals(0, Pair.of(null, null).compareTo(Pair.of(null, null)));
-    assertEquals(0, Pair.of(1, 2).compareTo(Pair.of(1, 2)));
-    assertTrue(Pair.of(2, "a").compareTo(Pair.of(1, "a")) > 0);
-    assertTrue(Pair.of("a", 2).compareTo(Pair.of("a", 1)) > 0);
-    assertTrue(Pair.of(null, 17).compareTo(Pair.of(null, 29)) < 0);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/TupleTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/TupleTest.java b/crunch/src/test/java/org/apache/crunch/TupleTest.java
deleted file mode 100644
index b07ec3f..0000000
--- a/crunch/src/test/java/org/apache/crunch/TupleTest.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.apache.crunch.types.TupleFactory;
-import org.junit.Test;
-
-public class TupleTest {
-  private String first = "foo";
-  private Integer second = 1729;
-  private Double third = 64.2;
-  private Boolean fourth = false;
-  private Float fifth = 17.29f;
-
-  @Test
-  public void testTuple3() {
-    Tuple3<String, Integer, Double> t = new Tuple3<String, Integer, Double>(first, second, third);
-    assertEquals(3, t.size());
-    assertEquals(first, t.first());
-    assertEquals(second, t.second());
-    assertEquals(third, t.third());
-    assertEquals(first, t.get(0));
-    assertEquals(second, t.get(1));
-    assertEquals(third, t.get(2));
-    try {
-      t.get(-1);
-      fail();
-    } catch (IndexOutOfBoundsException e) {
-      // expected
-    }
-  }
-
-  @Test
-  public void testTuple3Equality() {
-    Tuple3<String, Integer, Double> t = new Tuple3<String, Integer, Double>(first, second, third);
-    assertTrue(t.equals(new Tuple3(first, second, third)));
-    assertFalse(t.equals(new Tuple3(first, null, third)));
-    assertFalse((new Tuple3(null, null, null)).equals(t));
-    assertTrue((new Tuple3(first, null, null)).equals(new Tuple3(first, null, null)));
-  }
-
-  @Test
-  public void testTuple4() {
-    Tuple4<String, Integer, Double, Boolean> t = new Tuple4<String, Integer, Double, Boolean>(first, second, third,
-        fourth);
-    assertEquals(4, t.size());
-    assertEquals(first, t.first());
-    assertEquals(second, t.second());
-    assertEquals(third, t.third());
-    assertEquals(fourth, t.fourth());
-    assertEquals(first, t.get(0));
-    assertEquals(second, t.get(1));
-    assertEquals(third, t.get(2));
-    assertEquals(fourth, t.get(3));
-    try {
-      t.get(-1);
-      fail();
-    } catch (IndexOutOfBoundsException e) {
-      // expected
-    }
-  }
-
-  @Test
-  public void testTuple4Equality() {
-    Tuple4<String, Integer, Double, Boolean> t = new Tuple4<String, Integer, Double, Boolean>(first, second, third,
-        fourth);
-    assertFalse(t.equals(new Tuple3(first, second, third)));
-    assertFalse(t.equals(new Tuple4(first, null, third, null)));
-    assertFalse((new Tuple4(null, null, null, null)).equals(t));
-    assertTrue((new Tuple4(first, null, third, null)).equals(new Tuple4(first, null, third, null)));
-  }
-
-  @Test
-  public void testTupleN() {
-    TupleN t = new TupleN(first, second, third, fourth, fifth);
-    assertEquals(5, t.size());
-    assertEquals(first, t.get(0));
-    assertEquals(second, t.get(1));
-    assertEquals(third, t.get(2));
-    assertEquals(fourth, t.get(3));
-    assertEquals(fifth, t.get(4));
-    try {
-      t.get(-1);
-      fail();
-    } catch (IndexOutOfBoundsException e) {
-      // expected
-    }
-  }
-
-  @Test
-  public void testTupleNEquality() {
-    TupleN t = new TupleN(first, second, third, fourth, fifth);
-    assertTrue(t.equals(new TupleN(first, second, third, fourth, fifth)));
-    assertFalse(t.equals(new TupleN(first, null, third, null)));
-    assertFalse((new TupleN(null, null, null, null, null)).equals(t));
-    assertTrue((new TupleN(first, second, third, null, null)).equals(new TupleN(first, second, third, null, null)));
-  }
-
-  @Test
-  public void testTupleFactory() {
-    checkTuple(TupleFactory.PAIR.makeTuple("a", "b"), Pair.class, "a", "b");
-    checkTuple(TupleFactory.TUPLE3.makeTuple("a", "b", "c"), Tuple3.class, "a", "b", "c");
-    checkTuple(TupleFactory.TUPLE4.makeTuple("a", "b", "c", "d"), Tuple4.class, "a", "b", "c", "d");
-    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c", "d", "e"), TupleN.class, "a", "b", "c", "d", "e");
-
-    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b"), TupleN.class, "a", "b");
-    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c"), TupleN.class, "a", "b", "c");
-    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c", "d"), TupleN.class, "a", "b", "c", "d");
-    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c", "d", "e"), TupleN.class, "a", "b", "c", "d", "e");
-  }
-
-  private void checkTuple(Tuple t, Class<? extends Tuple> type, Object... values) {
-    assertEquals(type, t.getClass());
-    assertEquals(values.length, t.size());
-    for (int i = 0; i < values.length; i++)
-      assertEquals(values[i], t.get(i));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/WriteModeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/WriteModeTest.java b/crunch/src/test/java/org/apache/crunch/WriteModeTest.java
deleted file mode 100644
index e99ac7b..0000000
--- a/crunch/src/test/java/org/apache/crunch/WriteModeTest.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.Target.WriteMode;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.io.To;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-
-public class WriteModeTest {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test(expected=CrunchRuntimeException.class)
-  public void testDefault() throws Exception {
-    run(null, true);
-  }
-
-  @Test(expected=CrunchRuntimeException.class)
-  public void testDefaultNoRun() throws Exception {
-    run(null, false);
-  }
-  
-  @Test
-  public void testOverwrite() throws Exception {
-    Path p = run(WriteMode.OVERWRITE, true);
-    PCollection<String> lines = MemPipeline.getInstance().readTextFile(p.toString());
-    assertEquals(ImmutableList.of("some", "string", "values"), lines.materialize());
-  }
-  
-  @Test(expected=CrunchRuntimeException.class)
-  public void testOverwriteNoRun() throws Exception {
-    run(WriteMode.OVERWRITE, false);
-  }
-  
-  @Test
-  public void testAppend() throws Exception {
-    Path p = run(WriteMode.APPEND, true);
-    PCollection<String> lines = MemPipeline.getInstance().readTextFile(p.toString());
-    assertEquals(ImmutableList.of("some", "string", "values", "some", "string", "values"),
-        lines.materialize());
-  }
-  
-  @Test
-  public void testAppendNoRun() throws Exception {
-    Path p = run(WriteMode.APPEND, false);
-    PCollection<String> lines = MemPipeline.getInstance().readTextFile(p.toString());
-    assertEquals(ImmutableList.of("some", "string", "values", "some", "string", "values"),
-        lines.materialize());
-  }
-  
-  Path run(WriteMode writeMode, boolean doRun) throws Exception {
-    Path output = tmpDir.getPath("existing");
-    FileSystem fs = FileSystem.get(tmpDir.getDefaultConfiguration());
-    if (fs.exists(output)) {
-      fs.delete(output, true);
-    }
-    Pipeline p = MemPipeline.getInstance();
-    PCollection<String> data = MemPipeline.typedCollectionOf(Avros.strings(),
-        ImmutableList.of("some", "string", "values"));
-    data.write(To.textFile(output));
-
-    if (doRun) {
-      p.run();
-    }
-    
-    if (writeMode == null) {
-      data.write(To.textFile(output));
-    } else {
-      data.write(To.textFile(output), writeMode);
-    }
-    
-    p.run();
-    
-    return output;
-  }
-}


[11/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
deleted file mode 100644
index f22b5a1..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
+++ /dev/null
@@ -1,245 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Target;
-import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob;
-import org.apache.crunch.impl.mr.collect.DoTableImpl;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-import org.apache.crunch.impl.mr.exec.CrunchJobHooks;
-import org.apache.crunch.impl.mr.run.CrunchCombiner;
-import org.apache.crunch.impl.mr.run.CrunchInputFormat;
-import org.apache.crunch.impl.mr.run.CrunchMapper;
-import org.apache.crunch.impl.mr.run.CrunchReducer;
-import org.apache.crunch.impl.mr.run.NodeContext;
-import org.apache.crunch.impl.mr.run.RTNode;
-import org.apache.crunch.util.DistCache;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-class JobPrototype {
-
-  public static JobPrototype createMapReduceJob(int jobID, PGroupedTableImpl<?, ?> group,
-      Set<NodePath> inputs, Path workingPath) {
-    return new JobPrototype(jobID, inputs, group, workingPath);
-  }
-
-  public static JobPrototype createMapOnlyJob(int jobID, HashMultimap<Target, NodePath> mapNodePaths, Path workingPath) {
-    return new JobPrototype(jobID, mapNodePaths, workingPath);
-  }
-
-  private final int jobID; // TODO: maybe stageID sounds better
-  private final Set<NodePath> mapNodePaths;
-  private final PGroupedTableImpl<?, ?> group;
-  private final Set<JobPrototype> dependencies = Sets.newHashSet();
-  private final Map<PCollectionImpl<?>, DoNode> nodes = Maps.newHashMap();
-  private final Path workingPath;
-
-  private HashMultimap<Target, NodePath> targetsToNodePaths;
-  private DoTableImpl<?, ?> combineFnTable;
-
-  private CrunchControlledJob job;
-
-  private JobPrototype(int jobID, Set<NodePath> inputs, PGroupedTableImpl<?, ?> group, Path workingPath) {
-    this.jobID = jobID;
-    this.mapNodePaths = ImmutableSet.copyOf(inputs);
-    this.group = group;
-    this.workingPath = workingPath;
-    this.targetsToNodePaths = null;
-  }
-
-  private JobPrototype(int jobID, HashMultimap<Target, NodePath> outputPaths, Path workingPath) {
-    this.jobID = jobID;
-    this.group = null;
-    this.mapNodePaths = null;
-    this.workingPath = workingPath;
-    this.targetsToNodePaths = outputPaths;
-  }
-
-  public int getJobID() {
-    return jobID;
-  }
-
-  public boolean isMapOnly() {
-    return this.group == null;
-  }
-
-  Set<NodePath> getMapNodePaths() {
-    return mapNodePaths;
-  }
-
-  PGroupedTableImpl<?, ?> getGroupingTable() {
-    return group;
-  }
-
-  HashMultimap<Target, NodePath> getTargetsToNodePaths() {
-    return targetsToNodePaths;
-  }
-
-  public void addReducePaths(HashMultimap<Target, NodePath> outputPaths) {
-    if (group == null) {
-      throw new IllegalStateException("Cannot add a reduce phase to a map-only job");
-    }
-    this.targetsToNodePaths = outputPaths;
-  }
-
-  public void addDependency(JobPrototype dependency) {
-    this.dependencies.add(dependency);
-  }
-
-  public CrunchControlledJob getCrunchJob(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
-    if (job == null) {
-      job = build(jarClass, conf, pipeline);
-      for (JobPrototype proto : dependencies) {
-        job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline));
-      }
-    }
-    return job;
-  }
-
-  private CrunchControlledJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
-    Job job = new Job(conf);
-    conf = job.getConfiguration();
-    conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString());
-    job.setJarByClass(jarClass);
-
-    Set<DoNode> outputNodes = Sets.newHashSet();
-    Set<Target> targets = targetsToNodePaths.keySet();
-    Path outputPath = new Path(workingPath, "output");
-    MSCROutputHandler outputHandler = new MSCROutputHandler(job, outputPath, group == null);
-    for (Target target : targets) {
-      DoNode node = null;
-      for (NodePath nodePath : targetsToNodePaths.get(target)) {
-        if (node == null) {
-          PCollectionImpl<?> collect = nodePath.tail();
-          node = DoNode.createOutputNode(target.toString(), collect.getPType());
-          outputHandler.configureNode(node, target);
-        }
-        outputNodes.add(walkPath(nodePath.descendingIterator(), node));
-      }
-    }
-
-    job.setMapperClass(CrunchMapper.class);
-    List<DoNode> inputNodes;
-    DoNode reduceNode = null;
-    if (group != null) {
-      job.setReducerClass(CrunchReducer.class);
-      List<DoNode> reduceNodes = Lists.newArrayList(outputNodes);
-      serialize(reduceNodes, conf, workingPath, NodeContext.REDUCE);
-      reduceNode = reduceNodes.get(0);
-
-      if (combineFnTable != null) {
-        job.setCombinerClass(CrunchCombiner.class);
-        DoNode combinerInputNode = group.createDoNode();
-        DoNode combineNode = combineFnTable.createDoNode();
-        combineNode.addChild(group.getGroupingNode());
-        combinerInputNode.addChild(combineNode);
-        serialize(ImmutableList.of(combinerInputNode), conf, workingPath, NodeContext.COMBINE);
-      }
-
-      group.configureShuffle(job);
-
-      DoNode mapOutputNode = group.getGroupingNode();
-      Set<DoNode> mapNodes = Sets.newHashSet();
-      for (NodePath nodePath : mapNodePaths) {
-        // Advance these one step, since we've already configured
-        // the grouping node, and the PGroupedTableImpl is the tail
-        // of the NodePath.
-        Iterator<PCollectionImpl<?>> iter = nodePath.descendingIterator();
-        iter.next();
-        mapNodes.add(walkPath(iter, mapOutputNode));
-      }
-      inputNodes = Lists.newArrayList(mapNodes);
-    } else { // No grouping
-      job.setNumReduceTasks(0);
-      inputNodes = Lists.newArrayList(outputNodes);
-    }
-    serialize(inputNodes, conf, workingPath, NodeContext.MAP);
-
-    if (inputNodes.size() == 1) {
-      DoNode inputNode = inputNodes.get(0);
-      inputNode.getSource().configureSource(job, -1);
-    } else {
-      for (int i = 0; i < inputNodes.size(); i++) {
-        DoNode inputNode = inputNodes.get(i);
-        inputNode.getSource().configureSource(job, i);
-      }
-      job.setInputFormatClass(CrunchInputFormat.class);
-    }
-    job.setJobName(createJobName(pipeline.getName(), inputNodes, reduceNode));
-
-    return new CrunchControlledJob(
-        jobID,
-        job,
-        new CrunchJobHooks.PrepareHook(job),
-        new CrunchJobHooks.CompletionHook(job, outputPath, outputHandler.getMultiPaths(), group == null));
-  }
-
-  private void serialize(List<DoNode> nodes, Configuration conf, Path workingPath, NodeContext context)
-      throws IOException {
-    List<RTNode> rtNodes = Lists.newArrayList();
-    for (DoNode node : nodes) {
-      rtNodes.add(node.toRTNode(true, conf, context));
-    }
-    Path path = new Path(workingPath, context.toString());
-    DistCache.write(conf, path, rtNodes);
-  }
-
-  private String createJobName(String pipelineName, List<DoNode> mapNodes, DoNode reduceNode) {
-    JobNameBuilder builder = new JobNameBuilder(pipelineName);
-    builder.visit(mapNodes);
-    if (reduceNode != null) {
-      builder.visit(reduceNode);
-    }
-    return builder.build();
-  }
-
-  private DoNode walkPath(Iterator<PCollectionImpl<?>> iter, DoNode working) {
-    while (iter.hasNext()) {
-      PCollectionImpl<?> collect = iter.next();
-      if (combineFnTable != null && !(collect instanceof PGroupedTableImpl)) {
-        combineFnTable = null;
-      } else if (collect instanceof DoTableImpl && ((DoTableImpl<?, ?>) collect).hasCombineFn()) {
-        combineFnTable = (DoTableImpl<?, ?>) collect;
-      }
-      if (!nodes.containsKey(collect)) {
-        nodes.put(collect, collect.createDoNode());
-      }
-      DoNode parent = nodes.get(collect);
-      parent.addChild(working);
-      working = parent;
-    }
-    return working;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java
deleted file mode 100644
index 36c565e..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.Map;
-
-import org.apache.crunch.Target;
-import org.apache.crunch.io.MapReduceTarget;
-import org.apache.crunch.io.OutputHandler;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-
-import com.google.common.collect.Maps;
-
-public class MSCROutputHandler implements OutputHandler {
-
-  private final Job job;
-  private final Path path;
-  private final boolean mapOnlyJob;
-
-  private DoNode workingNode;
-  private Map<Integer, PathTarget> multiPaths;
-  private int jobCount;
-
-  public MSCROutputHandler(Job job, Path outputPath, boolean mapOnlyJob) {
-    this.job = job;
-    this.path = outputPath;
-    this.mapOnlyJob = mapOnlyJob;
-    this.multiPaths = Maps.newHashMap();
-  }
-
-  public void configureNode(DoNode node, Target target) {
-    workingNode = node;
-    target.accept(this, node.getPType());
-  }
-
-  public boolean configure(Target target, PType<?> ptype) {
-    if (target instanceof MapReduceTarget) {
-      if (target instanceof PathTarget) {
-        multiPaths.put(jobCount, (PathTarget) target);
-      }
-
-      String name = PlanningParameters.MULTI_OUTPUT_PREFIX + jobCount;
-      jobCount++;
-      workingNode.setOutputName(name);
-      ((MapReduceTarget) target).configureForMapReduce(job, ptype, path, name);
-      return true;
-    }
-
-    return false;
-  }
-
-  public boolean isMapOnlyJob() {
-    return mapOnlyJob;
-  }
-
-  public Map<Integer, PathTarget> getMultiPaths() {
-    return multiPaths;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
deleted file mode 100644
index 3e1de38..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
+++ /dev/null
@@ -1,378 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.TreeMap;
-
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-import org.apache.crunch.impl.mr.exec.MRExecutor;
-import org.apache.crunch.materialize.MaterializableIterable;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Multimap;
-import com.google.common.collect.Sets;
-
-public class MSCRPlanner {
-
-  private final MRPipeline pipeline;
-  private final Map<PCollectionImpl<?>, Set<Target>> outputs;
-  private final Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize;
-  private int lastJobID = 0;
-
-  public MSCRPlanner(MRPipeline pipeline, Map<PCollectionImpl<?>, Set<Target>> outputs,
-      Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize) {
-    this.pipeline = pipeline;
-    this.outputs = new TreeMap<PCollectionImpl<?>, Set<Target>>(DEPTH_COMPARATOR);
-    this.outputs.putAll(outputs);
-    this.toMaterialize = toMaterialize;
-  }
-
-  // Used to ensure that we always build pipelines starting from the deepest
-  // outputs, which helps ensure that we handle intermediate outputs correctly.
-  private static final Comparator<PCollectionImpl<?>> DEPTH_COMPARATOR = new Comparator<PCollectionImpl<?>>() {
-    @Override
-    public int compare(PCollectionImpl<?> left, PCollectionImpl<?> right) {
-      int cmp = right.getDepth() - left.getDepth();
-      if (cmp == 0) {
-        // Ensure we don't throw away two output collections at the same depth.
-        // Using the collection name would be nicer here, but names aren't
-        // necessarily unique.
-        cmp = new Integer(right.hashCode()).compareTo(left.hashCode());
-      }
-      return cmp;
-    }
-  };  
-
-  public MRExecutor plan(Class<?> jarClass, Configuration conf) throws IOException {
-    Map<PCollectionImpl<?>, Set<SourceTarget<?>>> targetDeps = Maps.newTreeMap(DEPTH_COMPARATOR);
-    for (PCollectionImpl<?> pcollect : outputs.keySet()) {
-      targetDeps.put(pcollect, pcollect.getTargetDependencies());
-    }
-    
-    Multimap<Vertex, JobPrototype> assignments = HashMultimap.create();
-    Multimap<PCollectionImpl<?>, Vertex> protoDependency = HashMultimap.create();
-    while (!targetDeps.isEmpty()) {
-      Set<Target> allTargets = Sets.newHashSet();
-      for (PCollectionImpl<?> pcollect : targetDeps.keySet()) {
-        allTargets.addAll(outputs.get(pcollect));
-      }
-      GraphBuilder graphBuilder = new GraphBuilder();
-      
-      // Walk the current plan tree and build a graph in which the vertices are
-      // sources, targets, and GBK operations.
-      Set<PCollectionImpl<?>> currentStage = Sets.newHashSet();
-      Set<PCollectionImpl<?>> laterStage = Sets.newHashSet();
-      for (PCollectionImpl<?> output : targetDeps.keySet()) {
-        if (Sets.intersection(allTargets, targetDeps.get(output)).isEmpty()) {
-          graphBuilder.visitOutput(output);
-          currentStage.add(output);
-        } else {
-          laterStage.add(output);
-        }
-      }
-      
-      Graph baseGraph = graphBuilder.getGraph();
-      
-      // Create a new graph that splits up up dependent GBK nodes.
-      Graph graph = prepareFinalGraph(baseGraph);
-      
-      // Break the graph up into connected components.
-      List<List<Vertex>> components = graph.connectedComponents();
-      
-      // For each component, we will create one or more job prototypes,
-      // depending on its profile.
-      // For dependency handling, we only need to care about which
-      // job prototype a particular GBK is assigned to.
-      for (List<Vertex> component : components) {
-        assignments.putAll(constructJobPrototypes(component));
-      }
-
-      // Add in the job dependency information here.
-      for (Map.Entry<Vertex, JobPrototype> e : assignments.entries()) {
-        JobPrototype current = e.getValue();
-        List<Vertex> parents = graph.getParents(e.getKey());
-        for (Vertex parent : parents) {
-          for (JobPrototype parentJobProto : assignments.get(parent)) {
-            current.addDependency(parentJobProto);
-          }
-        }
-      }
-      
-      // Add cross-stage dependencies.
-      for (PCollectionImpl<?> output : currentStage) {
-        Set<Target> targets = outputs.get(output);
-        Vertex vertex = graph.getVertexAt(output);
-        for (PCollectionImpl<?> later : laterStage) {
-          if (!Sets.intersection(targets, targetDeps.get(later)).isEmpty()) {
-            protoDependency.put(later, vertex);
-          }
-        }
-        targetDeps.remove(output);
-      }
-    }
-    
-    // Cross-job dependencies.
-    for (Entry<PCollectionImpl<?>, Vertex> pd : protoDependency.entries()) {
-      Vertex d = new Vertex(pd.getKey());
-      Vertex dj = pd.getValue();
-      for (JobPrototype parent : assignments.get(dj)) {
-        for (JobPrototype child : assignments.get(d)) {
-          child.addDependency(parent);
-        }
-      }
-    }
-    
-    // Finally, construct the jobs from the prototypes and return.
-    DotfileWriter dotfileWriter = new DotfileWriter();
-    MRExecutor exec = new MRExecutor(jarClass, outputs, toMaterialize);
-    for (JobPrototype proto : Sets.newHashSet(assignments.values())) {
-      dotfileWriter.addJobPrototype(proto);
-      exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline));
-    }
-
-    String planDotFile = dotfileWriter.buildDotfile();
-    exec.setPlanDotFile(planDotFile);
-    conf.set(PlanningParameters.PIPELINE_PLAN_DOTFILE, planDotFile);
-
-    return exec;
-  }
-  
-  private Graph prepareFinalGraph(Graph baseGraph) {
-    Graph graph = new Graph();
-    
-    for (Vertex baseVertex : baseGraph) {
-      // Add all of the vertices in the base graph, but no edges (yet).
-      graph.addVertex(baseVertex.getPCollection(), baseVertex.isOutput());
-    }
-    
-    for (Edge e : baseGraph.getAllEdges()) {
-      // Add back all of the edges where neither vertex is a GBK and we do not
-      // have an output feeding into a GBK.
-      if (!(e.getHead().isGBK() && e.getTail().isGBK()) &&
-          !(e.getHead().isOutput() && e.getTail().isGBK())) {
-        Vertex head = graph.getVertexAt(e.getHead().getPCollection());
-        Vertex tail = graph.getVertexAt(e.getTail().getPCollection());
-        graph.getEdge(head, tail).addAllNodePaths(e.getNodePaths());
-      }
-    }
-    
-    for (Vertex baseVertex : baseGraph) {
-      if (baseVertex.isGBK()) {
-        Vertex vertex = graph.getVertexAt(baseVertex.getPCollection());
-        for (Edge e : baseVertex.getIncomingEdges()) {
-          if (e.getHead().isOutput()) {
-            // Execute an edge split.
-            Vertex splitTail = e.getHead();
-            PCollectionImpl<?> split = splitTail.getPCollection();
-            InputCollection<?> inputNode = handleSplitTarget(split);
-            Vertex splitHead = graph.addVertex(inputNode, false);
-            
-            // Divide up the node paths in the edge between the two GBK nodes so
-            // that each node is either owned by GBK1 -> newTail or newHead -> GBK2.
-            for (NodePath path : e.getNodePaths()) {
-              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
-              graph.getEdge(vertex, splitTail).addNodePath(headPath);
-              graph.getEdge(splitHead, vertex).addNodePath(path);
-            }
-            
-            // Note the dependency between the vertices in the graph.
-            graph.markDependency(splitHead, splitTail);
-          } else if (!e.getHead().isGBK()) {
-            Vertex newHead = graph.getVertexAt(e.getHead().getPCollection());
-            graph.getEdge(newHead, vertex).addAllNodePaths(e.getNodePaths());
-          }
-        }
-        for (Edge e : baseVertex.getOutgoingEdges()) {
-          if (!e.getTail().isGBK()) {
-            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
-            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
-          } else {
-            // Execute an Edge split
-            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
-            PCollectionImpl split = e.getSplit();
-            InputCollection<?> inputNode = handleSplitTarget(split);
-            Vertex splitTail = graph.addVertex(split, true);
-            Vertex splitHead = graph.addVertex(inputNode, false);
-            
-            // Divide up the node paths in the edge between the two GBK nodes so
-            // that each node is either owned by GBK1 -> newTail or newHead -> GBK2.
-            for (NodePath path : e.getNodePaths()) {
-              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
-              graph.getEdge(vertex, splitTail).addNodePath(headPath);
-              graph.getEdge(splitHead, newGraphTail).addNodePath(path);
-            }
-            
-            // Note the dependency between the vertices in the graph.
-            graph.markDependency(splitHead, splitTail);
-          }
-        }
-      }
-    }
-    
-    return graph;
-  }
-  
-  private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component) {
-    Multimap<Vertex, JobPrototype> assignment = HashMultimap.create();
-    List<Vertex> gbks = Lists.newArrayList();
-    for (Vertex v : component) {
-      if (v.isGBK()) {
-        gbks.add(v);
-      }
-    }
-
-    if (gbks.isEmpty()) {
-      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
-      for (Vertex v : component) {
-        if (v.isInput()) {
-          for (Edge e : v.getOutgoingEdges()) {
-            for (NodePath nodePath : e.getNodePaths()) {
-              PCollectionImpl target = nodePath.tail();
-              for (Target t : outputs.get(target)) {
-                outputPaths.put(t, nodePath);
-              }
-            }
-          }
-        }
-      }
-      if (outputPaths.isEmpty()) {
-        throw new IllegalStateException("No outputs?");
-      }
-      JobPrototype prototype = JobPrototype.createMapOnlyJob(
-          ++lastJobID, outputPaths, pipeline.createTempPath());
-      for (Vertex v : component) {
-        assignment.put(v, prototype);
-      }
-    } else {
-      Set<Edge> usedEdges = Sets.newHashSet();
-      for (Vertex g : gbks) {
-        Set<NodePath> inputs = Sets.newHashSet();
-        for (Edge e : g.getIncomingEdges()) {
-          inputs.addAll(e.getNodePaths());
-          usedEdges.add(e);
-        }
-        JobPrototype prototype = JobPrototype.createMapReduceJob(
-            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
-        assignment.put(g, prototype);
-        for (Edge e : g.getIncomingEdges()) {
-          assignment.put(e.getHead(), prototype);
-          usedEdges.add(e);
-        }
-        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
-        for (Edge e : g.getOutgoingEdges()) {
-          Vertex output = e.getTail();
-          for (Target t : outputs.get(output.getPCollection())) {
-            outputPaths.putAll(t, e.getNodePaths());
-          }
-          assignment.put(output, prototype);
-          usedEdges.add(e);
-        }
-        prototype.addReducePaths(outputPaths);
-      }
-      
-      // Check for any un-assigned vertices, which should be map-side outputs
-      // that we will need to run in a map-only job.
-      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
-      Set<Vertex> orphans = Sets.newHashSet();
-      for (Vertex v : component) {
-
-        // Check if this vertex has multiple inputs but only a subset of
-        // them have already been assigned
-        boolean vertexHasUnassignedIncomingEdges = false;
-        if (v.isOutput()) {
-          for (Edge e : v.getIncomingEdges()) {
-            if (!usedEdges.contains(e)) {
-              vertexHasUnassignedIncomingEdges = true;
-            }
-          }
-        }
-
-        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
-          orphans.add(v);
-          for (Edge e : v.getIncomingEdges()) {
-            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
-              // We've already dealt with this incoming edge
-              continue;
-            }
-            orphans.add(e.getHead());
-            for (NodePath nodePath : e.getNodePaths()) {
-              PCollectionImpl target = nodePath.tail();
-              for (Target t : outputs.get(target)) {
-                outputPaths.put(t, nodePath);
-              }
-            }
-          }
-        }
-
-      }
-      if (!outputPaths.isEmpty()) {
-        JobPrototype prototype = JobPrototype.createMapOnlyJob(
-            ++lastJobID, outputPaths, pipeline.createTempPath());
-        for (Vertex orphan : orphans) {
-          assignment.put(orphan, prototype);
-        }
-      }
-    }
-    
-    return assignment;
-  }
-  
-  private InputCollection<?> handleSplitTarget(PCollectionImpl<?> splitTarget) {
-    if (!outputs.containsKey(splitTarget)) {
-      outputs.put(splitTarget, Sets.<Target> newHashSet());
-    }
-
-    SourceTarget srcTarget = null;
-    Target targetToReplace = null;
-    for (Target t : outputs.get(splitTarget)) {
-      if (t instanceof SourceTarget) {
-        srcTarget = (SourceTarget<?>) t;
-        break;
-      } else {
-        srcTarget = t.asSourceTarget(splitTarget.getPType());
-        if (srcTarget != null) {
-          targetToReplace = t;
-          break;
-        }
-      }
-    }
-    if (targetToReplace != null) {
-      outputs.get(splitTarget).remove(targetToReplace);
-    } else if (srcTarget == null) {
-      srcTarget = pipeline.createIntermediateOutput(splitTarget.getPType());
-    }
-    outputs.get(splitTarget).add(srcTarget);
-    splitTarget.materializeAt(srcTarget);
-
-    return (InputCollection<?>) pipeline.read(srcTarget);
-  }  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java
deleted file mode 100644
index a090d93..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.Iterator;
-import java.util.LinkedList;
-
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-
-import com.google.common.collect.Lists;
-
-class NodePath implements Iterable<PCollectionImpl<?>> {
-  private LinkedList<PCollectionImpl<?>> path;
-
-  public NodePath() {
-    this.path = Lists.newLinkedList();
-  }
-
-  public NodePath(PCollectionImpl<?> tail) {
-    this.path = Lists.newLinkedList();
-    this.path.add(tail);
-  }
-
-  public NodePath(NodePath other) {
-    this.path = Lists.newLinkedList(other.path);
-  }
-
-  public void push(PCollectionImpl<?> stage) {
-    this.path.push((PCollectionImpl<?>) stage);
-  }
-
-  public NodePath close(PCollectionImpl<?> head) {
-    this.path.push(head);
-    return this;
-  }
-
-  public Iterator<PCollectionImpl<?>> iterator() {
-    return path.iterator();
-  }
-
-  public Iterator<PCollectionImpl<?>> descendingIterator() {
-    return path.descendingIterator();
-  }
-
-  public PCollectionImpl<?> get(int index) {
-    return path.get(index);
-  }
-
-  public PCollectionImpl<?> head() {
-    return path.peekFirst();
-  }
-
-  public PCollectionImpl<?> tail() {
-    return path.peekLast();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof NodePath)) {
-      return false;
-    }
-    NodePath nodePath = (NodePath) other;
-    return path.equals(nodePath.path);
-  }
-
-  @Override
-  public int hashCode() {
-    return 17 + 37 * path.hashCode();
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    for (PCollectionImpl<?> collect : path) {
-      sb.append(collect.getName() + "|");
-    }
-    sb.deleteCharAt(sb.length() - 1);
-    return sb.toString();
-  }
-
-  public NodePath splitAt(int splitIndex, PCollectionImpl<?> newHead) {
-    NodePath top = new NodePath();
-    for (int i = 0; i <= splitIndex; i++) {
-      top.path.add(path.get(i));
-    }
-    LinkedList<PCollectionImpl<?>> nextPath = Lists.newLinkedList();
-    nextPath.add(newHead);
-    nextPath.addAll(path.subList(splitIndex + 1, path.size()));
-    path = nextPath;
-    return top;
-  }
-  
-  public NodePath splitAt(PCollectionImpl split, PCollectionImpl<?> newHead) {
-    NodePath top = new NodePath();
-    int splitIndex = 0;
-    for (PCollectionImpl p : path) {
-      top.path.add(p);
-      if (p == split) {
-        break;
-      }
-      splitIndex++;
-    }
-    LinkedList<PCollectionImpl<?>> nextPath = Lists.newLinkedList();
-    nextPath.add(newHead);
-    nextPath.addAll(path.subList(splitIndex + 1, path.size()));
-    path = nextPath;
-    return top;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
deleted file mode 100644
index b90a911..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-/**
- * Collection of Configuration keys and various constants used when planning MapReduce jobs for a
- * pipeline.
- */
-public class PlanningParameters {
-
-  public static final String MULTI_OUTPUT_PREFIX = "out";
-
-  public static final String CRUNCH_WORKING_DIRECTORY = "crunch.work.dir";
-
-  /**
-   * Configuration key under which a <a href="http://www.graphviz.org">DOT</a> file containing the
-   * pipeline job graph is stored by the planner.
-   */
-  public static final String PIPELINE_PLAN_DOTFILE = "crunch.planner.dotfile";
-
-  private PlanningParameters() {
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java
deleted file mode 100644
index f4aa668..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.lang.builder.ReflectionToStringBuilder;
-import org.apache.commons.lang.builder.ToStringStyle;
-import org.apache.crunch.Source;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- *
- */
-class Vertex {
-  private final PCollectionImpl impl;
-  
-  private boolean output;
-  private Set<Edge> incoming;
-  private Set<Edge> outgoing;
-  
-  public Vertex(PCollectionImpl impl) {
-    this.impl = impl;
-    this.incoming = Sets.newHashSet();
-    this.outgoing = Sets.newHashSet();
-  }
-  
-  public PCollectionImpl getPCollection() {
-    return impl;
-  }
-  
-  public boolean isInput() {
-    return impl instanceof InputCollection;
-  }
-  
-  public boolean isGBK() {
-    return impl instanceof PGroupedTableImpl;
-  }
-  
-  public void setOutput() {
-    this.output = true;
-  }
-  
-  public boolean isOutput() {
-    return output;
-  }
-  
-  public Source getSource() {
-    if (isInput()) {
-      return ((InputCollection) impl).getSource();
-    }
-    return null;
-  }
-  
-  public void addIncoming(Edge edge) {
-    this.incoming.add(edge);
-  }
-  
-  public void addOutgoing(Edge edge) {
-    this.outgoing.add(edge);
-  }
-  
-  public List<Vertex> getAllNeighbors() {
-    List<Vertex> n = Lists.newArrayList();
-    for (Edge e : incoming) {
-      n.add(e.getHead());
-    }
-    for (Edge e : outgoing) {
-      n.add(e.getTail());
-    }
-    return n;
-  }
-  
-  public Set<Edge> getAllEdges() {
-    return Sets.union(incoming, outgoing);
-  }
-  
-  public Set<Edge> getIncomingEdges() {
-    return incoming;
-  }
-  
-  public Set<Edge> getOutgoingEdges() {
-    return outgoing;
-  }
-  
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == null || !(obj instanceof Vertex)) {
-      return false;
-    }
-    Vertex other = (Vertex) obj;
-    return impl.equals(other.impl);
-  }
-  
-  @Override
-  public int hashCode() {
-    return 17 + 37 * impl.hashCode();
-  }
-
-  @Override
-  public String toString() {
-    return new ReflectionToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).setExcludeFieldNames(
-        new String[] { "outgoing", "incoming" }).toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java
deleted file mode 100644
index 47a3ded..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-public class CrunchCombiner extends CrunchReducer {
-
-  @Override
-  protected NodeContext getNodeContext() {
-    return NodeContext.COMBINE;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java
deleted file mode 100644
index eb5dd8a..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.crunch.io.CrunchInputs;
-import org.apache.crunch.io.FormatBundle;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.collect.Lists;
-
-public class CrunchInputFormat<K, V> extends InputFormat<K, V> {
-
-  @Override
-  public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
-    List<InputSplit> splits = Lists.newArrayList();
-    Configuration base = job.getConfiguration();
-    Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = CrunchInputs.getFormatNodeMap(job);
-
-    // First, build a map of InputFormats to Paths
-    for (Map.Entry<FormatBundle, Map<Integer, List<Path>>> entry : formatNodeMap.entrySet()) {
-      FormatBundle inputBundle = entry.getKey();
-      Configuration conf = new Configuration(base);
-      inputBundle.configure(conf);
-      Job jobCopy = new Job(conf);
-      InputFormat<?, ?> format = (InputFormat<?, ?>) ReflectionUtils.newInstance(inputBundle.getFormatClass(),
-          jobCopy.getConfiguration());
-      for (Map.Entry<Integer, List<Path>> nodeEntry : entry.getValue().entrySet()) {
-        Integer nodeIndex = nodeEntry.getKey();
-        List<Path> paths = nodeEntry.getValue();
-        FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));
-
-        // Get splits for each input path and tag with InputFormat
-        // and Mapper types by wrapping in a TaggedInputSplit.
-        List<InputSplit> pathSplits = format.getSplits(jobCopy);
-        for (InputSplit pathSplit : pathSplits) {
-          splits.add(new CrunchInputSplit(pathSplit, inputBundle.getFormatClass(),
-              nodeIndex, jobCopy.getConfiguration()));
-        }
-      }
-    }
-    return splits;
-  }
-
-  @Override
-  public RecordReader<K, V> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
-      InterruptedException {
-    return new CrunchRecordReader<K, V>(inputSplit, context);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java
deleted file mode 100644
index b41062b..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.SerializationFactory;
-import org.apache.hadoop.io.serializer.Serializer;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.util.ReflectionUtils;
-
-class CrunchInputSplit extends InputSplit implements Writable {
-
-  private InputSplit inputSplit;
-  private Class<? extends InputFormat<?, ?>> inputFormatClass;
-  private int nodeIndex;
-  private Configuration conf;
-
-  public CrunchInputSplit() {
-    // default constructor
-  }
-
-  public CrunchInputSplit(
-      InputSplit inputSplit,
-      Class<? extends InputFormat<?, ?>> inputFormatClass,
-      int nodeIndex,
-      Configuration conf) {
-    this.inputSplit = inputSplit;
-    this.inputFormatClass = inputFormatClass;
-    this.nodeIndex = nodeIndex;
-    this.conf = conf;
-  }
-
-  public Configuration getConf() {
-    return conf;
-  }
-  
-  public int getNodeIndex() {
-    return nodeIndex;
-  }
-
-  public InputSplit getInputSplit() {
-    return inputSplit;
-  }
-
-  public Class<? extends InputFormat<?, ?>> getInputFormatClass() {
-    return inputFormatClass;
-  }
-
-  @Override
-  public long getLength() throws IOException, InterruptedException {
-    return inputSplit.getLength();
-  }
-
-  @Override
-  public String[] getLocations() throws IOException, InterruptedException {
-    return inputSplit.getLocations();
-  }
-
-  public void readFields(DataInput in) throws IOException {
-    nodeIndex = in.readInt();
-    conf = new Configuration();
-    conf.readFields(in);
-    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
-    Class<? extends InputSplit> inputSplitClass = (Class<? extends InputSplit>) readClass(in);
-    inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf);
-    SerializationFactory factory = new SerializationFactory(conf);
-    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
-    deserializer.open((DataInputStream) in);
-    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
-  }
-
-  private Class<?> readClass(DataInput in) throws IOException {
-    String className = Text.readString(in);
-    try {
-      return conf.getClassByName(className);
-    } catch (ClassNotFoundException e) {
-      throw new RuntimeException("readObject can't find class", e);
-    }
-  }
-
-  public void write(DataOutput out) throws IOException {
-    out.writeInt(nodeIndex);
-    conf.write(out);
-    Text.writeString(out, inputFormatClass.getName());
-    Text.writeString(out, inputSplit.getClass().getName());
-    SerializationFactory factory = new SerializationFactory(conf);
-    Serializer serializer = factory.getSerializer(inputSplit.getClass());
-    serializer.open((DataOutputStream) out);
-    serializer.serialize(inputSplit);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java
deleted file mode 100644
index 70f0b01..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.hadoop.mapreduce.Mapper;
-
-public class CrunchMapper extends Mapper<Object, Object, Object, Object> {
-
-  private static final Log LOG = LogFactory.getLog(CrunchMapper.class);
-
-  private RTNode node;
-  private CrunchTaskContext ctxt;
-  private boolean debug;
-
-  @Override
-  protected void setup(Mapper<Object, Object, Object, Object>.Context context) {
-    List<RTNode> nodes;
-    this.ctxt = new CrunchTaskContext(context, NodeContext.MAP);
-    try {
-      nodes = ctxt.getNodes();
-    } catch (IOException e) {
-      LOG.info("Crunch deserialization error", e);
-      throw new CrunchRuntimeException(e);
-    }
-    if (nodes.size() == 1) {
-      this.node = nodes.get(0);
-    } else {
-      CrunchInputSplit split = (CrunchInputSplit) context.getInputSplit();
-      this.node = nodes.get(split.getNodeIndex());
-    }
-    this.debug = ctxt.isDebugRun();
-  }
-
-  @Override
-  protected void map(Object k, Object v, Mapper<Object, Object, Object, Object>.Context context) {
-    if (debug) {
-      try {
-        node.process(k, v);
-      } catch (Exception e) {
-        LOG.error("Mapper exception", e);
-      }
-    } else {
-      node.process(k, v);
-    }
-  }
-
-  @Override
-  protected void cleanup(Mapper<Object, Object, Object, Object>.Context context) {
-    node.cleanup();
-    ctxt.cleanup();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java
deleted file mode 100644
index fc8fb32..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.IOException;
-
-import org.apache.crunch.hadoop.mapreduce.TaskAttemptContextFactory;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.util.ReflectionUtils;
-
-class CrunchRecordReader<K, V> extends RecordReader<K, V> {
-
-  private final RecordReader<K, V> delegate;
-
-  public CrunchRecordReader(InputSplit inputSplit, final TaskAttemptContext context) throws IOException,
-      InterruptedException {
-    CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit;
-    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils.newInstance(crunchSplit.getInputFormatClass(),
-        crunchSplit.getConf());
-    this.delegate = inputFormat.createRecordReader(crunchSplit.getInputSplit(),
-        TaskAttemptContextFactory.create(crunchSplit.getConf(), context.getTaskAttemptID()));
-  }
-
-  @Override
-  public void close() throws IOException {
-    delegate.close();
-  }
-
-  @Override
-  public K getCurrentKey() throws IOException, InterruptedException {
-    return delegate.getCurrentKey();
-  }
-
-  @Override
-  public V getCurrentValue() throws IOException, InterruptedException {
-    return delegate.getCurrentValue();
-  }
-
-  @Override
-  public float getProgress() throws IOException, InterruptedException {
-    return delegate.getProgress();
-  }
-
-  @Override
-  public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
-    CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit;
-    InputSplit delegateSplit = crunchSplit.getInputSplit();
-    delegate.initialize(delegateSplit,
-        TaskAttemptContextFactory.create(crunchSplit.getConf(), context.getTaskAttemptID()));
-  }
-
-  @Override
-  public boolean nextKeyValue() throws IOException, InterruptedException {
-    return delegate.nextKeyValue();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java
deleted file mode 100644
index e5ddbd2..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.impl.SingleUseIterable;
-import org.apache.hadoop.mapreduce.Reducer;
-
-public class CrunchReducer extends Reducer<Object, Object, Object, Object> {
-
-  private static final Log LOG = LogFactory.getLog(CrunchReducer.class);
-
-  private RTNode node;
-  private CrunchTaskContext ctxt;
-  private boolean debug;
-
-  protected NodeContext getNodeContext() {
-    return NodeContext.REDUCE;
-  }
-
-  @Override
-  protected void setup(Reducer<Object, Object, Object, Object>.Context context) {
-    this.ctxt = new CrunchTaskContext(context, getNodeContext());
-    try {
-      List<RTNode> nodes = ctxt.getNodes();
-      this.node = nodes.get(0);
-    } catch (IOException e) {
-      LOG.info("Crunch deserialization error", e);
-      throw new CrunchRuntimeException(e);
-    }
-    this.debug = ctxt.isDebugRun();
-  }
-
-  @Override
-  protected void reduce(Object key, Iterable<Object> values, Reducer<Object, Object, Object, Object>.Context context) {
-    values = new SingleUseIterable<Object>(values);
-    if (debug) {
-      try {
-        node.processIterable(key, values);
-      } catch (Exception e) {
-        LOG.error("Reducer exception", e);
-      }
-    } else {
-      node.processIterable(key, values);
-    }
-  }
-
-  @Override
-  protected void cleanup(Reducer<Object, Object, Object, Object>.Context context) {
-    node.cleanup();
-    ctxt.cleanup();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java
deleted file mode 100644
index c4f2873..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.impl.mr.plan.PlanningParameters;
-import org.apache.crunch.io.CrunchOutputs;
-import org.apache.crunch.util.DistCache;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-class CrunchTaskContext {
-
-  private final TaskInputOutputContext<Object, Object, Object, Object> taskContext;
-  private final NodeContext nodeContext;
-  private CrunchOutputs<Object, Object> multipleOutputs;
-
-  public CrunchTaskContext(TaskInputOutputContext<Object, Object, Object, Object> taskContext, NodeContext nodeContext) {
-    this.taskContext = taskContext;
-    this.nodeContext = nodeContext;
-  }
-
-  public TaskInputOutputContext<Object, Object, Object, Object> getContext() {
-    return taskContext;
-  }
-
-  public NodeContext getNodeContext() {
-    return nodeContext;
-  }
-
-  public List<RTNode> getNodes() throws IOException {
-    Configuration conf = taskContext.getConfiguration();
-    Path path = new Path(new Path(conf.get(PlanningParameters.CRUNCH_WORKING_DIRECTORY)), nodeContext.toString());
-    @SuppressWarnings("unchecked")
-    List<RTNode> nodes = (List<RTNode>) DistCache.read(conf, path);
-    if (nodes != null) {
-      for (RTNode node : nodes) {
-        node.initialize(this);
-      }
-    }
-    return nodes;
-  }
-
-  public boolean isDebugRun() {
-    Configuration conf = taskContext.getConfiguration();
-    return conf.getBoolean(RuntimeParameters.DEBUG, false);
-  }
-
-  public void cleanup() {
-    if (multipleOutputs != null) {
-      try {
-        multipleOutputs.close();
-      } catch (IOException e) {
-        throw new CrunchRuntimeException(e);
-      } catch (InterruptedException e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-  }
-
-  public CrunchOutputs<Object, Object> getMultipleOutputs() {
-    if (multipleOutputs == null) {
-      multipleOutputs = new CrunchOutputs<Object, Object>(taskContext);
-    }
-    return multipleOutputs;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java
deleted file mode 100644
index ffc9e7c..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import org.apache.crunch.impl.mr.plan.DoNode;
-
-/**
- * Enum that is associated with a serialized {@link DoNode} instance, so we know
- * how to use it within the context of a particular MR job.
- * 
- */
-public enum NodeContext {
-  MAP,
-  REDUCE,
-  COMBINE;
-
-  public String getConfigurationKey() {
-    return "crunch.donode." + toString().toLowerCase();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java
deleted file mode 100644
index ce7b795..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.impl.mr.emit.IntermediateEmitter;
-import org.apache.crunch.impl.mr.emit.MultipleOutputEmitter;
-import org.apache.crunch.impl.mr.emit.OutputEmitter;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PType;
-
-public class RTNode implements Serializable {
-
-  private static final Log LOG = LogFactory.getLog(RTNode.class);
-
-  private final String nodeName;
-  private DoFn<Object, Object> fn;
-  private PType<Object> outputPType;
-  private final List<RTNode> children;
-  private final Converter inputConverter;
-  private final Converter outputConverter;
-  private final String outputName;
-
-  private transient Emitter<Object> emitter;
-
-  public RTNode(DoFn<Object, Object> fn, PType<Object> outputPType, String name, List<RTNode> children,
-      Converter inputConverter,
-      Converter outputConverter, String outputName) {
-    this.fn = fn;
-    this.outputPType = outputPType;
-    this.nodeName = name;
-    this.children = children;
-    this.inputConverter = inputConverter;
-    this.outputConverter = outputConverter;
-    this.outputName = outputName;
-  }
-
-  public void initialize(CrunchTaskContext ctxt) {
-    if (emitter != null) {
-      // Already initialized
-      return;
-    }
-
-    fn.setContext(ctxt.getContext());
-    fn.initialize();
-    for (RTNode child : children) {
-      child.initialize(ctxt);
-    }
-
-    if (outputConverter != null) {
-      if (outputName != null) {
-        this.emitter = new MultipleOutputEmitter(outputConverter, ctxt.getMultipleOutputs(),
-            outputName);
-      } else {
-        this.emitter = new OutputEmitter(outputConverter, ctxt.getContext());
-      }
-    } else if (!children.isEmpty()) {
-      this.emitter = new IntermediateEmitter(outputPType, children,
-          ctxt.getContext().getConfiguration());
-    } else {
-      throw new CrunchRuntimeException("Invalid RTNode config: no emitter for: " + nodeName);
-    }
-  }
-
-  public boolean isLeafNode() {
-    return outputConverter != null && children.isEmpty();
-  }
-
-  public void process(Object input) {
-    try {
-      fn.process(input, emitter);
-    } catch (CrunchRuntimeException e) {
-      if (!e.wasLogged()) {
-        LOG.info(String.format("Crunch exception in '%s' for input: %s", nodeName, input.toString()), e);
-        e.markLogged();
-      }
-      throw e;
-    }
-  }
-
-  public void process(Object key, Object value) {
-    process(inputConverter.convertInput(key, value));
-  }
-
-  public void processIterable(Object key, Iterable values) {
-    process(inputConverter.convertIterableInput(key, values));
-  }
-
-  public void cleanup() {
-    fn.cleanup(emitter);
-    emitter.flush();
-    for (RTNode child : children) {
-      child.cleanup();
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "RTNode [nodeName=" + nodeName + ", fn=" + fn + ", children=" + children + ", inputConverter="
-        + inputConverter + ", outputConverter=" + outputConverter + ", outputName=" + outputName + "]";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java b/crunch/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java
deleted file mode 100644
index 604c49c..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.run;
-
-/**
- * Parameters used during the runtime execution.
- */
-public class RuntimeParameters {
-
-  public static final String AGGREGATOR_BUCKETS = "crunch.aggregator.buckets";
-
-  public static final String DEBUG = "crunch.debug";
-
-  public static final String TMP_DIR = "crunch.tmp.dir";
-
-  public static final String LOG_JOB_PROGRESS = "crunch.log.job.progress";
-
-  public static final String CREATE_DIR = "mapreduce.jobcontrol.createdir.ifnotexist";
-
-  // Not instantiated
-  private RuntimeParameters() {
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/At.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/At.java b/crunch/src/main/java/org/apache/crunch/io/At.java
deleted file mode 100644
index a6f0782..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/At.java
+++ /dev/null
@@ -1,257 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.TableSourceTarget;
-import org.apache.crunch.io.avro.AvroFileSourceTarget;
-import org.apache.crunch.io.seq.SeqFileSourceTarget;
-import org.apache.crunch.io.seq.SeqFileTableSourceTarget;
-import org.apache.crunch.io.text.TextFileSourceTarget;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-
-/**
- * <p>Static factory methods for creating common {@link SourceTarget} types, which may be treated as both a {@code Source}
- * and a {@code Target}.</p>
- * 
- * <p>The {@code At} methods is analogous to the {@link From} and {@link To} factory methods, but is used for
- * storing intermediate outputs that need to be passed from one run of a MapReduce pipeline to another run. The
- * {@code SourceTarget} object acts as both a {@code Source} and a {@Target}, which enables it to provide this
- * functionality.
- * 
- * <code>
- *   Pipeline pipeline = new MRPipeline(this.getClass());
- *   // Create our intermediate storage location
- *   SourceTarget<String> intermediate = At.textFile("/temptext");
- *   ...
- *   // Write out the output of the first phase of a pipeline.
- *   pipeline.write(phase1, intermediate);
- *   
- *   // Explicitly call run to kick off the pipeline.
- *   pipeline.run();
- *   
- *   // And then kick off a second phase by consuming the output
- *   // from the first phase.
- *   PCollection<String> phase2Input = pipeline.read(intermediate);
- *   ...
- * </code>
- * </p>
- * 
- * <p>The {@code SourceTarget} abstraction is useful when we care about reading the intermediate
- * outputs of a pipeline as well as the final results.</p>
- */
-public class At {
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T extends SpecificRecord> SourceTarget<T> avroFile(String pathName, Class<T> avroClass) {
-    return avroFile(new Path(pathName), avroClass);  
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T extends SpecificRecord> SourceTarget<T> avroFile(Path path, Class<T> avroClass) {
-    return avroFile(path, Avros.specifics(avroClass));  
-  }
-  
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param avroType The {@code AvroType} for the Avro records
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T> SourceTarget<T> avroFile(String pathName, AvroType<T> avroType) {
-    return avroFile(new Path(pathName), avroType);
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @param avroType The {@code AvroType} for the Avro records
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T> SourceTarget<T> avroFile(Path path, AvroType<T> avroType) {
-    return new AvroFileSourceTarget<T>(path, avroType);
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given path name
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T extends Writable> SourceTarget<T> sequenceFile(String pathName, Class<T> valueClass) {
-    return sequenceFile(new Path(pathName), valueClass);
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given {@code Path}
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param path The {@code Path} to the data
-   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T extends Writable> SourceTarget<T> sequenceFile(Path path, Class<T> valueClass) {
-    return sequenceFile(path, Writables.writables(valueClass));
-  }
-  
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given path name
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param ptype The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T> SourceTarget<T> sequenceFile(String pathName, PType<T> ptype) {
-    return sequenceFile(new Path(pathName), ptype);
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given {@code Path}
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param path The {@code Path} to the data
-   * @param ptype The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T> SourceTarget<T> sequenceFile(Path path, PType<T> ptype) {
-    return new SeqFileSourceTarget<T>(path, ptype);
-  }
-
-  /**
-   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given path name
-   * from the key-value pairs in the SequenceFile(s).
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param keyClass The {@code Writable} type for the key of the SequenceFile entry
-   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
-   * @return A new {@code TableSourceTarget<K, V>} instance
-   */
-  public static <K extends Writable, V extends Writable> TableSourceTarget<K, V> sequenceFile(
-      String pathName, Class<K> keyClass, Class<V> valueClass) {
-    return sequenceFile(new Path(pathName), keyClass, valueClass);
-  }
-
-  /**
-   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given {@code Path}
-   * from the key-value pairs in the SequenceFile(s).
-   * 
-   * @param path The {@code Path} to the data
-   * @param keyClass The {@code Writable} type for the key of the SequenceFile entry
-   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
-   * @return A new {@code TableSourceTarget<K, V>} instance
-   */
-  public static <K extends Writable, V extends Writable> TableSourceTarget<K, V> sequenceFile(
-      Path path, Class<K> keyClass, Class<V> valueClass) {
-    return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass));
-  }
-  
-  /**
-   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given path name
-   * from the key-value pairs in the SequenceFile(s).
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param keyType The {@code PType} for the key of the SequenceFile entry
-   * @param valueType The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code TableSourceTarget<K, V>} instance
-   */
-  public static <K, V> TableSourceTarget<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) {
-    return sequenceFile(new Path(pathName), keyType, valueType);
-  }
-
-  /**
-   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given {@code Path}
-   * from the key-value pairs in the SequenceFile(s).
-   * 
-   * @param path The {@code Path} to the data
-   * @param keyType The {@code PType} for the key of the SequenceFile entry
-   * @param valueType The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code TableSourceTarget<K, V>} instance
-   */
-  public static <K, V> TableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
-    PTypeFamily ptf = keyType.getFamily();
-    return new SeqFileTableSourceTarget<K, V>(path, ptf.tableOf(keyType, valueType));
-  }
-
-  /**
-   * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @return A new {@code SourceTarget<String>} instance
-   */
-  public static SourceTarget<String> textFile(String pathName) {
-    return textFile(new Path(pathName));
-  }
-
-  /**
-   * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @return A new {@code SourceTarget<String>} instance
-   */
-  public static SourceTarget<String> textFile(Path path) {
-    return textFile(path, Writables.strings());
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance for the text file(s) at the given path name using
-   * the provided {@code PType<T>} to convert the input text.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param ptype The {@code PType<T>} to use to process the input text
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T> SourceTarget<T> textFile(String pathName, PType<T> ptype) {
-    return textFile(new Path(pathName), ptype);
-  }
-
-  /**
-   * Creates a {@code SourceTarget<T>} instance for the text file(s) at the given {@code Path} using
-   * the provided {@code PType<T>} to convert the input text.
-   * 
-   * @param path The {@code Path} to the data
-   * @param ptype The {@code PType<T>} to use to process the input text
-   * @return A new {@code SourceTarget<T>} instance
-   */
-  public static <T> SourceTarget<T> textFile(Path path, PType<T> ptype) {
-    return new TextFileSourceTarget<T>(path, ptype);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/CompositePathIterable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/CompositePathIterable.java b/crunch/src/main/java/org/apache/crunch/io/CompositePathIterable.java
deleted file mode 100644
index a4723e9..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/CompositePathIterable.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-
-import com.google.common.collect.UnmodifiableIterator;
-
-public class CompositePathIterable<T> implements Iterable<T> {
-
-  private final FileStatus[] stati;
-  private final FileSystem fs;
-  private final FileReaderFactory<T> readerFactory;
-
-  private static final PathFilter FILTER = new PathFilter() {
-    @Override
-    public boolean accept(Path path) {
-      return !path.getName().startsWith("_");
-    }
-  };
-
-  public static <S> Iterable<S> create(FileSystem fs, Path path, FileReaderFactory<S> readerFactory) throws IOException {
-
-    if (!fs.exists(path)) {
-      throw new IOException("No files found to materialize at: " + path);
-    }
-
-    FileStatus[] stati = null;
-    try {
-      stati = fs.listStatus(path, FILTER);
-    } catch (FileNotFoundException e) {
-      stati = null;
-    }
-    if (stati == null) {
-      throw new IOException("No files found to materialize at: " + path);
-    }
-
-    if (stati.length == 0) {
-      return Collections.emptyList();
-    } else {
-      return new CompositePathIterable<S>(stati, fs, readerFactory);
-    }
-
-  }
-
-  private CompositePathIterable(FileStatus[] stati, FileSystem fs, FileReaderFactory<T> readerFactory) {
-    this.stati = stati;
-    this.fs = fs;
-    this.readerFactory = readerFactory;
-  }
-
-  @Override
-  public Iterator<T> iterator() {
-
-    return new UnmodifiableIterator<T>() {
-      private int index = 0;
-      private Iterator<T> iter = readerFactory.read(fs, stati[index++].getPath());
-
-      @Override
-      public boolean hasNext() {
-        if (!iter.hasNext()) {
-          while (index < stati.length) {
-            iter = readerFactory.read(fs, stati[index++].getPath());
-            if (iter.hasNext()) {
-              return true;
-            }
-          }
-          return false;
-        }
-        return true;
-      }
-
-      @Override
-      public T next() {
-        return iter.next();
-      }
-    };
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/CrunchInputs.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/CrunchInputs.java b/crunch/src/main/java/org/apache/crunch/io/CrunchInputs.java
deleted file mode 100644
index d154db2..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/CrunchInputs.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-
-import com.google.common.base.Joiner;
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-/**
- * Helper functions for configuring multiple {@code InputFormat} instances within a single
- * Crunch MapReduce job.
- */
-public class CrunchInputs {
-  public static final String CRUNCH_INPUTS = "crunch.inputs.dir";
-
-  private static final char RECORD_SEP = ',';
-  private static final char FIELD_SEP = ';';
-  private static final Joiner JOINER = Joiner.on(FIELD_SEP);
-  private static final Splitter SPLITTER = Splitter.on(FIELD_SEP);
-
-  public static void addInputPath(Job job, Path path, FormatBundle inputBundle, int nodeIndex) {
-    Configuration conf = job.getConfiguration();
-    String inputs = JOINER.join(inputBundle.serialize(), String.valueOf(nodeIndex), path.toString());
-    String existing = conf.get(CRUNCH_INPUTS);
-    conf.set(CRUNCH_INPUTS, existing == null ? inputs : existing + RECORD_SEP + inputs);
-  }
-
-  public static Map<FormatBundle, Map<Integer, List<Path>>> getFormatNodeMap(JobContext job) {
-    Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = Maps.newHashMap();
-    Configuration conf = job.getConfiguration();
-    for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_INPUTS))) {
-      List<String> fields = Lists.newArrayList(SPLITTER.split(input));
-      FormatBundle<InputFormat> inputBundle = FormatBundle.fromSerialized(fields.get(0), InputFormat.class);
-      if (!formatNodeMap.containsKey(inputBundle)) {
-        formatNodeMap.put(inputBundle, Maps.<Integer, List<Path>> newHashMap());
-      }
-      Integer nodeIndex = Integer.valueOf(fields.get(1));
-      if (!formatNodeMap.get(inputBundle).containsKey(nodeIndex)) {
-        formatNodeMap.get(inputBundle).put(nodeIndex, Lists.<Path> newLinkedList());
-      }
-      formatNodeMap.get(inputBundle).get(nodeIndex).add(new Path(fields.get(2)));
-    }
-    return formatNodeMap;
-  }
-
-}


[39/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/maugham.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/maugham.txt b/crunch-core/src/it/resources/maugham.txt
new file mode 100644
index 0000000..16c45e8
--- /dev/null
+++ b/crunch-core/src/it/resources/maugham.txt
@@ -0,0 +1,29112 @@
+The Project Gutenberg EBook of Of Human Bondage, by W. Somerset Maugham
+
+This eBook is for the use of anyone anywhere at no cost and with
+almost no restrictions whatsoever.  You may copy it, give it away or
+re-use it under the terms of the Project Gutenberg License included
+with this eBook or online at www.gutenberg.net
+
+
+Title: Of Human Bondage
+
+Author: W. Somerset Maugham
+
+Release Date: May 6, 2008 [EBook #351]
+
+Language: English
+
+
+*** START OF THIS PROJECT GUTENBERG EBOOK OF HUMAN BONDAGE ***
+
+
+
+
+
+
+
+
+
+
+
+
+OF HUMAN BONDAGE
+
+
+BY
+
+W. SOMERSET MAUGHAM
+
+
+
+
+I
+
+The day broke gray and dull. The clouds hung heavily, and there was a
+rawness in the air that suggested snow. A woman servant came into a room
+in which a child was sleeping and drew the curtains. She glanced
+mechanically at the house opposite, a stucco house with a portico, and
+went to the child's bed.
+
+"Wake up, Philip," she said.
+
+She pulled down the bed-clothes, took him in her arms, and carried him
+downstairs. He was only half awake.
+
+"Your mother wants you," she said.
+
+She opened the door of a room on the floor below and took the child over
+to a bed in which a woman was lying. It was his mother. She stretched out
+her arms, and the child nestled by her side. He did not ask why he had
+been awakened. The woman kissed his eyes, and with thin, small hands felt
+the warm body through his white flannel nightgown. She pressed him closer
+to herself.
+
+"Are you sleepy, darling?" she said.
+
+Her voice was so weak that it seemed to come already from a great
+distance. The child did not answer, but smiled comfortably. He was very
+happy in the large, warm bed, with those soft arms about him. He tried to
+make himself smaller still as he cuddled up against his mother, and he
+kissed her sleepily. In a moment he closed his eyes and was fast asleep.
+The doctor came forwards and stood by the bed-side.
+
+"Oh, don't take him away yet," she moaned.
+
+The doctor, without answering, looked at her gravely. Knowing she would
+not be allowed to keep the child much longer, the woman kissed him again;
+and she passed her hand down his body till she came to his feet; she held
+the right foot in her hand and felt the five small toes; and then slowly
+passed her hand over the left one. She gave a sob.
+
+"What's the matter?" said the doctor. "You're tired."
+
+She shook her head, unable to speak, and the tears rolled down her cheeks.
+The doctor bent down.
+
+"Let me take him."
+
+She was too weak to resist his wish, and she gave the child up. The doctor
+handed him back to his nurse.
+
+"You'd better put him back in his own bed."
+
+"Very well, sir." The little boy, still sleeping, was taken away. His
+mother sobbed now broken-heartedly.
+
+"What will happen to him, poor child?"
+
+The monthly nurse tried to quiet her, and presently, from exhaustion, the
+crying ceased. The doctor walked to a table on the other side of the room,
+upon which, under a towel, lay the body of a still-born child. He lifted
+the towel and looked. He was hidden from the bed by a screen, but the
+woman guessed what he was doing.
+
+"Was it a girl or a boy?" she whispered to the nurse.
+
+"Another boy."
+
+The woman did not answer. In a moment the child's nurse came back. She
+approached the bed.
+
+"Master Philip never woke up," she said. There was a pause. Then the
+doctor felt his patient's pulse once more.
+
+"I don't think there's anything I can do just now," he said. "I'll call
+again after breakfast."
+
+"I'll show you out, sir," said the child's nurse.
+
+They walked downstairs in silence. In the hall the doctor stopped.
+
+"You've sent for Mrs. Carey's brother-in-law, haven't you?"
+
+"Yes, sir."
+
+"D'you know at what time he'll be here?"
+
+"No, sir, I'm expecting a telegram."
+
+"What about the little boy? I should think he'd be better out of the way."
+
+"Miss Watkin said she'd take him, sir."
+
+"Who's she?"
+
+"She's his godmother, sir. D'you think Mrs. Carey will get over it, sir?"
+
+The doctor shook his head.
+
+
+
+II
+
+It was a week later. Philip was sitting on the floor in the drawing-room
+at Miss Watkin's house in Onslow gardens. He was an only child and used to
+amusing himself. The room was filled with massive furniture, and on each
+of the sofas were three big cushions. There was a cushion too in each
+arm-chair. All these he had taken and, with the help of the gilt rout
+chairs, light and easy to move, had made an elaborate cave in which he
+could hide himself from the Red Indians who were lurking behind the
+curtains. He put his ear to the floor and listened to the herd of
+buffaloes that raced across the prairie. Presently, hearing the door open,
+he held his breath so that he might not be discovered; but a violent hand
+piled away a chair and the cushions fell down.
+
+"You naughty boy, Miss Watkin WILL be cross with you."
+
+"Hulloa, Emma!" he said.
+
+The nurse bent down and kissed him, then began to shake out the cushions,
+and put them back in their places.
+
+"Am I to come home?" he asked.
+
+"Yes, I've come to fetch you."
+
+"You've got a new dress on."
+
+It was in eighteen-eighty-five, and she wore a bustle. Her gown was of
+black velvet, with tight sleeves and sloping shoulders, and the skirt had
+three large flounces. She wore a black bonnet with velvet strings. She
+hesitated. The question she had expected did not come, and so she could
+not give the answer she had prepared.
+
+"Aren't you going to ask how your mamma is?" she said at length.
+
+"Oh, I forgot. How is mamma?"
+
+Now she was ready.
+
+"Your mamma is quite well and happy."
+
+"Oh, I am glad."
+
+"Your mamma's gone away. You won't ever see her any more." Philip did not
+know what she meant.
+
+"Why not?"
+
+"Your mamma's in heaven."
+
+She began to cry, and Philip, though he did not quite understand, cried
+too. Emma was a tall, big-boned woman, with fair hair and large features.
+She came from Devonshire and, notwithstanding her many years of service in
+London, had never lost the breadth of her accent. Her tears increased her
+emotion, and she pressed the little boy to her heart. She felt vaguely the
+pity of that child deprived of the only love in the world that is quite
+unselfish. It seemed dreadful that he must be handed over to strangers.
+But in a little while she pulled herself together.
+
+"Your Uncle William is waiting in to see you," she said. "Go and say
+good-bye to Miss Watkin, and we'll go home."
+
+"I don't want to say good-bye," he answered, instinctively anxious to hide
+his tears.
+
+"Very well, run upstairs and get your hat."
+
+He fetched it, and when he came down Emma was waiting for him in the hall.
+He heard the sound of voices in the study behind the dining-room. He
+paused. He knew that Miss Watkin and her sister were talking to friends,
+and it seemed to him--he was nine years old--that if he went in they would
+be sorry for him.
+
+"I think I'll go and say good-bye to Miss Watkin."
+
+"I think you'd better," said Emma.
+
+"Go in and tell them I'm coming," he said.
+
+He wished to make the most of his opportunity. Emma knocked at the door
+and walked in. He heard her speak.
+
+"Master Philip wants to say good-bye to you, miss."
+
+There was a sudden hush of the conversation, and Philip limped in.
+Henrietta Watkin was a stout woman, with a red face and dyed hair. In
+those days to dye the hair excited comment, and Philip had heard much
+gossip at home when his godmother's changed colour. She lived with an
+elder sister, who had resigned herself contentedly to old age. Two ladies,
+whom Philip did not know, were calling, and they looked at him curiously.
+
+"My poor child," said Miss Watkin, opening her arms.
+
+She began to cry. Philip understood now why she had not been in to
+luncheon and why she wore a black dress. She could not speak.
+
+"I've got to go home," said Philip, at last.
+
+He disengaged himself from Miss Watkin's arms, and she kissed him again.
+Then he went to her sister and bade her good-bye too. One of the strange
+ladies asked if she might kiss him, and he gravely gave her permission.
+Though crying, he keenly enjoyed the sensation he was causing; he would
+have been glad to stay a little longer to be made much of, but felt they
+expected him to go, so he said that Emma was waiting for him. He went out
+of the room. Emma had gone downstairs to speak with a friend in the
+basement, and he waited for her on the landing. He heard Henrietta
+Watkin's voice.
+
+"His mother was my greatest friend. I can't bear to think that she's
+dead."
+
+"You oughtn't to have gone to the funeral, Henrietta," said her sister. "I
+knew it would upset you."
+
+Then one of the strangers spoke.
+
+"Poor little boy, it's dreadful to think of him quite alone in the world.
+I see he limps."
+
+"Yes, he's got a club-foot. It was such a grief to his mother."
+
+Then Emma came back. They called a hansom, and she told the driver where
+to go.
+
+
+
+III
+
+
+When they reached the house Mrs. Carey had died in--it was in a dreary,
+respectable street between Notting Hill Gate and High Street,
+Kensington--Emma led Philip into the drawing-room. His uncle was writing
+letters of thanks for the wreaths which had been sent. One of them, which
+had arrived too late for the funeral, lay in its cardboard box on the
+hall-table.
+
+"Here's Master Philip," said Emma.
+
+Mr. Carey stood up slowly and shook hands with the little boy. Then on
+second thoughts he bent down and kissed his forehead. He was a man of
+somewhat less than average height, inclined to corpulence, with his hair,
+worn long, arranged over the scalp so as to conceal his baldness. He was
+clean-shaven. His features were regular, and it was possible to imagine
+that in his youth he had been good-looking. On his watch-chain he wore a
+gold cross.
+
+"You're going to live with me now, Philip," said Mr. Carey. "Shall you
+like that?"
+
+Two years before Philip had been sent down to stay at the vicarage after
+an attack of chicken-pox; but there remained with him a recollection of an
+attic and a large garden rather than of his uncle and aunt.
+
+"Yes."
+
+"You must look upon me and your Aunt Louisa as your father and mother."
+
+The child's mouth trembled a little, he reddened, but did not answer.
+
+"Your dear mother left you in my charge."
+
+Mr. Carey had no great ease in expressing himself. When the news came that
+his sister-in-law was dying, he set off at once for London, but on the way
+thought of nothing but the disturbance in his life that would be caused if
+her death forced him to undertake the care of her son. He was well over
+fifty, and his wife, to whom he had been married for thirty years, was
+childless; he did not look forward with any pleasure to the presence of a
+small boy who might be noisy and rough. He had never much liked his
+sister-in-law.
+
+"I'm going to take you down to Blackstable tomorrow," he said.
+
+"With Emma?"
+
+The child put his hand in hers, and she pressed it.
+
+"I'm afraid Emma must go away," said Mr. Carey.
+
+"But I want Emma to come with me."
+
+Philip began to cry, and the nurse could not help crying too. Mr. Carey
+looked at them helplessly.
+
+"I think you'd better leave me alone with Master Philip for a moment."
+
+"Very good, sir."
+
+Though Philip clung to her, she released herself gently. Mr. Carey took
+the boy on his knee and put his arm round him.
+
+"You mustn't cry," he said. "You're too old to have a nurse now. We must
+see about sending you to school."
+
+"I want Emma to come with me," the child repeated.
+
+"It costs too much money, Philip. Your father didn't leave very much, and
+I don't know what's become of it. You must look at every penny you spend."
+
+Mr. Carey had called the day before on the family solicitor. Philip's
+father was a surgeon in good practice, and his hospital appointments
+suggested an established position; so that it was a surprise on his sudden
+death from blood-poisoning to find that he had left his widow little more
+than his life insurance and what could be got for the lease of their house
+in Bruton Street. This was six months ago; and Mrs. Carey, already in
+delicate health, finding herself with child, had lost her head and
+accepted for the lease the first offer that was made. She stored her
+furniture, and, at a rent which the parson thought outrageous, took a
+furnished house for a year, so that she might suffer from no inconvenience
+till her child was born. But she had never been used to the management of
+money, and was unable to adapt her expenditure to her altered
+circumstances. The little she had slipped through her fingers in one way
+and another, so that now, when all expenses were paid, not much more than
+two thousand pounds remained to support the boy till he was able to earn
+his own living. It was impossible to explain all this to Philip and he was
+sobbing still.
+
+"You'd better go to Emma," Mr. Carey said, feeling that she could console
+the child better than anyone.
+
+Without a word Philip slipped off his uncle's knee, but Mr. Carey stopped
+him.
+
+"We must go tomorrow, because on Saturday I've got to prepare my sermon,
+and you must tell Emma to get your things ready today. You can bring all
+your toys. And if you want anything to remember your father and mother by
+you can take one thing for each of them. Everything else is going to be
+sold."
+
+The boy slipped out of the room. Mr. Carey was unused to work, and he
+turned to his correspondence with resentment. On one side of the desk was
+a bundle of bills, and these filled him with irritation. One especially
+seemed preposterous. Immediately after Mrs. Carey's death Emma had ordered
+from the florist masses of white flowers for the room in which the dead
+woman lay. It was sheer waste of money. Emma took far too much upon
+herself. Even if there had been no financial necessity, he would have
+dismissed her.
+
+But Philip went to her, and hid his face in her bosom, and wept as though
+his heart would break. And she, feeling that he was almost her own
+son--she had taken him when he was a month old--consoled him with soft
+words. She promised that she would come and see him sometimes, and that
+she would never forget him; and she told him about the country he was
+going to and about her own home in Devonshire--her father kept a turnpike
+on the high-road that led to Exeter, and there were pigs in the sty, and
+there was a cow, and the cow had just had a calf--till Philip forgot his
+tears and grew excited at the thought of his approaching journey.
+Presently she put him down, for there was much to be done, and he helped
+her to lay out his clothes on the bed. She sent him into the nursery to
+gather up his toys, and in a little while he was playing happily.
+
+But at last he grew tired of being alone and went back to the bed-room, in
+which Emma was now putting his things into a big tin box; he remembered
+then that his uncle had said he might take something to remember his
+father and mother by. He told Emma and asked her what he should take.
+
+"You'd better go into the drawing-room and see what you fancy."
+
+"Uncle William's there."
+
+"Never mind that. They're your own things now."
+
+Philip went downstairs slowly and found the door open. Mr. Carey had left
+the room. Philip walked slowly round. They had been in the house so short
+a time that there was little in it that had a particular interest to him.
+It was a stranger's room, and Philip saw nothing that struck his fancy.
+But he knew which were his mother's things and which belonged to the
+landlord, and presently fixed on a little clock that he had once heard his
+mother say she liked. With this he walked again rather disconsolately
+upstairs. Outside the door of his mother's bed-room he stopped and
+listened. Though no one had told him not to go in, he had a feeling that
+it would be wrong to do so; he was a little frightened, and his heart beat
+uncomfortably; but at the same time something impelled him to turn the
+handle. He turned it very gently, as if to prevent anyone within from
+hearing, and then slowly pushed the door open. He stood on the threshold
+for a moment before he had the courage to enter. He was not frightened
+now, but it seemed strange. He closed the door behind him. The blinds were
+drawn, and the room, in the cold light of a January afternoon, was dark.
+On the dressing-table were Mrs. Carey's brushes and the hand mirror. In a
+little tray were hairpins. There was a photograph of himself on the
+chimney-piece and one of his father. He had often been in the room when
+his mother was not in it, but now it seemed different. There was something
+curious in the look of the chairs. The bed was made as though someone were
+going to sleep in it that night, and in a case on the pillow was a
+night-dress.
+
+Philip opened a large cupboard filled with dresses and, stepping in, took
+as many of them as he could in his arms and buried his face in them. They
+smelt of the scent his mother used. Then he pulled open the drawers,
+filled with his mother's things, and looked at them: there were lavender
+bags among the linen, and their scent was fresh and pleasant. The
+strangeness of the room left it, and it seemed to him that his mother had
+just gone out for a walk. She would be in presently and would come
+upstairs to have nursery tea with him. And he seemed to feel her kiss on
+his lips.
+
+It was not true that he would never see her again. It was not true simply
+because it was impossible. He climbed up on the bed and put his head on
+the pillow. He lay there quite still.
+
+
+
+IV
+
+
+Philip parted from Emma with tears, but the journey to Blackstable amused
+him, and, when they arrived, he was resigned and cheerful. Blackstable was
+sixty miles from London. Giving their luggage to a porter, Mr. Carey set
+out to walk with Philip to the vicarage; it took them little more than
+five minutes, and, when they reached it, Philip suddenly remembered the
+gate. It was red and five-barred: it swung both ways on easy hinges; and
+it was possible, though forbidden, to swing backwards and forwards on it.
+They walked through the garden to the front-door. This was only used by
+visitors and on Sundays, and on special occasions, as when the Vicar went
+up to London or came back. The traffic of the house took place through a
+side-door, and there was a back door as well for the gardener and for
+beggars and tramps. It was a fairly large house of yellow brick, with a
+red roof, built about five and twenty years before in an ecclesiastical
+style. The front-door was like a church porch, and the drawing-room
+windows were gothic.
+
+Mrs. Carey, knowing by what train they were coming, waited in the
+drawing-room and listened for the click of the gate. When she heard it she
+went to the door.
+
+"There's Aunt Louisa," said Mr. Carey, when he saw her. "Run and give her
+a kiss."
+
+Philip started to run, awkwardly, trailing his club-foot, and then
+stopped. Mrs. Carey was a little, shrivelled woman of the same age as her
+husband, with a face extraordinarily filled with deep wrinkles, and pale
+blue eyes. Her gray hair was arranged in ringlets according to the fashion
+of her youth. She wore a black dress, and her only ornament was a gold
+chain, from which hung a cross. She had a shy manner and a gentle voice.
+
+"Did you walk, William?" she said, almost reproachfully, as she kissed her
+husband.
+
+"I didn't think of it," he answered, with a glance at his nephew.
+
+"It didn't hurt you to walk, Philip, did it?" she asked the child.
+
+"No. I always walk."
+
+He was a little surprised at their conversation. Aunt Louisa told him to
+come in, and they entered the hall. It was paved with red and yellow
+tiles, on which alternately were a Greek Cross and the Lamb of God. An
+imposing staircase led out of the hall. It was of polished pine, with a
+peculiar smell, and had been put in because fortunately, when the church
+was reseated, enough wood remained over. The balusters were decorated with
+emblems of the Four Evangelists.
+
+"I've had the stove lighted as I thought you'd be cold after your
+journey," said Mrs. Carey.
+
+It was a large black stove that stood in the hall and was only lighted if
+the weather was very bad and the Vicar had a cold. It was not lighted if
+Mrs. Carey had a cold. Coal was expensive. Besides, Mary Ann, the maid,
+didn't like fires all over the place. If they wanted all them fires they
+must keep a second girl. In the winter Mr. and Mrs. Carey lived in the
+dining-room so that one fire should do, and in the summer they could not
+get out of the habit, so the drawing-room was used only by Mr. Carey on
+Sunday afternoons for his nap. But every Saturday he had a fire in the
+study so that he could write his sermon.
+
+Aunt Louisa took Philip upstairs and showed him into a tiny bed-room that
+looked out on the drive. Immediately in front of the window was a large
+tree, which Philip remembered now because the branches were so low that it
+was possible to climb quite high up it.
+
+"A small room for a small boy," said Mrs. Carey. "You won't be frightened
+at sleeping alone?"
+
+"Oh, no."
+
+On his first visit to the vicarage he had come with his nurse, and Mrs.
+Carey had had little to do with him. She looked at him now with some
+uncertainty.
+
+"Can you wash your own hands, or shall I wash them for you?"
+
+"I can wash myself," he answered firmly.
+
+"Well, I shall look at them when you come down to tea," said Mrs. Carey.
+
+She knew nothing about children. After it was settled that Philip should
+come down to Blackstable, Mrs. Carey had thought much how she should treat
+him; she was anxious to do her duty; but now he was there she found
+herself just as shy of him as he was of her. She hoped he would not be
+noisy and rough, because her husband did not like rough and noisy boys.
+Mrs. Carey made an excuse to leave Philip alone, but in a moment came back
+and knocked at the door; she asked him, without coming in, if he could
+pour out the water himself. Then she went downstairs and rang the bell for
+tea.
+
+The dining-room, large and well-proportioned, had windows on two sides of
+it, with heavy curtains of red rep; there was a big table in the middle;
+and at one end an imposing mahogany sideboard with a looking-glass in it.
+In one corner stood a harmonium. On each side of the fireplace were chairs
+covered in stamped leather, each with an antimacassar; one had arms and
+was called the husband, and the other had none and was called the wife.
+Mrs. Carey never sat in the arm-chair: she said she preferred a chair that
+was not too comfortable; there was always a lot to do, and if her chair
+had had arms she might not be so ready to leave it.
+
+Mr. Carey was making up the fire when Philip came in, and he pointed out
+to his nephew that there were two pokers. One was large and bright and
+polished and unused, and was called the Vicar; and the other, which was
+much smaller and had evidently passed through many fires, was called the
+Curate.
+
+"What are we waiting for?" said Mr. Carey.
+
+"I told Mary Ann to make you an egg. I thought you'd be hungry after your
+journey."
+
+Mrs. Carey thought the journey from London to Blackstable very tiring. She
+seldom travelled herself, for the living was only three hundred a year,
+and, when her husband wanted a holiday, since there was not money for two,
+he went by himself. He was very fond of Church Congresses and usually
+managed to go up to London once a year; and once he had been to Paris for
+the exhibition, and two or three times to Switzerland. Mary Ann brought in
+the egg, and they sat down. The chair was much too low for Philip, and for
+a moment neither Mr. Carey nor his wife knew what to do.
+
+"I'll put some books under him," said Mary Ann.
+
+She took from the top of the harmonium the large Bible and the prayer-book
+from which the Vicar was accustomed to read prayers, and put them on
+Philip's chair.
+
+"Oh, William, he can't sit on the Bible," said Mrs. Carey, in a shocked
+tone. "Couldn't you get him some books out of the study?"
+
+Mr. Carey considered the question for an instant.
+
+"I don't think it matters this once if you put the prayer-book on the top,
+Mary Ann," he said. "The book of Common Prayer is the composition of men
+like ourselves. It has no claim to divine authorship."
+
+"I hadn't thought of that, William," said Aunt Louisa.
+
+Philip perched himself on the books, and the Vicar, having said grace, cut
+the top off his egg.
+
+"There," he said, handing it to Philip, "you can eat my top if you like."
+
+Philip would have liked an egg to himself, but he was not offered one, so
+took what he could.
+
+"How have the chickens been laying since I went away?" asked the Vicar.
+
+"Oh, they've been dreadful, only one or two a day."
+
+"How did you like that top, Philip?" asked his uncle.
+
+"Very much, thank you."
+
+"You shall have another one on Sunday afternoon."
+
+Mr. Carey always had a boiled egg at tea on Sunday, so that he might be
+fortified for the evening service.
+
+
+
+V
+
+
+Philip came gradually to know the people he was to live with, and by
+fragments of conversation, some of it not meant for his ears, learned a
+good deal both about himself and about his dead parents. Philip's father
+had been much younger than the Vicar of Blackstable. After a brilliant
+career at St. Luke's Hospital he was put on the staff, and presently began
+to earn money in considerable sums. He spent it freely. When the parson
+set about restoring his church and asked his brother for a subscription,
+he was surprised by receiving a couple of hundred pounds: Mr. Carey,
+thrifty by inclination and economical by necessity, accepted it with
+mingled feelings; he was envious of his brother because he could afford to
+give so much, pleased for the sake of his church, and vaguely irritated by
+a generosity which seemed almost ostentatious. Then Henry Carey married a
+patient, a beautiful girl but penniless, an orphan with no near relations,
+but of good family; and there was an array of fine friends at the wedding.
+The parson, on his visits to her when he came to London, held himself with
+reserve. He felt shy with her and in his heart he resented her great
+beauty: she dressed more magnificently than became the wife of a
+hardworking surgeon; and the charming furniture of her house, the flowers
+among which she lived even in winter, suggested an extravagance which he
+deplored. He heard her talk of entertainments she was going to; and, as he
+told his wife on getting home again, it was impossible to accept
+hospitality without making some return. He had seen grapes in the
+dining-room that must have cost at least eight shillings a pound; and at
+luncheon he had been given asparagus two months before it was ready in the
+vicarage garden. Now all he had anticipated was come to pass: the Vicar
+felt the satisfaction of the prophet who saw fire and brimstone consume
+the city which would not mend its way to his warning. Poor Philip was
+practically penniless, and what was the good of his mother's fine friends
+now? He heard that his father's extravagance was really criminal, and it
+was a mercy that Providence had seen fit to take his dear mother to
+itself: she had no more idea of money than a child.
+
+When Philip had been a week at Blackstable an incident happened which
+seemed to irritate his uncle very much. One morning he found on the
+breakfast table a small packet which had been sent on by post from the
+late Mrs. Carey's house in London. It was addressed to her. When the
+parson opened it he found a dozen photographs of Mrs. Carey. They showed
+the head and shoulders only, and her hair was more plainly done than
+usual, low on the forehead, which gave her an unusual look; the face was
+thin and worn, but no illness could impair the beauty of her features.
+There was in the large dark eyes a sadness which Philip did not remember.
+The first sight of the dead woman gave Mr. Carey a little shock, but this
+was quickly followed by perplexity. The photographs seemed quite recent,
+and he could not imagine who had ordered them.
+
+"D'you know anything about these, Philip?" he asked.
+
+"I remember mamma said she'd been taken," he answered. "Miss Watkin
+scolded her.... She said: I wanted the boy to have something to remember
+me by when he grows up."
+
+Mr. Carey looked at Philip for an instant. The child spoke in a clear
+treble. He recalled the words, but they meant nothing to him.
+
+"You'd better take one of the photographs and keep it in your room," said
+Mr. Carey. "I'll put the others away."
+
+He sent one to Miss Watkin, and she wrote and explained how they came to
+be taken.
+
+One day Mrs. Carey was lying in bed, but she was feeling a little better
+than usual, and the doctor in the morning had seemed hopeful; Emma had
+taken the child out, and the maids were downstairs in the basement:
+suddenly Mrs. Carey felt desperately alone in the world. A great fear
+seized her that she would not recover from the confinement which she was
+expecting in a fortnight. Her son was nine years old. How could he be
+expected to remember her? She could not bear to think that he would grow
+up and forget, forget her utterly; and she had loved him so passionately,
+because he was weakly and deformed, and because he was her child. She had
+no photographs of herself taken since her marriage, and that was ten years
+before. She wanted her son to know what she looked like at the end. He
+could not forget her then, not forget utterly. She knew that if she called
+her maid and told her she wanted to get up, the maid would prevent her,
+and perhaps send for the doctor, and she had not the strength now to
+struggle or argue. She got out of bed and began to dress herself. She had
+been on her back so long that her legs gave way beneath her, and then the
+soles of her feet tingled so that she could hardly bear to put them to the
+ground. But she went on. She was unused to doing her own hair and, when
+she raised her arms and began to brush it, she felt faint. She could never
+do it as her maid did. It was beautiful hair, very fine, and of a deep
+rich gold. Her eyebrows were straight and dark. She put on a black skirt,
+but chose the bodice of the evening dress which she liked best: it was of
+a white damask which was fashionable in those days. She looked at herself
+in the glass. Her face was very pale, but her skin was clear: she had
+never had much colour, and this had always made the redness of her
+beautiful mouth emphatic. She could not restrain a sob. But she could not
+afford to be sorry for herself; she was feeling already desperately tired;
+and she put on the furs which Henry had given her the Christmas
+before--she had been so proud of them and so happy then--and slipped
+downstairs with beating heart. She got safely out of the house and drove
+to a photographer. She paid for a dozen photographs. She was obliged to
+ask for a glass of water in the middle of the sitting; and the assistant,
+seeing she was ill, suggested that she should come another day, but she
+insisted on staying till the end. At last it was finished, and she drove
+back again to the dingy little house in Kensington which she hated with
+all her heart. It was a horrible house to die in.
+
+She found the front door open, and when she drove up the maid and Emma ran
+down the steps to help her. They had been frightened when they found her
+room empty. At first they thought she must have gone to Miss Watkin, and
+the cook was sent round. Miss Watkin came back with her and was waiting
+anxiously in the drawing-room. She came downstairs now full of anxiety and
+reproaches; but the exertion had been more than Mrs. Carey was fit for,
+and when the occasion for firmness no longer existed she gave way. She
+fell heavily into Emma's arms and was carried upstairs. She remained
+unconscious for a time that seemed incredibly long to those that watched
+her, and the doctor, hurriedly sent for, did not come. It was next day,
+when she was a little better, that Miss Watkin got some explanation out of
+her. Philip was playing on the floor of his mother's bed-room, and neither
+of the ladies paid attention to him. He only understood vaguely what they
+were talking about, and he could not have said why those words remained in
+his memory.
+
+"I wanted the boy to have something to remember me by when he grows up."
+
+"I can't make out why she ordered a dozen," said Mr. Carey. "Two would
+have done."
+
+
+
+VI
+
+
+One day was very like another at the vicarage.
+
+Soon after breakfast Mary Ann brought in The Times. Mr. Carey shared it
+with two neighbours. He had it from ten till one, when the gardener took
+it over to Mr. Ellis at the Limes, with whom it remained till seven; then
+it was taken to Miss Brooks at the Manor House, who, since she got it
+late, had the advantage of keeping it. In summer Mrs. Carey, when she was
+making jam, often asked her for a copy to cover the pots with. When the
+Vicar settled down to his paper his wife put on her bonnet and went out to
+do the shopping. Philip accompanied her. Blackstable was a fishing
+village. It consisted of a high street in which were the shops, the bank,
+the doctor's house, and the houses of two or three coalship owners; round
+the little harbor were shabby streets in which lived fishermen and poor
+people; but since they went to chapel they were of no account. When Mrs.
+Carey passed the dissenting ministers in the street she stepped over to
+the other side to avoid meeting them, but if there was not time for this
+fixed her eyes on the pavement. It was a scandal to which the Vicar had
+never resigned himself that there were three chapels in the High Street:
+he could not help feeling that the law should have stepped in to prevent
+their erection. Shopping in Blackstable was not a simple matter; for
+dissent, helped by the fact that the parish church was two miles from the
+town, was very common; and it was necessary to deal only with churchgoers;
+Mrs. Carey knew perfectly that the vicarage custom might make all the
+difference to a tradesman's faith. There were two butchers who went to
+church, and they would not understand that the Vicar could not deal with
+both of them at once; nor were they satisfied with his simple plan of
+going for six months to one and for six months to the other. The butcher
+who was not sending meat to the vicarage constantly threatened not to come
+to church, and the Vicar was sometimes obliged to make a threat: it was
+very wrong of him not to come to church, but if he carried iniquity
+further and actually went to chapel, then of course, excellent as his meat
+was, Mr. Carey would be forced to leave him for ever. Mrs. Carey often
+stopped at the bank to deliver a message to Josiah Graves, the manager,
+who was choir-master, treasurer, and churchwarden. He was a tall, thin man
+with a sallow face and a long nose; his hair was very white, and to Philip
+he seemed extremely old. He kept the parish accounts, arranged the treats
+for the choir and the schools; though there was no organ in the parish
+church, it was generally considered (in Blackstable) that the choir he led
+was the best in Kent; and when there was any ceremony, such as a visit
+from the Bishop for confirmation or from the Rural Dean to preach at the
+Harvest Thanksgiving, he made the necessary preparations. But he had no
+hesitation in doing all manner of things without more than a perfunctory
+consultation with the Vicar, and the Vicar, though always ready to be
+saved trouble, much resented the churchwarden's managing ways. He really
+seemed to look upon himself as the most important person in the parish.
+Mr. Carey constantly told his wife that if Josiah Graves did not take care
+he would give him a good rap over the knuckles one day; but Mrs. Carey
+advised him to bear with Josiah Graves: he meant well, and it was not his
+fault if he was not quite a gentleman. The Vicar, finding his comfort in
+the practice of a Christian virtue, exercised forbearance; but he revenged
+himself by calling the churchwarden Bismarck behind his back.
+
+Once there had been a serious quarrel between the pair, and Mrs. Carey
+still thought of that anxious time with dismay. The Conservative candidate
+had announced his intention of addressing a meeting at Blackstable; and
+Josiah Graves, having arranged that it should take place in the Mission
+Hall, went to Mr. Carey and told him that he hoped he would say a few
+words. It appeared that the candidate had asked Josiah Graves to take the
+chair. This was more than Mr. Carey could put up with. He had firm views
+upon the respect which was due to the cloth, and it was ridiculous for a
+churchwarden to take the chair at a meeting when the Vicar was there. He
+reminded Josiah Graves that parson meant person, that is, the vicar was
+the person of the parish. Josiah Graves answered that he was the first to
+recognise the dignity of the church, but this was a matter of politics,
+and in his turn he reminded the Vicar that their Blessed Saviour had
+enjoined upon them to render unto Caesar the things that were Caesar's. To
+this Mr. Carey replied that the devil could quote scripture to his
+purpose, himself had sole authority over the Mission Hall, and if he were
+not asked to be chairman he would refuse the use of it for a political
+meeting. Josiah Graves told Mr. Carey that he might do as he chose, and
+for his part he thought the Wesleyan Chapel would be an equally suitable
+place. Then Mr. Carey said that if Josiah Graves set foot in what was
+little better than a heathen temple he was not fit to be churchwarden in
+a Christian parish. Josiah Graves thereupon resigned all his offices, and
+that very evening sent to the church for his cassock and surplice. His
+sister, Miss Graves, who kept house for him, gave up her secretaryship of
+the Maternity Club, which provided the pregnant poor with flannel, baby
+linen, coals, and five shillings. Mr. Carey said he was at last master in
+his own house. But soon he found that he was obliged to see to all sorts
+of things that he knew nothing about; and Josiah Graves, after the first
+moment of irritation, discovered that he had lost his chief interest in
+life. Mrs. Carey and Miss Graves were much distressed by the quarrel; they
+met after a discreet exchange of letters, and made up their minds to put
+the matter right: they talked, one to her husband, the other to her
+brother, from morning till night; and since they were persuading these
+gentlemen to do what in their hearts they wanted, after three weeks of
+anxiety a reconciliation was effected. It was to both their interests, but
+they ascribed it to a common love for their Redeemer. The meeting was held
+at the Mission Hall, and the doctor was asked to be chairman. Mr. Carey
+and Josiah Graves both made speeches.
+
+When Mrs. Carey had finished her business with the banker, she generally
+went upstairs to have a little chat with his sister; and while the ladies
+talked of parish matters, the curate or the new bonnet of Mrs. Wilson--Mr.
+Wilson was the richest man in Blackstable, he was thought to have at least
+five hundred a year, and he had married his cook--Philip sat demurely in
+the stiff parlour, used only to receive visitors, and busied himself with
+the restless movements of goldfish in a bowl. The windows were never
+opened except to air the room for a few minutes in the morning, and it had
+a stuffy smell which seemed to Philip to have a mysterious connection with
+banking.
+
+Then Mrs. Carey remembered that she had to go to the grocer, and they
+continued their way. When the shopping was done they often went down a
+side street of little houses, mostly of wood, in which fishermen dwelt
+(and here and there a fisherman sat on his doorstep mending his nets, and
+nets hung to dry upon the doors), till they came to a small beach, shut in
+on each side by warehouses, but with a view of the sea. Mrs. Carey stood
+for a few minutes and looked at it, it was turbid and yellow, [and who
+knows what thoughts passed through her mind?] while Philip searched for
+flat stones to play ducks and drakes. Then they walked slowly back. They
+looked into the post office to get the right time, nodded to Mrs. Wigram
+the doctor's wife, who sat at her window sewing, and so got home.
+
+Dinner was at one o'clock; and on Monday, Tuesday, and Wednesday it
+consisted of beef, roast, hashed, and minced, and on Thursday, Friday, and
+Saturday of mutton. On Sunday they ate one of their own chickens. In the
+afternoon Philip did his lessons, He was taught Latin and mathematics by
+his uncle who knew neither, and French and the piano by his aunt. Of
+French she was ignorant, but she knew the piano well enough to accompany
+the old-fashioned songs she had sung for thirty years. Uncle William used
+to tell Philip that when he was a curate his wife had known twelve songs
+by heart, which she could sing at a moment's notice whenever she was
+asked. She often sang still when there was a tea-party at the vicarage.
+There were few people whom the Careys cared to ask there, and their
+parties consisted always of the curate, Josiah Graves with his sister, Dr.
+Wigram and his wife. After tea Miss Graves played one or two of
+Mendelssohn's Songs without Words, and Mrs. Carey sang When the
+Swallows Homeward Fly, or Trot, Trot, My Pony.
+
+But the Careys did not give tea-parties often; the preparations upset
+them, and when their guests were gone they felt themselves exhausted. They
+preferred to have tea by themselves, and after tea they played backgammon.
+Mrs. Carey arranged that her husband should win, because he did not like
+losing. They had cold supper at eight. It was a scrappy meal because Mary
+Ann resented getting anything ready after tea, and Mrs. Carey helped to
+clear away. Mrs. Carey seldom ate more than bread and butter, with a
+little stewed fruit to follow, but the Vicar had a slice of cold meat.
+Immediately after supper Mrs. Carey rang the bell for prayers, and then
+Philip went to bed. He rebelled against being undressed by Mary Ann and
+after a while succeeded in establishing his right to dress and undress
+himself. At nine o'clock Mary Ann brought in the eggs and the plate. Mrs.
+Carey wrote the date on each egg and put the number down in a book. She
+then took the plate-basket on her arm and went upstairs. Mr. Carey
+continued to read one of his old books, but as the clock struck ten he got
+up, put out the lamps, and followed his wife to bed.
+
+When Philip arrived there was some difficulty in deciding on which evening
+he should have his bath. It was never easy to get plenty of hot water,
+since the kitchen boiler did not work, and it was impossible for two
+persons to have a bath on the same day. The only man who had a bathroom in
+Blackstable was Mr. Wilson, and it was thought ostentatious of him. Mary
+Ann had her bath in the kitchen on Monday night, because she liked to
+begin the week clean. Uncle William could not have his on Saturday,
+because he had a heavy day before him and he was always a little tired
+after a bath, so he had it on Friday. Mrs. Carey had hers on Thursday for
+the same reason. It looked as though Saturday were naturally indicated for
+Philip, but Mary Ann said she couldn't keep the fire up on Saturday night:
+what with all the cooking on Sunday, having to make pastry and she didn't
+know what all, she did not feel up to giving the boy his bath on Saturday
+night; and it was quite clear that he could not bath himself. Mrs. Carey
+was shy about bathing a boy, and of course the Vicar had his sermon. But
+the Vicar insisted that Philip should be clean and sweet for the lord's
+Day. Mary Ann said she would rather go than be put upon--and after
+eighteen years she didn't expect to have more work given her, and they
+might show some consideration--and Philip said he didn't want anyone to
+bath him, but could very well bath himself. This settled it. Mary Ann said
+she was quite sure he wouldn't bath himself properly, and rather than he
+should go dirty--and not because he was going into the presence of the
+Lord, but because she couldn't abide a boy who wasn't properly
+washed--she'd work herself to the bone even if it was Saturday night.
+
+
+
+VII
+
+
+Sunday was a day crowded with incident. Mr. Carey was accustomed to say
+that he was the only man in his parish who worked seven days a week.
+
+The household got up half an hour earlier than usual. No lying abed for a
+poor parson on the day of rest, Mr. Carey remarked as Mary Ann knocked at
+the door punctually at eight. It took Mrs. Carey longer to dress, and she
+got down to breakfast at nine, a little breathless, only just before her
+husband. Mr. Carey's boots stood in front of the fire to warm. Prayers
+were longer than usual, and the breakfast more substantial. After
+breakfast the Vicar cut thin slices of bread for the communion, and Philip
+was privileged to cut off the crust. He was sent to the study to fetch a
+marble paperweight, with which Mr. Carey pressed the bread till it was
+thin and pulpy, and then it was cut into small squares. The amount was
+regulated by the weather. On a very bad day few people came to church, and
+on a very fine one, though many came, few stayed for communion. There were
+most when it was dry enough to make the walk to church pleasant, but not
+so fine that people wanted to hurry away.
+
+Then Mrs. Carey brought the communion plate out of the safe, which stood
+in the pantry, and the Vicar polished it with a chamois leather. At ten
+the fly drove up, and Mr. Carey got into his boots. Mrs. Carey took
+several minutes to put on her bonnet, during which the Vicar, in a
+voluminous cloak, stood in the hall with just such an expression on his
+face as would have become an early Christian about to be led into the
+arena. It was extraordinary that after thirty years of marriage his wife
+could not be ready in time on Sunday morning. At last she came, in black
+satin; the Vicar did not like colours in a clergyman's wife at any time,
+but on Sundays he was determined that she should wear black; now and then,
+in conspiracy with Miss Graves, she ventured a white feather or a pink
+rose in her bonnet, but the Vicar insisted that it should disappear; he
+said he would not go to church with the scarlet woman: Mrs. Carey sighed
+as a woman but obeyed as a wife. They were about to step into the carriage
+when the Vicar remembered that no one had given him his egg. They knew
+that he must have an egg for his voice, there were two women in the house,
+and no one had the least regard for his comfort. Mrs. Carey scolded Mary
+Ann, and Mary Ann answered that she could not think of everything. She
+hurried away to fetch an egg, and Mrs. Carey beat it up in a glass of
+sherry. The Vicar swallowed it at a gulp. The communion plate was stowed
+in the carriage, and they set off.
+
+The fly came from The Red Lion and had a peculiar smell of stale straw.
+They drove with both windows closed so that the Vicar should not catch
+cold. The sexton was waiting at the porch to take the communion plate, and
+while the Vicar went to the vestry Mrs. Carey and Philip settled
+themselves in the vicarage pew. Mrs. Carey placed in front of her the
+sixpenny bit she was accustomed to put in the plate, and gave Philip
+threepence for the same purpose. The church filled up gradually and the
+service began.
+
+Philip grew bored during the sermon, but if he fidgetted Mrs. Carey put a
+gentle hand on his arm and looked at him reproachfully. He regained
+interest when the final hymn was sung and Mr. Graves passed round with the
+plate.
+
+When everyone had gone Mrs. Carey went into Miss Graves' pew to have a few
+words with her while they were waiting for the gentlemen, and Philip went
+to the vestry. His uncle, the curate, and Mr. Graves were still in their
+surplices. Mr. Carey gave him the remains of the consecrated bread and
+told him he might eat it. He had been accustomed to eat it himself, as it
+seemed blasphemous to throw it away, but Philip's keen appetite relieved
+him from the duty. Then they counted the money. It consisted of pennies,
+sixpences and threepenny bits. There were always two single shillings, one
+put in the plate by the Vicar and the other by Mr. Graves; and sometimes
+there was a florin. Mr. Graves told the Vicar who had given this. It was
+always a stranger to Blackstable, and Mr. Carey wondered who he was. But
+Miss Graves had observed the rash act and was able to tell Mrs. Carey that
+the stranger came from London, was married and had children. During the
+drive home Mrs. Carey passed the information on, and the Vicar made up his
+mind to call on him and ask for a subscription to the Additional Curates
+Society. Mr. Carey asked if Philip had behaved properly; and Mrs. Carey
+remarked that Mrs. Wigram had a new mantle, Mr. Cox was not in church, and
+somebody thought that Miss Phillips was engaged. When they reached the
+vicarage they all felt that they deserved a substantial dinner.
+
+When this was over Mrs. Carey went to her room to rest, and Mr. Carey lay
+down on the sofa in the drawing-room for forty winks.
+
+They had tea at five, and the Vicar ate an egg to support himself for
+evensong. Mrs. Carey did not go to this so that Mary Ann might, but she
+read the service through and the hymns. Mr. Carey walked to church in the
+evening, and Philip limped along by his side. The walk through the
+darkness along the country road strangely impressed him, and the church
+with all its lights in the distance, coming gradually nearer, seemed very
+friendly. At first he was shy with his uncle, but little by little grew
+used to him, and he would slip his hand in his uncle's and walk more
+easily for the feeling of protection.
+
+They had supper when they got home. Mr. Carey's slippers were waiting for
+him on a footstool in front of the fire and by their side Philip's, one
+the shoe of a small boy, the other misshapen and odd. He was dreadfully
+tired when he went up to bed, and he did not resist when Mary Ann
+undressed him. She kissed him after she tucked him up, and he began to
+love her.
+
+
+
+VIII
+
+
+Philip had led always the solitary life of an only child, and his
+loneliness at the vicarage was no greater than it had been when his mother
+lived. He made friends with Mary Ann. She was a chubby little person of
+thirty-five, the daughter of a fisherman, and had come to the vicarage at
+eighteen; it was her first place and she had no intention of leaving it;
+but she held a possible marriage as a rod over the timid heads of her
+master and mistress. Her father and mother lived in a little house off
+Harbour Street, and she went to see them on her evenings out. Her stories
+of the sea touched Philip's imagination, and the narrow alleys round the
+harbour grew rich with the romance which his young fancy lent them. One
+evening he asked whether he might go home with her; but his aunt was
+afraid that he might catch something, and his uncle said that evil
+communications corrupted good manners. He disliked the fisher folk, who
+were rough, uncouth, and went to chapel. But Philip was more comfortable
+in the kitchen than in the dining-room, and, whenever he could, he took
+his toys and played there. His aunt was not sorry. She did not like
+disorder, and though she recognised that boys must be expected to be
+untidy she preferred that he should make a mess in the kitchen. If he
+fidgeted his uncle was apt to grow restless and say it was high time he
+went to school. Mrs. Carey thought Philip very young for this, and her
+heart went out to the motherless child; but her attempts to gain his
+affection were awkward, and the boy, feeling shy, received her
+demonstrations with so much sullenness that she was mortified. Sometimes
+she heard his shrill voice raised in laughter in the kitchen, but when she
+went in, he grew suddenly silent, and he flushed darkly when Mary Ann
+explained the joke. Mrs. Carey could not see anything amusing in what she
+heard, and she smiled with constraint.
+
+"He seems happier with Mary Ann than with us, William," she said, when she
+returned to her sewing.
+
+"One can see he's been very badly brought up. He wants licking into
+shape."
+
+On the second Sunday after Philip arrived an unlucky incident occurred.
+Mr. Carey had retired as usual after dinner for a little snooze in the
+drawing-room, but he was in an irritable mood and could not sleep. Josiah
+Graves that morning had objected strongly to some candlesticks with which
+the Vicar had adorned the altar. He had bought them second-hand in
+Tercanbury, and he thought they looked very well. But Josiah Graves said
+they were popish. This was a taunt that always aroused the Vicar. He had
+been at Oxford during the movement which ended in the secession from the
+Established Church of Edward Manning, and he felt a certain sympathy for
+the Church of Rome. He would willingly have made the service more ornate
+than had been usual in the low-church parish of Blackstable, and in his
+secret soul he yearned for processions and lighted candles. He drew the
+line at incense. He hated the word protestant. He called himself a
+Catholic. He was accustomed to say that Papists required an epithet, they
+were Roman Catholic; but the Church of England was Catholic in the best,
+the fullest, and the noblest sense of the term. He was pleased to think
+that his shaven face gave him the look of a priest, and in his youth he
+had possessed an ascetic air which added to the impression. He often
+related that on one of his holidays in Boulogne, one of those holidays
+upon which his wife for economy's sake did not accompany him, when he was
+sitting in a church, the cure had come up to him and invited him to
+preach a sermon. He dismissed his curates when they married, having
+decided views on the celibacy of the unbeneficed clergy. But when at an
+election the Liberals had written on his garden fence in large blue
+letters: This way to Rome, he had been very angry, and threatened to
+prosecute the leaders of the Liberal party in Blackstable. He made up his
+mind now that nothing Josiah Graves said would induce him to remove the
+candlesticks from the altar, and he muttered Bismarck to himself once or
+twice irritably.
+
+Suddenly he heard an unexpected noise. He pulled the handkerchief off his
+face, got up from the sofa on which he was lying, and went into the
+dining-room. Philip was seated on the table with all his bricks around
+him. He had built a monstrous castle, and some defect in the foundation
+had just brought the structure down in noisy ruin.
+
+"What are you doing with those bricks, Philip? You know you're not allowed
+to play games on Sunday."
+
+Philip stared at him for a moment with frightened eyes, and, as his habit
+was, flushed deeply.
+
+"I always used to play at home," he answered.
+
+"I'm sure your dear mamma never allowed you to do such a wicked thing as
+that."
+
+Philip did not know it was wicked; but if it was, he did not wish it to be
+supposed that his mother had consented to it. He hung his head and did not
+answer.
+
+"Don't you know it's very, very wicked to play on Sunday? What d'you
+suppose it's called the day of rest for? You're going to church tonight,
+and how can you face your Maker when you've been breaking one of His laws
+in the afternoon?"
+
+Mr. Carey told him to put the bricks away at once, and stood over him
+while Philip did so.
+
+"You're a very naughty boy," he repeated. "Think of the grief you're
+causing your poor mother in heaven."
+
+Philip felt inclined to cry, but he had an instinctive disinclination to
+letting other people see his tears, and he clenched his teeth to prevent
+the sobs from escaping. Mr. Carey sat down in his arm-chair and began to
+turn over the pages of a book. Philip stood at the window. The vicarage
+was set back from the highroad to Tercanbury, and from the dining-room one
+saw a semicircular strip of lawn and then as far as the horizon green
+fields. Sheep were grazing in them. The sky was forlorn and gray. Philip
+felt infinitely unhappy.
+
+Presently Mary Ann came in to lay the tea, and Aunt Louisa descended the
+stairs.
+
+"Have you had a nice little nap, William?" she asked.
+
+"No," he answered. "Philip made so much noise that I couldn't sleep a
+wink."
+
+This was not quite accurate, for he had been kept awake by his own
+thoughts; and Philip, listening sullenly, reflected that he had only made
+a noise once, and there was no reason why his uncle should not have slept
+before or after. When Mrs. Carey asked for an explanation the Vicar
+narrated the facts.
+
+"He hasn't even said he was sorry," he finished.
+
+"Oh, Philip, I'm sure you're sorry," said Mrs. Carey, anxious that the
+child should not seem wickeder to his uncle than need be.
+
+Philip did not reply. He went on munching his bread and butter. He did not
+know what power it was in him that prevented him from making any
+expression of regret. He felt his ears tingling, he was a little inclined
+to cry, but no word would issue from his lips.
+
+"You needn't make it worse by sulking," said Mr. Carey.
+
+Tea was finished in silence. Mrs. Carey looked at Philip surreptitiously
+now and then, but the Vicar elaborately ignored him. When Philip saw his
+uncle go upstairs to get ready for church he went into the hall and got
+his hat and coat, but when the Vicar came downstairs and saw him, he said:
+
+"I don't wish you to go to church tonight, Philip. I don't think you're in
+a proper frame of mind to enter the House of God."
+
+Philip did not say a word. He felt it was a deep humiliation that was
+placed upon him, and his cheeks reddened. He stood silently watching his
+uncle put on his broad hat and his voluminous cloak. Mrs. Carey as usual
+went to the door to see him off. Then she turned to Philip.
+
+"Never mind, Philip, you won't be a naughty boy next Sunday, will you, and
+then your uncle will take you to church with him in the evening."
+
+She took off his hat and coat, and led him into the dining-room.
+
+"Shall you and I read the service together, Philip, and we'll sing the
+hymns at the harmonium. Would you like that?"
+
+Philip shook his head decidedly. Mrs. Carey was taken aback. If he would
+not read the evening service with her she did not know what to do with
+him.
+
+"Then what would you like to do until your uncle comes back?" she asked
+helplessly.
+
+Philip broke his silence at last.
+
+"I want to be left alone," he said.
+
+"Philip, how can you say anything so unkind? Don't you know that your
+uncle and I only want your good? Don't you love me at all?"
+
+"I hate you. I wish you was dead."
+
+Mrs. Carey gasped. He said the words so savagely that it gave her quite a
+start. She had nothing to say. She sat down in her husband's chair; and as
+she thought of her desire to love the friendless, crippled boy and her
+eager wish that he should love her--she was a barren woman and, even
+though it was clearly God's will that she should be childless, she could
+scarcely bear to look at little children sometimes, her heart ached
+so--the tears rose to her eyes and one by one, slowly, rolled down her
+cheeks. Philip watched her in amazement. She took out her handkerchief,
+and now she cried without restraint. Suddenly Philip realised that she was
+crying because of what he had said, and he was sorry. He went up to her
+silently and kissed her. It was the first kiss he had ever given her
+without being asked. And the poor lady, so small in her black satin,
+shrivelled up and sallow, with her funny corkscrew curls, took the little
+boy on her lap and put her arms around him and wept as though her heart
+would break. But her tears were partly tears of happiness, for she felt
+that the strangeness between them was gone. She loved him now with a new
+love because he had made her suffer.
+
+
+
+IX
+
+
+On the following Sunday, when the Vicar was making his preparations to go
+into the drawing-room for his nap--all the actions of his life were
+conducted with ceremony--and Mrs. Carey was about to go upstairs, Philip
+asked:
+
+"What shall I do if I'm not allowed to play?"
+
+"Can't you sit still for once and be quiet?"
+
+"I can't sit still till tea-time."
+
+Mr. Carey looked out of the window, but it was cold and raw, and he could
+not suggest that Philip should go into the garden.
+
+"I know what you can do. You can learn by heart the collect for the day."
+
+He took the prayer-book which was used for prayers from the harmonium, and
+turned the pages till he came to the place he wanted.
+
+"It's not a long one. If you can say it without a mistake when I come in
+to tea you shall have the top of my egg."
+
+Mrs. Carey drew up Philip's chair to the dining-room table--they had
+bought him a high chair by now--and placed the book in front of him.
+
+"The devil finds work for idle hands to do," said Mr. Carey.
+
+He put some more coals on the fire so that there should be a cheerful
+blaze when he came in to tea, and went into the drawing-room. He loosened
+his collar, arranged the cushions, and settled himself comfortably on the
+sofa. But thinking the drawing-room a little chilly, Mrs. Carey brought
+him a rug from the hall; she put it over his legs and tucked it round his
+feet. She drew the blinds so that the light should not offend his eyes,
+and since he had closed them already went out of the room on tiptoe. The
+Vicar was at peace with himself today, and in ten minutes he was asleep.
+He snored softly.
+
+It was the Sixth Sunday after Epiphany, and the collect began with the
+words: O God, whose blessed Son was manifested that he might destroy the
+works of the devil, and make us the sons of God, and heirs of Eternal
+life. Philip read it through. He could make no sense of it. He began
+saying the words aloud to himself, but many of them were unknown to him,
+and the construction of the sentence was strange. He could not get more
+than two lines in his head. And his attention was constantly wandering:
+there were fruit trees trained on the walls of the vicarage, and a long
+twig beat now and then against the windowpane; sheep grazed stolidly in
+the field beyond the garden. It seemed as though there were knots inside
+his brain. Then panic seized him that he would not know the words by
+tea-time, and he kept on whispering them to himself quickly; he did not
+try to understand, but merely to get them parrot-like into his memory.
+
+Mrs. Carey could not sleep that afternoon, and by four o'clock she was so
+wide awake that she came downstairs. She thought she would hear Philip his
+collect so that he should make no mistakes when he said it to his uncle.
+His uncle then would be pleased; he would see that the boy's heart was in
+the right place. But when Mrs. Carey came to the dining-room and was about
+to go in, she heard a sound that made her stop suddenly. Her heart gave a
+little jump. She turned away and quietly slipped out of the front-door.
+She walked round the house till she came to the dining-room window and
+then cautiously looked in. Philip was still sitting on the chair she had
+put him in, but his head was on the table buried in his arms, and he was
+sobbing desperately. She saw the convulsive movement of his shoulders.
+Mrs. Carey was frightened. A thing that had always struck her about the
+child was that he seemed so collected. She had never seen him cry. And now
+she realised that his calmness was some instinctive shame of showing his
+fillings: he hid himself to weep.
+
+Without thinking that her husband disliked being wakened suddenly, she
+burst into the drawing-room.
+
+"William, William," she said. "The boy's crying as though his heart would
+break."
+
+Mr. Carey sat up and disentangled himself from the rug about his legs.
+
+"What's he got to cry about?"
+
+"I don't know.... Oh, William, we can't let the boy be unhappy. D'you
+think it's our fault? If we'd had children we'd have known what to do."
+
+Mr. Carey looked at her in perplexity. He felt extraordinarily helpless.
+
+"He can't be crying because I gave him the collect to learn. It's not more
+than ten lines."
+
+"Don't you think I might take him some picture books to look at, William?
+There are some of the Holy Land. There couldn't be anything wrong in
+that."
+
+"Very well, I don't mind."
+
+Mrs. Carey went into the study. To collect books was Mr. Carey's only
+passion, and he never went into Tercanbury without spending an hour or two
+in the second-hand shop; he always brought back four or five musty
+volumes. He never read them, for he had long lost the habit of reading,
+but he liked to turn the pages, look at the illustrations if they were
+illustrated, and mend the bindings. He welcomed wet days because on them
+he could stay at home without pangs of conscience and spend the afternoon
+with white of egg and a glue-pot, patching up the Russia leather of some
+battered quarto. He had many volumes of old travels, with steel
+engravings, and Mrs. Carey quickly found two which described Palestine.
+She coughed elaborately at the door so that Philip should have time to
+compose himself, she felt that he would be humiliated if she came upon him
+in the midst of his tears, then she rattled the door handle. When she went
+in Philip was poring over the prayer-book, hiding his eyes with his hands
+so that she might not see he had been crying.
+
+"Do you know the collect yet?" she said.
+
+He did not answer for a moment, and she felt that he did not trust his
+voice. She was oddly embarrassed.
+
+"I can't learn it by heart," he said at last, with a gasp.
+
+"Oh, well, never mind," she said. "You needn't. I've got some picture
+books for you to look at. Come and sit on my lap, and we'll look at them
+together."
+
+Philip slipped off his chair and limped over to her. He looked down so
+that she should not see his eyes. She put her arms round him.
+
+"Look," she said, "that's the place where our blessed Lord was born."
+
+She showed him an Eastern town with flat roofs and cupolas and minarets.
+In the foreground was a group of palm-trees, and under them were resting
+two Arabs and some camels. Philip passed his hand over the picture as if
+he wanted to feel the houses and the loose habiliments of the nomads.
+
+"Read what it says," he asked.
+
+Mrs. Carey in her even voice read the opposite page. It was a romantic
+narrative of some Eastern traveller of the thirties, pompous maybe, but
+fragrant with the emotion with which the East came to the generation that
+followed Byron and Chateaubriand. In a moment or two Philip interrupted
+her.
+
+"I want to see another picture."
+
+When Mary Ann came in and Mrs. Carey rose to help her lay the cloth.
+Philip took the book in his hands and hurried through the illustrations.
+It was with difficulty that his aunt induced him to put the book down for
+tea. He had forgotten his horrible struggle to get the collect by heart;
+he had forgotten his tears. Next day it was raining, and he asked for the
+book again. Mrs. Carey gave it him joyfully. Talking over his future with
+her husband she had found that both desired him to take orders, and this
+eagerness for the book which described places hallowed by the presence of
+Jesus seemed a good sign. It looked as though the boy's mind addressed
+itself naturally to holy things. But in a day or two he asked for more
+books. Mr. Carey took him into his study, showed him the shelf in which he
+kept illustrated works, and chose for him one that dealt with Rome. Philip
+took it greedily. The pictures led him to a new amusement. He began to
+read the page before and the page after each engraving to find out what it
+was about, and soon he lost all interest in his toys.
+
+Then, when no one was near, he took out books for himself; and perhaps
+because the first impression on his mind was made by an Eastern town, he
+found his chief amusement in those which described the Levant. His heart
+beat with excitement at the pictures of mosques and rich palaces; but
+there was one, in a book on Constantinople, which peculiarly stirred his
+imagination. It was called the Hall of the Thousand Columns. It was a
+Byzantine cistern, which the popular fancy had endowed with fantastic
+vastness; and the legend which he read told that a boat was always moored
+at the entrance to tempt the unwary, but no traveller venturing into the
+darkness had ever been seen again. And Philip wondered whether the boat
+went on for ever through one pillared alley after another or came at last
+to some strange mansion.
+
+One day a good fortune befell him, for he hit upon Lane's translation of
+The Thousand Nights and a Night. He was captured first by the
+illustrations, and then he began to read, to start with, the stories that
+dealt with magic, and then the others; and those he liked he read again
+and again. He could think of nothing else. He forgot the life about him.
+He had to be called two or three times before he would come to his dinner.
+Insensibly he formed the most delightful habit in the world, the habit of
+reading: he did not know that thus he was providing himself with a refuge
+from all the distress of life; he did not know either that he was creating
+for himself an unreal world which would make the real world of every day
+a source of bitter disappointment. Presently he began to read other
+things. His brain was precocious. His uncle and aunt, seeing that he
+occupied himself and neither worried nor made a noise, ceased to trouble
+themselves about him. Mr. Carey had so many books that he did not know
+them, and as he read little he forgot the odd lots he had bought at one
+time and another because they were cheap. Haphazard among the sermons and
+homilies, the travels, the lives of the Saints, the Fathers, the histories
+of the church, were old-fashioned novels; and these Philip at last
+discovered. He chose them by their titles, and the first he read was The
+Lancashire Witches, and then he read The Admirable Crichton, and then
+many more. Whenever he started a book with two solitary travellers riding
+along the brink of a desperate ravine he knew he was safe.
+
+The summer was come now, and the gardener, an old sailor, made him a
+hammock and fixed it up for him in the branches of a weeping willow. And
+here for long hours he lay, hidden from anyone who might come to the
+vicarage, reading, reading passionately. Time passed and it was July;
+August came: on Sundays the church was crowded with strangers, and the
+collection at the offertory often amounted to two pounds. Neither the
+Vicar nor Mrs. Carey went out of the garden much during this period; for
+they disliked strange faces, and they looked upon the visitors from London
+with aversion. The house opposite was taken for six weeks by a gentleman
+who had two little boys, and he sent in to ask if Philip would like to go
+and play with them; but Mrs. Carey returned a polite refusal. She was
+afraid that Philip would be corrupted by little boys from London. He was
+going to be a clergyman, and it was necessary that he should be preserved
+from contamination. She liked to see in him an infant Samuel.
+
+
+
+X
+
+
+The Careys made up their minds to send Philip to King's School at
+Tercanbury. The neighbouring clergy sent their sons there. It was united
+by long tradition to the Cathedral: its headmaster was an honorary Canon,
+and a past headmaster was the Archdeacon. Boys were encouraged there to
+aspire to Holy Orders, and the education was such as might prepare an
+honest lad to spend his life in God's service. A preparatory school was
+attached to it, and to this it was arranged that Philip should go. Mr.
+Carey took him into Tercanbury one Thursday afternoon towards the end of
+September. All day Philip had been excited and rather frightened. He knew
+little of school life but what he had read in the stories of The Boy's
+Own Paper. He had also read Eric, or Little by Little.
+
+When they got out of the train at Tercanbury, Philip felt sick with
+apprehension, and during the drive in to the town sat pale and silent. The
+high brick wall in front of the school gave it the look of a prison. There
+was a little door in it, which opened on their ringing; and a clumsy,
+untidy man came out and fetched Philip's tin trunk and his play-box. They
+were shown into the drawing-room; it was filled with massive, ugly
+furniture, and the chairs of the suite were placed round the walls with a
+forbidding rigidity. They waited for the headmaster.
+
+"What's Mr. Watson like?" asked Philip, after a while.
+
+"You'll see for yourself."
+
+There was another pause. Mr. Carey wondered why the headmaster did not
+come. Presently Philip made an effort and spoke again.
+
+"Tell him I've got a club-foot," he said.
+
+Before Mr. Carey could speak the door burst open and Mr. Watson swept into
+the room. To Philip he seemed gigantic. He was a man of over six feet
+high, and broad, with enormous hands and a great red beard; he talked
+loudly in a jovial manner; but his aggressive cheerfulness struck terror
+in Philip's heart. He shook hands with Mr. Carey, and then took Philip's
+small hand in his.
+
+"Well, young fellow, are you glad to come to school?" he shouted.
+
+Philip reddened and found no word to answer.
+
+"How old are you?"
+
+"Nine," said Philip.
+
+"You must say sir," said his uncle.
+
+"I expect you've got a good lot to learn," the headmaster bellowed
+cheerily.
+
+To give the boy confidence he began to tickle him with rough fingers.
+Philip, feeling shy and uncomfortable, squirmed under his touch.
+
+"I've put him in the small dormitory for the present.... You'll like that,
+won't you?" he added to Philip. "Only eight of you in there. You won't
+feel so strange."
+
+Then the door opened, and Mrs. Watson came in. She was a dark woman with
+black hair, neatly parted in the middle. She had curiously thick lips and
+a small round nose. Her eyes were large and black. There was a singular
+coldness in her appearance. She seldom spoke and smiled more seldom still.
+Her husband introduced Mr. Carey to her, and then gave Philip a friendly
+push towards her.
+
+"This is a new boy, Helen, His name's Carey."
+
+Without a word she shook hands with Philip and then sat down, not
+speaking, while the headmaster asked Mr. Carey how much Philip knew and
+what books he had been working with. The Vicar of Blackstable was a little
+embarrassed by Mr. Watson's boisterous heartiness, and in a moment or two
+got up.
+
+"I think I'd better leave Philip with you now."
+
+"That's all right," said Mr. Watson. "He'll be safe with me. He'll get on
+like a house on fire. Won't you, young fellow?"
+
+Without waiting for an answer from Philip the big man burst into a great
+bellow of laughter. Mr. Carey kissed Philip on the forehead and went away.
+
+"Come along, young fellow," shouted Mr. Watson. "I'll show you the
+school-room."
+
+He swept out of the drawing-room with giant strides, and Philip hurriedly
+limped behind him. He was taken into a long, bare room with two tables
+that ran along its whole length; on each side of them were wooden forms.
+
+"Nobody much here yet," said Mr. Watson. "I'll just show you the
+playground, and then I'll leave you to shift for yourself."
+
+Mr. Watson led the way. Philip found himself in a large play-ground with
+high brick walls on three sides of it. On the fourth side was an iron
+railing through which you saw a vast lawn and beyond this some of the
+buildings of King's School. One small boy was wandering disconsolately,
+kicking up the gravel as he walked.
+
+"Hulloa, Venning," shouted Mr. Watson. "When did you turn up?"
+
+The small boy came forward and shook hands.
+
+"Here's a new boy. He's older and bigger than you, so don't you bully
+him."
+
+The headmaster glared amicably at the two children, filling them with fear
+by the roar of his voice, and then with a guffaw left them.
+
+"What's your name?"
+
+"Carey."
+
+"What's your father?"
+
+"He's dead."
+
+"Oh! Does your mother wash?"
+
+"My mother's dead, too."
+
+Philip thought this answer would cause the boy a certain awkwardness, but
+Venning was not to be turned from his facetiousness for so little.
+
+"Well, did she wash?" he went on.
+
+"Yes," said Philip indignantly.
+
+"She was a washerwoman then?"
+
+"No, she wasn't."
+
+"Then she didn't wash."
+
+The little boy crowed with delight at the success of his dialectic. Then
+he caught sight of Philip's feet.
+
+"What's the matter with your foot?"
+
+Philip instinctively tried to withdraw it from sight. He hid it behind the
+one which was whole.
+
+"I've got a club-foot," he answered.
+
+"How did you get it?"
+
+"I've always had it."
+
+"Let's have a look."
+
+"No."
+
+"Don't then."
+
+The little boy accompanied the words with a sharp kick on Philip's shin,
+which Philip did not expect and thus could not guard against. The pain was
+so great that it made him gasp, but greater than the pain was the
+surprise. He did not know why Venning kicked him. He had not the presence
+of mind to give him a black eye. Besides, the boy was smaller than he, and
+he had read in The Boy's Own Paper that it was a mean thing to hit
+anyone smaller than yourself. While Philip was nursing his shin a third
+boy appeared, and his tormentor left him. In a little while he noticed
+that the pair were talking about him, and he felt they were looking at his
+feet. He grew hot and uncomfortable.
+
+But others arrived, a dozen together, and then more, and they began to
+talk about their doings during the holidays, where they had been, and what
+wonderful cricket they had played. A few new boys appeared, and with these
+presently Philip found himself talking. He was shy and nervous. He was
+anxious to make himself pleasant, but he could not think of anything to
+say. He was asked a great many questions and answered them all quite
+willingly. One boy asked him whether he could play cricket.
+
+"No," answered Philip. "I've got a club-foot."
+
+The boy looked down quickly and reddened. Philip saw that he felt he had
+asked an unseemly question. He was too shy to apologise and looked at
+Philip awkwardly.
+
+
+
+XI
+
+
+Next morning when the clanging of a bell awoke Philip he looked round his
+cubicle in astonishment. Then a voice sang out, and he remembered where he
+was.
+
+"Are you awake, Singer?"
+
+The partitions of the cubicle were of polished pitch-pine, and there was
+a green curtain in front. In those days there was little thought of
+ventilation, and the windows were closed except when the dormitory was
+aired in the morning.
+
+Philip got up and knelt down to say his prayers. It was a cold morning,
+and he shivered a little; but he had been taught by his uncle that his
+prayers were more acceptable to God if he said them in his nightshirt than
+if he waited till he was dressed. This did not surprise him, for he was
+beginning to realise that he was the creature of a God who appreciated the
+discomfort of his worshippers. Then he washed. There were two baths for
+the fifty boarders, and each boy had a bath once a week. The rest of his
+washing was done in a small basin on a wash-stand, which with the bed and
+a chair, made up the furniture of each cubicle. The boys chatted gaily
+while they dressed. Philip was all ears. Then another bell sounded, and
+they ran downstairs. They took their seats on the forms on each side of
+the two long tables in the school-room; and Mr. Watson, followed by his
+wife and the servants, came in and sat down. Mr. Watson read prayers in an
+impressive manner, and the supplications thundered out in his loud voice
+as though they were threats personally addressed to each boy. Philip
+listened with anxiety. Then Mr. Watson read a chapter from the Bible, and
+the servants trooped out. In a moment the untidy youth brought in two
+large pots of tea and on a second journey immense dishes of bread and
+butter.
+
+Philip had a squeamish appetite, and the thick slabs of poor butter on the
+bread turned his stomach, but he saw other boys scraping it off and
+followed their example. They all had potted meats and such like, which
+they had brought in their play-boxes; and some had 'extras,' eggs or
+bacon, upon which Mr. Watson made a profit. When he had asked Mr. Carey
+whether Philip was to have these, Mr. Carey replied that he did not think
+boys should be spoilt. Mr. Watson quite agreed with him--he considered
+nothing was better than bread and butter for growing lads--but some
+parents, unduly pampering their offspring, insisted on it.
+
+Philip noticed that 'extras' gave boys a certain consideration and made up
+his mind, when he wrote to Aunt Louisa, to ask for them.
+
+After breakfast the boys wandered out into the play-ground. Here the
+day-boys were gradually assembling. They were sons of the local clergy, of
+the officers at the Depot, and of such manufacturers or men of business as
+the old town possessed. Presently a bell rang, and they all trooped into
+school. This consisted of a large, long room at opposite ends of which two
+under-masters conducted the second and third forms, and of a smaller one,
+leading out of it, used by Mr. Watson, who taught the first form. To
+attach the preparatory to the senior school these three classes were known
+officially, on speech days and in reports, as upper, middle, and lower
+second. Philip was put in the last. The master, a red-faced man with a
+pleasant voice, was called Rice; he had a jolly manner with boys, and the
+time passed quickly. Philip was surprised when it was a quarter to eleven
+and they were let out for ten minutes' rest.
+
+The whole school rushed noisily into the play-ground. The new boys were
+told to go into the middle, while the others stationed themselves along
+opposite walls. They began to play Pig in the Middle. The old boys ran
+from wall to wall while the new boys tried to catch them: when one was
+seized and the mystic words said--one, two, three, and a pig for me--he
+became a prisoner and, turning sides, helped to catch those who were still
+free. Philip saw a boy running past and tried to catch him, but his limp
+gave him no chance; and the runners, taking their opportunity, made
+straight for the ground he covered. Then one of them had the brilliant
+idea of imitating Philip's clumsy run. Other boys saw it and began to
+laugh; then they all copied the first; and they ran round Philip, limping
+grotesquely, screaming in their treble voices with shrill laughter. They
+lost their heads with the delight of their new amusement, and choked with
+helpless merriment. One of them tripped Philip up and he fell, heavily as
+he always fell, and cut his knee. They laughed all the louder when he got
+up. A boy pushed him from behind, and he would have fallen again if
+another had not caught him. The game was forgotten in the entertainment of
+Philip's deformity. One of them invented an odd, rolling limp that struck
+the rest as supremely ridiculous, and several of the boys lay down on the
+ground and rolled about in laughter: Philip was completely scared. He
+could not make out why they were laughing at him. His heart beat so that
+he could hardly breathe, and he was more frightened than he had ever been
+in his life. He stood still stupidly while the boys ran round him,
+mimicking and laughing; they shouted to him to try and catch them; but he
+did not move. He did not want them to see him run any more. He was using
+all his strength to prevent himself from crying.
+
+Suddenly the bell rang, and they all trooped back to school. Philip's knee
+was bleeding, and he was dusty and dishevelled. For some minutes Mr. Rice
+could not control his form. They were excited still by the strange
+novelty, and Philip saw one or two of them furtively looking down at his
+feet. He tucked them under the bench.
+
+In the afternoon they went up to play football, but Mr. Watson stopped
+Philip on the way out after dinner.
+
+"I suppose you can't play football, Carey?" he asked him.
+
+Philip blushed self-consciously.
+
+"No, sir."
+
+"Very well. You'd better go up to the field. You can walk as far as that,
+can't you?"
+
+Philip had no idea where the field was, but he answered all the same.
+
+"Yes, sir."
+
+The boys went in charge of Mr. Rice, who glanced at Philip and seeing he
+had not changed, asked why he was not going to play.
+
+"Mr. Watson said I needn't, sir," said Philip.
+
+"Why?"
+
+There were boys all round him, looking at him curiously, and a feeling of
+shame came over Philip. He looked down without answering. Others gave the
+reply.
+
+"He's got a club-foot, sir."
+
+"Oh, I see."
+
+Mr. Rice was quite young; he had only taken his degree a year before; and
+he was suddenly embarrassed. His instinct was to beg the boy's pardon, but
+he was too shy to do so. He made his voice gruff and loud.
+
+"Now then, you boys, what are you waiting about for? Get on with you."
+
+Some of them had already started and those that were left now set off, in
+groups of two or three.
+
+"You'd better come along with me, Carey," said the master "You don't know
+the way, do you?"
+
+Philip guessed the kindness, and a sob came to his throat.
+
+"I can't go very fast, sir."
+
+"Then I'll go very slow," said the master, with a smile.
+
+Philip's heart went out to the red-faced, commonplace young man who said
+a gentle word to him. He suddenly felt less unhappy.
+
+But at night when they went up to bed and were undressing, the boy who was
+called Singer came out of his cubicle and put his head in Philip's.
+
+"I say, let's look at your foot," he said.
+
+"No," answered Philip.
+
+He jumped into bed quickly.
+
+"Don't say no to me," said Singer. "Come on, Mason."
+
+The boy in the next cubicle was looking round the corner, and at the words
+he slipped in. They made for Philip and tried to tear the bed-clothes off
+him, but he held them tightly.
+
+"Why can't you leave me alone?" he cried.
+
+Singer seized a brush and with the back of it beat Philip's hands clenched
+on the blanket. Philip cried out.
+
+"Why don't you show us your foot quietly?"
+
+"I won't."
+
+In desperation Philip clenched his fist and hit the boy who tormented him,
+but he was at a disadvantage, and the boy seized his arm. He began to turn
+it.
+
+"Oh, don't, don't," said Philip. "You'll break my arm."
+
+"Stop still then and put out your foot."
+
+Philip gave a sob and a gasp. The boy gave the arm another wrench. The
+pain was unendurable.
+
+"All right. I'll do it," said Philip.
+
+He put out his foot. Singer still kept his hand on Philip's wrist. He
+looked curiously at the deformity.
+
+"Isn't it beastly?" said Mason.
+
+Another came in and looked too.
+
+"Ugh," he said, in disgust.
+
+"My word, it is rum," said Singer, making a face. "Is it hard?"
+
+He touched it with the tip of his forefinger, cautiously

<TRUNCATED>
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/orders.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/orders.txt b/crunch-core/src/it/resources/orders.txt
new file mode 100644
index 0000000..2f1383f
--- /dev/null
+++ b/crunch-core/src/it/resources/orders.txt
@@ -0,0 +1,4 @@
+222|Toilet plunger
+333|Toilet brush
+222|Toilet paper
+111|Corn flakes
\ No newline at end of file


[17/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/org/apache/crunch/UnionITData/src1.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/org/apache/crunch/UnionITData/src1.txt b/crunch/src/it/resources/org/apache/crunch/UnionITData/src1.txt
deleted file mode 100644
index a92974b..0000000
--- a/crunch/src/it/resources/org/apache/crunch/UnionITData/src1.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-a1
-b2
-a1
-a1
-b2

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/org/apache/crunch/UnionITData/src2.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/org/apache/crunch/UnionITData/src2.txt b/crunch/src/it/resources/org/apache/crunch/UnionITData/src2.txt
deleted file mode 100644
index 9363398..0000000
--- a/crunch/src/it/resources/org/apache/crunch/UnionITData/src2.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-c3
-a1
-c3

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt b/crunch/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt
deleted file mode 100644
index 680cb09..0000000
--- a/crunch/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-a	1	2
-a	3	4
-b	2	3
-a	5	6
-b	9	10

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt b/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt
deleted file mode 100644
index 9f38eb9..0000000
--- a/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-a,1-1
-b,1-2
-c,1-3
-a,1-4

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt b/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt
deleted file mode 100644
index ed9524e..0000000
--- a/crunch/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-b,2-1
-c,2-2
-c,2-3
-d,2-4

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/secondary_sort_input.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/secondary_sort_input.txt b/crunch/src/it/resources/secondary_sort_input.txt
deleted file mode 100644
index 3c7be93..0000000
--- a/crunch/src/it/resources/secondary_sort_input.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-one,1,1 
-one,2,-3 
-two,4,5 
-two,2,6 
-two,1,7,9 
-three,0,-1 
-one,-5,10 

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/set1.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/set1.txt b/crunch/src/it/resources/set1.txt
deleted file mode 100644
index 3b67f57..0000000
--- a/crunch/src/it/resources/set1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-b
-c
-a
-e
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/set2.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/set2.txt b/crunch/src/it/resources/set2.txt
deleted file mode 100644
index 8169ab5..0000000
--- a/crunch/src/it/resources/set2.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-c
-d
-a
\ No newline at end of file


[33/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java
new file mode 100644
index 0000000..6ea9c4c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java
@@ -0,0 +1,295 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.ParallelDoOptions;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Target;
+import org.apache.crunch.fn.ExtractKeyFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.materialize.pobject.CollectionPObject;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public abstract class PCollectionImpl<S> implements PCollection<S> {
+
+  private static final Log LOG = LogFactory.getLog(PCollectionImpl.class);
+
+  private final String name;
+  protected MRPipeline pipeline;
+  protected SourceTarget<S> materializedAt;
+  private final ParallelDoOptions options;
+  
+  public PCollectionImpl(String name) {
+    this(name, ParallelDoOptions.builder().build());
+  }
+  
+  public PCollectionImpl(String name, ParallelDoOptions options) {
+    this.name = name;
+    this.options = options;
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  @Override
+  public String toString() {
+    return getName();
+  }
+
+  @Override
+  public PCollection<S> union(PCollection<S> other) {
+    return union(new PCollection[] { other });
+  }
+  
+  @Override
+  public PCollection<S> union(PCollection<S>... collections) {
+    List<PCollectionImpl<S>> internal = Lists.newArrayList();
+    internal.add(this);
+    for (PCollection<S> collection : collections) {
+      internal.add((PCollectionImpl<S>) collection.parallelDo(IdentityFn.<S>getInstance(), collection.getPType()));
+    }
+    return new UnionCollection<S>(internal);
+  }
+
+  @Override
+  public <T> PCollection<T> parallelDo(DoFn<S, T> fn, PType<T> type) {
+    MRPipeline pipeline = (MRPipeline) getPipeline();
+    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
+  }
+
+  @Override
+  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> fn, PType<T> type) {
+    return new DoCollectionImpl<T>(name, getChainingCollection(), fn, type);
+  }
+  
+  @Override
+  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> fn, PType<T> type,
+      ParallelDoOptions options) {
+    return new DoCollectionImpl<T>(name, getChainingCollection(), fn, type, options);
+  }
+  
+  @Override
+  public <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> fn, PTableType<K, V> type) {
+    MRPipeline pipeline = (MRPipeline) getPipeline();
+    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
+  }
+
+  @Override
+  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> fn, PTableType<K, V> type) {
+    return new DoTableImpl<K, V>(name, getChainingCollection(), fn, type);
+  }
+
+  @Override
+  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> fn, PTableType<K, V> type,
+      ParallelDoOptions options) {
+    return new DoTableImpl<K, V>(name, getChainingCollection(), fn, type, options);
+  }
+
+  public PCollection<S> write(Target target) {
+    if (materializedAt != null) {
+      getPipeline().write(new InputCollection<S>(materializedAt, (MRPipeline) getPipeline()), target);
+    } else {
+      getPipeline().write(this, target);
+    }
+    return this;
+  }
+
+  @Override
+  public PCollection<S> write(Target target, Target.WriteMode writeMode) {
+    if (materializedAt != null) {
+      getPipeline().write(new InputCollection<S>(materializedAt, (MRPipeline) getPipeline()), target,
+          writeMode);
+    } else {
+      getPipeline().write(this, target, writeMode);
+    }
+    return this;
+  }
+  
+  @Override
+  public Iterable<S> materialize() {
+    if (getSize() == 0) {
+      LOG.warn("Materializing an empty PCollection: " + this.getName());
+      return Collections.emptyList();
+    }
+    return getPipeline().materialize(this);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public PObject<Collection<S>> asCollection() {
+    return new CollectionPObject<S>(this);
+  }
+
+  public SourceTarget<S> getMaterializedAt() {
+    return materializedAt;
+  }
+
+  public void materializeAt(SourceTarget<S> sourceTarget) {
+    this.materializedAt = sourceTarget;
+  }
+
+  @Override
+  public PCollection<S> filter(FilterFn<S> filterFn) {
+    return parallelDo(filterFn, getPType());
+  }
+
+  @Override
+  public PCollection<S> filter(String name, FilterFn<S> filterFn) {
+    return parallelDo(name, filterFn, getPType());
+  }
+
+  @Override
+  public <K> PTable<K, S> by(MapFn<S, K> mapFn, PType<K> keyType) {
+    return parallelDo(new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
+  }
+
+  @Override
+  public <K> PTable<K, S> by(String name, MapFn<S, K> mapFn, PType<K> keyType) {
+    return parallelDo(name, new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
+  }
+
+  @Override
+  public PTable<S, Long> count() {
+    return Aggregate.count(this);
+  }
+
+  @Override
+  public PObject<Long> length() {
+    return Aggregate.length(this);
+  }
+
+  @Override
+  public PObject<S> max() {
+    return Aggregate.max(this);
+  }
+
+  @Override
+  public PObject<S> min() {
+    return Aggregate.min(this);
+  }
+
+  @Override
+  public PTypeFamily getTypeFamily() {
+    return getPType().getFamily();
+  }
+
+  public abstract DoNode createDoNode();
+
+  public abstract List<PCollectionImpl<?>> getParents();
+
+  public PCollectionImpl<?> getOnlyParent() {
+    List<PCollectionImpl<?>> parents = getParents();
+    if (parents.size() != 1) {
+      throw new IllegalArgumentException("Expected exactly one parent PCollection");
+    }
+    return parents.get(0);
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    if (pipeline == null) {
+      pipeline = (MRPipeline) getParents().get(0).getPipeline();
+    }
+    return pipeline;
+  }
+  
+  public Set<SourceTarget<?>> getTargetDependencies() {
+    Set<SourceTarget<?>> targetDeps = options.getSourceTargets();
+    for (PCollectionImpl<?> parent : getParents()) {
+      targetDeps = Sets.union(targetDeps, parent.getTargetDependencies());
+    }
+    return targetDeps;
+  }
+  
+  public int getDepth() {
+    int parentMax = 0;
+    for (PCollectionImpl parent : getParents()) {
+      parentMax = Math.max(parent.getDepth(), parentMax);
+    }
+    return 1 + parentMax;
+  }
+
+  public interface Visitor {
+    void visitInputCollection(InputCollection<?> collection);
+
+    void visitUnionCollection(UnionCollection<?> collection);
+
+    void visitDoFnCollection(DoCollectionImpl<?> collection);
+
+    void visitDoTable(DoTableImpl<?, ?> collection);
+
+    void visitGroupedTable(PGroupedTableImpl<?, ?> collection);
+  }
+
+  public void accept(Visitor visitor) {
+    if (materializedAt != null) {
+      visitor.visitInputCollection(new InputCollection<S>(materializedAt, (MRPipeline) getPipeline()));
+    } else {
+      acceptInternal(visitor);
+    }
+  }
+
+  protected abstract void acceptInternal(Visitor visitor);
+
+  @Override
+  public long getSize() {
+    if (materializedAt != null) {
+      long sz = materializedAt.getSize(getPipeline().getConfiguration());
+      if (sz > 0) {
+        return sz;
+      }
+    }
+    return getSizeInternal();
+  }
+
+  protected abstract long getSizeInternal();
+  
+  /**
+   * Retrieve the PCollectionImpl to be used for chaining within PCollectionImpls further down the pipeline.
+   * @return The PCollectionImpl instance to be chained
+   */
+  protected PCollectionImpl<S> getChainingCollection(){
+    return this;
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java
new file mode 100644
index 0000000..ccac5d5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.Aggregator;
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.fn.Aggregators;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.util.PartitionUtils;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+
+public class PGroupedTableImpl<K, V> extends PCollectionImpl<Pair<K, Iterable<V>>> implements PGroupedTable<K, V> {
+
+  private static final Log LOG = LogFactory.getLog(PGroupedTableImpl.class);
+
+  private final PTableBase<K, V> parent;
+  private final GroupingOptions groupingOptions;
+  private final PGroupedTableType<K, V> ptype;
+  
+  PGroupedTableImpl(PTableBase<K, V> parent) {
+    this(parent, null);
+  }
+
+  PGroupedTableImpl(PTableBase<K, V> parent, GroupingOptions groupingOptions) {
+    super("GBK");
+    this.parent = parent;
+    this.groupingOptions = groupingOptions;
+    this.ptype = parent.getPTableType().getGroupedTableType();
+  }
+
+  public void configureShuffle(Job job) {
+    ptype.configureShuffle(job, groupingOptions);
+    if (groupingOptions == null || groupingOptions.getNumReducers() <= 0) {
+      int numReduceTasks = PartitionUtils.getRecommendedPartitions(this, getPipeline().getConfiguration());
+      if (numReduceTasks > 0) {
+        job.setNumReduceTasks(numReduceTasks);
+        LOG.info(String.format("Setting num reduce tasks to %d", numReduceTasks));
+      } else {
+        LOG.warn("Attempted to set a negative number of reduce tasks");
+      }
+    }
+  }
+
+  @Override
+  protected long getSizeInternal() {
+    return parent.getSizeInternal();
+  }
+
+  @Override
+  public PType<Pair<K, Iterable<V>>> getPType() {
+    return ptype;
+  }
+
+  @Override
+  public PTable<K, V> combineValues(CombineFn<K, V> combineFn) {
+    return new DoTableImpl<K, V>("combine", getChainingCollection(), combineFn, parent.getPTableType());
+  }
+
+  @Override
+  public PTable<K, V> combineValues(Aggregator<V> agg) {
+    return combineValues(Aggregators.<K, V>toCombineFn(agg));
+  }
+
+  private static class Ungroup<K, V> extends DoFn<Pair<K, Iterable<V>>, Pair<K, V>> {
+    @Override
+    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
+      for (V v : input.second()) {
+        emitter.emit(Pair.of(input.first(), v));
+      }
+    }
+  }
+
+  public PTable<K, V> ungroup() {
+    return parallelDo("ungroup", new Ungroup<K, V>(), parent.getPTableType());
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitGroupedTable(this);
+  }
+
+  @Override
+  public Set<SourceTarget<?>> getTargetDependencies() {
+    Set<SourceTarget<?>> td = Sets.newHashSet(super.getTargetDependencies());
+    if (groupingOptions != null) {
+      td.addAll(groupingOptions.getSourceTargets());
+    }
+    return ImmutableSet.copyOf(td);
+  }
+  
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.<PCollectionImpl<?>> of(parent);
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    return DoNode.createFnNode(getName(), ptype.getInputMapFn(), ptype);
+  }
+
+  public DoNode getGroupingNode() {
+    return DoNode.createGroupingNode("", ptype);
+  }
+  
+  @Override
+  protected PCollectionImpl<Pair<K, Iterable<V>>> getChainingCollection() {
+    // Use a copy for chaining to allow sending the output of a single grouped table to multiple outputs
+    // TODO This should be implemented in a cleaner way in the planner
+    return new PGroupedTableImpl<K, V>(parent, groupingOptions);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java
new file mode 100644
index 0000000..3c2393d
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java
@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.ParallelDoOptions;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.lib.Cogroup;
+import org.apache.crunch.lib.Join;
+import org.apache.crunch.lib.PTables;
+import org.apache.crunch.materialize.MaterializableMap;
+import org.apache.crunch.materialize.pobject.MapPObject;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.Lists;
+
+abstract class PTableBase<K, V> extends PCollectionImpl<Pair<K, V>> implements PTable<K, V> {
+
+  public PTableBase(String name) {
+    super(name);
+  }
+
+  public PTableBase(String name, ParallelDoOptions options) {
+    super(name, options);
+  }
+  
+  public PType<K> getKeyType() {
+    return getPTableType().getKeyType();
+  }
+
+  public PType<V> getValueType() {
+    return getPTableType().getValueType();
+  }
+
+  public PGroupedTableImpl<K, V> groupByKey() {
+    return new PGroupedTableImpl<K, V>(this);
+  }
+
+  public PGroupedTableImpl<K, V> groupByKey(int numReduceTasks) {
+    return new PGroupedTableImpl<K, V>(this, GroupingOptions.builder().numReducers(numReduceTasks).build());
+  }
+
+  public PGroupedTableImpl<K, V> groupByKey(GroupingOptions groupingOptions) {
+    return new PGroupedTableImpl<K, V>(this, groupingOptions);
+  }
+
+  @Override
+  public PTable<K, V> union(PTable<K, V> other) {
+    return union(new PTable[] { other });
+  }
+  
+  @Override
+  public PTable<K, V> union(PTable<K, V>... others) {
+    List<PTableBase<K, V>> internal = Lists.newArrayList();
+    internal.add(this);
+    for (PTable<K, V> table : others) {
+      internal.add((PTableBase<K, V>) table);
+    }
+    return new UnionTable<K, V>(internal);
+  }
+
+  @Override
+  public PTable<K, V> write(Target target) {
+    if (getMaterializedAt() != null) {
+      getPipeline().write(new InputTable<K, V>(
+          (TableSource<K, V>) getMaterializedAt(), (MRPipeline) getPipeline()), target);
+    } else {
+      getPipeline().write(this, target);
+    }
+    return this;
+  }
+
+  @Override
+  public PTable<K, V> write(Target target, Target.WriteMode writeMode) {
+    if (getMaterializedAt() != null) {
+      getPipeline().write(new InputTable<K, V>(
+          (TableSource<K, V>) getMaterializedAt(), (MRPipeline) getPipeline()), target, writeMode);
+    } else {
+      getPipeline().write(this, target, writeMode);
+    }
+    return this;
+  }
+  
+  @Override
+  public PTable<K, V> filter(FilterFn<Pair<K, V>> filterFn) {
+    return parallelDo(filterFn, getPTableType());
+  }
+  
+  @Override
+  public PTable<K, V> filter(String name, FilterFn<Pair<K, V>> filterFn) {
+    return parallelDo(name, filterFn, getPTableType());
+  }
+  
+  @Override
+  public PTable<K, V> top(int count) {
+    return Aggregate.top(this, count, true);
+  }
+
+  @Override
+  public PTable<K, V> bottom(int count) {
+    return Aggregate.top(this, count, false);
+  }
+
+  @Override
+  public PTable<K, Collection<V>> collectValues() {
+    return Aggregate.collectValues(this);
+  }
+
+  @Override
+  public <U> PTable<K, Pair<V, U>> join(PTable<K, U> other) {
+    return Join.join(this, other);
+  }
+
+  @Override
+  public <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other) {
+    return Cogroup.cogroup(this, other);
+  }
+
+  @Override
+  public PCollection<K> keys() {
+    return PTables.keys(this);
+  }
+
+  @Override
+  public PCollection<V> values() {
+    return PTables.values(this);
+  }
+
+  /**
+   * Returns a Map<K, V> made up of the keys and values in this PTable.
+   */
+  @Override
+  public Map<K, V> materializeToMap() {
+    return new MaterializableMap<K, V>(this.materialize());
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public PObject<Map<K, V>> asMap() {
+    return new MapPObject<K, V>(this);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java
new file mode 100644
index 0000000..7b3dd7b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.ImmutableList;
+
+public class UnionCollection<S> extends PCollectionImpl<S> {
+
+  private List<PCollectionImpl<S>> parents;
+  private long size = 0;
+
+  private static <S> String flatName(List<PCollectionImpl<S>> collections) {
+    StringBuilder sb = new StringBuilder("union(");
+    for (int i = 0; i < collections.size(); i++) {
+      if (i != 0) {
+        sb.append(',');
+      }
+      sb.append(collections.get(i).getName());
+    }
+    return sb.append(')').toString();
+  }
+
+  UnionCollection(List<PCollectionImpl<S>> collections) {
+    super(flatName(collections));
+    this.parents = ImmutableList.copyOf(collections);
+    this.pipeline = (MRPipeline) parents.get(0).getPipeline();
+    for (PCollectionImpl<S> parent : parents) {
+      if (this.pipeline != parent.getPipeline()) {
+        throw new IllegalStateException("Cannot union PCollections from different Pipeline instances");
+      }
+      size += parent.getSize();
+    }
+  }
+
+  @Override
+  protected long getSizeInternal() {
+    return size;
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitUnionCollection(this);
+  }
+
+  @Override
+  public PType<S> getPType() {
+    return parents.get(0).getPType();
+  }
+
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.<PCollectionImpl<?>> copyOf(parents);
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    throw new UnsupportedOperationException("Unioned collection does not support DoNodes");
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java
new file mode 100644
index 0000000..a369432
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+public class UnionTable<K, V> extends PTableBase<K, V> {
+
+  private PTableType<K, V> ptype;
+  private List<PCollectionImpl<Pair<K, V>>> parents;
+  private long size;
+
+  private static <K, V> String flatName(List<PTableBase<K, V>> tables) {
+    StringBuilder sb = new StringBuilder("union(");
+    for (int i = 0; i < tables.size(); i++) {
+      if (i != 0) {
+        sb.append(',');
+      }
+      sb.append(tables.get(i).getName());
+    }
+    return sb.append(')').toString();
+  }
+
+  public UnionTable(List<PTableBase<K, V>> tables) {
+    super(flatName(tables));
+    this.ptype = tables.get(0).getPTableType();
+    this.pipeline = (MRPipeline) tables.get(0).getPipeline();
+    this.parents = Lists.newArrayList();
+    for (PTableBase<K, V> parent : tables) {
+      if (pipeline != parent.getPipeline()) {
+        throw new IllegalStateException("Cannot union PTables from different Pipeline instances");
+      }
+      this.parents.add(parent);
+      size += parent.getSize();
+    }
+  }
+
+  @Override
+  protected long getSizeInternal() {
+    return size;
+  }
+
+  @Override
+  public PTableType<K, V> getPTableType() {
+    return ptype;
+  }
+
+  @Override
+  public PType<Pair<K, V>> getPType() {
+    return ptype;
+  }
+
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.<PCollectionImpl<?>> copyOf(parents);
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitUnionCollection(new UnionCollection<Pair<K, V>>(parents));
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    throw new UnsupportedOperationException("Unioned table does not support do nodes");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java
new file mode 100644
index 0000000..b6df98b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.emit;
+
+import java.util.List;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.impl.mr.run.RTNode;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * An {@link Emitter} implementation that links the output of one {@link DoFn} to the input of
+ * another {@code DoFn}.
+ * 
+ */
+public class IntermediateEmitter implements Emitter<Object> {
+
+  private final List<RTNode> children;
+  private final Configuration conf;
+  private final PType<Object> outputPType;
+  private final boolean needDetachedValues;
+
+  public IntermediateEmitter(PType<Object> outputPType, List<RTNode> children, Configuration conf) {
+    this.outputPType = outputPType;
+    this.children = ImmutableList.copyOf(children);
+    this.conf = conf;
+
+    outputPType.initialize(conf);
+    needDetachedValues = this.children.size() > 1;
+  }
+
+  public void emit(Object emitted) {
+    for (RTNode child : children) {
+      Object value = emitted;
+      if (needDetachedValues) {
+        value = this.outputPType.getDetachedValue(emitted);
+      }
+      child.process(value);
+    }
+  }
+
+  public void flush() {
+    // No-op
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java
new file mode 100644
index 0000000..2e58fed
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.emit;
+
+import java.io.IOException;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.io.CrunchOutputs;
+import org.apache.crunch.types.Converter;
+
+public class MultipleOutputEmitter<T, K, V> implements Emitter<T> {
+
+  private final Converter converter;
+  private final CrunchOutputs<K, V> outputs;
+  private final String outputName;
+
+  public MultipleOutputEmitter(Converter converter, CrunchOutputs<K, V> outputs,
+      String outputName) {
+    this.converter = converter;
+    this.outputs = outputs;
+    this.outputName = outputName;
+  }
+
+  @Override
+  public void emit(T emitted) {
+    try {
+      this.outputs.write(outputName,
+          (K) converter.outputKey(emitted),
+          (V) converter.outputValue(emitted));
+    } catch (Exception e) {
+      throw new CrunchRuntimeException(e);
+    }
+  }
+
+  @Override
+  public void flush() {
+    // No-op
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java
new file mode 100644
index 0000000..bc3ae0d
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.emit;
+
+import java.io.IOException;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.types.Converter;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+public class OutputEmitter<T, K, V> implements Emitter<T> {
+
+  private final Converter<K, V, Object, Object> converter;
+  private final TaskInputOutputContext<?, ?, K, V> context;
+
+  public OutputEmitter(Converter<K, V, Object, Object> converter, TaskInputOutputContext<?, ?, K, V> context) {
+    this.converter = converter;
+    this.context = context;
+  }
+
+  public void emit(T emitted) {
+    try {
+      K key = converter.outputKey(emitted);
+      V value = converter.outputValue(emitted);
+      this.context.write(key, value);
+    } catch (IOException e) {
+      throw new CrunchRuntimeException(e);
+    } catch (InterruptedException e) {
+      throw new CrunchRuntimeException(e);
+    }
+  }
+
+  public void flush() {
+    // No-op
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java
new file mode 100644
index 0000000..d90f2e8
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.exec;
+
+/**
+ * Generate a series of capped numbers exponentially.
+ *
+ * It is used for creating retry intervals. It is NOT thread-safe.
+ */
+public class CappedExponentialCounter {
+
+  private long current;
+  private final long limit;
+
+  public CappedExponentialCounter(long start, long limit) {
+    this.current = start;
+    this.limit = limit;
+  }
+
+  public long get() {
+    long result = current;
+    current = Math.min(current * 2, limit);
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java
new file mode 100644
index 0000000..74bc9ac
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.exec;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob;
+import org.apache.crunch.impl.mr.plan.PlanningParameters;
+import org.apache.crunch.impl.mr.run.RuntimeParameters;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.PathTarget;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+
+public final class CrunchJobHooks {
+
+  private CrunchJobHooks() {}
+
+  /** Creates missing input directories before job is submitted. */
+  public static final class PrepareHook implements CrunchControlledJob.Hook {
+    private final Job job;
+
+    public PrepareHook(Job job) {
+      this.job = job;
+    }
+
+    @Override
+    public void run() throws IOException {
+      Configuration conf = job.getConfiguration();
+      if (conf.getBoolean(RuntimeParameters.CREATE_DIR, false)) {
+        Path[] inputPaths = FileInputFormat.getInputPaths(job);
+        for (Path inputPath : inputPaths) {
+          FileSystem fs = inputPath.getFileSystem(conf);
+          if (!fs.exists(inputPath)) {
+            try {
+              fs.mkdirs(inputPath);
+            } catch (IOException e) {
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /** Moving output files produced by the MapReduce job to specified directories. */
+  public static final class CompletionHook implements CrunchControlledJob.Hook {
+    private final Job job;
+    private final Path workingPath;
+    private final Map<Integer, PathTarget> multiPaths;
+    private final boolean mapOnlyJob;
+
+    public CompletionHook(Job job, Path workingPath, Map<Integer, PathTarget> multiPaths, boolean mapOnlyJob) {
+      this.job = job;
+      this.workingPath = workingPath;
+      this.multiPaths = multiPaths;
+      this.mapOnlyJob = mapOnlyJob;
+    }
+
+    @Override
+    public void run() throws IOException {
+      handleMultiPaths();
+    }
+
+    private synchronized void handleMultiPaths() throws IOException {
+      if (!multiPaths.isEmpty()) {
+        // Need to handle moving the data from the output directory of the
+        // job to the output locations specified in the paths.
+        FileSystem srcFs = workingPath.getFileSystem(job.getConfiguration());
+        for (Map.Entry<Integer, PathTarget> entry : multiPaths.entrySet()) {
+          final int i = entry.getKey();
+          final Path dst = entry.getValue().getPath();
+          FileNamingScheme fileNamingScheme = entry.getValue().getFileNamingScheme();
+
+          Path src = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + i + "-*");
+          Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(src), src);
+          Configuration conf = job.getConfiguration();
+          FileSystem dstFs = dst.getFileSystem(conf);
+          if (!dstFs.exists(dst)) {
+            dstFs.mkdirs(dst);
+          }
+          boolean sameFs = isCompatible(srcFs, dst);
+          for (Path s : srcs) {
+            Path d = getDestFile(conf, s, dst, fileNamingScheme);
+            if (sameFs) {
+              srcFs.rename(s, d);
+            } else {
+              FileUtil.copy(srcFs, s, dstFs, d, true, true, job.getConfiguration());
+            }
+          }
+        }
+      }
+    }
+
+    private boolean isCompatible(FileSystem fs, Path path) {
+      try {
+        fs.makeQualified(path);
+        return true;
+      } catch (IllegalArgumentException e) {
+        return false;
+      }
+    }
+    private Path getDestFile(Configuration conf, Path src, Path dir, FileNamingScheme fileNamingScheme)
+        throws IOException {
+      String outputFilename = null;
+      if (mapOnlyJob) {
+        outputFilename = fileNamingScheme.getMapOutputName(conf, dir);
+      } else {
+        outputFilename = fileNamingScheme.getReduceOutputName(conf, dir, extractPartitionNumber(src.getName()));
+      }
+      if (src.getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) {
+        outputFilename += org.apache.avro.mapred.AvroOutputFormat.EXT;
+      }
+      return new Path(dir, outputFilename);
+    }
+  }
+
+  /**
+   * Extract the partition number from a raw reducer output filename.
+   *
+   * @param reduceOutputFileName The raw reducer output file name
+   * @return The partition number encoded in the filename
+   */
+  static int extractPartitionNumber(String reduceOutputFileName) {
+    Matcher matcher = Pattern.compile(".*-r-(\\d{5})").matcher(reduceOutputFileName);
+    if (matcher.find()) {
+      return Integer.parseInt(matcher.group(1), 10);
+    } else {
+      throw new IllegalArgumentException("Reducer output name '" + reduceOutputFileName + "' cannot be parsed");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java
new file mode 100644
index 0000000..4c7b7ea
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.exec;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.PipelineExecution;
+import org.apache.crunch.PipelineResult;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Target;
+import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob;
+import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchJobControl;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.materialize.MaterializableIterable;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Provides APIs for job control at runtime to clients.
+ *
+ * This class has a thread that submits jobs when they become ready, monitors
+ * the states of the running jobs, and updates the states of jobs based on the
+ * state changes of their depending jobs states.
+ *
+ * It is thread-safe.
+ */
+public class MRExecutor implements PipelineExecution {
+
+  private static final Log LOG = LogFactory.getLog(MRExecutor.class);
+
+  private final CrunchJobControl control;
+  private final Map<PCollectionImpl<?>, Set<Target>> outputTargets;
+  private final Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize;
+  private final CountDownLatch doneSignal = new CountDownLatch(1);
+  private final CountDownLatch killSignal = new CountDownLatch(1);
+  private final CappedExponentialCounter pollInterval;
+  private AtomicReference<Status> status = new AtomicReference<Status>(Status.READY);
+  private PipelineResult result;
+  private Thread monitorThread;
+
+  private String planDotFile;
+  
+  public MRExecutor(Class<?> jarClass, Map<PCollectionImpl<?>, Set<Target>> outputTargets,
+      Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize) {
+    this.control = new CrunchJobControl(jarClass.toString());
+    this.outputTargets = outputTargets;
+    this.toMaterialize = toMaterialize;
+    this.monitorThread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        monitorLoop();
+      }
+    });
+    this.pollInterval = isLocalMode()
+      ? new CappedExponentialCounter(50, 1000)
+      : new CappedExponentialCounter(500, 10000);
+  }
+
+  public void addJob(CrunchControlledJob job) {
+    this.control.addJob(job);
+  }
+
+  public void setPlanDotFile(String planDotFile) {
+    this.planDotFile = planDotFile;
+  }
+  
+  public PipelineExecution execute() {
+    monitorThread.start();
+    return this;
+  }
+
+  /** Monitors running status. It is called in {@code MonitorThread}. */
+  private void monitorLoop() {
+    try {
+      while (killSignal.getCount() > 0 && !control.allFinished()) {
+        control.pollJobStatusAndStartNewOnes();
+        killSignal.await(pollInterval.get(), TimeUnit.MILLISECONDS);
+      }
+      control.killAllRunningJobs();
+
+      List<CrunchControlledJob> failures = control.getFailedJobList();
+      if (!failures.isEmpty()) {
+        System.err.println(failures.size() + " job failure(s) occurred:");
+        for (CrunchControlledJob job : failures) {
+          System.err.println(job.getJobName() + "(" + job.getJobID() + "): " + job.getMessage());
+        }
+      }
+      List<PipelineResult.StageResult> stages = Lists.newArrayList();
+      for (CrunchControlledJob job : control.getSuccessfulJobList()) {
+        stages.add(new PipelineResult.StageResult(job.getJobName(), job.getJob().getCounters()));
+      }
+
+      for (PCollectionImpl<?> c : outputTargets.keySet()) {
+        if (toMaterialize.containsKey(c)) {
+          MaterializableIterable iter = toMaterialize.get(c);
+          if (iter.isSourceTarget()) {
+            iter.materialize();
+            c.materializeAt((SourceTarget) iter.getSource());
+          }
+        } else {
+          boolean materialized = false;
+          for (Target t : outputTargets.get(c)) {
+            if (!materialized) {
+              if (t instanceof SourceTarget) {
+                c.materializeAt((SourceTarget) t);
+                materialized = true;
+              } else {
+                SourceTarget st = t.asSourceTarget(c.getPType());
+                if (st != null) {
+                  c.materializeAt(st);
+                  materialized = true;
+                }
+              }
+            }
+          }
+        }
+      }
+
+      synchronized (this) {
+        result = new PipelineResult(stages);
+        if (killSignal.getCount() == 0) {
+          status.set(Status.KILLED);
+        } else {
+          status.set(result.succeeded() ? Status.SUCCEEDED : Status.FAILED);
+        }
+      }
+    } catch (InterruptedException e) {
+      throw new AssertionError(e); // Nobody should interrupt us.
+    } catch (IOException e) {
+      LOG.error("Pipeline failed due to exception", e);
+      status.set(Status.FAILED);
+    } finally {
+      doneSignal.countDown();
+    }
+  }
+
+  @Override
+  public String getPlanDotFile() {
+    return planDotFile;
+  }
+
+  @Override
+  public void waitFor(long timeout, TimeUnit timeUnit) throws InterruptedException {
+    doneSignal.await(timeout, timeUnit);
+  }
+
+  @Override
+  public void waitUntilDone() throws InterruptedException {
+    doneSignal.await();
+  }
+
+  @Override
+  public synchronized Status getStatus() {
+    return status.get();
+  }
+
+  @Override
+  public synchronized PipelineResult getResult() {
+    return result;
+  }
+
+  @Override
+  public void kill() throws InterruptedException {
+    killSignal.countDown();
+  }
+
+  private static boolean isLocalMode() {
+    Configuration conf = new Configuration();
+    // Try to handle MapReduce version 0.20 or 0.22
+    String jobTrackerAddress = conf.get("mapreduce.jobtracker.address",
+        conf.get("mapred.job.tracker", "local"));
+    return "local".equals(jobTrackerAddress);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/package-info.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/package-info.java
new file mode 100644
index 0000000..7e403c3
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A Pipeline implementation that runs on Hadoop MapReduce.
+ */
+package org.apache.crunch.impl.mr;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java
new file mode 100644
index 0000000..865369c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.List;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Source;
+import org.apache.crunch.impl.mr.run.NodeContext;
+import org.apache.crunch.impl.mr.run.RTNode;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+public class DoNode {
+
+  private static final List<DoNode> NO_CHILDREN = ImmutableList.of();
+
+  private final DoFn fn;
+  private final String name;
+  private final PType<?> ptype;
+  private final List<DoNode> children;
+  private final Converter outputConverter;
+  private final Source<?> source;
+  private String outputName;
+
+  private DoNode(DoFn fn, String name, PType<?> ptype, List<DoNode> children, Converter outputConverter,
+      Source<?> source) {
+    this.fn = fn;
+    this.name = name;
+    this.ptype = ptype;
+    this.children = children;
+    this.outputConverter = outputConverter;
+    this.source = source;
+  }
+
+  private static List<DoNode> allowsChildren() {
+    return Lists.newArrayList();
+  }
+
+  public static <K, V> DoNode createGroupingNode(String name, PGroupedTableType<K, V> ptype) {
+    DoFn<?, ?> fn = ptype.getOutputMapFn();
+    return new DoNode(fn, name, ptype, NO_CHILDREN, ptype.getGroupingConverter(), null);
+  }
+
+  public static <S> DoNode createOutputNode(String name, PType<S> ptype) {
+    Converter outputConverter = ptype.getConverter();
+    DoFn<?, ?> fn = ptype.getOutputMapFn();
+    return new DoNode(fn, name, ptype, NO_CHILDREN, outputConverter, null);
+  }
+
+  public static DoNode createFnNode(String name, DoFn<?, ?> function, PType<?> ptype) {
+    return new DoNode(function, name, ptype, allowsChildren(), null, null);
+  }
+
+  public static <S> DoNode createInputNode(Source<S> source) {
+    PType<?> ptype = source.getType();
+    DoFn<?, ?> fn = ptype.getInputMapFn();
+    return new DoNode(fn, source.toString(), ptype, allowsChildren(), null, source);
+  }
+
+  public boolean isInputNode() {
+    return source != null;
+  }
+
+  public boolean isOutputNode() {
+    return outputConverter != null;
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public List<DoNode> getChildren() {
+    return children;
+  }
+
+  public Source<?> getSource() {
+    return source;
+  }
+
+  public PType<?> getPType() {
+    return ptype;
+  }
+
+  public DoNode addChild(DoNode node) {
+    // TODO: This is sort of terrible, refactor the code to make this make more sense.
+    boolean exists = false;
+    for (DoNode child : children) {
+      if (node == child) {
+        exists = true;
+        break;
+      }
+    }
+    if (!exists) {
+      children.add(node);
+    }
+    return this;
+  }
+
+  public void setOutputName(String outputName) {
+    if (outputConverter == null) {
+      throw new IllegalStateException("Cannot set output name w/o output converter: " + outputName);
+    }
+    this.outputName = outputName;
+  }
+
+  public RTNode toRTNode(boolean inputNode, Configuration conf, NodeContext nodeContext) {
+    List<RTNode> childRTNodes = Lists.newArrayList();
+    fn.configure(conf);
+    for (DoNode child : children) {
+      childRTNodes.add(child.toRTNode(false, conf, nodeContext));
+    }
+
+    Converter inputConverter = null;
+    if (inputNode) {
+      if (nodeContext == NodeContext.MAP) {
+        inputConverter = ptype.getConverter();
+      } else {
+        inputConverter = ((PGroupedTableType<?, ?>) ptype).getGroupingConverter();
+      }
+    }
+    return new RTNode(fn, (PType<Object>) getPType(), name, childRTNodes, inputConverter, outputConverter, outputName);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof DoNode)) {
+      return false;
+    }
+    if (this == other) {
+      return true;
+    }
+    DoNode o = (DoNode) other;
+    return (name.equals(o.name) && fn.equals(o.fn) && source == o.source && outputConverter == o.outputConverter);
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    return hcb.append(name).append(fn).append(source).append(outputConverter).toHashCode();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
new file mode 100644
index 0000000..46d8c53
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * Writes <a href="http://www.graphviz.org">Graphviz</a> dot files to illustrate
+ * the topology of Crunch pipelines.
+ */
+public class DotfileWriter {
+  
+  /** The types of tasks within a MapReduce job. */
+  enum MRTaskType { MAP, REDUCE };
+
+  private Set<JobPrototype> jobPrototypes = Sets.newHashSet();
+  private HashMultimap<Pair<JobPrototype, MRTaskType>, String> jobNodeDeclarations = HashMultimap.create();
+  private Set<String> globalNodeDeclarations = Sets.newHashSet();
+  private Set<String> nodePathChains = Sets.newHashSet();
+
+  /**
+   * Format the declaration of a node based on a PCollection.
+   * 
+   * @param pcollectionImpl PCollection for which a node will be declared
+   * @param jobPrototype The job containing the PCollection
+   * @return The node declaration
+   */
+  String formatPCollectionNodeDeclaration(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype) {
+    String shape = "box";
+    if (pcollectionImpl instanceof InputCollection) {
+      shape = "folder";
+    }
+    return String.format("%s [label=\"%s\" shape=%s];", formatPCollection(pcollectionImpl, jobPrototype), pcollectionImpl.getName(),
+        shape);
+  }
+
+  /**
+   * Format a Target as a node declaration.
+   * 
+   * @param target A Target used within a MapReduce pipeline
+   * @return The global node declaration for the Target
+   */
+  String formatTargetNodeDeclaration(Target target) {
+    return String.format("\"%s\" [label=\"%s\" shape=folder];", target.toString(), target.toString());
+  }
+
+  /**
+   * Format a PCollectionImpl into a format to be used for dot files.
+   * 
+   * @param pcollectionImpl The PCollectionImpl to be formatted
+   * @param jobPrototype The job containing the PCollection
+   * @return The dot file formatted representation of the PCollectionImpl
+   */
+  String formatPCollection(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype) {
+    if (pcollectionImpl instanceof InputCollection) {
+      InputCollection<?> inputCollection = (InputCollection<?>) pcollectionImpl;
+      return String.format("\"%s\"", inputCollection.getSource());
+    }
+    return String.format("\"%s@%d@%d\"", pcollectionImpl.getName(), pcollectionImpl.hashCode(), jobPrototype.hashCode());
+  }
+
+  /**
+   * Format a collection of node strings into dot file syntax.
+   * 
+   * @param nodeCollection Collection of chained node strings
+   * @return The dot-formatted chain of nodes
+   */
+  String formatNodeCollection(List<String> nodeCollection) {
+    return String.format("%s;", Joiner.on(" -> ").join(nodeCollection));
+  }
+
+  /**
+   * Format a NodePath in dot file syntax.
+   * 
+   * @param nodePath The node path to be formatted
+   * @param jobPrototype The job containing the NodePath
+   * @return The dot file representation of the node path
+   */
+  List<String> formatNodePath(NodePath nodePath, JobPrototype jobPrototype) {
+    List<String> formattedNodePaths = Lists.newArrayList();
+    
+    List<PCollectionImpl<?>> pcollections = Lists.newArrayList(nodePath);
+    for (int collectionIndex = 1; collectionIndex < pcollections.size(); collectionIndex++){
+      String fromNode = formatPCollection(pcollections.get(collectionIndex - 1), jobPrototype);
+      String toNode = formatPCollection(pcollections.get(collectionIndex), jobPrototype);
+      formattedNodePaths.add(formatNodeCollection(Lists.newArrayList(fromNode, toNode)));
+    }
+    return formattedNodePaths;
+  }
+
+  /**
+   * Add a NodePath to be formatted as a list of node declarations within a
+   * single job.
+   * 
+   * @param jobPrototype The job containing the node path
+   * @param nodePath The node path to be formatted
+   */
+  void addNodePathDeclarations(JobPrototype jobPrototype, NodePath nodePath) {
+    boolean groupingEncountered = false;
+    for (PCollectionImpl<?> pcollectionImpl : nodePath) {
+      if (pcollectionImpl instanceof InputCollection) {
+        globalNodeDeclarations.add(formatPCollectionNodeDeclaration(pcollectionImpl, jobPrototype));
+      } else {
+        if (!groupingEncountered){
+          groupingEncountered = (pcollectionImpl instanceof PGroupedTableImpl);
+        }
+
+        MRTaskType taskType = groupingEncountered ? MRTaskType.REDUCE : MRTaskType.MAP;
+        jobNodeDeclarations.put(Pair.of(jobPrototype, taskType), formatPCollectionNodeDeclaration(pcollectionImpl, jobPrototype));
+      }
+    }
+  }
+
+  /**
+   * Add the chaining of a NodePath to the graph.
+   * 
+   * @param nodePath The path to be formatted as a node chain in the dot file
+   * @param jobPrototype The job containing the NodePath
+   */
+  void addNodePathChain(NodePath nodePath, JobPrototype jobPrototype) {
+    for (String nodePathChain : formatNodePath(nodePath, jobPrototype)){
+      this.nodePathChains.add(nodePathChain);
+    }
+  }
+
+  /**
+   * Get the graph attributes for a task-specific subgraph.
+   * 
+   * @param taskType The type of task in the subgraph
+   * @return Graph attributes
+   */
+  String getTaskGraphAttributes(MRTaskType taskType) {
+    if (taskType == MRTaskType.MAP) {
+      return "label = Map; color = blue;";
+    } else {
+      return "label = Reduce; color = red;";
+    }
+  }
+
+  /**
+   * Add the contents of a {@link JobPrototype} to the graph describing a
+   * pipeline.
+   * 
+   * @param jobPrototype A JobPrototype representing a portion of a MapReduce
+   *          pipeline
+   */
+  public void addJobPrototype(JobPrototype jobPrototype) {
+    jobPrototypes.add(jobPrototype);
+    if (!jobPrototype.isMapOnly()) {
+      for (NodePath nodePath : jobPrototype.getMapNodePaths()) {
+        addNodePathDeclarations(jobPrototype, nodePath);
+        addNodePathChain(nodePath, jobPrototype);
+      }
+    }
+
+    HashMultimap<Target, NodePath> targetsToNodePaths = jobPrototype.getTargetsToNodePaths();
+    for (Target target : targetsToNodePaths.keySet()) {
+      globalNodeDeclarations.add(formatTargetNodeDeclaration(target));
+      for (NodePath nodePath : targetsToNodePaths.get(target)) {
+        addNodePathDeclarations(jobPrototype, nodePath);
+        addNodePathChain(nodePath, jobPrototype);
+        nodePathChains.add(formatNodeCollection(Lists.newArrayList(formatPCollection(nodePath.descendingIterator()
+            .next(), jobPrototype), String.format("\"%s\"", target.toString()))));
+      }
+    }
+  }
+
+  /**
+   * Build up the full dot file containing the description of a MapReduce
+   * pipeline.
+   * 
+   * @return Graphviz dot file contents
+   */
+  public String buildDotfile() {
+    StringBuilder stringBuilder = new StringBuilder();
+    stringBuilder.append("digraph G {\n");
+    int clusterIndex = 0;
+
+    for (String globalDeclaration : globalNodeDeclarations) {
+      stringBuilder.append(String.format("  %s\n", globalDeclaration));
+    }
+
+    for (JobPrototype jobPrototype : jobPrototypes){
+      StringBuilder jobProtoStringBuilder = new StringBuilder();
+      jobProtoStringBuilder.append(String.format("  subgraph cluster%d {\n", clusterIndex++));
+      for (MRTaskType taskType : MRTaskType.values()){
+        Pair<JobPrototype,MRTaskType> jobTaskKey = Pair.of(jobPrototype, taskType);
+        if (jobNodeDeclarations.containsKey(jobTaskKey)){
+          jobProtoStringBuilder.append(String.format("    subgraph cluster%d {\n", clusterIndex++));
+          jobProtoStringBuilder.append(String.format("      %s\n", getTaskGraphAttributes(taskType)));
+          for (String declarationEntry : jobNodeDeclarations.get(jobTaskKey)){
+            jobProtoStringBuilder.append(String.format("      %s\n", declarationEntry));
+          }
+          jobProtoStringBuilder.append("    }\n");
+        }
+      }
+      jobProtoStringBuilder.append("  }\n");
+      stringBuilder.append(jobProtoStringBuilder.toString());
+    }
+    
+    for (String nodePathChain : nodePathChains) {
+      stringBuilder.append(String.format("  %s\n", nodePathChain));
+    }
+
+    stringBuilder.append("}\n");
+    return stringBuilder.toString();
+  }
+
+
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
new file mode 100644
index 0000000..1e59df0
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.commons.lang.builder.ReflectionToStringBuilder;
+import org.apache.commons.lang.builder.ToStringStyle;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ *
+ */
+class Edge {
+  private final Vertex head;
+  private final Vertex tail;
+  private final Set<NodePath> paths;
+  
+  public Edge(Vertex head, Vertex tail) {
+    this.head = head;
+    this.tail = tail;
+    this.paths = Sets.newHashSet();
+  }
+  
+  public Vertex getHead() {
+    return head;
+  }
+  
+  public Vertex getTail() {
+    return tail;
+  }
+
+  public void addNodePath(NodePath path) {
+    this.paths.add(path);
+  }
+  
+  public void addAllNodePaths(Collection<NodePath> paths) {
+    this.paths.addAll(paths);
+  }
+  
+  public Set<NodePath> getNodePaths() {
+    return paths;
+  }
+  
+  public PCollectionImpl getSplit() {
+    List<Iterator<PCollectionImpl<?>>> iters = Lists.newArrayList();
+    for (NodePath nodePath : paths) {
+      Iterator<PCollectionImpl<?>> iter = nodePath.iterator();
+      iter.next(); // prime this past the initial NGroupedTableImpl
+      iters.add(iter);
+    }
+
+    // Find the lowest point w/the lowest cost to be the split point for
+    // all of the dependent paths.
+    boolean end = false;
+    int splitIndex = -1;
+    while (!end) {
+      splitIndex++;
+      PCollectionImpl<?> current = null;
+      for (Iterator<PCollectionImpl<?>> iter : iters) {
+        if (iter.hasNext()) {
+          PCollectionImpl<?> next = iter.next();
+          if (next instanceof PGroupedTableImpl) {
+            end = true;
+            break;
+          } else if (current == null) {
+            current = next;
+          } else if (current != next) {
+            end = true;
+            break;
+          }
+        } else {
+          end = true;
+          break;
+        }
+      }
+    }
+    // TODO: Add costing calcs here.
+    
+    return Iterables.getFirst(paths, null).get(splitIndex);
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof Edge)) {
+      return false;
+    }
+    Edge e = (Edge) other;
+    return head.equals(e.head) && tail.equals(e.tail) && paths.equals(e.paths);
+  }
+  
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().append(head).append(tail).toHashCode();
+  }
+  
+  @Override
+  public String toString() {
+    return ReflectionToStringBuilder.toString(this, ToStringStyle.SHORT_PREFIX_STYLE);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java
new file mode 100644
index 0000000..ce0a847
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ *
+ */
+class Graph implements Iterable<Vertex> {
+
+  private final Map<PCollectionImpl, Vertex> vertices;
+  private final Map<Pair<Vertex, Vertex>, Edge> edges;  
+  private final Map<Vertex, List<Vertex>> dependencies;
+  
+  public Graph() {
+    this.vertices = Maps.newHashMap();
+    this.edges = Maps.newHashMap();
+    this.dependencies = Maps.newHashMap();
+  }
+  
+  public Vertex getVertexAt(PCollectionImpl impl) {
+    return vertices.get(impl);
+  }
+  
+  public Vertex addVertex(PCollectionImpl impl, boolean output) {
+    if (vertices.containsKey(impl)) {
+      Vertex v = vertices.get(impl);
+      if (output) {
+        v.setOutput();
+      }
+      return v;
+    }
+    Vertex v = new Vertex(impl);
+    vertices.put(impl, v);
+    if (output) {
+      v.setOutput();
+    }
+    return v;
+  }
+  
+  public Edge getEdge(Vertex head, Vertex tail) {
+    Pair<Vertex, Vertex> p = Pair.of(head, tail);
+    if (edges.containsKey(p)) {
+      return edges.get(p);
+    }
+    
+    Edge e = new Edge(head, tail);
+    edges.put(p, e);
+    tail.addIncoming(e);
+    head.addOutgoing(e);
+    return e;
+  }
+  
+  @Override
+  public Iterator<Vertex> iterator() {
+    return Sets.newHashSet(vertices.values()).iterator();
+  }
+
+  public Set<Edge> getAllEdges() {
+    return Sets.newHashSet(edges.values());
+  }
+  
+  public void markDependency(Vertex child, Vertex parent) {
+    List<Vertex> parents = dependencies.get(child);
+    if (parents == null) {
+      parents = Lists.newArrayList();
+      dependencies.put(child, parents);
+    }
+    parents.add(parent);
+  }
+  
+  public List<Vertex> getParents(Vertex child) {
+    if (dependencies.containsKey(child)) {
+      return dependencies.get(child);
+    }
+    return ImmutableList.of();
+  }
+  
+  public List<List<Vertex>> connectedComponents() {
+    List<List<Vertex>> components = Lists.newArrayList();
+    Set<Vertex> unassigned = Sets.newHashSet(vertices.values());
+    while (!unassigned.isEmpty()) {
+      Vertex base = unassigned.iterator().next();
+      List<Vertex> component = Lists.newArrayList();
+      component.add(base);
+      unassigned.remove(base);
+      Set<Vertex> working = Sets.newHashSet(base.getAllNeighbors());
+      while (!working.isEmpty()) {
+        Vertex n = working.iterator().next();
+        working.remove(n);
+        if (unassigned.contains(n)) {
+          component.add(n);
+          unassigned.remove(n);
+          for (Vertex n2 : n.getAllNeighbors()) {
+            if (unassigned.contains(n2)) {
+              working.add(n2);
+            }
+          }
+        }
+      }
+      components.add(component);
+    }
+    
+    return components;
+  }  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java
new file mode 100644
index 0000000..925c39a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import org.apache.crunch.impl.mr.collect.DoCollectionImpl;
+import org.apache.crunch.impl.mr.collect.DoTableImpl;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+import org.apache.crunch.impl.mr.collect.UnionCollection;
+
+/**
+ *
+ */
+class GraphBuilder implements PCollectionImpl.Visitor {
+
+  private Graph graph = new Graph();
+  private Vertex workingVertex;
+  private NodePath workingPath;
+  
+  public Graph getGraph() {
+    return graph;
+  }
+  
+  public void visitOutput(PCollectionImpl<?> output) {
+    workingVertex = graph.addVertex(output, true);
+    workingPath = new NodePath();
+    output.accept(this);
+  }
+  
+  @Override
+  public void visitInputCollection(InputCollection<?> collection) {
+    Vertex v = graph.addVertex(collection, false);
+    graph.getEdge(v, workingVertex).addNodePath(workingPath.close(collection));
+  }
+
+  @Override
+  public void visitUnionCollection(UnionCollection<?> collection) {
+    Vertex baseVertex = workingVertex;
+    NodePath basePath = workingPath;
+    for (PCollectionImpl<?> parent : collection.getParents()) {
+      workingPath = new NodePath(basePath);
+      workingVertex = baseVertex;
+      processParent(parent);
+    }
+  }
+
+  @Override
+  public void visitDoFnCollection(DoCollectionImpl<?> collection) {
+    workingPath.push(collection);
+    processParent(collection.getOnlyParent());
+  }
+
+  @Override
+  public void visitDoTable(DoTableImpl<?, ?> collection) {
+    workingPath.push(collection);
+    processParent(collection.getOnlyParent());
+  }
+
+  @Override
+  public void visitGroupedTable(PGroupedTableImpl<?, ?> collection) {
+    Vertex v = graph.addVertex(collection, false);
+    graph.getEdge(v, workingVertex).addNodePath(workingPath.close(collection));
+    workingVertex = v;
+    workingPath = new NodePath(collection);
+    processParent(collection.getOnlyParent());
+  }
+  
+  private void processParent(PCollectionImpl<?> parent) {
+    Vertex v = graph.getVertexAt(parent);
+    if (v == null) {
+      parent.accept(this);
+    } else {
+      graph.getEdge(v, workingVertex).addNodePath(workingPath.close(parent));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
new file mode 100644
index 0000000..9ad7300
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.List;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+
+/**
+ * Visitor that traverses the {@code DoNode} instances in a job and builds a
+ * String that identifies the stages of the pipeline that belong to this job.
+ */
+class JobNameBuilder {
+
+  private static final Joiner JOINER = Joiner.on("+");
+  private static final Joiner CHILD_JOINER = Joiner.on("/");
+
+  private String pipelineName;
+  List<String> rootStack = Lists.newArrayList();
+
+  public JobNameBuilder(final String pipelineName) {
+    this.pipelineName = pipelineName;
+  }
+
+  public void visit(DoNode node) {
+    visit(node, rootStack);
+  }
+
+  public void visit(List<DoNode> nodes) {
+    visit(nodes, rootStack);
+  }
+
+  private void visit(List<DoNode> nodes, List<String> stack) {
+    if (nodes.size() == 1) {
+      visit(nodes.get(0), stack);
+    } else {
+      List<String> childStack = Lists.newArrayList();
+      for (int i = 0; i < nodes.size(); i++) {
+        DoNode node = nodes.get(i);
+        List<String> subStack = Lists.newArrayList();
+        visit(node, subStack);
+        if (!subStack.isEmpty()) {
+          childStack.add("[" + JOINER.join(subStack) + "]");
+        }
+      }
+      if (!childStack.isEmpty()) {
+        stack.add("[" + CHILD_JOINER.join(childStack) + "]");
+      }
+    }
+  }
+
+  private void visit(DoNode node, List<String> stack) {
+    String name = node.getName();
+    if (!name.isEmpty()) {
+      stack.add(node.getName());
+    }
+    visit(node.getChildren(), stack);
+  }
+
+  public String build() {
+    return String.format("%s: %s", pipelineName, JOINER.join(rootStack));
+  }
+}


[41/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/fn/AggregatorsIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/fn/AggregatorsIT.java b/crunch-core/src/it/java/org/apache/crunch/fn/AggregatorsIT.java
new file mode 100644
index 0000000..c9584a1
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/fn/AggregatorsIT.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.apache.crunch.fn.Aggregators.SUM_INTS;
+import static org.apache.crunch.fn.Aggregators.pairAggregator;
+import static org.apache.crunch.types.writable.Writables.ints;
+import static org.apache.crunch.types.writable.Writables.pairs;
+import static org.apache.crunch.types.writable.Writables.strings;
+import static org.apache.crunch.types.writable.Writables.tableOf;
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.test.Tests;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+
+@RunWith(Parameterized.class)
+public class AggregatorsIT {
+  private Pipeline pipeline;
+
+  @Parameters
+  public static Collection<Object[]> params() {
+    return Tests.pipelinesParams(AggregatorsIT.class);
+  }
+
+  public AggregatorsIT(Pipeline pipeline) {
+    this.pipeline = pipeline;
+  }
+
+  @Test
+  public void testPairAggregator() {
+    PCollection<String> lines = pipeline.readTextFile(Tests.pathTo(this, "ints.txt"));
+
+    PTable<String, Pair<Integer, Integer>> table = lines.parallelDo(new SplitLine(),
+        tableOf(strings(), pairs(ints(), ints())));
+
+    PTable<String, Pair<Integer, Integer>> combinedTable = table.groupByKey().combineValues(
+        pairAggregator(SUM_INTS(), SUM_INTS()));
+
+    Map<String, Pair<Integer, Integer>> result = combinedTable.asMap().getValue();
+
+    assertThat(result.size(), is(2));
+    assertThat(result.get("a"), is(Pair.of(9,  12)));
+    assertThat(result.get("b"), is(Pair.of(11,  13)));
+  }
+
+  private static final class SplitLine extends MapFn<String, Pair<String, Pair<Integer, Integer>>> {
+    @Override
+    public Pair<String, Pair<Integer, Integer>> map(String input) {
+      String[] split = input.split("\t");
+      return Pair.of(split[0],
+          Pair.of(Integer.parseInt(split[1]), Integer.parseInt(split[2])));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java b/crunch-core/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java
new file mode 100644
index 0000000..976a43e
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.Files;
+
+public class MemPipelineFileWritingIT {
+  @Rule
+  public TemporaryPath baseTmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testMemPipelineFileWriter() throws Exception {
+    File tmpDir = baseTmpDir.getFile("mempipe");
+    Pipeline p = MemPipeline.getInstance();
+    PCollection<String> lines = MemPipeline.collectionOf("hello", "world");
+    p.writeTextFile(lines, tmpDir.toString());
+    p.done();
+    assertTrue(tmpDir.exists());
+    File[] files = tmpDir.listFiles();
+    assertTrue(files != null && files.length > 0);
+    for (File f : files) {
+      if (!f.getName().startsWith(".")) {
+        List<String> txt = Files.readLines(f, Charsets.UTF_8);
+        assertEquals(ImmutableList.of("hello", "world"), txt);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java b/crunch-core/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java
new file mode 100644
index 0000000..f9f73b2
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTableKeyValueIT;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.io.To;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+
+@RunWith(value = Parameterized.class)
+public class UnionCollectionIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  private static final Log LOG = LogFactory.getLog(UnionCollectionIT.class);
+
+  private PTypeFamily typeFamily;
+  private Pipeline pipeline;
+  private PCollection<String> union;
+
+  private ArrayList<String> EXPECTED = Lists.newArrayList("a", "a", "b", "c", "c", "d", "e");
+
+  private Class pipelineClass;
+
+  @Before
+  @SuppressWarnings("unchecked")
+  public void setUp() throws IOException {
+    String inputFile1 = tmpDir.copyResourceFileName("set1.txt");
+    String inputFile2 = tmpDir.copyResourceFileName("set2.txt");
+    if (pipelineClass == null) {
+      pipeline = MemPipeline.getInstance();
+    } else {
+      pipeline = new MRPipeline(pipelineClass, tmpDir.getDefaultConfiguration());
+    }
+    PCollection<String> firstCollection = pipeline.read(At.textFile(inputFile1, typeFamily.strings()));
+    PCollection<String> secondCollection = pipeline.read(At.textFile(inputFile2, typeFamily.strings()));
+
+    LOG.info("Test fixture: [" + pipeline.getClass().getSimpleName() + " : " + typeFamily.getClass().getSimpleName()
+        + "]  First: " + Lists.newArrayList(firstCollection.materialize().iterator()) + ", Second: "
+        + Lists.newArrayList(secondCollection.materialize().iterator()));
+
+    union = secondCollection.union(firstCollection);
+  }
+
+  @Parameters
+  public static Collection<Object[]> data() throws IOException {
+    Object[][] data = new Object[][] { { WritableTypeFamily.getInstance(), PTableKeyValueIT.class },
+        { WritableTypeFamily.getInstance(), null }, { AvroTypeFamily.getInstance(), PTableKeyValueIT.class },
+        { AvroTypeFamily.getInstance(), null } };
+    return Arrays.asList(data);
+  }
+
+  public UnionCollectionIT(PTypeFamily typeFamily, Class pipelineClass) {
+    this.typeFamily = typeFamily;
+    this.pipelineClass = pipelineClass;
+  }
+
+  @Test
+  public void unionMaterializeShouldNotThrowNPE() throws Exception {
+    checkMaterialized(union.materialize());
+    checkMaterialized(pipeline.materialize(union));
+  }
+
+  private void checkMaterialized(Iterable<String> materialized) {
+    List<String> materializedValues = Lists.newArrayList(materialized.iterator());
+    Collections.sort(materializedValues);
+    LOG.info("Materialized union: " + materializedValues);
+    assertEquals(EXPECTED, materializedValues);
+  }
+
+  @Test
+  public void unionWriteShouldNotThrowNPE() throws IOException {
+    String outputPath1 = tmpDir.getFileName("output1");
+    String outputPath2 = tmpDir.getFileName("output2");
+    String outputPath3 = tmpDir.getFileName("output3");
+
+    if (typeFamily == AvroTypeFamily.getInstance()) {
+      union.write(To.avroFile(outputPath1));
+      pipeline.write(union, To.avroFile(outputPath2));
+
+      pipeline.run();
+
+      checkFileContents(outputPath1);
+      checkFileContents(outputPath2);
+
+    } else {
+
+      union.write(To.textFile(outputPath1));
+      pipeline.write(union, To.textFile(outputPath2));
+      pipeline.writeTextFile(union, outputPath3);
+
+      pipeline.run();
+
+      checkFileContents(outputPath1);
+      checkFileContents(outputPath2);
+      checkFileContents(outputPath3);
+    }
+  }
+
+  private void checkFileContents(String filePath) throws IOException {
+
+    List<String> fileContentValues = (typeFamily != AvroTypeFamily.getInstance() || !(pipeline instanceof MRPipeline)) ? Lists
+        .newArrayList(pipeline.read(At.textFile(filePath, typeFamily.strings())).materialize().iterator()) : Lists
+        .newArrayList(pipeline.read(At.avroFile(filePath, Avros.strings())).materialize().iterator());
+
+    Collections.sort(fileContentValues);
+
+    LOG.info("Saved Union: " + fileContentValues);
+    assertEquals(EXPECTED, fileContentValues);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java b/crunch-core/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java
new file mode 100644
index 0000000..08d226d
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.crunch.io.text.TextFileReaderFactory;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class CompositePathIterableIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testCreate_FilePresent() throws IOException {
+    String inputFilePath = tmpDir.copyResourceFileName("set1.txt");
+    Configuration conf = new Configuration();
+    LocalFileSystem local = FileSystem.getLocal(conf);
+
+    Iterable<String> iterable = CompositePathIterable.create(local, new Path(inputFilePath),
+        new TextFileReaderFactory<String>(Writables.strings()));
+
+    assertEquals(Lists.newArrayList("b", "c", "a", "e"), Lists.newArrayList(iterable));
+
+  }
+
+  @Test
+  public void testCreate_DirectoryPresentButNoFiles() throws IOException {
+    Path emptyInputDir = tmpDir.getRootPath();
+
+    Configuration conf = new Configuration();
+    LocalFileSystem local = FileSystem.getLocal(conf);
+
+    Iterable<String> iterable = CompositePathIterable.create(local, emptyInputDir,
+        new TextFileReaderFactory<String>(Writables.strings()));
+
+    assertTrue(Lists.newArrayList(iterable).isEmpty());
+  }
+
+  @Test(expected = IOException.class)
+  public void testCreate_DirectoryNotPresent() throws IOException {
+    File nonExistentDir = tmpDir.getFile("not-there");
+
+    // Sanity check
+    assertFalse(nonExistentDir.exists());
+
+    Configuration conf = new Configuration();
+    LocalFileSystem local = FileSystem.getLocal(conf);
+
+    CompositePathIterable.create(local, new Path(nonExistentDir.getAbsolutePath()), new TextFileReaderFactory<String>(
+        Writables.strings()));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/NLineInputIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/NLineInputIT.java b/crunch-core/src/it/java/org/apache/crunch/io/NLineInputIT.java
new file mode 100644
index 0000000..52b8ff5
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/NLineInputIT.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.text.NLineFileSource;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class NLineInputIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  @Test
+  public void testNLine() throws Exception {
+    String urlsInputPath = tmpDir.copyResourceFileName("urls.txt");
+    Configuration conf = new Configuration(tmpDir.getDefaultConfiguration());
+    conf.setInt("io.sort.mb", 10);
+    Pipeline pipeline = new MRPipeline(NLineInputIT.class, conf);
+    PCollection<String> urls = pipeline.read(new NLineFileSource<String>(urlsInputPath,
+        Writables.strings(), 2));
+    assertEquals(new Integer(2),
+        urls.parallelDo(new LineCountFn(), Avros.ints()).max().getValue());
+  }
+  
+  private static class LineCountFn extends DoFn<String, Integer> {
+
+    private int lineCount = 0;
+    
+    @Override
+    public void initialize() {
+      this.lineCount = 0;
+    }
+    
+    @Override
+    public void process(String input, Emitter<Integer> emitter) {
+      lineCount++;
+    }
+    
+    @Override
+    public void cleanup(Emitter<Integer> emitter) {
+      emitter.emit(lineCount);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/TextFileTableIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/TextFileTableIT.java b/crunch-core/src/it/java/org/apache/crunch/io/TextFileTableIT.java
new file mode 100644
index 0000000..bddc0b5
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/TextFileTableIT.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import static org.apache.crunch.types.writable.Writables.*;
+import static org.junit.Assert.assertEquals;
+
+import java.util.Set;
+
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.text.TextFileTableSource;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableSet;
+
+/**
+ *
+ */
+public class TextFileTableIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  @Test
+  public void testTextFileTable() throws Exception {
+    String urlsFile = tmpDir.copyResourceFileName("urls.txt");
+    Pipeline pipeline = new MRPipeline(TextFileTableIT.class, tmpDir.getDefaultConfiguration());
+    PTable<String, String> urls = pipeline.read(
+        new TextFileTableSource<String, String>(urlsFile, tableOf(strings(), strings())));
+    Set<Pair<String, Long>> cnts = ImmutableSet.copyOf(urls.keys().count().materialize());
+    assertEquals(ImmutableSet.of(Pair.of("www.A.com", 4L), Pair.of("www.B.com", 2L),
+        Pair.of("www.C.com", 1L), Pair.of("www.D.com", 1L), Pair.of("www.E.com", 1L),
+        Pair.of("www.F.com", 2L)), cnts);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java
new file mode 100644
index 0000000..671b920
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+@SuppressWarnings("serial")
+public class AvroFileSourceTargetIT implements Serializable {
+
+  private transient File avroFile;
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Before
+  public void setUp() throws IOException {
+    avroFile = tmpDir.getFile("test.avro");
+  }
+
+  private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
+    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
+    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(schema);
+
+    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
+    dataFileWriter.create(schema, outputStream);
+
+    for (GenericRecord record : genericRecords) {
+      dataFileWriter.append(record);
+    }
+
+    dataFileWriter.close();
+    outputStream.close();
+
+  }
+
+  @Test
+  public void testSpecific() throws IOException {
+    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
+    savedRecord.put("name", "John Doe");
+    savedRecord.put("age", 42);
+    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
+    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
+
+    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
+        Avros.records(Person.class)));
+
+    List<Person> personList = Lists.newArrayList(genericCollection.materialize());
+
+    Person expectedPerson = new Person();
+    expectedPerson.name = "John Doe";
+    expectedPerson.age = 42;
+
+    List<CharSequence> siblingNames = Lists.newArrayList();
+    siblingNames.add("Jimmy");
+    siblingNames.add("Jane");
+    expectedPerson.siblingnames = siblingNames;
+
+    assertEquals(Lists.newArrayList(expectedPerson), Lists.newArrayList(personList));
+  }
+
+  @Test
+  public void testGeneric() throws IOException {
+    String genericSchemaJson = Person.SCHEMA$.toString().replace("Person", "GenericPerson");
+    Schema genericPersonSchema = new Schema.Parser().parse(genericSchemaJson);
+    GenericRecord savedRecord = new GenericData.Record(genericPersonSchema);
+    savedRecord.put("name", "John Doe");
+    savedRecord.put("age", 42);
+    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
+    populateGenericFile(Lists.newArrayList(savedRecord), genericPersonSchema);
+
+    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<Record> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
+        Avros.generics(genericPersonSchema)));
+
+    List<Record> recordList = Lists.newArrayList(genericCollection.materialize());
+
+    assertEquals(Lists.newArrayList(savedRecord), Lists.newArrayList(recordList));
+  }
+
+  @Test
+  public void testReflect() throws IOException {
+    Schema pojoPersonSchema = ReflectData.get().getSchema(StringWrapper.class);
+    GenericRecord savedRecord = new GenericData.Record(pojoPersonSchema);
+    savedRecord.put("value", "stringvalue");
+    populateGenericFile(Lists.newArrayList(savedRecord), pojoPersonSchema);
+
+    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<StringWrapper> stringValueCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
+        Avros.reflects(StringWrapper.class)));
+
+    List<StringWrapper> recordList = Lists.newArrayList(stringValueCollection.materialize());
+
+    assertEquals(1, recordList.size());
+    StringWrapper stringWrapper = recordList.get(0);
+    assertEquals("stringvalue", stringWrapper.getValue());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java
new file mode 100644
index 0000000..29bf4f5
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.crunch.io.avro;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.io.FileUtils;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.io.To;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class AvroPipelineIT implements Serializable {
+
+  private transient File avroFile;
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Before
+  public void setUp() throws IOException {
+    avroFile = tmpDir.getFile("test.avro");
+  }
+
+  private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
+    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
+    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(schema);
+
+    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
+    dataFileWriter.create(schema, outputStream);
+
+    for (GenericRecord record : genericRecords) {
+      dataFileWriter.append(record);
+    }
+
+    dataFileWriter.close();
+    outputStream.close();
+
+  }
+
+  @Test
+  public void toTextShouldWriteAvroDataAsDatumText() throws Exception {
+    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
+    savedRecord.put("name", "John Doe");
+    savedRecord.put("age", 42);
+    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
+    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
+
+    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
+        Avros.records(Person.class)));
+    File outputFile = tmpDir.getFile("output");
+    Target textFile = To.textFile(outputFile.getAbsolutePath());
+    pipeline.write(genericCollection, textFile);
+    pipeline.run();
+    Person person = genericCollection.materialize().iterator().next();
+    String outputString = FileUtils.readFileToString(new File(outputFile, "part-m-00000"));
+    assertTrue(outputString.contains(person.toString()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java
new file mode 100644
index 0000000..7a90517
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.Assume;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class AvroReflectIT implements Serializable {
+
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testReflection() throws IOException {
+    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<StringWrapper> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
+        .parallelDo(new MapFn<String, StringWrapper>() {
+
+          @Override
+          public StringWrapper map(String input) {
+            StringWrapper stringWrapper = new StringWrapper();
+            stringWrapper.setValue(input);
+            return stringWrapper;
+          }
+        }, Avros.reflects(StringWrapper.class));
+
+    List<StringWrapper> stringWrappers = Lists.newArrayList(stringWrapperCollection.materialize());
+
+    pipeline.done();
+
+    assertEquals(Lists.newArrayList(new StringWrapper("b"), new StringWrapper("c"), new StringWrapper("a"),
+        new StringWrapper("e")), stringWrappers);
+
+  }
+
+  // Verify that running with a combination of reflect and specific schema
+  // doesn't crash
+  @Test
+  public void testCombinationOfReflectionAndSpecific() throws IOException {
+    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
+    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<Pair<StringWrapper, Person>> hybridPairCollection = pipeline.readTextFile(
+        tmpDir.copyResourceFileName("set1.txt")).parallelDo(new MapFn<String, Pair<StringWrapper, Person>>() {
+
+      @Override
+      public Pair<StringWrapper, Person> map(String input) {
+        Person person = new Person();
+        person.name = input;
+        person.age = 42;
+        person.siblingnames = Lists.<CharSequence> newArrayList(input);
+
+        return Pair.of(new StringWrapper(input), person);
+      }
+    }, Avros.pairs(Avros.reflects(StringWrapper.class), Avros.records(Person.class)));
+
+    PCollection<Pair<String, Long>> countCollection = Aggregate.count(hybridPairCollection).parallelDo(
+        new MapFn<Pair<Pair<StringWrapper, Person>, Long>, Pair<String, Long>>() {
+
+          @Override
+          public Pair<String, Long> map(Pair<Pair<StringWrapper, Person>, Long> input) {
+            return Pair.of(input.first().first().getValue(), input.second());
+          }
+        }, Avros.pairs(Avros.strings(), Avros.longs()));
+
+    List<Pair<String, Long>> materialized = Lists.newArrayList(countCollection.materialize());
+    List<Pair<String, Long>> expected = Lists.newArrayList(Pair.of("a", 1L), Pair.of("b", 1L), Pair.of("c", 1L),
+        Pair.of("e", 1L));
+    Collections.sort(materialized);
+
+    assertEquals(expected, materialized);
+    pipeline.done();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java
new file mode 100644
index 0000000..cbb7fde
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import static org.apache.crunch.types.avro.Avros.ints;
+import static org.apache.crunch.types.avro.Avros.tableOf;
+import static org.apache.crunch.types.avro.Avros.writables;
+import static org.junit.Assert.assertEquals;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.hadoop.io.DoubleWritable;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+
+/**
+ * Verify handling of both a ByteBuffer and byte array as input from an Avro job (depending
+ * on the version of Avro being used).
+ */
+public class AvroWritableIT implements Serializable {
+
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  @Test
+  public void testAvroBasedWritablePipeline() throws Exception {
+    String customersInputPath = tmpDir.copyResourceFileName("customers.txt");
+    Pipeline pipeline = new MRPipeline(AvroWritableIT.class, tmpDir.getDefaultConfiguration());
+    pipeline.enableDebug();
+    PCollection<String> customerLines = pipeline.readTextFile(customersInputPath);
+    Map<Integer, DoubleWritable> outputMap = customerLines.parallelDo(
+        new MapFn<String, Pair<Integer, DoubleWritable>>() {
+          @Override
+          public Pair<Integer, DoubleWritable> map(String input) {
+            int len = input.length();
+            return Pair.of(len, new DoubleWritable(len));
+          }
+        }, tableOf(ints(), writables(DoubleWritable.class)))
+    .groupByKey()
+    .combineValues(new CombineFn<Integer, DoubleWritable>() {
+      @Override
+      public void process(Pair<Integer, Iterable<DoubleWritable>> input,
+          Emitter<Pair<Integer, DoubleWritable>> emitter) {
+        double sum = 0.0;
+        for (DoubleWritable dw : input.second()) {
+          sum += dw.get();
+        }
+        emitter.emit(Pair.of(input.first(), new DoubleWritable(sum)));
+      }
+    })
+    .materializeToMap();
+    
+    Map<Integer, DoubleWritable> expectedMap = Maps.newHashMap();
+    expectedMap.put(17, new DoubleWritable(17.0));
+    expectedMap.put(16, new DoubleWritable(16.0));
+    expectedMap.put(12, new DoubleWritable(24.0));
+   
+    assertEquals(expectedMap, outputMap);
+    
+    pipeline.done();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/AggregateIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/AggregateIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/AggregateIT.java
new file mode 100644
index 0000000..56ee3ac
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/AggregateIT.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.apache.crunch.types.writable.Writables.strings;
+import static org.apache.crunch.types.writable.Writables.tableOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.Employee;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.io.Text;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+public class AggregateIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritables() throws Exception {
+    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
+    runMinMax(shakes, WritableTypeFamily.getInstance());
+    pipeline.done();
+  }
+
+  @Test
+  public void testAvro() throws Exception {
+    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
+    runMinMax(shakes, AvroTypeFamily.getInstance());
+    pipeline.done();
+  }
+
+  @Test
+  public void testInMemoryAvro() throws Exception {
+    PCollection<String> someText = MemPipeline.collectionOf("first line", "second line", "third line");
+    runMinMax(someText, AvroTypeFamily.getInstance());
+  }
+
+  public static void runMinMax(PCollection<String> shakes, PTypeFamily family) throws Exception {
+    PCollection<Integer> lengths = shakes.parallelDo(new MapFn<String, Integer>() {
+      @Override
+      public Integer map(String input) {
+        return input.length();
+      }
+    }, family.ints());
+    PCollection<Integer> negLengths = lengths.parallelDo(new MapFn<Integer, Integer>() {
+      @Override
+      public Integer map(Integer input) {
+        return -input;
+      }
+    }, family.ints());
+    Integer maxLengths = Aggregate.max(lengths).getValue();
+    Integer minLengths = Aggregate.min(negLengths).getValue();
+    assertTrue(maxLengths != null);
+    assertTrue(minLengths != null);
+    assertEquals(maxLengths.intValue(), -minLengths.intValue());
+  }
+
+  private static class SplitFn extends MapFn<String, Pair<String, String>> {
+    @Override
+    public Pair<String, String> map(String input) {
+      String[] p = input.split("\\s+");
+      return Pair.of(p[0], p[1]);
+    }
+  }
+
+  @Test
+  public void testCollectUrls() throws Exception {
+    Pipeline p = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
+    String urlsInputPath = tmpDir.copyResourceFileName("urls.txt");
+    PTable<String, Collection<String>> urls = Aggregate.collectValues(p.readTextFile(urlsInputPath).parallelDo(
+        new SplitFn(), tableOf(strings(), strings())));
+    for (Pair<String, Collection<String>> e : urls.materialize()) {
+      String key = e.first();
+      int expectedSize = 0;
+      if ("www.A.com".equals(key)) {
+        expectedSize = 4;
+      } else if ("www.B.com".equals(key) || "www.F.com".equals(key)) {
+        expectedSize = 2;
+      } else if ("www.C.com".equals(key) || "www.D.com".equals(key) || "www.E.com".equals(key)) {
+        expectedSize = 1;
+      }
+      assertEquals("Checking key = " + key, expectedSize, e.second().size());
+      p.done();
+    }
+  }
+
+  @Test
+  public void testTopN() throws Exception {
+    PTableType<String, Integer> ptype = Avros.tableOf(Avros.strings(), Avros.ints());
+    PTable<String, Integer> counts = MemPipeline.typedTableOf(ptype, "foo", 12, "bar", 17, "baz", 29);
+
+    PTable<String, Integer> top2 = Aggregate.top(counts, 2, true);
+    assertEquals(ImmutableList.of(Pair.of("baz", 29), Pair.of("bar", 17)), top2.materialize());
+
+    PTable<String, Integer> bottom2 = Aggregate.top(counts, 2, false);
+    assertEquals(ImmutableList.of(Pair.of("foo", 12), Pair.of("bar", 17)), bottom2.materialize());
+  }
+
+  @Test
+  public void testCollectValues_Writables() throws IOException {
+    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
+    Map<Integer, Collection<Text>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
+        .parallelDo(new MapStringToTextPair(), Writables.tableOf(Writables.ints(), Writables.writables(Text.class)))
+        .collectValues().materializeToMap();
+
+    assertEquals(1, collectionMap.size());
+
+    assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a"))));
+  }
+
+  @Test
+  public void testCollectValues_Avro() throws IOException {
+
+    MapStringToEmployeePair mapFn = new MapStringToEmployeePair();
+    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
+    Map<Integer, Collection<Employee>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
+        .parallelDo(mapFn, Avros.tableOf(Avros.ints(), Avros.records(Employee.class))).collectValues()
+        .materializeToMap();
+
+    assertEquals(1, collectionMap.size());
+
+    Employee empC = mapFn.map("c").second();
+    Employee empD = mapFn.map("d").second();
+    Employee empA = mapFn.map("a").second();
+
+    assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(empC, empD, empA)));
+  }
+
+  private static class MapStringToTextPair extends MapFn<String, Pair<Integer, Text>> {
+    @Override
+    public Pair<Integer, Text> map(String input) {
+      return Pair.of(1, new Text(input));
+    }
+  }
+
+  private static class MapStringToEmployeePair extends MapFn<String, Pair<Integer, Employee>> {
+    @Override
+    public Pair<Integer, Employee> map(String input) {
+      Employee emp = new Employee();
+      emp.name = input;
+      emp.salary = 0;
+      emp.department = "";
+      return Pair.of(1, emp);
+    }
+  }
+
+  public static class PojoText {
+    private String value;
+
+    public PojoText() {
+      this("");
+    }
+
+    public PojoText(String value) {
+      this.value = value;
+    }
+
+    public String getValue() {
+      return value;
+    }
+
+    public void setValue(String value) {
+      this.value = value;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("PojoText<%s>", this.value);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj)
+        return true;
+      if (obj == null)
+        return false;
+      if (getClass() != obj.getClass())
+        return false;
+      PojoText other = (PojoText) obj;
+      if (value == null) {
+        if (other.value != null)
+          return false;
+      } else if (!value.equals(other.value))
+        return false;
+      return true;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java
new file mode 100644
index 0000000..a832a5d
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.crunch.types.avro.Avros.ints;
+import static org.apache.crunch.types.avro.Avros.records;
+import static org.apache.crunch.types.avro.Avros.strings;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Test sorting Avro types by selected inner field
+ */
+public class AvroTypeSortIT implements Serializable {
+
+  private static final long serialVersionUID = 1344118240353796561L;
+
+  private transient File avroFile;
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Before
+  public void setUp() throws IOException {
+    avroFile = File.createTempFile("avrotest", ".avro");
+  }
+
+  @After
+  public void tearDown() {
+    avroFile.delete();
+  }
+
+  @Test
+  public void testSortAvroTypesBySelectedFields() throws Exception {
+
+    MRPipeline pipeline = new MRPipeline(AvroTypeSortIT.class, tmpDir.getDefaultConfiguration());
+
+    Person ccc10 = createPerson("CCC", 10);
+    Person bbb20 = createPerson("BBB", 20);
+    Person aaa30 = createPerson("AAA", 30);
+
+    writeAvroFile(Lists.newArrayList(ccc10, bbb20, aaa30), avroFile);
+
+    PCollection<Person> unsorted = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), records(Person.class)));
+
+    // Sort by Name
+    MapFn<Person, String> nameExtractor = new MapFn<Person, String>() {
+
+      @Override
+      public String map(Person input) {
+        return input.name.toString();
+      }
+    };
+
+    PCollection<Person> sortedByName = unsorted.by(nameExtractor, strings()).groupByKey().ungroup().values();
+
+    List<Person> sortedByNameList = Lists.newArrayList(sortedByName.materialize());
+
+    assertEquals(3, sortedByNameList.size());
+    assertEquals(aaa30, sortedByNameList.get(0));
+    assertEquals(bbb20, sortedByNameList.get(1));
+    assertEquals(ccc10, sortedByNameList.get(2));
+
+    // Sort by Age
+
+    MapFn<Person, Integer> ageExtractor = new MapFn<Person, Integer>() {
+
+      @Override
+      public Integer map(Person input) {
+        return input.age;
+      }
+    };
+
+    PCollection<Person> sortedByAge = unsorted.by(ageExtractor, ints()).groupByKey().ungroup().values();
+
+    List<Person> sortedByAgeList = Lists.newArrayList(sortedByAge.materialize());
+
+    assertEquals(3, sortedByAgeList.size());
+    assertEquals(ccc10, sortedByAgeList.get(0));
+    assertEquals(bbb20, sortedByAgeList.get(1));
+    assertEquals(aaa30, sortedByAgeList.get(2));
+
+    pipeline.done();
+  }
+
+  private void writeAvroFile(List<Person> people, File avroFile) throws IOException {
+
+    FileOutputStream outputStream = new FileOutputStream(avroFile);
+    SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
+
+    DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
+    dataFileWriter.create(Person.SCHEMA$, outputStream);
+    for (Person person : people) {
+      dataFileWriter.append(person);
+    }
+    dataFileWriter.close();
+    outputStream.close();
+  }
+
+  private Person createPerson(String name, int age) throws IOException {
+
+    Person person = new Person();
+    person.age = age;
+    person.name = name;
+    List<CharSequence> siblingNames = Lists.newArrayList();
+    person.siblingnames = siblingNames;
+
+    return person;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/CogroupIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/CogroupIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/CogroupIT.java
new file mode 100644
index 0000000..4b28da7
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/CogroupIT.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.test.Tests;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+
+public class CogroupIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  private MRPipeline pipeline;
+  private PCollection<String> lines1;
+  private PCollection<String> lines2;
+
+
+  @Before
+  public void setUp() throws IOException {
+    pipeline = new MRPipeline(CogroupIT.class, tmpDir.getDefaultConfiguration());
+    lines1 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
+    lines2 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
+  }
+
+  @After
+  public void tearDown() {
+    pipeline.done();
+  }
+
+  @Test
+  public void testCogroupWritables() {
+    runCogroup(WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testCogroupAvro() {
+    runCogroup(AvroTypeFamily.getInstance());
+  }
+
+  public void runCogroup(PTypeFamily ptf) {
+    PTableType<String, String> tt = ptf.tableOf(ptf.strings(), ptf.strings());
+
+    PTable<String, String> kv1 = lines1.parallelDo("kv1", new KeyValueSplit(), tt);
+    PTable<String, String> kv2 = lines2.parallelDo("kv2", new KeyValueSplit(), tt);
+
+    PTable<String, Pair<Collection<String>, Collection<String>>> cg = Cogroup.cogroup(kv1, kv2);
+
+    Map<String, Pair<Collection<String>, Collection<String>>> actual = cg.materializeToMap();
+
+    Map<String, Pair<Collection<String>, Collection<String>>> expected = ImmutableMap.of(
+        "a", Pair.of(coll("1-1", "1-4"), coll()),
+        "b", Pair.of(coll("1-2"), coll("2-1")),
+        "c", Pair.of(coll("1-3"), coll("2-2", "2-3")),
+        "d", Pair.of(coll(), coll("2-4"))
+    );
+
+    assertThat(actual, is(expected));
+  }
+
+
+  private static class KeyValueSplit extends DoFn<String, Pair<String, String>> {
+    @Override
+    public void process(String input, Emitter<Pair<String, String>> emitter) {
+      String[] fields = input.split(",");
+      emitter.emit(Pair.of(fields[0], fields[1]));
+    }
+  }
+
+  private static Collection<String> coll(String... values) {
+    return ImmutableList.copyOf(values);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/SecondarySortIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/SecondarySortIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/SecondarySortIT.java
new file mode 100644
index 0000000..242f621
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/SecondarySortIT.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.apache.crunch.types.avro.Avros.*;
+import static org.junit.Assert.assertEquals;
+
+import java.io.Serializable;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.From;
+import org.apache.crunch.test.CrunchTestSupport;
+import org.junit.Test;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+
+
+public class SecondarySortIT extends CrunchTestSupport implements Serializable {
+
+  @Test
+  public void testSecondarySort() throws Exception {
+    Pipeline p = new MRPipeline(SecondarySortIT.class, tempDir.getDefaultConfiguration());
+    String inputFile = tempDir.copyResourceFileName("secondary_sort_input.txt");
+    
+    PTable<String, Pair<Integer, Integer>> in = p.read(From.textFile(inputFile))
+        .parallelDo(new MapFn<String, Pair<String, Pair<Integer, Integer>>>() {
+          @Override
+          public Pair<String, Pair<Integer, Integer>> map(String input) {
+            String[] pieces = input.split(",");
+            return Pair.of(pieces[0],
+                Pair.of(Integer.valueOf(pieces[1].trim()), Integer.valueOf(pieces[2].trim())));
+          }
+        }, tableOf(strings(), pairs(ints(), ints())));
+    Iterable<String> lines = SecondarySort.sortAndApply(in, new MapFn<Pair<String, Iterable<Pair<Integer, Integer>>>, String>() {
+      @Override
+      public String map(Pair<String, Iterable<Pair<Integer, Integer>>> input) {
+        Joiner j = Joiner.on(',');
+        return j.join(input.first(), j.join(input.second()));
+      }
+    }, strings()).materialize();
+    assertEquals(ImmutableList.of("one,[-5,10],[1,1],[2,-3]", "three,[0,-1]", "two,[1,7],[2,6],[4,5]"),
+        ImmutableList.copyOf(lines));
+    p.done();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/SetIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/SetIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/SetIT.java
new file mode 100644
index 0000000..d1300d2
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/SetIT.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+
+@RunWith(value = Parameterized.class)
+public class SetIT {
+
+  private PTypeFamily typeFamily;
+
+  private Pipeline pipeline;
+  private PCollection<String> set1;
+  private PCollection<String> set2;
+
+  public SetIT(PTypeFamily typeFamily) {
+    this.typeFamily = typeFamily;
+  }
+  
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Parameters
+  public static Collection<Object[]> data() {
+    Object[][] data = new Object[][] { { WritableTypeFamily.getInstance() }, { AvroTypeFamily.getInstance() } };
+    return Arrays.asList(data);
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    String set1InputPath = tmpDir.copyResourceFileName("set1.txt");
+    String set2InputPath = tmpDir.copyResourceFileName("set2.txt");
+    pipeline = new MRPipeline(SetIT.class, tmpDir.getDefaultConfiguration());
+    set1 = pipeline.read(At.textFile(set1InputPath, typeFamily.strings()));
+    set2 = pipeline.read(At.textFile(set2InputPath, typeFamily.strings()));
+  }
+
+  @After
+  public void tearDown() {
+    pipeline.done();
+  }
+
+  @Test
+  public void testDifference() throws Exception {
+    PCollection<String> difference = Set.difference(set1, set2);
+    assertEquals(Lists.newArrayList("b", "e"), Lists.newArrayList(difference.materialize()));
+  }
+
+  @Test
+  public void testIntersection() throws Exception {
+    PCollection<String> intersection = Set.intersection(set1, set2);
+    assertEquals(Lists.newArrayList("a", "c"), Lists.newArrayList(intersection.materialize()));
+  }
+
+  @Test
+  public void testComm() throws Exception {
+    PCollection<Tuple3<String, String, String>> comm = Set.comm(set1, set2);
+    Iterator<Tuple3<String, String, String>> i = comm.materialize().iterator();
+    checkEquals(null, null, "a", i.next());
+    checkEquals("b", null, null, i.next());
+    checkEquals(null, null, "c", i.next());
+    checkEquals(null, "d", null, i.next());
+    checkEquals("e", null, null, i.next());
+    assertFalse(i.hasNext());
+  }
+
+  private void checkEquals(String s1, String s2, String s3, Tuple3<String, String, String> tuple) {
+    assertEquals("first string", s1, tuple.first());
+    assertEquals("second string", s2, tuple.second());
+    assertEquals("third string", s3, tuple.third());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/SortByValueIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/SortByValueIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/SortByValueIT.java
new file mode 100644
index 0000000..e19c7d3
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/SortByValueIT.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.From;
+import org.apache.crunch.lib.Sort.ColumnOrder;
+import org.apache.crunch.lib.Sort.Order;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ *
+ */
+public class SortByValueIT {
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  private static class SplitFn extends MapFn<String, Pair<String, Long>> {
+    private String sep;
+    
+    public SplitFn(String sep) {
+      this.sep = sep;
+    }
+    
+    @Override
+    public Pair<String, Long> map(String input) {
+      String[] pieces = input.split(sep);
+      return Pair.of(pieces[0], Long.valueOf(pieces[1]));
+    }
+  }
+  
+  @Test
+  public void testSortByValueWritables() throws Exception {
+    run(new MRPipeline(SortByValueIT.class), WritableTypeFamily.getInstance());
+  }
+  
+  @Test
+  public void testSortByValueAvro() throws Exception {
+    run(new MRPipeline(SortByValueIT.class), AvroTypeFamily.getInstance());
+  }
+  
+  public void run(Pipeline pipeline, PTypeFamily ptf) throws Exception {
+    String sbv = tmpDir.copyResourceFileName("sort_by_value.txt");
+    PTable<String, Long> letterCounts = pipeline.read(From.textFile(sbv)).parallelDo(new SplitFn("\t"),
+        ptf.tableOf(ptf.strings(), ptf.longs()));
+    PCollection<Pair<String, Long>> sorted = Sort.sortPairs(
+        letterCounts,
+        new ColumnOrder(2, Order.DESCENDING),
+        new ColumnOrder(1, Order.ASCENDING));
+    assertEquals(
+        ImmutableList.of(Pair.of("C", 3L), Pair.of("A", 2L), Pair.of("D", 2L), Pair.of("B", 1L), Pair.of("E", 1L)),
+        ImmutableList.copyOf(sorted.materialize()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/SortIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/SortIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/SortIT.java
new file mode 100644
index 0000000..bad4864
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/SortIT.java
@@ -0,0 +1,327 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.apache.crunch.lib.Sort.ColumnOrder.by;
+import static org.apache.crunch.lib.Sort.Order.ASCENDING;
+import static org.apache.crunch.lib.Sort.Order.DESCENDING;
+import static org.apache.crunch.test.StringWrapper.wrap;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.lib.Sort.ColumnOrder;
+import org.apache.crunch.lib.Sort.Order;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class SortIT implements Serializable {
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritableSortAsc() throws Exception {
+    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), Order.ASCENDING,
+        "A\tand this text as well");
+  }
+
+  @Test
+  public void testWritableSortDesc() throws Exception {
+    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), Order.DESCENDING,
+        "B\tthis doc has some text");
+  }
+
+  @Test
+  public void testWritableSortAscDesc() throws Exception {
+    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING), "A",
+        "this doc has this text");
+  }
+
+  @Test
+  public void testWritableSortSecondDescFirstAsc() throws Exception {
+    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(2, DESCENDING), by(1, ASCENDING), "A",
+        "this doc has this text");
+  }
+
+  @Test
+  public void testWritableSortTripleAscDescAsc() throws Exception {
+    runTriple(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
+        by(3, ASCENDING), "A", "this", "doc");
+  }
+
+  @Test
+  public void testWritableSortQuadAscDescAscDesc() throws Exception {
+    runQuad(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
+        by(3, ASCENDING), by(4, DESCENDING), "A", "this", "doc", "has");
+  }
+
+  @Test
+  public void testWritableSortTupleNAscDesc() throws Exception {
+    runTupleN(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(),
+        new ColumnOrder[] { by(1, ASCENDING), by(2, DESCENDING) }, new String[] { "A", "this doc has this text" });
+  }
+
+  @Test
+  public void testWritableSortTable() throws Exception {
+    runTable(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), "A");
+  }
+
+  @Test
+  public void testAvroSortAsc() throws Exception {
+    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), Order.ASCENDING, "A\tand this text as well");
+  }
+
+  @Test
+  public void testAvroSortDesc() throws Exception {
+    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), Order.DESCENDING, "B\tthis doc has some text");
+  }
+
+  @Test
+  public void testAvroSortPairAscDesc() throws Exception {
+    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING), "A",
+        "this doc has this text");
+  }
+
+  @Test
+  public void testAvroSortPairSecondDescFirstAsc() throws Exception {
+    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(2, DESCENDING), by(1, ASCENDING), "A",
+        "this doc has this text");
+  }
+
+  @Test
+  public void testAvroSortTripleAscDescAsc() throws Exception {
+    runTriple(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
+        by(3, ASCENDING), "A", "this", "doc");
+  }
+
+  @Test
+  public void testAvroSortQuadAscDescAscDesc() throws Exception {
+    runQuad(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
+        by(3, ASCENDING), by(4, DESCENDING), "A", "this", "doc", "has");
+  }
+
+  @Test
+  public void testAvroSortTupleNAscDesc() throws Exception {
+    runTupleN(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(),
+        new ColumnOrder[] { by(1, ASCENDING), by(2, DESCENDING) }, new String[] { "A", "this doc has this text" });
+  }
+
+  @Test
+  public void testAvroReflectSortPair() throws IOException {
+    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
+    pipeline.enableDebug();
+    String rsrc = tmpDir.copyResourceFileName("set2.txt");
+    PCollection<Pair<String, StringWrapper>> in = pipeline.readTextFile(rsrc)
+        .parallelDo(new MapFn<String, Pair<String, StringWrapper>>() {
+
+          @Override
+          public Pair<String, StringWrapper> map(String input) {
+            return Pair.of(input, wrap(input));
+          }
+        }, Avros.pairs(Avros.strings(), Avros.reflects(StringWrapper.class)));
+    PCollection<Pair<String, StringWrapper>> sorted = Sort.sort(in, Order.ASCENDING);
+    
+    List<Pair<String, StringWrapper>> expected = Lists.newArrayList();
+    expected.add(Pair.of("a", wrap("a")));
+    expected.add(Pair.of("c", wrap("c")));
+    expected.add(Pair.of("d", wrap("d")));
+
+    assertEquals(expected, Lists.newArrayList(sorted.materialize()));
+  }
+
+  @Test
+  public void testAvroReflectSortTable() throws IOException {
+    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
+    PTable<String, StringWrapper> unsorted = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt")).parallelDo(
+        new MapFn<String, Pair<String, StringWrapper>>() {
+
+          @Override
+          public Pair<String, StringWrapper> map(String input) {
+            return Pair.of(input, wrap(input));
+          }
+        }, Avros.tableOf(Avros.strings(), Avros.reflects(StringWrapper.class)));
+
+    PTable<String, StringWrapper> sorted = Sort.sort(unsorted);
+
+    List<Pair<String, StringWrapper>> expected = Lists.newArrayList();
+    expected.add(Pair.of("a", wrap("a")));
+    expected.add(Pair.of("c", wrap("c")));
+    expected.add(Pair.of("d", wrap("d")));
+
+    assertEquals(expected, Lists.newArrayList(sorted.materialize()));
+  }
+
+  @Test
+  public void testAvroSortTable() throws Exception {
+    runTable(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), "A");
+  }
+
+  private void runSingle(Pipeline pipeline, PTypeFamily typeFamily, Order order, String firstLine) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+
+    PCollection<String> input = pipeline.readTextFile(inputPath);
+    // following turns the input from Writables to required type family
+    PCollection<String> input2 = input.parallelDo(new DoFn<String, String>() {
+      @Override
+      public void process(String input, Emitter<String> emitter) {
+        emitter.emit(input);
+      }
+    }, typeFamily.strings());
+    PCollection<String> sorted = Sort.sort(input2, order);
+    Iterable<String> lines = sorted.materialize();
+
+    assertEquals(firstLine, lines.iterator().next());
+    pipeline.done(); // TODO: finally
+  }
+
+  private void runPair(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder first, ColumnOrder second,
+      String firstField, String secondField) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+
+    PCollection<String> input = pipeline.readTextFile(inputPath);
+    PTable<String, String> kv = input.parallelDo(new DoFn<String, Pair<String, String>>() {
+      @Override
+      public void process(String input, Emitter<Pair<String, String>> emitter) {
+        String[] split = input.split("[\t]+");
+        emitter.emit(Pair.of(split[0], split[1]));
+      }
+    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.strings()));
+    PCollection<Pair<String, String>> sorted = Sort.sortPairs(kv, first, second);
+    List<Pair<String, String>> lines = Lists.newArrayList(sorted.materialize());
+    Pair<String, String> l = lines.iterator().next();
+    assertEquals(firstField, l.first());
+    assertEquals(secondField, l.second());
+    pipeline.done();
+  }
+
+  private void runTriple(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder first, ColumnOrder second,
+      ColumnOrder third, String firstField, String secondField, String thirdField) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+
+    PCollection<String> input = pipeline.readTextFile(inputPath);
+    PCollection<Tuple3<String, String, String>> kv = input.parallelDo(
+        new DoFn<String, Tuple3<String, String, String>>() {
+          @Override
+          public void process(String input, Emitter<Tuple3<String, String, String>> emitter) {
+            String[] split = input.split("[\t ]+");
+            int len = split.length;
+            emitter.emit(Tuple3.of(split[0], split[1 % len], split[2 % len]));
+          }
+        }, typeFamily.triples(typeFamily.strings(), typeFamily.strings(), typeFamily.strings()));
+    PCollection<Tuple3<String, String, String>> sorted = Sort.sortTriples(kv, first, second, third);
+    List<Tuple3<String, String, String>> lines = Lists.newArrayList(sorted.materialize());
+    Tuple3<String, String, String> l = lines.iterator().next();
+    assertEquals(firstField, l.first());
+    assertEquals(secondField, l.second());
+    assertEquals(thirdField, l.third());
+    pipeline.done();
+  }
+
+  private void runQuad(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder first, ColumnOrder second,
+      ColumnOrder third, ColumnOrder fourth, String firstField, String secondField, String thirdField,
+      String fourthField) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+
+    PCollection<String> input = pipeline.readTextFile(inputPath);
+    PCollection<Tuple4<String, String, String, String>> kv = input.parallelDo(
+        new DoFn<String, Tuple4<String, String, String, String>>() {
+          @Override
+          public void process(String input, Emitter<Tuple4<String, String, String, String>> emitter) {
+            String[] split = input.split("[\t ]+");
+            int len = split.length;
+            emitter.emit(Tuple4.of(split[0], split[1 % len], split[2 % len], split[3 % len]));
+          }
+        }, typeFamily.quads(typeFamily.strings(), typeFamily.strings(), typeFamily.strings(), typeFamily.strings()));
+    PCollection<Tuple4<String, String, String, String>> sorted = Sort.sortQuads(kv, first, second, third, fourth);
+    Iterable<Tuple4<String, String, String, String>> lines = sorted.materialize();
+    Tuple4<String, String, String, String> l = lines.iterator().next();
+    assertEquals(firstField, l.first());
+    assertEquals(secondField, l.second());
+    assertEquals(thirdField, l.third());
+    assertEquals(fourthField, l.fourth());
+    pipeline.done();
+  }
+
+  private void runTupleN(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder[] orders, String[] fields)
+      throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+
+    PCollection<String> input = pipeline.readTextFile(inputPath);
+    PType[] types = new PType[orders.length];
+    Arrays.fill(types, typeFamily.strings());
+    PCollection<TupleN> kv = input.parallelDo(new DoFn<String, TupleN>() {
+      @Override
+      public void process(String input, Emitter<TupleN> emitter) {
+        String[] split = input.split("[\t]+");
+        emitter.emit(new TupleN(split));
+      }
+    }, typeFamily.tuples(types));
+    PCollection<TupleN> sorted = Sort.sortTuples(kv, orders);
+    Iterable<TupleN> lines = sorted.materialize();
+    TupleN l = lines.iterator().next();
+    int i = 0;
+    for (String field : fields) {
+      assertEquals(field, l.get(i++));
+    }
+    pipeline.done();
+  }
+
+  private void runTable(Pipeline pipeline, PTypeFamily typeFamily, String firstKey) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+
+    PCollection<String> input = pipeline.readTextFile(inputPath);
+    PTable<String, String> table = input.parallelDo(new DoFn<String, Pair<String, String>>() {
+      @Override
+      public void process(String input, Emitter<Pair<String, String>> emitter) {
+        String[] split = input.split("[\t]+");
+        emitter.emit(Pair.of(split[0], split[1]));
+      }
+    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.strings()));
+
+    PTable<String, String> sorted = Sort.sort(table);
+    Iterable<Pair<String, String>> lines = sorted.materialize();
+    Pair<String, String> l = lines.iterator().next();
+    assertEquals(firstKey, l.first());
+    pipeline.done();
+  }
+
+}


[20/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java b/crunch/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java
deleted file mode 100644
index 671b920..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/avro/AvroFileSourceTargetIT.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-@SuppressWarnings("serial")
-public class AvroFileSourceTargetIT implements Serializable {
-
-  private transient File avroFile;
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Before
-  public void setUp() throws IOException {
-    avroFile = tmpDir.getFile("test.avro");
-  }
-
-  private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
-    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
-    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(schema);
-
-    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
-    dataFileWriter.create(schema, outputStream);
-
-    for (GenericRecord record : genericRecords) {
-      dataFileWriter.append(record);
-    }
-
-    dataFileWriter.close();
-    outputStream.close();
-
-  }
-
-  @Test
-  public void testSpecific() throws IOException {
-    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
-    savedRecord.put("name", "John Doe");
-    savedRecord.put("age", 42);
-    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
-    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
-
-    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
-        Avros.records(Person.class)));
-
-    List<Person> personList = Lists.newArrayList(genericCollection.materialize());
-
-    Person expectedPerson = new Person();
-    expectedPerson.name = "John Doe";
-    expectedPerson.age = 42;
-
-    List<CharSequence> siblingNames = Lists.newArrayList();
-    siblingNames.add("Jimmy");
-    siblingNames.add("Jane");
-    expectedPerson.siblingnames = siblingNames;
-
-    assertEquals(Lists.newArrayList(expectedPerson), Lists.newArrayList(personList));
-  }
-
-  @Test
-  public void testGeneric() throws IOException {
-    String genericSchemaJson = Person.SCHEMA$.toString().replace("Person", "GenericPerson");
-    Schema genericPersonSchema = new Schema.Parser().parse(genericSchemaJson);
-    GenericRecord savedRecord = new GenericData.Record(genericPersonSchema);
-    savedRecord.put("name", "John Doe");
-    savedRecord.put("age", 42);
-    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
-    populateGenericFile(Lists.newArrayList(savedRecord), genericPersonSchema);
-
-    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<Record> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
-        Avros.generics(genericPersonSchema)));
-
-    List<Record> recordList = Lists.newArrayList(genericCollection.materialize());
-
-    assertEquals(Lists.newArrayList(savedRecord), Lists.newArrayList(recordList));
-  }
-
-  @Test
-  public void testReflect() throws IOException {
-    Schema pojoPersonSchema = ReflectData.get().getSchema(StringWrapper.class);
-    GenericRecord savedRecord = new GenericData.Record(pojoPersonSchema);
-    savedRecord.put("value", "stringvalue");
-    populateGenericFile(Lists.newArrayList(savedRecord), pojoPersonSchema);
-
-    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<StringWrapper> stringValueCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
-        Avros.reflects(StringWrapper.class)));
-
-    List<StringWrapper> recordList = Lists.newArrayList(stringValueCollection.materialize());
-
-    assertEquals(1, recordList.size());
-    StringWrapper stringWrapper = recordList.get(0);
-    assertEquals("stringvalue", stringWrapper.getValue());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java b/crunch/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java
deleted file mode 100644
index 29bf4f5..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/avro/AvroPipelineIT.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package org.apache.crunch.io.avro;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.io.FileUtils;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.io.To;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class AvroPipelineIT implements Serializable {
-
-  private transient File avroFile;
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Before
-  public void setUp() throws IOException {
-    avroFile = tmpDir.getFile("test.avro");
-  }
-
-  private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
-    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
-    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(schema);
-
-    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
-    dataFileWriter.create(schema, outputStream);
-
-    for (GenericRecord record : genericRecords) {
-      dataFileWriter.append(record);
-    }
-
-    dataFileWriter.close();
-    outputStream.close();
-
-  }
-
-  @Test
-  public void toTextShouldWriteAvroDataAsDatumText() throws Exception {
-    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
-    savedRecord.put("name", "John Doe");
-    savedRecord.put("age", 42);
-    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
-    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
-
-    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
-        Avros.records(Person.class)));
-    File outputFile = tmpDir.getFile("output");
-    Target textFile = To.textFile(outputFile.getAbsolutePath());
-    pipeline.write(genericCollection, textFile);
-    pipeline.run();
-    Person person = genericCollection.materialize().iterator().next();
-    String outputString = FileUtils.readFileToString(new File(outputFile, "part-m-00000"));
-    assertTrue(outputString.contains(person.toString()));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java b/crunch/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java
deleted file mode 100644
index 7a90517..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/avro/AvroReflectIT.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.Assume;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class AvroReflectIT implements Serializable {
-
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testReflection() throws IOException {
-    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<StringWrapper> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
-        .parallelDo(new MapFn<String, StringWrapper>() {
-
-          @Override
-          public StringWrapper map(String input) {
-            StringWrapper stringWrapper = new StringWrapper();
-            stringWrapper.setValue(input);
-            return stringWrapper;
-          }
-        }, Avros.reflects(StringWrapper.class));
-
-    List<StringWrapper> stringWrappers = Lists.newArrayList(stringWrapperCollection.materialize());
-
-    pipeline.done();
-
-    assertEquals(Lists.newArrayList(new StringWrapper("b"), new StringWrapper("c"), new StringWrapper("a"),
-        new StringWrapper("e")), stringWrappers);
-
-  }
-
-  // Verify that running with a combination of reflect and specific schema
-  // doesn't crash
-  @Test
-  public void testCombinationOfReflectionAndSpecific() throws IOException {
-    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
-    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<Pair<StringWrapper, Person>> hybridPairCollection = pipeline.readTextFile(
-        tmpDir.copyResourceFileName("set1.txt")).parallelDo(new MapFn<String, Pair<StringWrapper, Person>>() {
-
-      @Override
-      public Pair<StringWrapper, Person> map(String input) {
-        Person person = new Person();
-        person.name = input;
-        person.age = 42;
-        person.siblingnames = Lists.<CharSequence> newArrayList(input);
-
-        return Pair.of(new StringWrapper(input), person);
-      }
-    }, Avros.pairs(Avros.reflects(StringWrapper.class), Avros.records(Person.class)));
-
-    PCollection<Pair<String, Long>> countCollection = Aggregate.count(hybridPairCollection).parallelDo(
-        new MapFn<Pair<Pair<StringWrapper, Person>, Long>, Pair<String, Long>>() {
-
-          @Override
-          public Pair<String, Long> map(Pair<Pair<StringWrapper, Person>, Long> input) {
-            return Pair.of(input.first().first().getValue(), input.second());
-          }
-        }, Avros.pairs(Avros.strings(), Avros.longs()));
-
-    List<Pair<String, Long>> materialized = Lists.newArrayList(countCollection.materialize());
-    List<Pair<String, Long>> expected = Lists.newArrayList(Pair.of("a", 1L), Pair.of("b", 1L), Pair.of("c", 1L),
-        Pair.of("e", 1L));
-    Collections.sort(materialized);
-
-    assertEquals(expected, materialized);
-    pipeline.done();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java b/crunch/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java
deleted file mode 100644
index cbb7fde..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/avro/AvroWritableIT.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import static org.apache.crunch.types.avro.Avros.ints;
-import static org.apache.crunch.types.avro.Avros.tableOf;
-import static org.apache.crunch.types.avro.Avros.writables;
-import static org.junit.Assert.assertEquals;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.hadoop.io.DoubleWritable;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Maps;
-
-/**
- * Verify handling of both a ByteBuffer and byte array as input from an Avro job (depending
- * on the version of Avro being used).
- */
-public class AvroWritableIT implements Serializable {
-
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  @Test
-  public void testAvroBasedWritablePipeline() throws Exception {
-    String customersInputPath = tmpDir.copyResourceFileName("customers.txt");
-    Pipeline pipeline = new MRPipeline(AvroWritableIT.class, tmpDir.getDefaultConfiguration());
-    pipeline.enableDebug();
-    PCollection<String> customerLines = pipeline.readTextFile(customersInputPath);
-    Map<Integer, DoubleWritable> outputMap = customerLines.parallelDo(
-        new MapFn<String, Pair<Integer, DoubleWritable>>() {
-          @Override
-          public Pair<Integer, DoubleWritable> map(String input) {
-            int len = input.length();
-            return Pair.of(len, new DoubleWritable(len));
-          }
-        }, tableOf(ints(), writables(DoubleWritable.class)))
-    .groupByKey()
-    .combineValues(new CombineFn<Integer, DoubleWritable>() {
-      @Override
-      public void process(Pair<Integer, Iterable<DoubleWritable>> input,
-          Emitter<Pair<Integer, DoubleWritable>> emitter) {
-        double sum = 0.0;
-        for (DoubleWritable dw : input.second()) {
-          sum += dw.get();
-        }
-        emitter.emit(Pair.of(input.first(), new DoubleWritable(sum)));
-      }
-    })
-    .materializeToMap();
-    
-    Map<Integer, DoubleWritable> expectedMap = Maps.newHashMap();
-    expectedMap.put(17, new DoubleWritable(17.0));
-    expectedMap.put(16, new DoubleWritable(16.0));
-    expectedMap.put(12, new DoubleWritable(24.0));
-   
-    assertEquals(expectedMap, outputMap);
-    
-    pipeline.done();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
deleted file mode 100644
index 56ee3ac..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.apache.crunch.types.writable.Writables.strings;
-import static org.apache.crunch.types.writable.Writables.tableOf;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.Employee;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.io.Text;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-public class AggregateIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritables() throws Exception {
-    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
-    runMinMax(shakes, WritableTypeFamily.getInstance());
-    pipeline.done();
-  }
-
-  @Test
-  public void testAvro() throws Exception {
-    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
-    runMinMax(shakes, AvroTypeFamily.getInstance());
-    pipeline.done();
-  }
-
-  @Test
-  public void testInMemoryAvro() throws Exception {
-    PCollection<String> someText = MemPipeline.collectionOf("first line", "second line", "third line");
-    runMinMax(someText, AvroTypeFamily.getInstance());
-  }
-
-  public static void runMinMax(PCollection<String> shakes, PTypeFamily family) throws Exception {
-    PCollection<Integer> lengths = shakes.parallelDo(new MapFn<String, Integer>() {
-      @Override
-      public Integer map(String input) {
-        return input.length();
-      }
-    }, family.ints());
-    PCollection<Integer> negLengths = lengths.parallelDo(new MapFn<Integer, Integer>() {
-      @Override
-      public Integer map(Integer input) {
-        return -input;
-      }
-    }, family.ints());
-    Integer maxLengths = Aggregate.max(lengths).getValue();
-    Integer minLengths = Aggregate.min(negLengths).getValue();
-    assertTrue(maxLengths != null);
-    assertTrue(minLengths != null);
-    assertEquals(maxLengths.intValue(), -minLengths.intValue());
-  }
-
-  private static class SplitFn extends MapFn<String, Pair<String, String>> {
-    @Override
-    public Pair<String, String> map(String input) {
-      String[] p = input.split("\\s+");
-      return Pair.of(p[0], p[1]);
-    }
-  }
-
-  @Test
-  public void testCollectUrls() throws Exception {
-    Pipeline p = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
-    String urlsInputPath = tmpDir.copyResourceFileName("urls.txt");
-    PTable<String, Collection<String>> urls = Aggregate.collectValues(p.readTextFile(urlsInputPath).parallelDo(
-        new SplitFn(), tableOf(strings(), strings())));
-    for (Pair<String, Collection<String>> e : urls.materialize()) {
-      String key = e.first();
-      int expectedSize = 0;
-      if ("www.A.com".equals(key)) {
-        expectedSize = 4;
-      } else if ("www.B.com".equals(key) || "www.F.com".equals(key)) {
-        expectedSize = 2;
-      } else if ("www.C.com".equals(key) || "www.D.com".equals(key) || "www.E.com".equals(key)) {
-        expectedSize = 1;
-      }
-      assertEquals("Checking key = " + key, expectedSize, e.second().size());
-      p.done();
-    }
-  }
-
-  @Test
-  public void testTopN() throws Exception {
-    PTableType<String, Integer> ptype = Avros.tableOf(Avros.strings(), Avros.ints());
-    PTable<String, Integer> counts = MemPipeline.typedTableOf(ptype, "foo", 12, "bar", 17, "baz", 29);
-
-    PTable<String, Integer> top2 = Aggregate.top(counts, 2, true);
-    assertEquals(ImmutableList.of(Pair.of("baz", 29), Pair.of("bar", 17)), top2.materialize());
-
-    PTable<String, Integer> bottom2 = Aggregate.top(counts, 2, false);
-    assertEquals(ImmutableList.of(Pair.of("foo", 12), Pair.of("bar", 17)), bottom2.materialize());
-  }
-
-  @Test
-  public void testCollectValues_Writables() throws IOException {
-    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
-    Map<Integer, Collection<Text>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
-        .parallelDo(new MapStringToTextPair(), Writables.tableOf(Writables.ints(), Writables.writables(Text.class)))
-        .collectValues().materializeToMap();
-
-    assertEquals(1, collectionMap.size());
-
-    assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a"))));
-  }
-
-  @Test
-  public void testCollectValues_Avro() throws IOException {
-
-    MapStringToEmployeePair mapFn = new MapStringToEmployeePair();
-    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
-    Map<Integer, Collection<Employee>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
-        .parallelDo(mapFn, Avros.tableOf(Avros.ints(), Avros.records(Employee.class))).collectValues()
-        .materializeToMap();
-
-    assertEquals(1, collectionMap.size());
-
-    Employee empC = mapFn.map("c").second();
-    Employee empD = mapFn.map("d").second();
-    Employee empA = mapFn.map("a").second();
-
-    assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(empC, empD, empA)));
-  }
-
-  private static class MapStringToTextPair extends MapFn<String, Pair<Integer, Text>> {
-    @Override
-    public Pair<Integer, Text> map(String input) {
-      return Pair.of(1, new Text(input));
-    }
-  }
-
-  private static class MapStringToEmployeePair extends MapFn<String, Pair<Integer, Employee>> {
-    @Override
-    public Pair<Integer, Employee> map(String input) {
-      Employee emp = new Employee();
-      emp.name = input;
-      emp.salary = 0;
-      emp.department = "";
-      return Pair.of(1, emp);
-    }
-  }
-
-  public static class PojoText {
-    private String value;
-
-    public PojoText() {
-      this("");
-    }
-
-    public PojoText(String value) {
-      this.value = value;
-    }
-
-    public String getValue() {
-      return value;
-    }
-
-    public void setValue(String value) {
-      this.value = value;
-    }
-
-    @Override
-    public String toString() {
-      return String.format("PojoText<%s>", this.value);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj)
-        return true;
-      if (obj == null)
-        return false;
-      if (getClass() != obj.getClass())
-        return false;
-      PojoText other = (PojoText) obj;
-      if (value == null) {
-        if (other.value != null)
-          return false;
-      } else if (!value.equals(other.value))
-        return false;
-      return true;
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java b/crunch/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java
deleted file mode 100644
index a832a5d..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/AvroTypeSortIT.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static junit.framework.Assert.assertEquals;
-import static org.apache.crunch.types.avro.Avros.ints;
-import static org.apache.crunch.types.avro.Avros.records;
-import static org.apache.crunch.types.avro.Avros.strings;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.specific.SpecificDatumWriter;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-/**
- * Test sorting Avro types by selected inner field
- */
-public class AvroTypeSortIT implements Serializable {
-
-  private static final long serialVersionUID = 1344118240353796561L;
-
-  private transient File avroFile;
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Before
-  public void setUp() throws IOException {
-    avroFile = File.createTempFile("avrotest", ".avro");
-  }
-
-  @After
-  public void tearDown() {
-    avroFile.delete();
-  }
-
-  @Test
-  public void testSortAvroTypesBySelectedFields() throws Exception {
-
-    MRPipeline pipeline = new MRPipeline(AvroTypeSortIT.class, tmpDir.getDefaultConfiguration());
-
-    Person ccc10 = createPerson("CCC", 10);
-    Person bbb20 = createPerson("BBB", 20);
-    Person aaa30 = createPerson("AAA", 30);
-
-    writeAvroFile(Lists.newArrayList(ccc10, bbb20, aaa30), avroFile);
-
-    PCollection<Person> unsorted = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), records(Person.class)));
-
-    // Sort by Name
-    MapFn<Person, String> nameExtractor = new MapFn<Person, String>() {
-
-      @Override
-      public String map(Person input) {
-        return input.name.toString();
-      }
-    };
-
-    PCollection<Person> sortedByName = unsorted.by(nameExtractor, strings()).groupByKey().ungroup().values();
-
-    List<Person> sortedByNameList = Lists.newArrayList(sortedByName.materialize());
-
-    assertEquals(3, sortedByNameList.size());
-    assertEquals(aaa30, sortedByNameList.get(0));
-    assertEquals(bbb20, sortedByNameList.get(1));
-    assertEquals(ccc10, sortedByNameList.get(2));
-
-    // Sort by Age
-
-    MapFn<Person, Integer> ageExtractor = new MapFn<Person, Integer>() {
-
-      @Override
-      public Integer map(Person input) {
-        return input.age;
-      }
-    };
-
-    PCollection<Person> sortedByAge = unsorted.by(ageExtractor, ints()).groupByKey().ungroup().values();
-
-    List<Person> sortedByAgeList = Lists.newArrayList(sortedByAge.materialize());
-
-    assertEquals(3, sortedByAgeList.size());
-    assertEquals(ccc10, sortedByAgeList.get(0));
-    assertEquals(bbb20, sortedByAgeList.get(1));
-    assertEquals(aaa30, sortedByAgeList.get(2));
-
-    pipeline.done();
-  }
-
-  private void writeAvroFile(List<Person> people, File avroFile) throws IOException {
-
-    FileOutputStream outputStream = new FileOutputStream(avroFile);
-    SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
-
-    DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
-    dataFileWriter.create(Person.SCHEMA$, outputStream);
-    for (Person person : people) {
-      dataFileWriter.append(person);
-    }
-    dataFileWriter.close();
-    outputStream.close();
-  }
-
-  private Person createPerson(String name, int age) throws IOException {
-
-    Person person = new Person();
-    person.age = age;
-    person.name = name;
-    List<CharSequence> siblingNames = Lists.newArrayList();
-    person.siblingnames = siblingNames;
-
-    return person;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/CogroupIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/CogroupIT.java b/crunch/src/it/java/org/apache/crunch/lib/CogroupIT.java
deleted file mode 100644
index 4b28da7..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/CogroupIT.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.test.Tests;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-
-
-public class CogroupIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  private MRPipeline pipeline;
-  private PCollection<String> lines1;
-  private PCollection<String> lines2;
-
-
-  @Before
-  public void setUp() throws IOException {
-    pipeline = new MRPipeline(CogroupIT.class, tmpDir.getDefaultConfiguration());
-    lines1 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
-    lines2 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
-  }
-
-  @After
-  public void tearDown() {
-    pipeline.done();
-  }
-
-  @Test
-  public void testCogroupWritables() {
-    runCogroup(WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testCogroupAvro() {
-    runCogroup(AvroTypeFamily.getInstance());
-  }
-
-  public void runCogroup(PTypeFamily ptf) {
-    PTableType<String, String> tt = ptf.tableOf(ptf.strings(), ptf.strings());
-
-    PTable<String, String> kv1 = lines1.parallelDo("kv1", new KeyValueSplit(), tt);
-    PTable<String, String> kv2 = lines2.parallelDo("kv2", new KeyValueSplit(), tt);
-
-    PTable<String, Pair<Collection<String>, Collection<String>>> cg = Cogroup.cogroup(kv1, kv2);
-
-    Map<String, Pair<Collection<String>, Collection<String>>> actual = cg.materializeToMap();
-
-    Map<String, Pair<Collection<String>, Collection<String>>> expected = ImmutableMap.of(
-        "a", Pair.of(coll("1-1", "1-4"), coll()),
-        "b", Pair.of(coll("1-2"), coll("2-1")),
-        "c", Pair.of(coll("1-3"), coll("2-2", "2-3")),
-        "d", Pair.of(coll(), coll("2-4"))
-    );
-
-    assertThat(actual, is(expected));
-  }
-
-
-  private static class KeyValueSplit extends DoFn<String, Pair<String, String>> {
-    @Override
-    public void process(String input, Emitter<Pair<String, String>> emitter) {
-      String[] fields = input.split(",");
-      emitter.emit(Pair.of(fields[0], fields[1]));
-    }
-  }
-
-  private static Collection<String> coll(String... values) {
-    return ImmutableList.copyOf(values);
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/SecondarySortIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/SecondarySortIT.java b/crunch/src/it/java/org/apache/crunch/lib/SecondarySortIT.java
deleted file mode 100644
index 242f621..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/SecondarySortIT.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.apache.crunch.types.avro.Avros.*;
-import static org.junit.Assert.assertEquals;
-
-import java.io.Serializable;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.From;
-import org.apache.crunch.test.CrunchTestSupport;
-import org.junit.Test;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.ImmutableList;
-
-
-public class SecondarySortIT extends CrunchTestSupport implements Serializable {
-
-  @Test
-  public void testSecondarySort() throws Exception {
-    Pipeline p = new MRPipeline(SecondarySortIT.class, tempDir.getDefaultConfiguration());
-    String inputFile = tempDir.copyResourceFileName("secondary_sort_input.txt");
-    
-    PTable<String, Pair<Integer, Integer>> in = p.read(From.textFile(inputFile))
-        .parallelDo(new MapFn<String, Pair<String, Pair<Integer, Integer>>>() {
-          @Override
-          public Pair<String, Pair<Integer, Integer>> map(String input) {
-            String[] pieces = input.split(",");
-            return Pair.of(pieces[0],
-                Pair.of(Integer.valueOf(pieces[1].trim()), Integer.valueOf(pieces[2].trim())));
-          }
-        }, tableOf(strings(), pairs(ints(), ints())));
-    Iterable<String> lines = SecondarySort.sortAndApply(in, new MapFn<Pair<String, Iterable<Pair<Integer, Integer>>>, String>() {
-      @Override
-      public String map(Pair<String, Iterable<Pair<Integer, Integer>>> input) {
-        Joiner j = Joiner.on(',');
-        return j.join(input.first(), j.join(input.second()));
-      }
-    }, strings()).materialize();
-    assertEquals(ImmutableList.of("one,[-5,10],[1,1],[2,-3]", "three,[0,-1]", "two,[1,7],[2,6],[4,5]"),
-        ImmutableList.copyOf(lines));
-    p.done();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/SetIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/SetIT.java b/crunch/src/it/java/org/apache/crunch/lib/SetIT.java
deleted file mode 100644
index d1300d2..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/SetIT.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-
-@RunWith(value = Parameterized.class)
-public class SetIT {
-
-  private PTypeFamily typeFamily;
-
-  private Pipeline pipeline;
-  private PCollection<String> set1;
-  private PCollection<String> set2;
-
-  public SetIT(PTypeFamily typeFamily) {
-    this.typeFamily = typeFamily;
-  }
-  
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { WritableTypeFamily.getInstance() }, { AvroTypeFamily.getInstance() } };
-    return Arrays.asList(data);
-  }
-
-  @Before
-  public void setUp() throws IOException {
-    String set1InputPath = tmpDir.copyResourceFileName("set1.txt");
-    String set2InputPath = tmpDir.copyResourceFileName("set2.txt");
-    pipeline = new MRPipeline(SetIT.class, tmpDir.getDefaultConfiguration());
-    set1 = pipeline.read(At.textFile(set1InputPath, typeFamily.strings()));
-    set2 = pipeline.read(At.textFile(set2InputPath, typeFamily.strings()));
-  }
-
-  @After
-  public void tearDown() {
-    pipeline.done();
-  }
-
-  @Test
-  public void testDifference() throws Exception {
-    PCollection<String> difference = Set.difference(set1, set2);
-    assertEquals(Lists.newArrayList("b", "e"), Lists.newArrayList(difference.materialize()));
-  }
-
-  @Test
-  public void testIntersection() throws Exception {
-    PCollection<String> intersection = Set.intersection(set1, set2);
-    assertEquals(Lists.newArrayList("a", "c"), Lists.newArrayList(intersection.materialize()));
-  }
-
-  @Test
-  public void testComm() throws Exception {
-    PCollection<Tuple3<String, String, String>> comm = Set.comm(set1, set2);
-    Iterator<Tuple3<String, String, String>> i = comm.materialize().iterator();
-    checkEquals(null, null, "a", i.next());
-    checkEquals("b", null, null, i.next());
-    checkEquals(null, null, "c", i.next());
-    checkEquals(null, "d", null, i.next());
-    checkEquals("e", null, null, i.next());
-    assertFalse(i.hasNext());
-  }
-
-  private void checkEquals(String s1, String s2, String s3, Tuple3<String, String, String> tuple) {
-    assertEquals("first string", s1, tuple.first());
-    assertEquals("second string", s2, tuple.second());
-    assertEquals("third string", s3, tuple.third());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/SortByValueIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/SortByValueIT.java b/crunch/src/it/java/org/apache/crunch/lib/SortByValueIT.java
deleted file mode 100644
index e19c7d3..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/SortByValueIT.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.From;
-import org.apache.crunch.lib.Sort.ColumnOrder;
-import org.apache.crunch.lib.Sort.Order;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-
-/**
- *
- */
-public class SortByValueIT {
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  private static class SplitFn extends MapFn<String, Pair<String, Long>> {
-    private String sep;
-    
-    public SplitFn(String sep) {
-      this.sep = sep;
-    }
-    
-    @Override
-    public Pair<String, Long> map(String input) {
-      String[] pieces = input.split(sep);
-      return Pair.of(pieces[0], Long.valueOf(pieces[1]));
-    }
-  }
-  
-  @Test
-  public void testSortByValueWritables() throws Exception {
-    run(new MRPipeline(SortByValueIT.class), WritableTypeFamily.getInstance());
-  }
-  
-  @Test
-  public void testSortByValueAvro() throws Exception {
-    run(new MRPipeline(SortByValueIT.class), AvroTypeFamily.getInstance());
-  }
-  
-  public void run(Pipeline pipeline, PTypeFamily ptf) throws Exception {
-    String sbv = tmpDir.copyResourceFileName("sort_by_value.txt");
-    PTable<String, Long> letterCounts = pipeline.read(From.textFile(sbv)).parallelDo(new SplitFn("\t"),
-        ptf.tableOf(ptf.strings(), ptf.longs()));
-    PCollection<Pair<String, Long>> sorted = Sort.sortPairs(
-        letterCounts,
-        new ColumnOrder(2, Order.DESCENDING),
-        new ColumnOrder(1, Order.ASCENDING));
-    assertEquals(
-        ImmutableList.of(Pair.of("C", 3L), Pair.of("A", 2L), Pair.of("D", 2L), Pair.of("B", 1L), Pair.of("E", 1L)),
-        ImmutableList.copyOf(sorted.materialize()));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/SortIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/SortIT.java b/crunch/src/it/java/org/apache/crunch/lib/SortIT.java
deleted file mode 100644
index bad4864..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/SortIT.java
+++ /dev/null
@@ -1,327 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.apache.crunch.lib.Sort.ColumnOrder.by;
-import static org.apache.crunch.lib.Sort.Order.ASCENDING;
-import static org.apache.crunch.lib.Sort.Order.DESCENDING;
-import static org.apache.crunch.test.StringWrapper.wrap;
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.lib.Sort.ColumnOrder;
-import org.apache.crunch.lib.Sort.Order;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class SortIT implements Serializable {
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritableSortAsc() throws Exception {
-    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), Order.ASCENDING,
-        "A\tand this text as well");
-  }
-
-  @Test
-  public void testWritableSortDesc() throws Exception {
-    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), Order.DESCENDING,
-        "B\tthis doc has some text");
-  }
-
-  @Test
-  public void testWritableSortAscDesc() throws Exception {
-    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING), "A",
-        "this doc has this text");
-  }
-
-  @Test
-  public void testWritableSortSecondDescFirstAsc() throws Exception {
-    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(2, DESCENDING), by(1, ASCENDING), "A",
-        "this doc has this text");
-  }
-
-  @Test
-  public void testWritableSortTripleAscDescAsc() throws Exception {
-    runTriple(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
-        by(3, ASCENDING), "A", "this", "doc");
-  }
-
-  @Test
-  public void testWritableSortQuadAscDescAscDesc() throws Exception {
-    runQuad(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
-        by(3, ASCENDING), by(4, DESCENDING), "A", "this", "doc", "has");
-  }
-
-  @Test
-  public void testWritableSortTupleNAscDesc() throws Exception {
-    runTupleN(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(),
-        new ColumnOrder[] { by(1, ASCENDING), by(2, DESCENDING) }, new String[] { "A", "this doc has this text" });
-  }
-
-  @Test
-  public void testWritableSortTable() throws Exception {
-    runTable(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), "A");
-  }
-
-  @Test
-  public void testAvroSortAsc() throws Exception {
-    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), Order.ASCENDING, "A\tand this text as well");
-  }
-
-  @Test
-  public void testAvroSortDesc() throws Exception {
-    runSingle(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), Order.DESCENDING, "B\tthis doc has some text");
-  }
-
-  @Test
-  public void testAvroSortPairAscDesc() throws Exception {
-    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING), "A",
-        "this doc has this text");
-  }
-
-  @Test
-  public void testAvroSortPairSecondDescFirstAsc() throws Exception {
-    runPair(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(2, DESCENDING), by(1, ASCENDING), "A",
-        "this doc has this text");
-  }
-
-  @Test
-  public void testAvroSortTripleAscDescAsc() throws Exception {
-    runTriple(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
-        by(3, ASCENDING), "A", "this", "doc");
-  }
-
-  @Test
-  public void testAvroSortQuadAscDescAscDesc() throws Exception {
-    runQuad(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), by(1, ASCENDING), by(2, DESCENDING),
-        by(3, ASCENDING), by(4, DESCENDING), "A", "this", "doc", "has");
-  }
-
-  @Test
-  public void testAvroSortTupleNAscDesc() throws Exception {
-    runTupleN(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(),
-        new ColumnOrder[] { by(1, ASCENDING), by(2, DESCENDING) }, new String[] { "A", "this doc has this text" });
-  }
-
-  @Test
-  public void testAvroReflectSortPair() throws IOException {
-    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
-    pipeline.enableDebug();
-    String rsrc = tmpDir.copyResourceFileName("set2.txt");
-    PCollection<Pair<String, StringWrapper>> in = pipeline.readTextFile(rsrc)
-        .parallelDo(new MapFn<String, Pair<String, StringWrapper>>() {
-
-          @Override
-          public Pair<String, StringWrapper> map(String input) {
-            return Pair.of(input, wrap(input));
-          }
-        }, Avros.pairs(Avros.strings(), Avros.reflects(StringWrapper.class)));
-    PCollection<Pair<String, StringWrapper>> sorted = Sort.sort(in, Order.ASCENDING);
-    
-    List<Pair<String, StringWrapper>> expected = Lists.newArrayList();
-    expected.add(Pair.of("a", wrap("a")));
-    expected.add(Pair.of("c", wrap("c")));
-    expected.add(Pair.of("d", wrap("d")));
-
-    assertEquals(expected, Lists.newArrayList(sorted.materialize()));
-  }
-
-  @Test
-  public void testAvroReflectSortTable() throws IOException {
-    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
-    PTable<String, StringWrapper> unsorted = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt")).parallelDo(
-        new MapFn<String, Pair<String, StringWrapper>>() {
-
-          @Override
-          public Pair<String, StringWrapper> map(String input) {
-            return Pair.of(input, wrap(input));
-          }
-        }, Avros.tableOf(Avros.strings(), Avros.reflects(StringWrapper.class)));
-
-    PTable<String, StringWrapper> sorted = Sort.sort(unsorted);
-
-    List<Pair<String, StringWrapper>> expected = Lists.newArrayList();
-    expected.add(Pair.of("a", wrap("a")));
-    expected.add(Pair.of("c", wrap("c")));
-    expected.add(Pair.of("d", wrap("d")));
-
-    assertEquals(expected, Lists.newArrayList(sorted.materialize()));
-  }
-
-  @Test
-  public void testAvroSortTable() throws Exception {
-    runTable(new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance(), "A");
-  }
-
-  private void runSingle(Pipeline pipeline, PTypeFamily typeFamily, Order order, String firstLine) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-
-    PCollection<String> input = pipeline.readTextFile(inputPath);
-    // following turns the input from Writables to required type family
-    PCollection<String> input2 = input.parallelDo(new DoFn<String, String>() {
-      @Override
-      public void process(String input, Emitter<String> emitter) {
-        emitter.emit(input);
-      }
-    }, typeFamily.strings());
-    PCollection<String> sorted = Sort.sort(input2, order);
-    Iterable<String> lines = sorted.materialize();
-
-    assertEquals(firstLine, lines.iterator().next());
-    pipeline.done(); // TODO: finally
-  }
-
-  private void runPair(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder first, ColumnOrder second,
-      String firstField, String secondField) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-
-    PCollection<String> input = pipeline.readTextFile(inputPath);
-    PTable<String, String> kv = input.parallelDo(new DoFn<String, Pair<String, String>>() {
-      @Override
-      public void process(String input, Emitter<Pair<String, String>> emitter) {
-        String[] split = input.split("[\t]+");
-        emitter.emit(Pair.of(split[0], split[1]));
-      }
-    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.strings()));
-    PCollection<Pair<String, String>> sorted = Sort.sortPairs(kv, first, second);
-    List<Pair<String, String>> lines = Lists.newArrayList(sorted.materialize());
-    Pair<String, String> l = lines.iterator().next();
-    assertEquals(firstField, l.first());
-    assertEquals(secondField, l.second());
-    pipeline.done();
-  }
-
-  private void runTriple(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder first, ColumnOrder second,
-      ColumnOrder third, String firstField, String secondField, String thirdField) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-
-    PCollection<String> input = pipeline.readTextFile(inputPath);
-    PCollection<Tuple3<String, String, String>> kv = input.parallelDo(
-        new DoFn<String, Tuple3<String, String, String>>() {
-          @Override
-          public void process(String input, Emitter<Tuple3<String, String, String>> emitter) {
-            String[] split = input.split("[\t ]+");
-            int len = split.length;
-            emitter.emit(Tuple3.of(split[0], split[1 % len], split[2 % len]));
-          }
-        }, typeFamily.triples(typeFamily.strings(), typeFamily.strings(), typeFamily.strings()));
-    PCollection<Tuple3<String, String, String>> sorted = Sort.sortTriples(kv, first, second, third);
-    List<Tuple3<String, String, String>> lines = Lists.newArrayList(sorted.materialize());
-    Tuple3<String, String, String> l = lines.iterator().next();
-    assertEquals(firstField, l.first());
-    assertEquals(secondField, l.second());
-    assertEquals(thirdField, l.third());
-    pipeline.done();
-  }
-
-  private void runQuad(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder first, ColumnOrder second,
-      ColumnOrder third, ColumnOrder fourth, String firstField, String secondField, String thirdField,
-      String fourthField) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-
-    PCollection<String> input = pipeline.readTextFile(inputPath);
-    PCollection<Tuple4<String, String, String, String>> kv = input.parallelDo(
-        new DoFn<String, Tuple4<String, String, String, String>>() {
-          @Override
-          public void process(String input, Emitter<Tuple4<String, String, String, String>> emitter) {
-            String[] split = input.split("[\t ]+");
-            int len = split.length;
-            emitter.emit(Tuple4.of(split[0], split[1 % len], split[2 % len], split[3 % len]));
-          }
-        }, typeFamily.quads(typeFamily.strings(), typeFamily.strings(), typeFamily.strings(), typeFamily.strings()));
-    PCollection<Tuple4<String, String, String, String>> sorted = Sort.sortQuads(kv, first, second, third, fourth);
-    Iterable<Tuple4<String, String, String, String>> lines = sorted.materialize();
-    Tuple4<String, String, String, String> l = lines.iterator().next();
-    assertEquals(firstField, l.first());
-    assertEquals(secondField, l.second());
-    assertEquals(thirdField, l.third());
-    assertEquals(fourthField, l.fourth());
-    pipeline.done();
-  }
-
-  private void runTupleN(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder[] orders, String[] fields)
-      throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-
-    PCollection<String> input = pipeline.readTextFile(inputPath);
-    PType[] types = new PType[orders.length];
-    Arrays.fill(types, typeFamily.strings());
-    PCollection<TupleN> kv = input.parallelDo(new DoFn<String, TupleN>() {
-      @Override
-      public void process(String input, Emitter<TupleN> emitter) {
-        String[] split = input.split("[\t]+");
-        emitter.emit(new TupleN(split));
-      }
-    }, typeFamily.tuples(types));
-    PCollection<TupleN> sorted = Sort.sortTuples(kv, orders);
-    Iterable<TupleN> lines = sorted.materialize();
-    TupleN l = lines.iterator().next();
-    int i = 0;
-    for (String field : fields) {
-      assertEquals(field, l.get(i++));
-    }
-    pipeline.done();
-  }
-
-  private void runTable(Pipeline pipeline, PTypeFamily typeFamily, String firstKey) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-
-    PCollection<String> input = pipeline.readTextFile(inputPath);
-    PTable<String, String> table = input.parallelDo(new DoFn<String, Pair<String, String>>() {
-      @Override
-      public void process(String input, Emitter<Pair<String, String>> emitter) {
-        String[] split = input.split("[\t]+");
-        emitter.emit(Pair.of(split[0], split[1]));
-      }
-    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.strings()));
-
-    PTable<String, String> sorted = Sort.sort(table);
-    Iterable<Pair<String, String>> lines = sorted.materialize();
-    Pair<String, String> l = lines.iterator().next();
-    assertEquals(firstKey, l.first());
-    pipeline.done();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java b/crunch/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java
deleted file mode 100644
index 5292353..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.specific.SpecificDatumWriter;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-/**
- * Test {@link org.apache.crunch.types.avro.SafeAvroSerialization} with Specific Avro types
- */
-public class SpecificAvroGroupByIT implements Serializable {
-
-  private static final long serialVersionUID = 1344118240353796561L;
-
-  private transient File avroFile;
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-
-  @Before
-  public void setUp() throws IOException {
-    avroFile = File.createTempFile("avrotest", ".avro");
-  }
-
-  @After
-  public void tearDown() {
-    avroFile.delete();
-  }
-
-  @Test
-  public void testGrouByWithSpecificAvroType() throws Exception {
-    MRPipeline pipeline = new MRPipeline(SpecificAvroGroupByIT.class, tmpDir.getDefaultConfiguration());
-    testSpecificAvro(pipeline);
-  }
-
-  public void testSpecificAvro(MRPipeline pipeline) throws Exception {
-
-    createPersonAvroFile(avroFile);
-
-    PCollection<Person> unsorted = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), Avros.records(Person.class)));
-
-    PTable<String, Person> sorted = unsorted.parallelDo(new MapFn<Person, Pair<String, Person>>() {
-
-      @Override
-      public Pair<String, Person> map(Person input) {
-        String key = input.name.toString();
-        return Pair.of(key, input);
-
-      }
-    }, Avros.tableOf(Avros.strings(), Avros.records(Person.class))).groupByKey().ungroup();
-
-    List<Pair<String, Person>> outputPersonList = Lists.newArrayList(sorted.materialize());
-
-    assertEquals(1, outputPersonList.size());
-    assertEquals(String.class, outputPersonList.get(0).first().getClass());
-    assertEquals(Person.class, outputPersonList.get(0).second().getClass());
-
-    pipeline.done();
-  }
-
-  private void createPersonAvroFile(File avroFile) throws IOException {
-
-    Person person = new Person();
-    person.age = 40;
-    person.name = "Bob";
-    List<CharSequence> siblingNames = Lists.newArrayList();
-    siblingNames.add("Bob" + "1");
-    siblingNames.add("Bob" + "2");
-    person.siblingnames = siblingNames;
-
-    FileOutputStream outputStream = new FileOutputStream(avroFile);
-    SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
-
-    DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
-    dataFileWriter.create(Person.SCHEMA$, outputStream);
-    dataFileWriter.append(person);
-    dataFileWriter.close();
-    outputStream.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java
deleted file mode 100644
index 63d594d..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PTypeFamily;
-
-public class FullOuterJoinIT extends JoinTester {
-  @Override
-  public void assertPassed(Iterable<Pair<String, Long>> lines) {
-    boolean passed1 = false;
-    boolean passed2 = false;
-    boolean passed3 = false;
-    for (Pair<String, Long> line : lines) {
-      if ("wretched".equals(line.first()) && 24 == line.second()) {
-        passed1 = true;
-      }
-      if ("againe".equals(line.first()) && 10 == line.second()) {
-        passed2 = true;
-      }
-      if ("Montparnasse.".equals(line.first()) && 2 == line.second()) {
-        passed3 = true;
-      }
-    }
-    assertTrue(passed1);
-    assertTrue(passed2);
-    assertTrue(passed3);
-  }
-
-  @Override
-  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
-    return new FullOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java
deleted file mode 100644
index 4759050..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PTypeFamily;
-
-public class InnerJoinIT extends JoinTester {
-  @Override
-  public void assertPassed(Iterable<Pair<String, Long>> lines) {
-    boolean passed1 = false;
-    boolean passed2 = true;
-    boolean passed3 = true;
-    for (Pair<String, Long> line : lines) {
-      if ("wretched".equals(line.first()) && 24 == line.second()) {
-        passed1 = true;
-      }
-      if ("againe".equals(line.first())) {
-        passed2 = false;
-      }
-      if ("Montparnasse.".equals(line.first())) {
-        passed3 = false;
-      }
-    }
-    assertTrue(passed1);
-    assertTrue(passed2);
-    assertTrue(passed3);
-  }
-
-  @Override
-  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
-    return new InnerJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/JoinTester.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/JoinTester.java b/crunch/src/it/java/org/apache/crunch/lib/join/JoinTester.java
deleted file mode 100644
index 3e8ffda..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/JoinTester.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
-R * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.lib.Join;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-public abstract class JoinTester implements Serializable {
-  private static class WordSplit extends DoFn<String, String> {
-    @Override
-    public void process(String input, Emitter<String> emitter) {
-      for (String word : input.split("\\s+")) {
-        emitter.emit(word);
-      }
-    }
-  }
-
-  protected PTable<String, Long> join(PCollection<String> w1, PCollection<String> w2, PTypeFamily ptf) {
-    PTableType<String, Long> ntt = ptf.tableOf(ptf.strings(), ptf.longs());
-    PTable<String, Long> ws1 = Aggregate.count(w1.parallelDo("ws1", new WordSplit(), ptf.strings()));
-    PTable<String, Long> ws2 = Aggregate.count(w2.parallelDo("ws2", new WordSplit(), ptf.strings()));
-
-    PTable<String, Pair<Long, Long>> join = Join.join(ws1, ws2, getJoinFn(ptf));
-
-    PTable<String, Long> sums = join.parallelDo("cnt", new DoFn<Pair<String, Pair<Long, Long>>, Pair<String, Long>>() {
-      @Override
-      public void process(Pair<String, Pair<Long, Long>> input, Emitter<Pair<String, Long>> emitter) {
-        Pair<Long, Long> pair = input.second();
-        long sum = (pair.first() != null ? pair.first() : 0) + (pair.second() != null ? pair.second() : 0);
-        emitter.emit(Pair.of(input.first(), sum));
-      }
-    }, ntt);
-
-    return sums;
-  }
-
-  protected void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    String maughamInputPath = tmpDir.copyResourceFileName("maugham.txt");
-
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    PCollection<String> maugham = pipeline.readTextFile(maughamInputPath);
-    PTable<String, Long> joined = join(shakespeare, maugham, typeFamily);
-    Iterable<Pair<String, Long>> lines = joined.materialize();
-
-    assertPassed(lines);
-
-    pipeline.done();
-  }
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritableJoin() throws Exception {
-    run(new MRPipeline(InnerJoinIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvroJoin() throws Exception {
-    run(new MRPipeline(InnerJoinIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  /**
-   * Used to check that the result of the join makes sense.
-   * 
-   * @param lines
-   *          The result of the join.
-   */
-  public abstract void assertPassed(Iterable<Pair<String, Long>> lines);
-
-  /**
-   * @return The JoinFn to use.
-   */
-  protected abstract JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java
deleted file mode 100644
index 4ad2a81..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PTypeFamily;
-
-public class LeftOuterJoinIT extends JoinTester {
-  @Override
-  public void assertPassed(Iterable<Pair<String, Long>> lines) {
-    boolean passed1 = false;
-    boolean passed2 = false;
-    boolean passed3 = true;
-    for (Pair<String, Long> line : lines) {
-      if ("wretched".equals(line.first()) && 24 == line.second()) {
-        passed1 = true;
-      }
-      if ("againe".equals(line.first()) && 10 == line.second()) {
-        passed2 = true;
-      }
-      if ("Montparnasse.".equals(line.first())) {
-        passed3 = false;
-      }
-    }
-    assertTrue(passed1);
-    assertTrue(passed2);
-    assertTrue(passed3);
-  }
-
-  @Override
-  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
-    return new LeftOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java
deleted file mode 100644
index 8bb5586..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.PipelineResult;
-import org.apache.crunch.fn.FilterFns;
-import org.apache.crunch.fn.MapValuesFn;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class MapsideJoinIT {
-  
-  private static String saveTempDir;
-  
-  @BeforeClass
-  public static void setUpClass(){
-    
-    // Ensure a consistent temporary directory for use of the DistributedCache.
-    
-    // The DistributedCache technically isn't supported when running in local mode, and the default
-    // temporary directiory "/tmp" is used as its location. This typically only causes an issue when 
-    // running integration tests on Mac OS X, as OS X doesn't use "/tmp" as it's default temporary
-    // directory. The following call ensures that "/tmp" is used as the temporary directory on all platforms.
-    saveTempDir = System.setProperty("java.io.tmpdir", "/tmp");
-  }
-  
-  @AfterClass
-  public static void tearDownClass(){
-    System.setProperty("java.io.tmpdir", saveTempDir);
-  }
-
-  private static class LineSplitter extends MapFn<String, Pair<Integer, String>> {
-    @Override
-    public Pair<Integer, String> map(String input) {
-      String[] fields = input.split("\\|");
-      return Pair.of(Integer.parseInt(fields[0]), fields[1]);
-    }
-  }
-
-  private static class CapOrdersFn extends MapValuesFn<Integer, String, String> {
-    @Override
-    public String map(String v) {
-      return v.toUpperCase();
-    }
-  }
-  
-  private static class ConcatValuesFn extends MapValuesFn<Integer, Pair<String, String>, String> {
-    @Override
-    public String map(Pair<String, String> v) {
-      return v.toString();
-    }
-  }
-  
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testMapSideJoin_MemPipeline() {
-    runMapsideJoin(MemPipeline.getInstance(), true);
-  }
-
-  @Test
-  public void testMapsideJoin_RightSideIsEmpty() throws IOException {
-    MRPipeline pipeline = new MRPipeline(MapsideJoinIT.class, tmpDir.getDefaultConfiguration());
-    PTable<Integer, String> customerTable = readTable(pipeline, "customers.txt");
-    PTable<Integer, String> orderTable = readTable(pipeline, "orders.txt");
-
-    PTable<Integer, String> filteredOrderTable = orderTable
-        .parallelDo(FilterFns.<Pair<Integer, String>>REJECT_ALL(), orderTable.getPTableType());
-
-    PTable<Integer, Pair<String, String>> joined = MapsideJoin.join(customerTable, filteredOrderTable);
-
-    List<Pair<Integer, Pair<String, String>>> materializedJoin = Lists.newArrayList(joined.materialize());
-
-    assertTrue(materializedJoin.isEmpty());
-  }
-
-  @Test
-  public void testMapsideJoin() throws IOException {
-    runMapsideJoin(new MRPipeline(MapsideJoinIT.class, tmpDir.getDefaultConfiguration()), false);
-  }
-
-  private void runMapsideJoin(Pipeline pipeline, boolean inMemory) {
-    PTable<Integer, String> customerTable = readTable(pipeline, "customers.txt");
-    PTable<Integer, String> orderTable = readTable(pipeline, "orders.txt");
-    
-    PTable<Integer, String> custOrders = MapsideJoin.join(customerTable, orderTable)
-        .parallelDo("concat", new ConcatValuesFn(), Writables.tableOf(Writables.ints(), Writables.strings()));
-
-    PTable<Integer, String> ORDER_TABLE = orderTable.parallelDo(new CapOrdersFn(), orderTable.getPTableType());
-    
-    PTable<Integer, Pair<String, String>> joined = MapsideJoin.join(custOrders, ORDER_TABLE);
-
-    List<Pair<Integer, Pair<String, String>>> expectedJoinResult = Lists.newArrayList();
-    expectedJoinResult.add(Pair.of(111, Pair.of("[John Doe,Corn flakes]", "CORN FLAKES")));
-    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet paper]", "TOILET PAPER")));
-    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet paper]", "TOILET PLUNGER")));
-    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet plunger]", "TOILET PAPER")));
-    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet plunger]", "TOILET PLUNGER")));
-    expectedJoinResult.add(Pair.of(333, Pair.of("[Someone Else,Toilet brush]", "TOILET BRUSH")));
-    Iterable<Pair<Integer, Pair<String, String>>> iter = joined.materialize();
-    
-    PipelineResult res = pipeline.run();
-    if (!inMemory) {
-      assertEquals(2, res.getStageResults().size());
-    }
-     
-    List<Pair<Integer, Pair<String, String>>> joinedResultList = Lists.newArrayList(iter);
-    Collections.sort(joinedResultList);
-
-    assertEquals(expectedJoinResult, joinedResultList);
-  }
-
-  private PTable<Integer, String> readTable(Pipeline pipeline, String filename) {
-    try {
-      return pipeline.readTextFile(tmpDir.copyResourceFileName(filename)).parallelDo("asTable", new LineSplitter(),
-          Writables.tableOf(Writables.ints(), Writables.strings()));
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-}


[10/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/CrunchOutputs.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/CrunchOutputs.java b/crunch/src/main/java/org/apache/crunch/io/CrunchOutputs.java
deleted file mode 100644
index ccf4fb5..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/CrunchOutputs.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.hadoop.mapreduce.TaskAttemptContextFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.base.Joiner;
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-/**
- * An analogue of {@link CrunchInputs} for handling multiple {@code OutputFormat} instances
- * writing to multiple files within a single MapReduce job.
- */
-public class CrunchOutputs<K, V> {
-  public static final String CRUNCH_OUTPUTS = "crunch.outputs.dir";
-  
-  private static final char RECORD_SEP = ',';
-  private static final char FIELD_SEP = ';';
-  private static final Joiner JOINER = Joiner.on(FIELD_SEP);
-  private static final Splitter SPLITTER = Splitter.on(FIELD_SEP);
-
-  public static void addNamedOutput(Job job, String name,
-      Class<? extends OutputFormat> outputFormatClass,
-      Class keyClass, Class valueClass) {
-    addNamedOutput(job, name, FormatBundle.forOutput(outputFormatClass), keyClass, valueClass);
-  }
-  
-  public static void addNamedOutput(Job job, String name,
-      FormatBundle<? extends OutputFormat> outputBundle,
-      Class keyClass, Class valueClass) {
-    Configuration conf = job.getConfiguration();
-    String inputs = JOINER.join(name, outputBundle.serialize(), keyClass.getName(), valueClass.getName());
-    String existing = conf.get(CRUNCH_OUTPUTS);
-    conf.set(CRUNCH_OUTPUTS, existing == null ? inputs : existing + RECORD_SEP + inputs);
-  }
-  
-  private static class OutputConfig<K, V> {
-    public FormatBundle<OutputFormat<K, V>> bundle;
-    public Class<K> keyClass;
-    public Class<V> valueClass;
-    
-    public OutputConfig(FormatBundle<OutputFormat<K, V>> bundle,
-        Class<K> keyClass, Class<V> valueClass) {
-      this.bundle = bundle;
-      this.keyClass = keyClass;
-      this.valueClass = valueClass;
-    }
-  }
-  
-  private static Map<String, OutputConfig> getNamedOutputs(
-      TaskInputOutputContext<?, ?, ?, ?> context) {
-    Map<String, OutputConfig> out = Maps.newHashMap();
-    Configuration conf = context.getConfiguration();
-    for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_OUTPUTS))) {
-      List<String> fields = Lists.newArrayList(SPLITTER.split(input));
-      String name = fields.get(0);
-      FormatBundle<OutputFormat> bundle = FormatBundle.fromSerialized(fields.get(1),
-          OutputFormat.class);
-      try {
-        Class<?> keyClass = Class.forName(fields.get(2));
-        Class<?> valueClass = Class.forName(fields.get(3));
-        out.put(name, new OutputConfig(bundle, keyClass, valueClass));
-      } catch (ClassNotFoundException e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-    return out;
-  }
-  
-  private static final String BASE_OUTPUT_NAME = "mapreduce.output.basename";
-  private static final String COUNTERS_GROUP = CrunchOutputs.class.getName();
-
-  private TaskInputOutputContext<?, ?, K, V> baseContext;
-  private Map<String, OutputConfig> namedOutputs;
-  private Map<String, RecordWriter<K, V>> recordWriters;
-  private Map<String, TaskAttemptContext> taskContextCache;
-  
-  /**
-   * Creates and initializes multiple outputs support,
-   * it should be instantiated in the Mapper/Reducer setup method.
-   *
-   * @param context the TaskInputOutputContext object
-   */
-  public CrunchOutputs(TaskInputOutputContext<?, ?, K, V> context) {
-    this.baseContext = context;
-    namedOutputs = getNamedOutputs(context);
-    recordWriters = Maps.newHashMap();
-    taskContextCache = Maps.newHashMap();
-  }
-  
-  @SuppressWarnings("unchecked")
-  public void write(String namedOutput, K key, V value)
-      throws IOException, InterruptedException {
-    if (!namedOutputs.containsKey(namedOutput)) {
-      throw new IllegalArgumentException("Undefined named output '" +
-        namedOutput + "'");
-    }
-    TaskAttemptContext taskContext = getContext(namedOutput);
-    baseContext.getCounter(COUNTERS_GROUP, namedOutput).increment(1);
-    getRecordWriter(taskContext, namedOutput).write(key, value);
-  }
-  
-  public void close() throws IOException, InterruptedException {
-    for (RecordWriter<?, ?> writer : recordWriters.values()) {
-      writer.close(baseContext);
-    }
-  }
-  
-  private TaskAttemptContext getContext(String nameOutput) throws IOException {
-    TaskAttemptContext taskContext = taskContextCache.get(nameOutput);
-    if (taskContext != null) {
-      return taskContext;
-    }
-
-    // The following trick leverages the instantiation of a record writer via
-    // the job thus supporting arbitrary output formats.
-    OutputConfig outConfig = namedOutputs.get(nameOutput);
-    Configuration conf = new Configuration(baseContext.getConfiguration());
-    Job job = new Job(conf);
-    job.getConfiguration().set("crunch.namedoutput", nameOutput);
-    job.setOutputFormatClass(outConfig.bundle.getFormatClass());
-    job.setOutputKeyClass(outConfig.keyClass);
-    job.setOutputValueClass(outConfig.valueClass);
-    outConfig.bundle.configure(job.getConfiguration());
-    taskContext = TaskAttemptContextFactory.create(
-      job.getConfiguration(), baseContext.getTaskAttemptID());
-
-    taskContextCache.put(nameOutput, taskContext);
-    return taskContext;
-  }
-  
-  private synchronized RecordWriter<K, V> getRecordWriter(
-      TaskAttemptContext taskContext, String namedOutput) 
-      throws IOException, InterruptedException {
-    // look for record-writer in the cache
-    RecordWriter<K, V> writer = recordWriters.get(namedOutput);
-    
-    // If not in cache, create a new one
-    if (writer == null) {
-      // get the record writer from context output format
-      taskContext.getConfiguration().set(BASE_OUTPUT_NAME, namedOutput);
-      try {
-        OutputFormat format = ReflectionUtils.newInstance(
-            taskContext.getOutputFormatClass(),
-            taskContext.getConfiguration());
-        writer = format.getRecordWriter(taskContext);
-      } catch (ClassNotFoundException e) {
-        throw new IOException(e);
-      }
-      recordWriters.put(namedOutput, writer);
-    }
-    
-    return writer;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/FileNamingScheme.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/FileNamingScheme.java b/crunch/src/main/java/org/apache/crunch/io/FileNamingScheme.java
deleted file mode 100644
index cf93651..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/FileNamingScheme.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-/**
- * Encapsulates rules for naming output files. It is the responsibility of
- * implementors to avoid file name collisions.
- */
-public interface FileNamingScheme {
-
-  /**
-   * Get the output file name for a map task. Note that the implementation is
-   * responsible for avoiding naming collisions.
-   * 
-   * @param configuration The configuration of the job for which the map output
-   *          is being written
-   * @param outputDirectory The directory where the output will be written
-   * @return The filename for the output of the map task
-   * @throws IOException if an exception occurs while accessing the output file
-   *           system
-   */
-  String getMapOutputName(Configuration configuration, Path outputDirectory) throws IOException;
-
-  /**
-   * Get the output file name for a reduce task. Note that the implementation is
-   * responsible for avoiding naming collisions.
-   * 
-   * @param configuration The configuration of the job for which output is being
-   *          written
-   * @param outputDirectory The directory where the file will be written
-   * @param partitionId The partition of the reduce task being output
-   * @return The filename for the output of the reduce task
-   * @throws IOException if an exception occurs while accessing output file
-   *           system
-   */
-  String getReduceOutputName(Configuration configuration, Path outputDirectory, int partitionId) throws IOException;
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/FileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/FileReaderFactory.java b/crunch/src/main/java/org/apache/crunch/io/FileReaderFactory.java
deleted file mode 100644
index 5cccb7b..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/FileReaderFactory.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-public interface FileReaderFactory<T> {
-  Iterator<T> read(FileSystem fs, Path path);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/FormatBundle.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/FormatBundle.java b/crunch/src/main/java/org/apache/crunch/io/FormatBundle.java
deleted file mode 100644
index d969009..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/FormatBundle.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-import java.util.Map;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.OutputFormat;
-
-import com.google.common.collect.Maps;
-
-/**
- * A combination of an {@link InputFormat} or {@link OutputFormat} and any extra 
- * configuration information that format class needs to run.
- * 
- * <p>The {@code FormatBundle} allow us to let different formats act as
- * if they are the only format that exists in a particular MapReduce job, even
- * when we have multiple types of inputs and outputs within a single job.
- */
-public class FormatBundle<K> implements Serializable {
-
-  private Class<K> formatClass;
-  private Map<String, String> extraConf;
-
-  public static <T> FormatBundle<T> fromSerialized(String serialized, Class<T> clazz) {
-    ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(serialized));
-    try {
-      ObjectInputStream ois = new ObjectInputStream(bais);
-      FormatBundle<T> bundle = (FormatBundle<T>) ois.readObject();
-      ois.close();
-      return bundle;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    } catch (ClassNotFoundException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public static <T extends InputFormat<?, ?>> FormatBundle<T> forInput(Class<T> inputFormatClass) {
-    return new FormatBundle<T>(inputFormatClass);
-  }
-  
-  public static <T extends OutputFormat<?, ?>> FormatBundle<T> forOutput(Class<T> inputFormatClass) {
-    return new FormatBundle<T>(inputFormatClass);
-  }
-  
-  private FormatBundle(Class<K> formatClass) {
-    this.formatClass = formatClass;
-    this.extraConf = Maps.newHashMap();
-  }
-
-  public FormatBundle<K> set(String key, String value) {
-    this.extraConf.put(key, value);
-    return this;
-  }
-
-  public Class<K> getFormatClass() {
-    return formatClass;
-  }
-
-  public Configuration configure(Configuration conf) {
-    for (Map.Entry<String, String> e : extraConf.entrySet()) {
-      conf.set(e.getKey(), e.getValue());
-    }
-    return conf;
-  }
-
-  public String serialize() {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    try {
-      ObjectOutputStream oos = new ObjectOutputStream(baos);
-      oos.writeObject(this);
-      oos.close();
-      return Base64.encodeBase64String(baos.toByteArray());
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public String getName() {
-    return formatClass.getSimpleName();
-  }
-
-  @Override
-  public int hashCode() {
-    return new HashCodeBuilder().append(formatClass).append(extraConf).toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof FormatBundle)) {
-      return false;
-    }
-    FormatBundle<K> oib = (FormatBundle<K>) other;
-    return formatClass.equals(oib.formatClass) && extraConf.equals(oib.extraConf);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/From.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/From.java b/crunch/src/main/java/org/apache/crunch/io/From.java
deleted file mode 100644
index e4cfb6a..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/From.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.crunch.Source;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.io.avro.AvroFileSource;
-import org.apache.crunch.io.impl.FileTableSourceImpl;
-import org.apache.crunch.io.seq.SeqFileSource;
-import org.apache.crunch.io.seq.SeqFileTableSource;
-import org.apache.crunch.io.text.TextFileSource;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-
-/**
- * <p>Static factory methods for creating common {@link Source} types.</p>
- * 
- * <p>The {@code From} class is intended to provide a literate API for creating
- * Crunch pipelines from common input file types.
- * 
- * <code>
- *   Pipeline pipeline = new MRPipeline(this.getClass());
- *   
- *   // Reference the lines of a text file by wrapping the TextInputFormat class.
- *   PCollection<String> lines = pipeline.read(From.textFile("/path/to/myfiles"));
- *   
- *   // Reference entries from a sequence file where the key is a LongWritable and the
- *   // value is a custom Writable class.
- *   PTable<LongWritable, MyWritable> table = pipeline.read(From.sequenceFile(
- *       "/path/to/seqfiles", LongWritable.class, MyWritable.class));
- *   
- *   // Reference the records from an Avro file, where MyAvroObject implements Avro's
- *   // SpecificRecord interface.
- *   PCollection<MyAvroObject> myObjects = pipeline.read(From.avroFile("/path/to/avrofiles",
- *       MyAvroObject.class));
- *       
- *   // References the key-value pairs from a custom extension of FileInputFormat:
- *   PTable<KeyWritable, ValueWritable> custom = pipeline.read(From.formattedFile(
- *       "/custom", MyFileInputFormat.class, KeyWritable.class, ValueWritable.class));
- * </code>
- * </p>
- */
-public class From {
-
-  /**
-   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
-   * {@code FileInputFormat<K, V>} implementations not covered by the provided {@code TableSource}
-   * and {@code Source} factory methods.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param formatClass The {@code FileInputFormat} implementation
-   * @param keyClass The {@code Writable} to use for the key
-   * @param valueClass The {@code Writable} to use for the value
-   * @return A new {@code TableSource<K, V>} instance
-   */
-  public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile(
-      String pathName, Class<? extends FileInputFormat<K, V>> formatClass,
-      Class<K> keyClass, Class<V> valueClass) {
-    return formattedFile(new Path(pathName), formatClass, keyClass, valueClass);
-  }
-
-  /**
-   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
-   * {@code FileInputFormat<K, V>} implementations not covered by the provided {@code TableSource}
-   * and {@code Source} factory methods.
-   * 
-   * @param  The {@code Path} to the data
-   * @param formatClass The {@code FileInputFormat} implementation
-   * @param keyClass The {@code Writable} to use for the key
-   * @param valueClass The {@code Writable} to use for the value
-   * @return A new {@code TableSource<K, V>} instance
-   */
-  public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile(
-      Path path, Class<? extends FileInputFormat<K, V>> formatClass,
-      Class<K> keyClass, Class<V> valueClass) {
-    return formattedFile(path, formatClass, Writables.writables(keyClass),
-        Writables.writables(valueClass));
-  }
-
-  /**
-   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
-   * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
-   * and {@code Source} factory methods.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param formatClass The {@code FileInputFormat} implementation
-   * @param keyType The {@code PType} to use for the key
-   * @param valueType The {@code PType} to use for the value
-   * @return A new {@code TableSource<K, V>} instance
-   */
-  public static <K, V> TableSource<K, V> formattedFile(String pathName,
-      Class<? extends FileInputFormat<?, ?>> formatClass,
-      PType<K> keyType, PType<V> valueType) {
-    return formattedFile(new Path(pathName), formatClass, keyType, valueType);
-  }
-
-  /**
-   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
-   * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
-   * and {@code Source} factory methods.
-   * 
-   * @param  The {@code Path} to the data
-   * @param formatClass The {@code FileInputFormat} implementation
-   * @param keyType The {@code PType} to use for the key
-   * @param valueType The {@code PType} to use for the value
-   * @return A new {@code TableSource<K, V>} instance
-   */
-  public static <K, V> TableSource<K, V> formattedFile(Path path,
-      Class<? extends FileInputFormat<?, ?>> formatClass,
-      PType<K> keyType, PType<V> valueType) {
-    PTableType<K, V> tableType = keyType.getFamily().tableOf(keyType, valueType);
-    return new FileTableSourceImpl<K, V>(path, tableType, formatClass);
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T extends SpecificRecord> Source<T> avroFile(String pathName, Class<T> avroClass) {
-    return avroFile(new Path(pathName), avroClass);  
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance from the Avro file(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T extends SpecificRecord> Source<T> avroFile(Path path, Class<T> avroClass) {
-    return avroFile(path, Avros.specifics(avroClass));  
-  }
-  
-  /**
-   * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param avroType The {@code AvroType} for the Avro records
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T> Source<T> avroFile(String pathName, AvroType<T> avroType) {
-    return avroFile(new Path(pathName), avroType);
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance from the Avro file(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @param avroType The {@code AvroType} for the Avro records
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T> Source<T> avroFile(Path path, AvroType<T> avroType) {
-    return new AvroFileSource<T>(path, avroType);
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T extends Writable> Source<T> sequenceFile(String pathName, Class<T> valueClass) {
-    return sequenceFile(new Path(pathName), valueClass);
-  }
-  
-  /**
-   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given {@code Path}
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param path The {@code Path} to the data
-   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T extends Writable> Source<T> sequenceFile(Path path, Class<T> valueClass) {
-    return sequenceFile(path, Writables.writables(valueClass));
-  }
-  
-  /**
-   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param ptype The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T> Source<T> sequenceFile(String pathName, PType<T> ptype) {
-    return sequenceFile(new Path(pathName), ptype);
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given {@code Path}
-   * from the value field of each key-value pair in the SequenceFile(s).
-   * 
-   * @param path The {@code Path} to the data
-   * @param ptype The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T> Source<T> sequenceFile(Path path, PType<T> ptype) {
-    return new SeqFileSource<T>(path, ptype);
-  }
-
-  /**
-   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry
-   * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry
-   * @return A new {@code SourceTable<K, V>} instance
-   */
-  public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile(
-      String pathName, Class<K> keyClass, Class<V> valueClass) {
-    return sequenceFile(new Path(pathName), keyClass, valueClass);
-  }
-
-  /**
-   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry
-   * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry
-   * @return A new {@code SourceTable<K, V>} instance
-   */
-  public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile(
-      Path path, Class<K> keyClass, Class<V> valueClass) {
-    return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass));
-  }
-  
-  /**
-   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param keyType The {@code PType} for the key of the SequenceFile entry
-   * @param valueType The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code SourceTable<K, V>} instance
-   */
-  public static <K, V> TableSource<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) {
-    return sequenceFile(new Path(pathName), keyType, valueType);
-  }
-
-  /**
-   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @param keyType The {@code PType} for the key of the SequenceFile entry
-   * @param valueType The {@code PType} for the value of the SequenceFile entry
-   * @return A new {@code SourceTable<K, V>} instance
-   */
-  public static <K, V> TableSource<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
-    PTypeFamily ptf = keyType.getFamily();
-    return new SeqFileTableSource<K, V>(path, ptf.tableOf(keyType, valueType));
-  }
-
-  /**
-   * Creates a {@code Source<String>} instance for the text file(s) at the given path name.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @return A new {@code Source<String>} instance
-   */
-  public static Source<String> textFile(String pathName) {
-    return textFile(new Path(pathName));
-  }
-
-  /**
-   * Creates a {@code Source<String>} instance for the text file(s) at the given {@code Path}.
-   * 
-   * @param path The {@code Path} to the data
-   * @return A new {@code Source<String>} instance
-   */
-  public static Source<String> textFile(Path path) {
-    return textFile(path, Writables.strings());
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance for the text file(s) at the given path name using
-   * the provided {@code PType<T>} to convert the input text.
-   * 
-   * @param pathName The name of the path to the data on the filesystem
-   * @param ptype The {@code PType<T>} to use to process the input text
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T> Source<T> textFile(String pathName, PType<T> ptype) {
-    return textFile(new Path(pathName), ptype);
-  }
-
-  /**
-   * Creates a {@code Source<T>} instance for the text file(s) at the given {@code Path} using
-   * the provided {@code PType<T>} to convert the input text.
-   * 
-   * @param path The {@code Path} to the data
-   * @param ptype The {@code PType<T>} to use to process the input text
-   * @return A new {@code Source<T>} instance
-   */
-  public static <T> Source<T> textFile(Path path, PType<T> ptype) {
-    return new TextFileSource<T>(path, ptype);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/MapReduceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/MapReduceTarget.java b/crunch/src/main/java/org/apache/crunch/io/MapReduceTarget.java
deleted file mode 100644
index b484103..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/MapReduceTarget.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.crunch.Target;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-
-public interface MapReduceTarget extends Target {
-  void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/OutputHandler.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/OutputHandler.java b/crunch/src/main/java/org/apache/crunch/io/OutputHandler.java
deleted file mode 100644
index 01d7f99..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/OutputHandler.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.crunch.Target;
-import org.apache.crunch.types.PType;
-
-public interface OutputHandler {
-  boolean configure(Target target, PType<?> ptype);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/PathTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/PathTarget.java b/crunch/src/main/java/org/apache/crunch/io/PathTarget.java
deleted file mode 100644
index 7a35209..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/PathTarget.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.hadoop.fs.Path;
-
-/**
- * A target whose output goes to a given path on a file system.
- */
-public interface PathTarget extends MapReduceTarget {
-
-  Path getPath();
-
-  /**
-   * Get the naming scheme to be used for outputs being written to an output
-   * path.
-   * 
-   * @return the naming scheme to be used
-   */
-  FileNamingScheme getFileNamingScheme();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/PathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/PathTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/PathTargetImpl.java
deleted file mode 100644
index 0be3f9a..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/PathTargetImpl.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
-public abstract class PathTargetImpl implements PathTarget {
-
-  private final Path path;
-  private final Class<OutputFormat> outputFormatClass;
-  private final Class keyClass;
-  private final Class valueClass;
-
-  public PathTargetImpl(String path, Class<OutputFormat> outputFormatClass, Class keyClass, Class valueClass) {
-    this(new Path(path), outputFormatClass, keyClass, valueClass);
-  }
-
-  public PathTargetImpl(Path path, Class<OutputFormat> outputFormatClass, Class keyClass, Class valueClass) {
-    this.path = path;
-    this.outputFormatClass = outputFormatClass;
-    this.keyClass = keyClass;
-    this.valueClass = valueClass;
-  }
-
-  @Override
-  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
-    try {
-      FileOutputFormat.setOutputPath(job, path);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-    if (name == null) {
-      job.setOutputFormatClass(outputFormatClass);
-      job.setOutputKeyClass(keyClass);
-      job.setOutputValueClass(valueClass);
-    } else {
-      CrunchOutputs.addNamedOutput(job, name, outputFormatClass, keyClass, valueClass);
-    }
-  }
-
-  @Override
-  public Path getPath() {
-    return path;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/ReadableSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/ReadableSource.java b/crunch/src/main/java/org/apache/crunch/io/ReadableSource.java
deleted file mode 100644
index 0407167..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/ReadableSource.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.io.IOException;
-
-import org.apache.crunch.Source;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * An extension of the {@code Source} interface that indicates that a
- * {@code Source} instance may be read as a series of records by the client
- * code. This is used to determine whether a {@code PCollection} instance can be
- * materialized.
- */
-public interface ReadableSource<T> extends Source<T> {
-
-  /**
-   * Returns an {@code Iterable} that contains the contents of this source.
-   * 
-   * @param conf The current {@code Configuration} instance
-   * @return the contents of this {@code Source} as an {@code Iterable} instance
-   * @throws IOException
-   */
-  Iterable<T> read(Configuration conf) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java
deleted file mode 100644
index 95c90aa..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.crunch.SourceTarget;
-
-/**
- * An interface that indicates that a {@code SourceTarget} instance can be read
- * into the local client.
- * 
- * @param <T>
- *          The type of data read.
- */
-public interface ReadableSourceTarget<T> extends ReadableSource<T>, SourceTarget<T> {
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java b/crunch/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java
deleted file mode 100644
index bdda8e6..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-/**
- * Default {@link FileNamingScheme} that uses an incrementing sequence number in
- * order to generate unique file names.
- */
-public class SequentialFileNamingScheme implements FileNamingScheme {
-
-  @Override
-  public String getMapOutputName(Configuration configuration, Path outputDirectory) throws IOException {
-    return getSequentialFileName(configuration, outputDirectory, "m");
-  }
-
-  @Override
-  public String getReduceOutputName(Configuration configuration, Path outputDirectory, int partitionId)
-      throws IOException {
-    return getSequentialFileName(configuration, outputDirectory, "r");
-  }
-
-  private String getSequentialFileName(Configuration configuration, Path outputDirectory, String jobTypeName)
-      throws IOException {
-    FileSystem fileSystem = outputDirectory.getFileSystem(configuration);
-    int fileSequenceNumber = fileSystem.listStatus(outputDirectory).length;
-
-    return String.format("part-%s-%05d", jobTypeName, fileSequenceNumber);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/SourceTargetHelper.java b/crunch/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
deleted file mode 100644
index f4400de..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-/**
- * Functions for configuring the inputs/outputs of MapReduce jobs.
- * 
- */
-public class SourceTargetHelper {
-
-  public static long getPathSize(Configuration conf, Path path) throws IOException {
-    return getPathSize(path.getFileSystem(conf), path);
-  }
-
-  public static long getPathSize(FileSystem fs, Path path) throws IOException {
-    FileStatus[] stati = fs.globStatus(path);
-    if (stati == null || stati.length == 0) {
-      return -1L;
-    }
-    long size = 0;
-    for (FileStatus status : stati) {
-      size += fs.getContentSummary(status.getPath()).getLength();
-    }
-    return size;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/To.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/To.java b/crunch/src/main/java/org/apache/crunch/io/To.java
deleted file mode 100644
index d62d294..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/To.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import org.apache.crunch.Target;
-import org.apache.crunch.io.avro.AvroFileTarget;
-import org.apache.crunch.io.impl.FileTargetImpl;
-import org.apache.crunch.io.seq.SeqFileTarget;
-import org.apache.crunch.io.text.TextFileTarget;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
-/**
- * <p>Static factory methods for creating common {@link Target} types.</p>
- * 
- * <p>The {@code To} class is intended to be used as part of a literate API
- * for writing the output of Crunch pipelines to common file types. We can use
- * the {@code Target} objects created by the factory methods in the {@code To}
- * class with either the {@code write} method on the {@code Pipeline} class or
- * the convenience {@code write} method on {@code PCollection} and {@code PTable}
- * instances.
- * 
- * <code>
- *   Pipeline pipeline = new MRPipeline(this.getClass());
- *   ...
- *   // Write a PCollection<String> to a text file:
- *   PCollection<String> words = ...;
- *   pipeline.write(words, To.textFile("/put/my/words/here"));
- *   
- *   // Write a PTable<Text, Text> to a sequence file:
- *   PTable<Text, Text> textToText = ...;
- *   textToText.write(To.sequenceFile("/words/to/words"));
- *   
- *   // Write a PCollection<MyAvroObject> to an Avro data file:
- *   PCollection<MyAvroObject> objects = ...;
- *   objects.write(To.avroFile("/my/avro/files"));
- *   
- *   // Write a PTable to a custom FileOutputFormat:
- *   PTable<KeyWritable, ValueWritable> custom = ...;
- *   pipeline.write(custom, To.formattedFile("/custom", MyFileFormat.class));
- * </code>
- * </p>
- */
-public class To {
-
-  /**
-   * Creates a {@code Target} at the given path name that writes data to
-   * a custom {@code FileOutputFormat}.
-   * 
-   * @param pathName The name of the path to write the data to on the filesystem
-   * @param formatClass The {@code FileOutputFormat<K, V>} to write the data to
-   * @return A new {@code Target} instance
-   */
-  public static <K extends Writable, V extends Writable> Target formattedFile(
-      String pathName, Class<? extends FileOutputFormat<K, V>> formatClass) {
-    return formattedFile(new Path(pathName), formatClass);
-  }
-
-  /**
-   * Creates a {@code Target} at the given {@code Path} that writes data to
-   * a custom {@code FileOutputFormat}.
-   * 
-   * @param path The {@code Path} to write the data to
-   * @param formatClass The {@code FileOutputFormat} to write the data to
-   * @return A new {@code Target} instance
-   */
-  public static <K extends Writable, V extends Writable> Target formattedFile(
-      Path path, Class<? extends FileOutputFormat<K, V>> formatClass) {
-    return new FileTargetImpl(path, formatClass, new SequentialFileNamingScheme());
-  }
-
-  /**
-   * Creates a {@code Target} at the given path name that writes data to
-   * Avro files. The {@code PType} for the written data must be for Avro records.
-   * 
-   * @param pathName The name of the path to write the data to on the filesystem
-   * @return A new {@code Target} instance
-   */
-  public static Target avroFile(String pathName) {
-    return avroFile(new Path(pathName));
-  }
-
-  /**
-   * Creates a {@code Target} at the given {@code Path} that writes data to
-   * Avro files. The {@code PType} for the written data must be for Avro records.
-   * 
-   * @param path The {@code Path} to write the data to
-   * @return A new {@code Target} instance
-   */
-  public static Target avroFile(Path path) {
-    return new AvroFileTarget(path);
-  }
-
-  /**
-   * Creates a {@code Target} at the given path name that writes data to
-   * SequenceFiles.
-   * 
-   * @param pathName The name of the path to write the data to on the filesystem
-   * @return A new {@code Target} instance
-   */
-  public static Target sequenceFile(String pathName) {
-    return sequenceFile(new Path(pathName));
-  }
-
-  /**
-   * Creates a {@code Target} at the given {@code Path} that writes data to
-   * SequenceFiles.
-   * 
-   * @param path The {@code Path} to write the data to
-   * @return A new {@code Target} instance
-   */
-  public static Target sequenceFile(Path path) {
-    return new SeqFileTarget(path);
-  }
-
-  /**
-   * Creates a {@code Target} at the given path name that writes data to
-   * text files.
-   * 
-   * @param pathName The name of the path to write the data to on the filesystem
-   * @return A new {@code Target} instance
-   */
-  public static Target textFile(String pathName) {
-    return textFile(new Path(pathName));
-  }
-
-  /**
-   * Creates a {@code Target} at the given {@code Path} that writes data to
-   * text files.
-   * 
-   * @param path The {@code Path} to write the data to
-   * @return A new {@code Target} instance
-   */
-  public static Target textFile(Path path) {
-    return new TextFileTarget(path);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java b/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java
deleted file mode 100644
index c8fe23a..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.mapred.FsInput;
-import org.apache.avro.reflect.ReflectDatumReader;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.io.FileReaderFactory;
-import org.apache.crunch.io.impl.AutoClosingIterator;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.collect.Iterators;
-import com.google.common.collect.UnmodifiableIterator;
-
-public class AvroFileReaderFactory<T> implements FileReaderFactory<T> {
-
-  private static final Log LOG = LogFactory.getLog(AvroFileReaderFactory.class);
-
-  private final DatumReader<T> recordReader;
-  private final MapFn<T, T> mapFn;
-
-  public AvroFileReaderFactory(AvroType<T> atype) {
-    this.recordReader = createDatumReader(atype);
-    this.mapFn = (MapFn<T, T>) atype.getInputMapFn();
-  }
-
-  public AvroFileReaderFactory(Schema schema) {
-    this.recordReader = new GenericDatumReader<T>(schema);
-    this.mapFn = IdentityFn.<T>getInstance();
-  }
-  
-  static <T> DatumReader<T> createDatumReader(AvroType<T> avroType) {
-    if (avroType.hasReflect()) {
-      if (avroType.hasSpecific()) {
-        Avros.checkCombiningSpecificAndReflectionSchemas();
-      }
-      return new ReflectDatumReader<T>(avroType.getSchema());
-    } else if (avroType.hasSpecific()) {
-      return new SpecificDatumReader<T>(avroType.getSchema());
-    } else {
-      return new GenericDatumReader<T>(avroType.getSchema());
-    }
-  }
-
-  @Override
-  public Iterator<T> read(FileSystem fs, final Path path) {
-    this.mapFn.initialize();
-    try {
-      FsInput fsi = new FsInput(path, fs.getConf());
-      final DataFileReader<T> reader = new DataFileReader<T>(fsi, recordReader);
-      return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
-        @Override
-        public boolean hasNext() {
-          return reader.hasNext();
-        }
-
-        @Override
-        public T next() {
-          return mapFn.map(reader.next());
-        }
-      });
-    } catch (IOException e) {
-      LOG.info("Could not read avro file at path: " + path, e);
-      return Iterators.emptyIterator();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java b/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java
deleted file mode 100644
index 15792bf..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import java.io.IOException;
-
-import org.apache.avro.mapred.AvroJob;
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.FormatBundle;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileSourceImpl;
-import org.apache.crunch.types.avro.AvroInputFormat;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-public class AvroFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
-
-  private static <S> FormatBundle getBundle(AvroType<S> ptype) {
-    FormatBundle bundle = FormatBundle.forInput(AvroInputFormat.class)
-        .set(AvroJob.INPUT_IS_REFLECT, String.valueOf(ptype.hasReflect()))
-        .set(AvroJob.INPUT_SCHEMA, ptype.getSchema().toString())
-        .set(Avros.REFLECT_DATA_FACTORY_CLASS, Avros.REFLECT_DATA_FACTORY.getClass().getName());
-    return bundle;
-  }
-  
-  public AvroFileSource(Path path, AvroType<T> ptype) {
-    super(path, ptype, getBundle(ptype));
-  }
-
-  @Override
-  public String toString() {
-    return "Avro(" + path.toString() + ")";
-  }
-
-  @Override
-  public Iterable<T> read(Configuration conf) throws IOException {
-    FileSystem fs = path.getFileSystem(conf);
-    return CompositePathIterable.create(fs, path, new AvroFileReaderFactory<T>((AvroType<T>) ptype));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java
deleted file mode 100644
index 76103e5..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.hadoop.fs.Path;
-
-public class AvroFileSourceTarget<T> extends ReadableSourcePathTargetImpl<T> {
-  public AvroFileSourceTarget(Path path, AvroType<T> atype) {
-    this(path, atype, new SequentialFileNamingScheme());
-  }
-
-  public AvroFileSourceTarget(Path path, AvroType<T> atype, FileNamingScheme fileNamingScheme) {
-    super(new AvroFileSource<T>(path, atype), new AvroFileTarget(path), fileNamingScheme);
-  }
-
-  @Override
-  public String toString() {
-    return target.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java b/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java
deleted file mode 100644
index 3a9e42c..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.OutputHandler;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.FileTargetImpl;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.avro.AvroOutputFormat;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-
-public class AvroFileTarget extends FileTargetImpl {
-
-  public AvroFileTarget(String path) {
-    this(new Path(path));
-  }
-
-  public AvroFileTarget(Path path) {
-    this(path, new SequentialFileNamingScheme());
-  }
-
-  public AvroFileTarget(Path path, FileNamingScheme fileNamingScheme) {
-    super(path, AvroOutputFormat.class, fileNamingScheme);
-  }
-
-  @Override
-  public String toString() {
-    return "Avro(" + path.toString() + ")";
-  }
-
-  @Override
-  public boolean accept(OutputHandler handler, PType<?> ptype) {
-    if (!(ptype instanceof AvroType)) {
-      return false;
-    }
-    handler.configure(this, ptype);
-    return true;
-  }
-
-  @Override
-  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
-    AvroType<?> atype = (AvroType<?>) ptype;
-    Configuration conf = job.getConfiguration();
-    String schemaParam = null;
-    if (name == null) {
-      schemaParam = "avro.output.schema";
-    } else {
-      schemaParam = "avro.output.schema." + name;
-    }
-    String outputSchema = conf.get(schemaParam);
-    if (outputSchema == null) {
-      conf.set(schemaParam, atype.getSchema().toString());
-    } else if (!outputSchema.equals(atype.getSchema().toString())) {
-      throw new IllegalStateException("Avro targets must use the same output schema");
-    }
-    Avros.configureReflectDataFactory(conf);
-    configureForMapReduce(job, AvroWrapper.class, NullWritable.class, AvroOutputFormat.class,
-        outputPath, name);
-  }
-
-  @Override
-  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
-    if (ptype instanceof AvroType) {
-      return new AvroFileSourceTarget<T>(path, (AvroType<T>) ptype);
-    }
-    return null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java b/crunch/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java
deleted file mode 100644
index 3bd802e..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.Iterator;
-
-import com.google.common.collect.UnmodifiableIterator;
-import com.google.common.io.Closeables;
-
-/**
- * Closes the wrapped {@code Closeable} when {@link #hasNext()} returns false.  As long a client loops through to
- * completion (doesn't abort early due to an exception, short circuit, etc.) resources will be closed automatically.
- */
-public class AutoClosingIterator<T> extends UnmodifiableIterator<T> implements Closeable {
-  private final Iterator<T> iter;
-  private Closeable closeable;
-
-  public AutoClosingIterator(Closeable closeable, Iterator<T> iter) {
-    this.closeable = closeable;
-    this.iter = iter;
-  }
-
-  @Override
-  public boolean hasNext() {
-    if (!iter.hasNext()) {
-      Closeables.closeQuietly(this);
-      return false;
-    } else {
-      return true;
-    }
-  }
-
-  @Override
-  public T next() {
-    return iter.next();
-  }
-
-  @Override
-  public void close() throws IOException {
-    if (closeable != null) {
-      closeable.close();
-      closeable = null;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java
deleted file mode 100644
index 688c801..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import java.io.IOException;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.Source;
-import org.apache.crunch.io.CrunchInputs;
-import org.apache.crunch.io.FormatBundle;
-import org.apache.crunch.io.SourceTargetHelper;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-
-public class FileSourceImpl<T> implements Source<T> {
-
-  private static final Log LOG = LogFactory.getLog(FileSourceImpl.class);
-
-  protected final Path path;
-  protected final PType<T> ptype;
-  protected final FormatBundle<? extends InputFormat> inputBundle;
-
-  public FileSourceImpl(Path path, PType<T> ptype, Class<? extends InputFormat> inputFormatClass) {
-    this.path = path;
-    this.ptype = ptype;
-    this.inputBundle = FormatBundle.forInput(inputFormatClass);
-  }
-
-  public FileSourceImpl(Path path, PType<T> ptype, FormatBundle<? extends InputFormat> inputBundle) {
-    this.path = path;
-    this.ptype = ptype;
-    this.inputBundle = inputBundle;
-  }
-
-  public Path getPath() {
-    return path;
-  }
-  
-  @Override
-  public void configureSource(Job job, int inputId) throws IOException {
-    if (inputId == -1) {
-      FileInputFormat.addInputPath(job, path);
-      job.setInputFormatClass(inputBundle.getFormatClass());
-      inputBundle.configure(job.getConfiguration());
-    } else {
-      CrunchInputs.addInputPath(job, path, inputBundle, inputId);
-    }
-  }
-
-  @Override
-  public PType<T> getType() {
-    return ptype;
-  }
-
-  @Override
-  public long getSize(Configuration configuration) {
-    try {
-      return SourceTargetHelper.getPathSize(configuration, path);
-    } catch (IOException e) {
-      LOG.warn(String.format("Exception thrown looking up size of: %s", path), e);
-      throw new IllegalStateException("Failed to get the file size of:" + path, e);
-    }
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !getClass().equals(other.getClass())) {
-      return false;
-    }
-    FileSourceImpl o = (FileSourceImpl) other;
-    return ptype.equals(o.ptype) && path.equals(o.path) && inputBundle.equals(o.inputBundle);
-  }
-
-  @Override
-  public int hashCode() {
-    return new HashCodeBuilder().append(ptype).append(path).append(inputBundle).toHashCode();
-  }
-
-  @Override
-  public String toString() {
-    return new StringBuilder().append(inputBundle.getName()).append("(").append(path).append(")").toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java
deleted file mode 100644
index 295edb5..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.io.FormatBundle;
-import org.apache.crunch.types.PTableType;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-
-public class FileTableSourceImpl<K, V> extends FileSourceImpl<Pair<K, V>> implements TableSource<K, V> {
-
-  public FileTableSourceImpl(Path path, PTableType<K, V> tableType, Class<? extends FileInputFormat> formatClass) {
-    super(path, tableType, formatClass);
-  }
-
-  public FileTableSourceImpl(Path path, PTableType<K, V> tableType, FormatBundle bundle) {
-    super(path, tableType, bundle);
-  }
-  
-  @Override
-  public PTableType<K, V> getTableType() {
-    return (PTableType<K, V>) getType();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java
deleted file mode 100644
index c1c29e4..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import java.io.IOException;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.io.CrunchOutputs;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.OutputHandler;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
-public class FileTargetImpl implements PathTarget {
-
-  private static final Log LOG = LogFactory.getLog(FileTargetImpl.class);
-  
-  protected final Path path;
-  private final Class<? extends FileOutputFormat> outputFormatClass;
-  private final FileNamingScheme fileNamingScheme;
-
-  public FileTargetImpl(Path path, Class<? extends FileOutputFormat> outputFormatClass,
-      FileNamingScheme fileNamingScheme) {
-    this.path = path;
-    this.outputFormatClass = outputFormatClass;
-    this.fileNamingScheme = fileNamingScheme;
-  }
-
-  @Override
-  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
-    Converter converter = ptype.getConverter();
-    Class keyClass = converter.getKeyClass();
-    Class valueClass = converter.getValueClass();
-    configureForMapReduce(job, keyClass, valueClass, outputFormatClass, outputPath, name);
-  }
-
-  protected void configureForMapReduce(Job job, Class keyClass, Class valueClass,
-      Class outputFormatClass, Path outputPath, String name) {
-    try {
-      FileOutputFormat.setOutputPath(job, outputPath);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-    if (name == null) {
-      job.setOutputFormatClass(outputFormatClass);
-      job.setOutputKeyClass(keyClass);
-      job.setOutputValueClass(valueClass);
-    } else {
-      CrunchOutputs.addNamedOutput(job, name, outputFormatClass, keyClass, valueClass);
-    }
-  }
-
-  @Override
-  public boolean accept(OutputHandler handler, PType<?> ptype) {
-    handler.configure(this, ptype);
-    return true;
-  }
-
-  @Override
-  public Path getPath() {
-    return path;
-  }
-
-  @Override
-  public FileNamingScheme getFileNamingScheme() {
-    return fileNamingScheme;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !getClass().equals(other.getClass())) {
-      return false;
-    }
-    FileTargetImpl o = (FileTargetImpl) other;
-    return path.equals(o.path);
-  }
-
-  @Override
-  public int hashCode() {
-    return new HashCodeBuilder().append(path).toHashCode();
-  }
-
-  @Override
-  public String toString() {
-    return new StringBuilder().append(outputFormatClass.getSimpleName()).append("(").append(path).append(")")
-        .toString();
-  }
-
-  @Override
-  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
-    // By default, assume that we cannot do this.
-    return null;
-  }
-
-  @Override
-  public void handleExisting(WriteMode strategy, Configuration conf) {
-    FileSystem fs = null;
-    try {
-      fs = FileSystem.get(conf);
-    } catch (IOException e) {
-      LOG.error("Could not retrieve FileSystem object to check for existing path", e);
-      throw new CrunchRuntimeException(e);
-    }
-    
-    boolean exists = false;
-    try {
-      exists = fs.exists(path);
-    } catch (IOException e) {
-      LOG.error("Exception checking existence of path: " + path, e);
-      throw new CrunchRuntimeException(e);
-    }
-    
-    if (exists) {
-      switch (strategy) {
-      case DEFAULT:
-        LOG.error("Path " + path + " already exists!");
-        throw new CrunchRuntimeException("Path already exists: " + path);
-      case OVERWRITE:
-        LOG.info("Removing data at existing path: " + path);
-        try {
-          fs.delete(path, true);
-        } catch (IOException e) {
-          LOG.error("Exception thrown removing data at path: " + path, e);
-        }
-        break;
-      case APPEND:
-        LOG.info("Adding output files to existing path: " + path);
-        break;
-      default:
-        throw new CrunchRuntimeException("Unknown WriteMode:  " + strategy);
-      }
-    } else {
-      LOG.info("Will write output files to new path: " + path);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java
deleted file mode 100644
index 6506816..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import java.io.IOException;
-
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.hadoop.conf.Configuration;
-
-public class ReadableSourcePathTargetImpl<T> extends SourcePathTargetImpl<T> implements ReadableSourceTarget<T> {
-
-  public ReadableSourcePathTargetImpl(ReadableSource<T> source, PathTarget target, FileNamingScheme fileNamingScheme) {
-    super(source, target, fileNamingScheme);
-  }
-
-  @Override
-  public Iterable<T> read(Configuration conf) throws IOException {
-    return ((ReadableSource<T>) source).read(conf);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java
deleted file mode 100644
index f435b3b..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import java.io.IOException;
-
-import org.apache.crunch.Target;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.hadoop.conf.Configuration;
-
-public class ReadableSourceTargetImpl<T> extends SourceTargetImpl<T> implements ReadableSourceTarget<T> {
-
-  public ReadableSourceTargetImpl(ReadableSource<T> source, Target target) {
-    super(source, target);
-  }
-
-  @Override
-  public Iterable<T> read(Configuration conf) throws IOException {
-    return ((ReadableSource<T>) source).read(conf);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java
deleted file mode 100644
index c0d7ce0..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import org.apache.crunch.Source;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-
-public class SourcePathTargetImpl<T> extends SourceTargetImpl<T> implements PathTarget {
-
-  private final FileNamingScheme fileNamingScheme;
-
-  public SourcePathTargetImpl(Source<T> source, PathTarget target, FileNamingScheme fileNamingScheme) {
-    super(source, target);
-    this.fileNamingScheme = fileNamingScheme;
-  }
-
-  @Override
-  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
-    ((PathTarget) target).configureForMapReduce(job, ptype, outputPath, name);
-  }
-
-  @Override
-  public Path getPath() {
-    return ((PathTarget) target).getPath();
-  }
-
-  @Override
-  public FileNamingScheme getFileNamingScheme() {
-    return fileNamingScheme;
-  }
-}


[19/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java
deleted file mode 100644
index f1ca770..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.apache.crunch.types.avro.Avros.records;
-import static org.apache.crunch.types.avro.Avros.strings;
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.util.List;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.io.DatumWriter;
-import org.apache.avro.specific.SpecificDatumWriter;
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.From;
-import org.apache.crunch.test.Employee;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-public class MultiAvroSchemaJoinIT {
-
-  private File personFile;
-  private File employeeFile;
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Before
-  public void setUp() throws Exception {
-    this.personFile = File.createTempFile("person", ".avro");
-    this.employeeFile = File.createTempFile("employee", ".avro");
-
-    DatumWriter<Person> pdw = new SpecificDatumWriter<Person>();
-    DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw);
-    pfw.create(Person.SCHEMA$, personFile);
-    Person p1 = new Person();
-    p1.name = "Josh";
-    p1.age = 19;
-    p1.siblingnames = ImmutableList.<CharSequence> of("Kate", "Mike");
-    pfw.append(p1);
-    Person p2 = new Person();
-    p2.name = "Kate";
-    p2.age = 17;;
-    p2.siblingnames = ImmutableList.<CharSequence> of("Josh", "Mike");
-    pfw.append(p2);
-    Person p3 = new Person();
-    p3.name = "Mike";
-    p3.age = 12;
-    p3.siblingnames = ImmutableList.<CharSequence> of("Josh", "Kate");
-    pfw.append(p3);
-    pfw.close();
-
-    DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>();
-    DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw);
-    efw.create(Employee.SCHEMA$, employeeFile);
-    Employee e1 = new Employee();
-    e1.name = "Kate";
-    e1.salary = 100000;
-    e1.department = "Marketing";
-    efw.append(e1);
-    efw.close();
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    personFile.delete();
-    employeeFile.delete();
-  }
-
-  public static class NameFn<K extends SpecificRecord> extends MapFn<K, String> {
-    @Override
-    public String map(K input) {
-      Schema s = input.getSchema();
-      Schema.Field f = s.getField("name");
-      return input.get(f.pos()).toString();
-    }
-  }
-
-  @Test
-  public void testJoin() throws Exception {
-    Pipeline p = new MRPipeline(MultiAvroSchemaJoinIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<Person> people = p.read(From.avroFile(personFile.getAbsolutePath(), records(Person.class)));
-    PCollection<Employee> employees = p.read(From.avroFile(employeeFile.getAbsolutePath(), records(Employee.class)));
-
-    Iterable<Pair<Person, Employee>> result = people.by(new NameFn<Person>(), strings())
-        .join(employees.by(new NameFn<Employee>(), strings())).values().materialize();
-    List<Pair<Person, Employee>> v = Lists.newArrayList(result);
-    assertEquals(1, v.size());
-    assertEquals("Kate", v.get(0).first().name.toString());
-    assertEquals("Kate", v.get(0).second().name.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java
deleted file mode 100644
index d889b61..0000000
--- a/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PTypeFamily;
-
-public class RightOuterJoinIT extends JoinTester {
-  @Override
-  public void assertPassed(Iterable<Pair<String, Long>> lines) {
-    boolean passed1 = false;
-    boolean passed2 = true;
-    boolean passed3 = false;
-    for (Pair<String, Long> line : lines) {
-      if ("wretched".equals(line.first()) && 24 == line.second()) {
-        passed1 = true;
-      }
-      if ("againe".equals(line.first())) {
-        passed2 = false;
-      }
-      if ("Montparnasse.".equals(line.first()) && 2 == line.second()) {
-        passed3 = true;
-      }
-    }
-    assertTrue(passed1);
-    assertTrue(passed2);
-    assertTrue(passed3);
-  }
-
-  @Override
-  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
-    return new RightOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java b/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java
deleted file mode 100644
index 97cf0de..0000000
--- a/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.test;
-
-import org.apache.crunch.impl.mr.run.RuntimeParameters;
-import org.apache.hadoop.conf.Configuration;
-
-
-/**
- * Utilities for working with {@link TemporaryPath}.
- */
-public final class TemporaryPaths {
-
-  /**
-   * Static factory returning a {@link TemporaryPath} with adjusted
-   * {@link Configuration} properties.
-   */
-  public static TemporaryPath create() {
-    return new TemporaryPath(RuntimeParameters.TMP_DIR, "hadoop.tmp.dir");
-  }
-
-  private TemporaryPaths() {
-    // nothing
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/test/Tests.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/test/Tests.java b/crunch/src/it/java/org/apache/crunch/test/Tests.java
deleted file mode 100644
index e381c1a..0000000
--- a/crunch/src/it/java/org/apache/crunch/test/Tests.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.test;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import java.io.IOException;
-import java.util.Collection;
-
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.hadoop.io.Writable;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.io.ByteArrayDataOutput;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.Resources;
-
-
-/**
- * Utilities for integration tests.
- */
-public final class Tests {
-
-  private Tests() {
-    // nothing
-  }
-
-  /**
-   * Get the path to and integration test resource file, as per naming convention.
-   *
-   * @param testCase The executing test case instance
-   * @param resourceName The file name of the resource
-   * @return The path to the resource (never null)
-   * @throws IllegalArgumentException Thrown if the resource doesn't exist
-   */
-  public static String pathTo(Object testCase, String resourceName) {
-    String qualifiedName = resource(testCase, resourceName);
-    return Resources.getResource(qualifiedName).getFile();
-  }
-
-  /**
-   * This doesn't check whether the resource exists!
-   *
-   * @param testCase
-   * @param resourceName
-   * @return The path to the resource (never null)
-   */
-  public static String resource(Object testCase, String resourceName) {
-    checkNotNull(testCase);
-    checkNotNull(resourceName);
-
-    // Note: We append "Data" because otherwise Eclipse would complain about the
-    //       the case's class name clashing with the resource directory's name.
-    return testCase.getClass().getName().replaceAll("\\.", "/") + "Data/" + resourceName;
-  }
-
-  /**
-   * Return our two types of {@link Pipeline}s for a JUnit Parameterized test.
-   *
-   * @param testCase The executing test case's class
-   * @return The collection to return from a {@link Parameters} provider method
-   */
-  public static Collection<Object[]> pipelinesParams(Class<?> testCase) {
-    return ImmutableList.copyOf(
-        new Object[][] { { MemPipeline.getInstance() }, { new MRPipeline(testCase) }
-    });
-  }
-
-  /**
-   * Serialize the given Writable into a byte array.
-   *
-   * @param value The instance to serialize
-   * @return The serialized data
-   */
-  public static byte[] serialize(Writable value) {
-    checkNotNull(value);
-    try {
-      ByteArrayDataOutput out = ByteStreams.newDataOutput();
-      value.write(out);
-      return out.toByteArray();
-    } catch (IOException e) {
-      throw new IllegalStateException("cannot serialize", e);
-    }
-  }
-
-  /**
-   * Serialize the src Writable into a byte array, then deserialize it into dest.
-   * @param src The instance to serialize
-   * @param dest The instance to deserialize into
-   * @return dest, for convenience
-   */
-  public static <T extends Writable> T roundtrip(Writable src, T dest) {
-    checkNotNull(src);
-    checkNotNull(dest);
-    checkArgument(src != dest, "src and dest may not be the same instance");
-
-    try {
-      byte[] data = serialize(src);
-      dest.readFields(ByteStreams.newDataInput(data));
-    } catch (IOException e) {
-      throw new IllegalStateException("cannot deserialize", e);
-    }
-    return dest;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/customers.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/customers.txt b/crunch/src/it/resources/customers.txt
deleted file mode 100644
index 98f3f3d..0000000
--- a/crunch/src/it/resources/customers.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-111|John Doe
-222|Jane Doe
-333|Someone Else
-444|Has No Orders
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/docs.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/docs.txt b/crunch/src/it/resources/docs.txt
deleted file mode 100644
index 90a3f65..0000000
--- a/crunch/src/it/resources/docs.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-A	this doc has this text
-A	and this text as well
-A	but also this
-B	this doc has some text
-B	but not as much as the last
-B	doc

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/emptyTextFile.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/emptyTextFile.txt b/crunch/src/it/resources/emptyTextFile.txt
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/letters.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/letters.txt b/crunch/src/it/resources/letters.txt
deleted file mode 100644
index 916bfc9..0000000
--- a/crunch/src/it/resources/letters.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-a
-bb
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/log4j.properties b/crunch/src/it/resources/log4j.properties
deleted file mode 100644
index 5d144a0..0000000
--- a/crunch/src/it/resources/log4j.properties
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# ***** Set root logger level to INFO and its only appender to A.
-log4j.logger.org.apache.crunch=info, A
-
-# Log warnings on Hadoop for the local runner when testing
-log4j.logger.org.apache.hadoop=warn, A
-# Except for Configuration, which is chatty.
-log4j.logger.org.apache.hadoop.conf.Configuration=error, A
-
-# ***** A is set to be a ConsoleAppender.
-log4j.appender.A=org.apache.log4j.ConsoleAppender
-# ***** A uses PatternLayout.
-log4j.appender.A.layout=org.apache.log4j.PatternLayout
-log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n


[26/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
new file mode 100644
index 0000000..fc30eaf
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/Avros.java
@@ -0,0 +1,709 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.specific.SpecificRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.fn.CompositeMapFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.types.CollectionDeepCopier;
+import org.apache.crunch.types.DeepCopier;
+import org.apache.crunch.types.MapDeepCopier;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypes;
+import org.apache.crunch.types.TupleDeepCopier;
+import org.apache.crunch.types.TupleFactory;
+import org.apache.crunch.types.writable.WritableDeepCopier;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * Defines static methods that are analogous to the methods defined in
+ * {@link AvroTypeFamily} for convenient static importing.
+ * 
+ */
+public class Avros {
+
+  /**
+   * Older versions of Avro (i.e., before 1.7.0) do not support schemas that are
+   * composed of a mix of specific and reflection-based schemas. This bit
+   * controls whether or not we allow Crunch jobs to be created that involve
+   * mixing specific and reflection-based schemas and can be overridden by the
+   * client developer.
+   */
+  public static final boolean CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS;
+
+  static {
+    CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS = AvroCapabilities.canDecodeSpecificSchemaWithReflectDatumReader();
+  }
+
+  /**
+   * The instance we use for generating reflected schemas. May be modified by
+   * clients (e.g., Scrunch.)
+   */
+  public static ReflectDataFactory REFLECT_DATA_FACTORY = new ReflectDataFactory();
+
+  /**
+   * The name of the configuration parameter that tracks which reflection
+   * factory to use.
+   */
+  public static final String REFLECT_DATA_FACTORY_CLASS = "crunch.reflectdatafactory";
+
+  public static void configureReflectDataFactory(Configuration conf) {
+    conf.setClass(REFLECT_DATA_FACTORY_CLASS, REFLECT_DATA_FACTORY.getClass(), ReflectDataFactory.class);
+  }
+
+  public static ReflectDataFactory getReflectDataFactory(Configuration conf) {
+    return (ReflectDataFactory) ReflectionUtils.newInstance(
+        conf.getClass(REFLECT_DATA_FACTORY_CLASS, ReflectDataFactory.class), conf);
+  }
+
+  public static void checkCombiningSpecificAndReflectionSchemas() {
+    if (!CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS) {
+      throw new IllegalStateException("Crunch does not support running jobs that"
+          + " contain a mixture of reflection-based and avro-generated data types."
+          + " Please consider turning your reflection-based type into an avro-generated"
+          + " type and using that generated type instead."
+          + " If the version of Avro you are using is 1.7.0 or greater, you can enable"
+          + " combined schemas by setting the Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS" + " field to 'true'.");
+    }
+  }
+
+  public static MapFn<CharSequence, String> UTF8_TO_STRING = new MapFn<CharSequence, String>() {
+    @Override
+    public String map(CharSequence input) {
+      return input.toString();
+    }
+  };
+
+  public static MapFn<String, Utf8> STRING_TO_UTF8 = new MapFn<String, Utf8>() {
+    @Override
+    public Utf8 map(String input) {
+      return new Utf8(input);
+    }
+  };
+
+  public static MapFn<Object, ByteBuffer> BYTES_IN = new MapFn<Object, ByteBuffer>() {
+    @Override
+    public ByteBuffer map(Object input) {
+      if (input instanceof ByteBuffer) {
+        return (ByteBuffer) input;
+      }
+      return ByteBuffer.wrap((byte[]) input);
+    }
+  };
+
+  private static final AvroType<String> strings = new AvroType<String>(String.class, Schema.create(Schema.Type.STRING),
+      UTF8_TO_STRING, STRING_TO_UTF8, new DeepCopier.NoOpDeepCopier<String>());
+  private static final AvroType<Void> nulls = create(Void.class, Schema.Type.NULL);
+  private static final AvroType<Long> longs = create(Long.class, Schema.Type.LONG);
+  private static final AvroType<Integer> ints = create(Integer.class, Schema.Type.INT);
+  private static final AvroType<Float> floats = create(Float.class, Schema.Type.FLOAT);
+  private static final AvroType<Double> doubles = create(Double.class, Schema.Type.DOUBLE);
+  private static final AvroType<Boolean> booleans = create(Boolean.class, Schema.Type.BOOLEAN);
+  private static final AvroType<ByteBuffer> bytes = new AvroType<ByteBuffer>(ByteBuffer.class,
+      Schema.create(Schema.Type.BYTES), BYTES_IN, IdentityFn.getInstance(), new DeepCopier.NoOpDeepCopier<ByteBuffer>());
+
+  private static final Map<Class<?>, PType<?>> PRIMITIVES = ImmutableMap.<Class<?>, PType<?>> builder()
+      .put(String.class, strings).put(Long.class, longs).put(Integer.class, ints).put(Float.class, floats)
+      .put(Double.class, doubles).put(Boolean.class, booleans).put(ByteBuffer.class, bytes).build();
+
+  private static final Map<Class<?>, AvroType<?>> EXTENSIONS = Maps.newHashMap();
+
+  public static <T> void register(Class<T> clazz, AvroType<T> ptype) {
+    EXTENSIONS.put(clazz, ptype);
+  }
+
+  public static <T> PType<T> getPrimitiveType(Class<T> clazz) {
+    return (PType<T>) PRIMITIVES.get(clazz);
+  }
+
+  static <T> boolean isPrimitive(AvroType<T> avroType) {
+    return avroType.getTypeClass().isPrimitive() || PRIMITIVES.containsKey(avroType.getTypeClass());
+  }
+
+  private static <T> AvroType<T> create(Class<T> clazz, Schema.Type schemaType) {
+    return new AvroType<T>(clazz, Schema.create(schemaType), new DeepCopier.NoOpDeepCopier<T>());
+  }
+
+  public static final AvroType<Void> nulls() {
+    return nulls;
+  }
+
+  public static final AvroType<String> strings() {
+    return strings;
+  }
+
+  public static final AvroType<Long> longs() {
+    return longs;
+  }
+
+  public static final AvroType<Integer> ints() {
+    return ints;
+  }
+
+  public static final AvroType<Float> floats() {
+    return floats;
+  }
+
+  public static final AvroType<Double> doubles() {
+    return doubles;
+  }
+
+  public static final AvroType<Boolean> booleans() {
+    return booleans;
+  }
+
+  public static final AvroType<ByteBuffer> bytes() {
+    return bytes;
+  }
+
+  public static final <T> AvroType<T> records(Class<T> clazz) {
+    if (EXTENSIONS.containsKey(clazz)) {
+      return (AvroType<T>) EXTENSIONS.get(clazz);
+    }
+    return containers(clazz);
+  }
+
+  public static final AvroType<GenericData.Record> generics(Schema schema) {
+    return new AvroType<GenericData.Record>(GenericData.Record.class, schema, new AvroDeepCopier.AvroGenericDeepCopier(
+        schema));
+  }
+
+  public static final <T> AvroType<T> containers(Class<T> clazz) {
+    if (SpecificRecord.class.isAssignableFrom(clazz)) {
+      return (AvroType<T>) specifics((Class<SpecificRecord>) clazz);
+    }
+    return reflects(clazz);
+  }
+
+  public static final <T extends SpecificRecord> AvroType<T> specifics(Class<T> clazz) {
+    T t = ReflectionUtils.newInstance(clazz, null);
+    Schema schema = t.getSchema();
+    return new AvroType<T>(clazz, schema, new AvroDeepCopier.AvroSpecificDeepCopier<T>(clazz, schema));
+  }
+
+  public static final <T> AvroType<T> reflects(Class<T> clazz) {
+    Schema schema = REFLECT_DATA_FACTORY.getReflectData().getSchema(clazz);
+    return new AvroType<T>(clazz, schema, new AvroDeepCopier.AvroReflectDeepCopier<T>(clazz, schema));
+  }
+
+  private static class BytesToWritableMapFn<T extends Writable> extends MapFn<Object, T> {
+    private static final Log LOG = LogFactory.getLog(BytesToWritableMapFn.class);
+
+    private final Class<T> writableClazz;
+
+    public BytesToWritableMapFn(Class<T> writableClazz) {
+      this.writableClazz = writableClazz;
+    }
+
+    @Override
+    public T map(Object input) {
+      ByteBuffer byteBuffer = BYTES_IN.map(input);
+      T instance = ReflectionUtils.newInstance(writableClazz, null);
+      try {
+        instance.readFields(new DataInputStream(new ByteArrayInputStream(byteBuffer.array(),
+            byteBuffer.arrayOffset(), byteBuffer.limit())));
+      } catch (IOException e) {
+        LOG.error("Exception thrown reading instance of: " + writableClazz, e);
+      }
+      return instance;
+    }
+  }
+
+  private static class WritableToBytesMapFn<T extends Writable> extends MapFn<T, ByteBuffer> {
+    private static final Log LOG = LogFactory.getLog(WritableToBytesMapFn.class);
+
+    @Override
+    public ByteBuffer map(T input) {
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      DataOutputStream das = new DataOutputStream(baos);
+      try {
+        input.write(das);
+      } catch (IOException e) {
+        LOG.error("Exception thrown converting Writable to bytes", e);
+      }
+      return ByteBuffer.wrap(baos.toByteArray());
+    }
+  }
+
+  public static final <T extends Writable> AvroType<T> writables(Class<T> clazz) {
+    return new AvroType<T>(clazz, Schema.create(Schema.Type.BYTES), new BytesToWritableMapFn<T>(clazz),
+        new WritableToBytesMapFn<T>(), new WritableDeepCopier<T>(clazz));
+  }
+
+  private static class GenericDataArrayToCollection<T> extends MapFn<Object, Collection<T>> {
+
+    private final MapFn<Object, T> mapFn;
+
+    public GenericDataArrayToCollection(MapFn<Object, T> mapFn) {
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public Collection<T> map(Object input) {
+      Collection<T> ret = Lists.newArrayList();
+      if (input instanceof Collection) {
+        for (Object in : (Collection<Object>) input) {
+          ret.add(mapFn.map(in));
+        }
+      } else {
+        // Assume it is an array
+        Object[] arr = (Object[]) input;
+        for (Object in : arr) {
+          ret.add(mapFn.map(in));
+        }
+      }
+      return ret;
+    }
+  }
+
+  private static class CollectionToGenericDataArray extends MapFn<Collection<?>, GenericData.Array<?>> {
+
+    private final MapFn mapFn;
+    private final String jsonSchema;
+    private transient Schema schema;
+
+    public CollectionToGenericDataArray(Schema schema, MapFn mapFn) {
+      this.mapFn = mapFn;
+      this.jsonSchema = schema.toString();
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public GenericData.Array<?> map(Collection<?> input) {
+      if (schema == null) {
+        schema = new Schema.Parser().parse(jsonSchema);
+      }
+      GenericData.Array array = new GenericData.Array(input.size(), schema);
+      for (Object in : input) {
+        array.add(mapFn.map(in));
+      }
+      return array;
+    }
+  }
+
+  public static final <T> AvroType<Collection<T>> collections(PType<T> ptype) {
+    AvroType<T> avroType = (AvroType<T>) ptype;
+    Schema collectionSchema = Schema.createArray(allowNulls(avroType.getSchema()));
+    GenericDataArrayToCollection<T> input = new GenericDataArrayToCollection<T>(avroType.getInputMapFn());
+    CollectionToGenericDataArray output = new CollectionToGenericDataArray(collectionSchema, avroType.getOutputMapFn());
+    return new AvroType(Collection.class, collectionSchema, input, output, new CollectionDeepCopier<T>(ptype), ptype);
+  }
+
+  private static class AvroMapToMap<T> extends MapFn<Map<CharSequence, Object>, Map<String, T>> {
+    private final MapFn<Object, T> mapFn;
+
+    public AvroMapToMap(MapFn<Object, T> mapFn) {
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public Map<String, T> map(Map<CharSequence, Object> input) {
+      Map<String, T> out = Maps.newHashMap();
+      for (Map.Entry<CharSequence, Object> e : input.entrySet()) {
+        out.put(e.getKey().toString(), mapFn.map(e.getValue()));
+      }
+      return out;
+    }
+  }
+
+  private static class MapToAvroMap<T> extends MapFn<Map<String, T>, Map<Utf8, Object>> {
+    private final MapFn<T, Object> mapFn;
+
+    public MapToAvroMap(MapFn<T, Object> mapFn) {
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      this.mapFn.initialize();
+    }
+
+    @Override
+    public Map<Utf8, Object> map(Map<String, T> input) {
+      Map<Utf8, Object> out = Maps.newHashMap();
+      for (Map.Entry<String, T> e : input.entrySet()) {
+        out.put(new Utf8(e.getKey()), mapFn.map(e.getValue()));
+      }
+      return out;
+    }
+  }
+
+  public static final <T> AvroType<Map<String, T>> maps(PType<T> ptype) {
+    AvroType<T> avroType = (AvroType<T>) ptype;
+    Schema mapSchema = Schema.createMap(allowNulls(avroType.getSchema()));
+    AvroMapToMap<T> inputFn = new AvroMapToMap<T>(avroType.getInputMapFn());
+    MapToAvroMap<T> outputFn = new MapToAvroMap<T>(avroType.getOutputMapFn());
+    return new AvroType(Map.class, mapSchema, inputFn, outputFn, new MapDeepCopier<T>(ptype), ptype);
+  }
+
+  private static class GenericRecordToTuple extends MapFn<GenericRecord, Tuple> {
+    private final TupleFactory<?> tupleFactory;
+    private final List<MapFn> fns;
+
+    private transient Object[] values;
+
+    public GenericRecordToTuple(TupleFactory<?> tupleFactory, PType<?>... ptypes) {
+      this.tupleFactory = tupleFactory;
+      this.fns = Lists.newArrayList();
+      for (PType<?> ptype : ptypes) {
+        AvroType atype = (AvroType) ptype;
+        fns.add(atype.getInputMapFn());
+      }
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      for (MapFn fn : fns) {
+        fn.configure(conf);
+      }
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      for (MapFn fn : fns) {
+        fn.setContext(context);
+      }
+    }
+    
+    @Override
+    public void initialize() {
+      for (MapFn fn : fns) {
+        fn.initialize();
+      }
+      this.values = new Object[fns.size()];
+      tupleFactory.initialize();
+    }
+
+    @Override
+    public Tuple map(GenericRecord input) {
+      for (int i = 0; i < values.length; i++) {
+        Object v = input.get(i);
+        if (v == null) {
+          values[i] = null;
+        } else {
+          values[i] = fns.get(i).map(v);
+        }
+      }
+      return tupleFactory.makeTuple(values);
+    }
+  }
+
+  private static class TupleToGenericRecord extends MapFn<Tuple, GenericRecord> {
+    private final List<MapFn> fns;
+    private final List<AvroType> avroTypes;
+    private final String jsonSchema;
+    private final boolean isReflect;
+    private transient Schema schema;
+
+    public TupleToGenericRecord(Schema schema, PType<?>... ptypes) {
+      this.fns = Lists.newArrayList();
+      this.avroTypes = Lists.newArrayList();
+      this.jsonSchema = schema.toString();
+      boolean reflectFound = false;
+      boolean specificFound = false;
+      for (PType ptype : ptypes) {
+        AvroType atype = (AvroType) ptype;
+        fns.add(atype.getOutputMapFn());
+        avroTypes.add(atype);
+        if (atype.hasReflect()) {
+          reflectFound = true;
+        }
+        if (atype.hasSpecific()) {
+          specificFound = true;
+        }
+      }
+      if (specificFound && reflectFound) {
+        checkCombiningSpecificAndReflectionSchemas();
+      }
+      this.isReflect = reflectFound;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      for (MapFn fn : fns) {
+        fn.configure(conf);
+      }
+    }
+ 
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      for (MapFn fn : fns) {
+        fn.setContext(getContext());
+      }
+    }
+    
+    @Override
+    public void initialize() {
+      this.schema = new Schema.Parser().parse(jsonSchema);
+      for (MapFn fn : fns) {
+        fn.initialize();
+      }
+    }
+
+    private GenericRecord createRecord() {
+      if (isReflect) {
+        return new ReflectGenericRecord(schema);
+      } else {
+        return new GenericData.Record(schema);
+      }
+    }
+
+    @Override
+    public GenericRecord map(Tuple input) {
+      GenericRecord record = createRecord();
+      for (int i = 0; i < input.size(); i++) {
+        Object v = input.get(i);
+        if (v == null) {
+          record.put(i, null);
+        } else {
+          record.put(i, fns.get(i).map(v));
+        }
+      }
+      return record;
+    }
+  }
+
+  public static final <V1, V2> AvroType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2) {
+    Schema schema = createTupleSchema(p1, p2);
+    GenericRecordToTuple input = new GenericRecordToTuple(TupleFactory.PAIR, p1, p2);
+    TupleToGenericRecord output = new TupleToGenericRecord(schema, p1, p2);
+    return new AvroType(Pair.class, schema, input, output, new TupleDeepCopier(Pair.class, p1, p2), p1, p2);
+  }
+
+  public static final <V1, V2, V3> AvroType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3) {
+    Schema schema = createTupleSchema(p1, p2, p3);
+    return new AvroType(Tuple3.class, schema, new GenericRecordToTuple(TupleFactory.TUPLE3, p1, p2, p3),
+        new TupleToGenericRecord(schema, p1, p2, p3), new TupleDeepCopier(Tuple3.class, p1, p2, p3), p1, p2, p3);
+  }
+
+  public static final <V1, V2, V3, V4> AvroType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3,
+      PType<V4> p4) {
+    Schema schema = createTupleSchema(p1, p2, p3, p4);
+    return new AvroType(Tuple4.class, schema, new GenericRecordToTuple(TupleFactory.TUPLE4, p1, p2, p3, p4),
+        new TupleToGenericRecord(schema, p1, p2, p3, p4), new TupleDeepCopier(Tuple4.class, p1, p2, p3, p4), p1, p2,
+        p3, p4);
+  }
+
+  public static final AvroType<TupleN> tuples(PType... ptypes) {
+    Schema schema = createTupleSchema(ptypes);
+    return new AvroType(TupleN.class, schema, new GenericRecordToTuple(TupleFactory.TUPLEN, ptypes),
+        new TupleToGenericRecord(schema, ptypes), new TupleDeepCopier(TupleN.class, ptypes), ptypes);
+  }
+
+  public static <T extends Tuple> AvroType<T> tuples(Class<T> clazz, PType... ptypes) {
+    Schema schema = createTupleSchema(ptypes);
+    Class[] typeArgs = new Class[ptypes.length];
+    for (int i = 0; i < typeArgs.length; i++) {
+      typeArgs[i] = ptypes[i].getTypeClass();
+    }
+    TupleFactory<T> factory = TupleFactory.create(clazz, typeArgs);
+    return new AvroType<T>(clazz, schema, new GenericRecordToTuple(factory, ptypes), new TupleToGenericRecord(schema,
+        ptypes), new TupleDeepCopier(clazz, ptypes), ptypes);
+  }
+
+  private static Schema createTupleSchema(PType<?>... ptypes) {
+    // Guarantee each tuple schema has a globally unique name
+    String tupleName = "tuple" + UUID.randomUUID().toString().replace('-', 'x');
+    Schema schema = Schema.createRecord(tupleName, "", "crunch", false);
+    List<Schema.Field> fields = Lists.newArrayList();
+    for (int i = 0; i < ptypes.length; i++) {
+      AvroType atype = (AvroType) ptypes[i];
+      Schema fieldSchema = allowNulls(atype.getSchema());
+      fields.add(new Schema.Field("v" + i, fieldSchema, "", null));
+    }
+    schema.setFields(fields);
+    return schema;
+  }
+
+  public static final <S, T> AvroType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn,
+      PType<S> base) {
+    AvroType<S> abase = (AvroType<S>) base;
+    return new AvroType<T>(clazz, abase.getSchema(), new CompositeMapFn(abase.getInputMapFn(), inputFn),
+        new CompositeMapFn(outputFn, abase.getOutputMapFn()), new DeepCopier.NoOpDeepCopier<T>(), base.getSubTypes()
+            .toArray(new PType[0]));
+  }
+
+  public static <T> PType<T> jsons(Class<T> clazz) {
+    return PTypes.jsonString(clazz, AvroTypeFamily.getInstance());
+  }
+
+  public static final <K, V> AvroTableType<K, V> tableOf(PType<K> key, PType<V> value) {
+    if (key instanceof PTableType) {
+      PTableType ptt = (PTableType) key;
+      key = Avros.pairs(ptt.getKeyType(), ptt.getValueType());
+    }
+    if (value instanceof PTableType) {
+      PTableType ptt = (PTableType) value;
+      value = Avros.pairs(ptt.getKeyType(), ptt.getValueType());
+    }
+    AvroType<K> avroKey = (AvroType<K>) key;
+    AvroType<V> avroValue = (AvroType<V>) value;
+    return new AvroTableType(avroKey, avroValue, Pair.class);
+  }
+
+  private static final Schema NULL_SCHEMA = Schema.create(Type.NULL);
+
+  private static Schema allowNulls(Schema base) {
+    if (NULL_SCHEMA.equals(base)) {
+      return base;
+    }
+    return Schema.createUnion(ImmutableList.of(base, NULL_SCHEMA));
+  }
+
+  private static class ReflectGenericRecord extends GenericData.Record {
+
+    public ReflectGenericRecord(Schema schema) {
+      super(schema);
+    }
+
+    @Override
+    public int hashCode() {
+      return reflectAwareHashCode(this, getSchema());
+    }
+  }
+
+  /*
+   * TODO: Remove this once we no longer have to support 1.5.4.
+   */
+  private static int reflectAwareHashCode(Object o, Schema s) {
+    if (o == null)
+      return 0; // incomplete datum
+    int hashCode = 1;
+    switch (s.getType()) {
+    case RECORD:
+      for (Schema.Field f : s.getFields()) {
+        if (f.order() == Schema.Field.Order.IGNORE)
+          continue;
+        hashCode = hashCodeAdd(hashCode, ReflectData.get().getField(o, f.name(), f.pos()), f.schema());
+      }
+      return hashCode;
+    case ARRAY:
+      Collection<?> a = (Collection<?>) o;
+      Schema elementType = s.getElementType();
+      for (Object e : a)
+        hashCode = hashCodeAdd(hashCode, e, elementType);
+      return hashCode;
+    case UNION:
+      return reflectAwareHashCode(o, s.getTypes().get(ReflectData.get().resolveUnion(s, o)));
+    case ENUM:
+      return s.getEnumOrdinal(o.toString());
+    case NULL:
+      return 0;
+    case STRING:
+      return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
+    default:
+      return o.hashCode();
+    }
+  }
+
+  /** Add the hash code for an object into an accumulated hash code. */
+  private static int hashCodeAdd(int hashCode, Object o, Schema s) {
+    return 31 * hashCode + reflectAwareHashCode(o, s);
+  }
+
+  private Avros() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java
new file mode 100644
index 0000000..e973cca
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.reflect.ReflectDatumReader;
+import org.apache.avro.reflect.ReflectDatumWriter;
+
+/**
+ * A Factory class for constructing Avro reflection-related objects.
+ */
+public class ReflectDataFactory {
+
+  public ReflectData getReflectData() {
+    return ReflectData.AllowNull.get();
+  }
+
+  public <T> ReflectDatumReader<T> getReader(Schema schema) {
+    return new ReflectDatumReader<T>(schema);
+  }
+
+  public <T> ReflectDatumWriter<T> getWriter(Schema schema) {
+    return new ReflectDatumWriter<T>(schema);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java
new file mode 100644
index 0000000..8bd18b0
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.avro.mapred.Pair;
+import org.apache.avro.reflect.ReflectDatumWriter;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/** The {@link Serialization} used by jobs configured with {@link AvroJob}. */
+class SafeAvroSerialization<T> extends Configured implements Serialization<AvroWrapper<T>> {
+
+  public boolean accept(Class<?> c) {
+    return AvroWrapper.class.isAssignableFrom(c);
+  }
+
+  /**
+   * Returns the specified map output deserializer. Defaults to the final output
+   * deserializer if no map output schema was specified.
+   */
+  public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) {
+    boolean isKey = AvroKey.class.isAssignableFrom(c);
+    Configuration conf = getConf();
+    Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob
+        .getMapOutputSchema(conf));
+
+    DatumReader<T> datumReader = null;
+    if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) {
+      ReflectDataFactory factory = (ReflectDataFactory) ReflectionUtils.newInstance(
+          conf.getClass("crunch.reflectdatafactory", ReflectDataFactory.class), conf);
+      datumReader = factory.getReader(schema);
+    } else {
+      datumReader = new SpecificDatumReader<T>(schema);
+    }
+    return new AvroWrapperDeserializer(datumReader, isKey);
+  }
+
+  private static final DecoderFactory FACTORY = DecoderFactory.get();
+
+  private class AvroWrapperDeserializer implements Deserializer<AvroWrapper<T>> {
+
+    private DatumReader<T> reader;
+    private BinaryDecoder decoder;
+    private boolean isKey;
+
+    public AvroWrapperDeserializer(DatumReader<T> reader, boolean isKey) {
+      this.reader = reader;
+      this.isKey = isKey;
+    }
+
+    public void open(InputStream in) {
+      this.decoder = FACTORY.directBinaryDecoder(in, decoder);
+    }
+
+    public AvroWrapper<T> deserialize(AvroWrapper<T> wrapper) throws IOException {
+      T datum = reader.read(wrapper == null ? null : wrapper.datum(), decoder);
+      if (wrapper == null) {
+        wrapper = isKey ? new AvroKey<T>(datum) : new AvroValue<T>(datum);
+      } else {
+        wrapper.datum(datum);
+      }
+      return wrapper;
+    }
+
+    public void close() throws IOException {
+      decoder.inputStream().close();
+    }
+  }
+
+  /** Returns the specified output serializer. */
+  public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) {
+    // AvroWrapper used for final output, AvroKey or AvroValue for map output
+    boolean isFinalOutput = c.equals(AvroWrapper.class);
+    Configuration conf = getConf();
+    Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair
+        .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)));
+
+    ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
+    ReflectDatumWriter<T> writer = factory.getWriter(schema);
+    return new AvroWrapperSerializer(writer);
+  }
+
+  private class AvroWrapperSerializer implements Serializer<AvroWrapper<T>> {
+    private DatumWriter<T> writer;
+    private OutputStream out;
+    private BinaryEncoder encoder;
+
+    public AvroWrapperSerializer(DatumWriter<T> writer) {
+      this.writer = writer;
+    }
+
+    public void open(OutputStream out) {
+      this.out = out;
+      this.encoder = new EncoderFactory().configureBlockSize(512).binaryEncoder(out, null);
+    }
+
+    public void serialize(AvroWrapper<T> wrapper) throws IOException {
+      writer.write(wrapper.datum(), encoder);
+      // would be a lot faster if the Serializer interface had a flush()
+      // method and the Hadoop framework called it when needed rather
+      // than for every record.
+      encoder.flush();
+    }
+
+    public void close() throws IOException {
+      out.close();
+    }
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/package-info.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/package-info.java
new file mode 100644
index 0000000..abaf60f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Business object serialization using Apache Avro.
+ */
+package org.apache.crunch.types.avro;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/package-info.java b/crunch-core/src/main/java/org/apache/crunch/types/package-info.java
new file mode 100644
index 0000000..b420b03
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Common functionality for business object serialization.
+ */
+package org.apache.crunch.types;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java
new file mode 100644
index 0000000..8b54008
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableFactories;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * A {@link Writable} for marshalling/unmarshalling Collections. Note that
+ * element order is <em>undefined</em>!
+ *
+ * @param <T> The value type
+ */
+class GenericArrayWritable<T> implements Writable {
+  private Writable[] values;
+  private Class<? extends Writable> valueClass;
+
+  public GenericArrayWritable(Class<? extends Writable> valueClass) {
+    this.valueClass = valueClass;
+  }
+
+  public GenericArrayWritable() {
+    // for deserialization
+  }
+
+  public void set(Writable[] values) {
+    this.values = values;
+  }
+
+  public Writable[] get() {
+    return values;
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    values = new Writable[WritableUtils.readVInt(in)]; // construct values
+    if (values.length > 0) {
+      int nulls = WritableUtils.readVInt(in);
+      if (nulls == values.length) {
+        return;
+      }
+      String valueType = Text.readString(in);
+      setValueType(valueType);
+      for (int i = 0; i < values.length - nulls; i++) {
+        Writable value = WritableFactories.newInstance(valueClass);
+        value.readFields(in); // read a value
+        values[i] = value; // store it in values
+      }
+    }
+  }
+
+  protected void setValueType(String valueType) {
+    if (valueClass == null) {
+      try {
+        valueClass = Class.forName(valueType).asSubclass(Writable.class);
+      } catch (ClassNotFoundException e) {
+        throw new CrunchRuntimeException(e);
+      }
+    } else if (!valueType.equals(valueClass.getName())) {
+      throw new IllegalStateException("Incoming " + valueType + " is not " + valueClass);
+    }
+  }
+
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, values.length);
+    if (values.length > 0) {
+      int nulls = 0;
+      for (int i = 0; i < values.length; i++) {
+        if (values[i] == null) {
+          nulls++;
+        }
+      }
+      WritableUtils.writeVInt(out, nulls);
+      if (values.length - nulls > 0) {
+        if (valueClass == null) {
+          throw new IllegalStateException("Value class not set by constructor or read");
+        }
+        Text.writeString(out, valueClass.getName());
+        for (int i = 0; i < values.length; i++) {
+          if (values[i] != null) {
+            values[i].write(out);
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    return hcb.append(values).toHashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    GenericArrayWritable other = (GenericArrayWritable) obj;
+    if (!Arrays.equals(values, other.values))
+      return false;
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return Arrays.toString(values);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java
new file mode 100644
index 0000000..1ab51df
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+import com.google.common.collect.Maps;
+
+class TextMapWritable<T extends Writable> implements Writable {
+
+  private Class<T> valueClazz;
+  private final Map<Text, T> instance;
+
+  public TextMapWritable() {
+    this.instance = Maps.newHashMap();
+  }
+
+  public TextMapWritable(Class<T> valueClazz) {
+    this.valueClazz = valueClazz;
+    this.instance = Maps.newHashMap();
+  }
+
+  public void put(Text txt, T value) {
+    instance.put(txt, value);
+  }
+
+  public Set<Map.Entry<Text, T>> entrySet() {
+    return instance.entrySet();
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    instance.clear();
+    try {
+      this.valueClazz = (Class<T>) Class.forName(Text.readString(in));
+    } catch (ClassNotFoundException e) {
+      throw (IOException) new IOException("Failed map init").initCause(e);
+    }
+    int entries = WritableUtils.readVInt(in);
+    try {
+      for (int i = 0; i < entries; i++) {
+        Text txt = new Text();
+        txt.readFields(in);
+        T value = valueClazz.newInstance();
+        value.readFields(in);
+        instance.put(txt, value);
+      }
+    } catch (IllegalAccessException e) {
+      throw (IOException) new IOException("Failed map init").initCause(e);
+    } catch (InstantiationException e) {
+      throw (IOException) new IOException("Failed map init").initCause(e);
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Text.writeString(out, valueClazz.getName());
+    WritableUtils.writeVInt(out, instance.size());
+    for (Map.Entry<Text, T> e : instance.entrySet()) {
+      e.getKey().write(out);
+      e.getValue().write(out);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
new file mode 100644
index 0000000..1c3536b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * A straight copy of the TupleWritable implementation in the join package,
+ * added here because of its package visibility restrictions.
+ * 
+ */
+public class TupleWritable implements WritableComparable<TupleWritable> {
+
+  private long written;
+  private Writable[] values;
+
+  /**
+   * Create an empty tuple with no allocated storage for writables.
+   */
+  public TupleWritable() {
+  }
+
+  /**
+   * Initialize tuple with storage; unknown whether any of them contain
+   * &quot;written&quot; values.
+   */
+  public TupleWritable(Writable[] vals) {
+    written = 0L;
+    values = vals;
+  }
+
+  /**
+   * Return true if tuple has an element at the position provided.
+   */
+  public boolean has(int i) {
+    return 0 != ((1 << i) & written);
+  }
+
+  /**
+   * Get ith Writable from Tuple.
+   */
+  public Writable get(int i) {
+    return values[i];
+  }
+
+  /**
+   * The number of children in this Tuple.
+   */
+  public int size() {
+    return values.length;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  public boolean equals(Object other) {
+    if (other instanceof TupleWritable) {
+      TupleWritable that = (TupleWritable) other;
+      if (this.size() != that.size() || this.written != that.written) {
+        return false;
+      }
+      for (int i = 0; i < values.length; ++i) {
+        if (!has(i))
+          continue;
+        if (!values[i].equals(that.get(i))) {
+          return false;
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+    builder.append(written);
+    for (Writable v : values) {
+      builder.append(v);
+    }
+    return builder.toHashCode();
+  }
+
+  /**
+   * Convert Tuple to String as in the following.
+   * <tt>[<child1>,<child2>,...,<childn>]</tt>
+   */
+  public String toString() {
+    StringBuffer buf = new StringBuffer("[");
+    for (int i = 0; i < values.length; ++i) {
+      buf.append(has(i) ? values[i].toString() : "");
+      buf.append(",");
+    }
+    if (values.length != 0)
+      buf.setCharAt(buf.length() - 1, ']');
+    else
+      buf.append(']');
+    return buf.toString();
+  }
+
+  /**
+   * Writes each Writable to <code>out</code>. TupleWritable format:
+   * {@code
+   *  <count><type1><type2>...<typen><obj1><obj2>...<objn>
+   * }
+   */
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, values.length);
+    WritableUtils.writeVLong(out, written);
+    for (int i = 0; i < values.length; ++i) {
+      if (has(i)) {
+        Text.writeString(out, values[i].getClass().getName());
+      }
+    }
+    for (int i = 0; i < values.length; ++i) {
+      if (has(i)) {
+        values[i].write(out);
+      }
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @SuppressWarnings("unchecked")
+  // No static typeinfo on Tuples
+  public void readFields(DataInput in) throws IOException {
+    int card = WritableUtils.readVInt(in);
+    values = new Writable[card];
+    written = WritableUtils.readVLong(in);
+    Class<? extends Writable>[] cls = new Class[card];
+    try {
+      for (int i = 0; i < card; ++i) {
+        if (has(i)) {
+          cls[i] = Class.forName(Text.readString(in)).asSubclass(Writable.class);
+        }
+      }
+      for (int i = 0; i < card; ++i) {
+        if (has(i)) {
+          values[i] = cls[i].newInstance();
+          values[i].readFields(in);
+        }
+      }
+    } catch (ClassNotFoundException e) {
+      throw (IOException) new IOException("Failed tuple init").initCause(e);
+    } catch (IllegalAccessException e) {
+      throw (IOException) new IOException("Failed tuple init").initCause(e);
+    } catch (InstantiationException e) {
+      throw (IOException) new IOException("Failed tuple init").initCause(e);
+    }
+  }
+
+  /**
+   * Record that the tuple contains an element at the position provided.
+   */
+  public void setWritten(int i) {
+    written |= 1 << i;
+  }
+
+  /**
+   * Record that the tuple does not contain an element at the position provided.
+   */
+  public void clearWritten(int i) {
+    written &= -1 ^ (1 << i);
+  }
+
+  /**
+   * Clear any record of which writables have been written to, without releasing
+   * storage.
+   */
+  public void clearWritten() {
+    written = 0L;
+  }
+
+  @Override
+  public int compareTo(TupleWritable o) {
+    for (int i = 0; i < values.length; ++i) {
+      if (has(i) && !o.has(i)) {
+        return 1;
+      } else if (!has(i) && o.has(i)) {
+        return -1;
+      } else {
+        Writable v1 = values[i];
+        Writable v2 = o.values[i];
+        if (v1 != v2 && (v1 != null && !v1.equals(v2))) {
+          if (v1 instanceof WritableComparable && v2 instanceof WritableComparable) {
+            int cmp = ((WritableComparable) v1).compareTo((WritableComparable) v2);
+            if (cmp != 0) {
+              return cmp;
+            }
+          } else {
+            int cmp = v1.hashCode() - v2.hashCode();
+            if (cmp != 0) {
+              return cmp;
+            }
+          }
+        }
+      }
+    }
+    return values.length - o.values.length;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java
new file mode 100644
index 0000000..7b6e11b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.types.DeepCopier;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Performs deep copies of Writable values.
+ * 
+ * @param <T> The type of Writable that can be copied
+ */
+public class WritableDeepCopier<T extends Writable> implements DeepCopier<T> {
+
+  private Class<T> writableClass;
+
+  public WritableDeepCopier(Class<T> writableClass) {
+    this.writableClass = writableClass;
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+  }
+
+  @Override
+  public T deepCopy(T source) {
+    
+    if (source == null) {
+      return null;
+    }
+    
+    ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
+    DataOutputStream dataOut = new DataOutputStream(byteOutStream);
+    T copiedValue = null;
+    try {
+      source.write(dataOut);
+      dataOut.flush();
+      ByteArrayInputStream byteInStream = new ByteArrayInputStream(byteOutStream.toByteArray());
+      DataInput dataInput = new DataInputStream(byteInStream);
+      copiedValue = writableClass.newInstance();
+      copiedValue.readFields(dataInput);
+    } catch (Exception e) {
+      throw new CrunchRuntimeException("Error while deep copying " + source, e);
+    }
+    return copiedValue;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java
new file mode 100644
index 0000000..84318d3
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.lib.PTables;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+
+class WritableGroupedTableType<K, V> extends PGroupedTableType<K, V> {
+
+  private final MapFn inputFn;
+  private final MapFn outputFn;
+  private final Converter converter;
+
+  public WritableGroupedTableType(WritableTableType<K, V> tableType) {
+    super(tableType);
+    WritableType keyType = (WritableType) tableType.getKeyType();
+    WritableType valueType = (WritableType) tableType.getValueType();
+    this.inputFn = new PairIterableMapFn(keyType.getInputMapFn(), valueType.getInputMapFn());
+    this.outputFn = tableType.getOutputMapFn();
+    this.converter = new WritablePairConverter(keyType.getSerializationClass(),
+        valueType.getSerializationClass());
+  }
+
+  @Override
+  public Class<Pair<K, Iterable<V>>> getTypeClass() {
+    return (Class<Pair<K, Iterable<V>>>) Pair.of(null, null).getClass();
+  }
+
+  @Override
+  public Converter getGroupingConverter() {
+    return converter;
+  }
+
+  @Override
+  public MapFn getInputMapFn() {
+    return inputFn;
+  }
+
+  @Override
+  public MapFn getOutputMapFn() {
+    return outputFn;
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    this.tableType.initialize(conf);
+  }
+
+  @Override
+  public Pair<K, Iterable<V>> getDetachedValue(Pair<K, Iterable<V>> value) {
+    return PTables.getGroupedDetachedValue(this, value);
+  }
+
+  @Override
+  public void configureShuffle(Job job, GroupingOptions options) {
+    if (options != null) {
+      options.configure(job);
+    }
+    WritableType keyType = (WritableType) tableType.getKeyType();
+    WritableType valueType = (WritableType) tableType.getValueType();
+    job.setMapOutputKeyClass(keyType.getSerializationClass());
+    job.setMapOutputValueClass(valueType.getSerializationClass());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java
new file mode 100644
index 0000000..2db0238
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.Converter;
+
+class WritablePairConverter<K, V> implements Converter<K, V, Pair<K, V>, Pair<K, Iterable<V>>> {
+
+  private final Class<K> keyClass;
+  private final Class<V> valueClass;
+
+  public WritablePairConverter(Class<K> keyClass, Class<V> valueClass) {
+    this.keyClass = keyClass;
+    this.valueClass = valueClass;
+  }
+
+  @Override
+  public Pair<K, V> convertInput(K key, V value) {
+    return Pair.of(key, value);
+  }
+
+  @Override
+  public K outputKey(Pair<K, V> value) {
+    return value.first();
+  }
+
+  @Override
+  public V outputValue(Pair<K, V> value) {
+    return value.second();
+  }
+
+  @Override
+  public Class<K> getKeyClass() {
+    return keyClass;
+  }
+
+  @Override
+  public Class<V> getValueClass() {
+    return valueClass;
+  }
+
+  @Override
+  public Pair<K, Iterable<V>> convertIterableInput(K key, Iterable<V> value) {
+    return Pair.of(key, value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTableType.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTableType.java
new file mode 100644
index 0000000..93e0fd6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTableType.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.util.List;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.fn.PairMapFn;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.io.seq.SeqFileTableSourceTarget;
+import org.apache.crunch.lib.PTables;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+
+import com.google.common.collect.ImmutableList;
+
+class WritableTableType<K, V> implements PTableType<K, V> {
+
+  private final WritableType<K, Writable> keyType;
+  private final WritableType<V, Writable> valueType;
+  private final MapFn inputFn;
+  private final MapFn outputFn;
+  private final Converter converter;
+
+  public WritableTableType(WritableType<K, Writable> keyType, WritableType<V, Writable> valueType) {
+    this.keyType = keyType;
+    this.valueType = valueType;
+    this.inputFn = new PairMapFn(keyType.getInputMapFn(), valueType.getInputMapFn());
+    this.outputFn = new PairMapFn(keyType.getOutputMapFn(), valueType.getOutputMapFn());
+    this.converter = new WritablePairConverter(keyType.getSerializationClass(),
+        valueType.getSerializationClass());
+  }
+
+  @Override
+  public Class<Pair<K, V>> getTypeClass() {
+    return (Class<Pair<K, V>>) Pair.of(null, null).getClass();
+  }
+
+  @Override
+  public List<PType> getSubTypes() {
+    return ImmutableList.<PType> of(keyType, valueType);
+  }
+
+  @Override
+  public MapFn getInputMapFn() {
+    return inputFn;
+  }
+
+  @Override
+  public MapFn getOutputMapFn() {
+    return outputFn;
+  }
+
+  @Override
+  public Converter getConverter() {
+    return converter;
+  }
+
+  @Override
+  public PTypeFamily getFamily() {
+    return WritableTypeFamily.getInstance();
+  }
+
+  public PType<K> getKeyType() {
+    return keyType;
+  }
+
+  public PType<V> getValueType() {
+    return valueType;
+  }
+
+  @Override
+  public PGroupedTableType<K, V> getGroupedTableType() {
+    return new WritableGroupedTableType<K, V>(this);
+  }
+
+  @Override
+  public ReadableSourceTarget<Pair<K, V>> getDefaultFileSource(Path path) {
+    return new SeqFileTableSourceTarget<K, V>(path, this);
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    keyType.initialize(conf);
+    valueType.initialize(conf);
+  }
+
+  @Override
+  public Pair<K, V> getDetachedValue(Pair<K, V> value) {
+    return PTables.getDetachedValue(this, value);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || !(obj instanceof WritableTableType)) {
+      return false;
+    }
+    WritableTableType that = (WritableTableType) obj;
+    return keyType.equals(that.keyType) && valueType.equals(that.valueType);
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    return hcb.append(keyType).append(valueType).toHashCode();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableType.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableType.java
new file mode 100644
index 0000000..734946c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableType.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.util.List;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.io.seq.SeqFileSourceTarget;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.DeepCopier;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+
+import com.google.common.collect.ImmutableList;
+
+public class WritableType<T, W extends Writable> implements PType<T> {
+
+  private final Class<T> typeClass;
+  private final Class<W> writableClass;
+  private final Converter converter;
+  private final MapFn<W, T> inputFn;
+  private final MapFn<T, W> outputFn;
+  private final DeepCopier<W> deepCopier;
+  private final List<PType> subTypes;
+  private boolean initialized = false;
+
+  public WritableType(Class<T> typeClass, Class<W> writableClass, MapFn<W, T> inputDoFn,
+      MapFn<T, W> outputDoFn, PType... subTypes) {
+    this.typeClass = typeClass;
+    this.writableClass = writableClass;
+    this.inputFn = inputDoFn;
+    this.outputFn = outputDoFn;
+    this.converter = new WritableValueConverter(writableClass);
+    this.deepCopier = new WritableDeepCopier<W>(writableClass);
+    this.subTypes = ImmutableList.<PType> builder().add(subTypes).build();
+  }
+
+  @Override
+  public PTypeFamily getFamily() {
+    return WritableTypeFamily.getInstance();
+  }
+
+  @Override
+  public Class<T> getTypeClass() {
+    return typeClass;
+  }
+
+  @Override
+  public Converter getConverter() {
+    return converter;
+  }
+
+  @Override
+  public MapFn getInputMapFn() {
+    return inputFn;
+  }
+
+  @Override
+  public MapFn getOutputMapFn() {
+    return outputFn;
+  }
+
+  @Override
+  public List<PType> getSubTypes() {
+    return subTypes;
+  }
+
+  public Class<W> getSerializationClass() {
+    return writableClass;
+  }
+
+  @Override
+  public ReadableSourceTarget<T> getDefaultFileSource(Path path) {
+    return new SeqFileSourceTarget<T>(path, this);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || !(obj instanceof WritableType)) {
+      return false;
+    }
+    WritableType wt = (WritableType) obj;
+    return (typeClass.equals(wt.typeClass) && writableClass.equals(wt.writableClass) && subTypes
+        .equals(wt.subTypes));
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    this.inputFn.initialize();
+    this.outputFn.initialize();
+    for (PType subType : subTypes) {
+      subType.initialize(conf);
+    }
+    this.initialized = true;
+  }
+
+  @Override
+  public T getDetachedValue(T value) {
+    if (!initialized) {
+      throw new IllegalStateException("Cannot call getDetachedValue on an uninitialized PType");
+    }
+    W writableValue = outputFn.map(value);
+    W deepCopy = this.deepCopier.deepCopy(writableValue);
+    return inputFn.map(deepCopy);
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(typeClass).append(writableClass).append(subTypes);
+    return hcb.toHashCode();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java
new file mode 100644
index 0000000..a94db96
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.PTypeUtils;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * The {@link Writable}-based implementation of the
+ * {@link org.apache.crunch.types.PTypeFamily} interface.
+ */
+public class WritableTypeFamily implements PTypeFamily {
+
+  private static final WritableTypeFamily INSTANCE = new WritableTypeFamily();
+
+  public static WritableTypeFamily getInstance() {
+    return INSTANCE;
+  }
+
+  // Disallow construction
+  private WritableTypeFamily() {
+  }
+
+  public PType<Void> nulls() {
+    return Writables.nulls();
+  }
+
+  public PType<String> strings() {
+    return Writables.strings();
+  }
+
+  public PType<Long> longs() {
+    return Writables.longs();
+  }
+
+  public PType<Integer> ints() {
+    return Writables.ints();
+  }
+
+  public PType<Float> floats() {
+    return Writables.floats();
+  }
+
+  public PType<Double> doubles() {
+    return Writables.doubles();
+  }
+
+  public PType<Boolean> booleans() {
+    return Writables.booleans();
+  }
+
+  public PType<ByteBuffer> bytes() {
+    return Writables.bytes();
+  }
+
+  public <T> PType<T> records(Class<T> clazz) {
+    return Writables.records(clazz);
+  }
+
+  public <W extends Writable> PType<W> writables(Class<W> clazz) {
+    return Writables.writables(clazz);
+  }
+
+  public <K, V> PTableType<K, V> tableOf(PType<K> key, PType<V> value) {
+    return Writables.tableOf(key, value);
+  }
+
+  public <V1, V2> PType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2) {
+    return Writables.pairs(p1, p2);
+  }
+
+  public <V1, V2, V3> PType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3) {
+    return Writables.triples(p1, p2, p3);
+  }
+
+  public <V1, V2, V3, V4> PType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3, PType<V4> p4) {
+    return Writables.quads(p1, p2, p3, p4);
+  }
+
+  public PType<TupleN> tuples(PType<?>... ptypes) {
+    return Writables.tuples(ptypes);
+  }
+
+  public <T> PType<Collection<T>> collections(PType<T> ptype) {
+    return Writables.collections(ptype);
+  }
+
+  public <T> PType<Map<String, T>> maps(PType<T> ptype) {
+    return Writables.maps(ptype);
+  }
+
+  @Override
+  public <T> PType<T> as(PType<T> ptype) {
+    if (ptype instanceof WritableType || ptype instanceof WritableTableType
+        || ptype instanceof WritableGroupedTableType) {
+      return ptype;
+    }
+    if (ptype instanceof PGroupedTableType) {
+      PTableType ptt = ((PGroupedTableType) ptype).getTableType();
+      return new WritableGroupedTableType((WritableTableType) as(ptt));
+    }
+    PType<T> prim = Writables.getPrimitiveType(ptype.getTypeClass());
+    if (prim != null) {
+      return prim;
+    }
+    return PTypeUtils.convert(ptype, this);
+  }
+
+  @Override
+  public <T extends Tuple> PType<T> tuples(Class<T> clazz, PType<?>... ptypes) {
+    return Writables.tuples(clazz, ptypes);
+  }
+
+  @Override
+  public <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
+    return Writables.derived(clazz, inputFn, outputFn, base);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java
new file mode 100644
index 0000000..3670b90
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import org.apache.crunch.types.Converter;
+import org.apache.hadoop.io.NullWritable;
+
+class WritableValueConverter<W> implements Converter<Object, W, W, Iterable<W>> {
+
+  private final Class<W> serializationClass;
+
+  public WritableValueConverter(Class<W> serializationClass) {
+    this.serializationClass = serializationClass;
+  }
+
+  @Override
+  public W convertInput(Object key, W value) {
+    return value;
+  }
+
+  @Override
+  public Object outputKey(W value) {
+    return NullWritable.get();
+  }
+
+  @Override
+  public W outputValue(W value) {
+    return value;
+  }
+
+  @Override
+  public Class<Object> getKeyClass() {
+    return (Class<Object>) (Class<?>) NullWritable.class;
+  }
+
+  @Override
+  public Class<W> getValueClass() {
+    return serializationClass;
+  }
+
+  @Override
+  public Iterable<W> convertIterableInput(Object key, Iterable<W> value) {
+    return value;
+  }
+}
\ No newline at end of file


[21/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/PObjectsIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/PObjectsIT.java b/crunch/src/it/java/org/apache/crunch/PObjectsIT.java
deleted file mode 100644
index 6ee849f..0000000
--- a/crunch/src/it/java/org/apache/crunch/PObjectsIT.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.lang.Integer;
-import java.lang.Iterable;
-import java.lang.String;
-import java.util.Iterator;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.materialize.pobject.PObjectImpl;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Rule;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class PObjectsIT {
-
-  private static final Integer LINES_IN_SHAKES = 3667;
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  /**
-   * A mock PObject that should map PCollections of strings to an integer count of the number of
-   * elements in the underlying PCollection.
-   */
-  public static class MockPObjectImpl extends PObjectImpl<String, Integer> {
-    private int numProcessCalls;
-
-    public MockPObjectImpl(PCollection<String> collect) {
-      super(collect);
-      numProcessCalls = 0;
-    }
-
-    @Override
-    public Integer process(Iterable<String> input) {
-      numProcessCalls++;
-      int i = 0;
-      Iterator<String> itr = input.iterator();
-      while (itr.hasNext()) {
-        i++;
-        itr.next();
-      }
-      return i;
-    }
-
-    public int getNumProcessCalls() {
-      return numProcessCalls;
-    }
-  }
-
-  @Test
-  public void testMRPipeline() throws IOException {
-    run(new MRPipeline(PObjectsIT.class, tmpDir.getDefaultConfiguration()));
-  }
-
-  @Test
-  public void testInMemoryPipeline() throws IOException {
-    run(MemPipeline.getInstance());
-  }
-
-  public void run(Pipeline pipeline) throws IOException {
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    MockPObjectImpl lineCount = new MockPObjectImpl(shakespeare);
-    // Get the line count once and verify it's correctness.
-    assertEquals("Incorrect number of lines counted from PCollection.", LINES_IN_SHAKES,
-        lineCount.getValue());
-    // And do it again.
-    assertEquals("Incorrect number of lines counted from PCollection.", LINES_IN_SHAKES,
-        lineCount.getValue());
-    // Make sure process was called only once because the PObject's value was cached after the
-    // first call.
-    assertEquals("Process on PObject not called exactly 1 times.", 1,
-        lineCount.getNumProcessCalls());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/PTableKeyValueIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/PTableKeyValueIT.java b/crunch/src/it/java/org/apache/crunch/PTableKeyValueIT.java
deleted file mode 100644
index d56e122..0000000
--- a/crunch/src/it/java/org/apache/crunch/PTableKeyValueIT.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-
-import junit.framework.Assert;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-
-@RunWith(value = Parameterized.class)
-public class PTableKeyValueIT implements Serializable {
-
-  private static final long serialVersionUID = 4374227704751746689L;
-
-  private transient PTypeFamily typeFamily;
-  private transient MRPipeline pipeline;
-  private transient String inputFile;
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Before
-  public void setUp() throws IOException {
-    pipeline = new MRPipeline(PTableKeyValueIT.class, tmpDir.getDefaultConfiguration());
-    inputFile = tmpDir.copyResourceFileName("set1.txt");
-  }
-
-  @After
-  public void tearDown() {
-    pipeline.done();
-  }
-
-  public PTableKeyValueIT(PTypeFamily typeFamily) {
-    this.typeFamily = typeFamily;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { WritableTypeFamily.getInstance() }, { AvroTypeFamily.getInstance() } };
-    return Arrays.asList(data);
-  }
-
-  @Test
-  public void testKeysAndValues() throws Exception {
-
-    PCollection<String> collection = pipeline.read(At.textFile(inputFile, typeFamily.strings()));
-
-    PTable<String, String> table = collection.parallelDo(new DoFn<String, Pair<String, String>>() {
-
-      @Override
-      public void process(String input, Emitter<Pair<String, String>> emitter) {
-        emitter.emit(Pair.of(input.toUpperCase(), input));
-
-      }
-    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.strings()));
-
-    PCollection<String> keys = table.keys();
-    PCollection<String> values = table.values();
-
-    ArrayList<String> keyList = Lists.newArrayList(keys.materialize().iterator());
-    ArrayList<String> valueList = Lists.newArrayList(values.materialize().iterator());
-
-    Assert.assertEquals(keyList.size(), valueList.size());
-    for (int i = 0; i < keyList.size(); i++) {
-      Assert.assertEquals(keyList.get(i), valueList.get(i).toUpperCase());
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/PageRankIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/PageRankIT.java b/crunch/src/it/java/org/apache/crunch/PageRankIT.java
deleted file mode 100644
index 6291ef8..0000000
--- a/crunch/src/it/java/org/apache/crunch/PageRankIT.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.PTypes;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-
-public class PageRankIT {
-
-  public static class PageRankData {
-    public float score;
-    public float lastScore;
-    public List<String> urls;
-
-    public PageRankData() {
-    }
-
-    public PageRankData(float score, float lastScore, Iterable<String> urls) {
-      this.score = score;
-      this.lastScore = lastScore;
-      this.urls = Lists.newArrayList(urls);
-    }
-
-    public PageRankData next(float newScore) {
-      return new PageRankData(newScore, score, urls);
-    }
-
-    public float propagatedScore() {
-      return score / urls.size();
-    }
-
-    @Override
-    public String toString() {
-      return score + " " + lastScore + " " + urls;
-    }
-  }
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testAvroReflect() throws Exception {
-    PTypeFamily tf = AvroTypeFamily.getInstance();
-    PType<PageRankData> prType = Avros.reflects(PageRankData.class);
-    String urlInput = tmpDir.copyResourceFileName("urls.txt");
-    run(new MRPipeline(PageRankIT.class, tmpDir.getDefaultConfiguration()),
-        urlInput, prType, tf);
-  }
-
-  @Test
-  public void testAvroMReflectInMemory() throws Exception {
-    PTypeFamily tf = AvroTypeFamily.getInstance();
-    PType<PageRankData> prType = Avros.reflects(PageRankData.class);
-    String urlInput = tmpDir.copyResourceFileName("urls.txt");
-    run(MemPipeline.getInstance(), urlInput, prType, tf);
-  }
-
-  @Test
-  public void testAvroJSON() throws Exception {
-    PTypeFamily tf = AvroTypeFamily.getInstance();
-    PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
-    String urlInput = tmpDir.copyResourceFileName("urls.txt");
-    run(new MRPipeline(PageRankIT.class, tmpDir.getDefaultConfiguration()),
-        urlInput, prType, tf);
-  }
-
-  @Test
-  public void testWritablesJSON() throws Exception {
-    PTypeFamily tf = WritableTypeFamily.getInstance();
-    PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
-    String urlInput = tmpDir.copyResourceFileName("urls.txt");
-    run(new MRPipeline(PageRankIT.class, tmpDir.getDefaultConfiguration()),
-        urlInput, prType, tf);
-  }
-
-  public static PTable<String, PageRankData> pageRank(PTable<String, PageRankData> input, final float d) {
-    PTypeFamily ptf = input.getTypeFamily();
-    PTable<String, Float> outbound = input.parallelDo(new DoFn<Pair<String, PageRankData>, Pair<String, Float>>() {
-      @Override
-      public void process(Pair<String, PageRankData> input, Emitter<Pair<String, Float>> emitter) {
-        PageRankData prd = input.second();
-        for (String link : prd.urls) {
-          emitter.emit(Pair.of(link, prd.propagatedScore()));
-        }
-      }
-    }, ptf.tableOf(ptf.strings(), ptf.floats()));
-
-    return input.cogroup(outbound).parallelDo(
-        new MapFn<Pair<String, Pair<Collection<PageRankData>, Collection<Float>>>, Pair<String, PageRankData>>() {
-          @Override
-          public Pair<String, PageRankData> map(Pair<String, Pair<Collection<PageRankData>, Collection<Float>>> input) {
-            PageRankData prd = Iterables.getOnlyElement(input.second().first());
-            Collection<Float> propagatedScores = input.second().second();
-            float sum = 0.0f;
-            for (Float s : propagatedScores) {
-              sum += s;
-            }
-            return Pair.of(input.first(), prd.next(d + (1.0f - d) * sum));
-          }
-        }, input.getPTableType());
-  }
-
-  public static void run(Pipeline pipeline, String urlInput,
-      PType<PageRankData> prType, PTypeFamily ptf) throws Exception {
-    PTable<String, PageRankData> scores = pipeline.readTextFile(urlInput)
-        .parallelDo(new MapFn<String, Pair<String, String>>() {
-          @Override
-          public Pair<String, String> map(String input) {
-            String[] urls = input.split("\\t");
-            return Pair.of(urls[0], urls[1]);
-          }
-        }, ptf.tableOf(ptf.strings(), ptf.strings())).groupByKey()
-        .parallelDo(new MapFn<Pair<String, Iterable<String>>, Pair<String, PageRankData>>() {
-          @Override
-          public Pair<String, PageRankData> map(Pair<String, Iterable<String>> input) {
-            return Pair.of(input.first(), new PageRankData(1.0f, 0.0f, input.second()));
-          }
-        }, ptf.tableOf(ptf.strings(), prType));
-
-    Float delta = 1.0f;
-    while (delta > 0.01) {
-      scores = pageRank(scores, 0.5f);
-      scores.materialize().iterator(); // force the write
-      delta = Aggregate.max(scores.parallelDo(new MapFn<Pair<String, PageRankData>, Float>() {
-        @Override
-        public Float map(Pair<String, PageRankData> input) {
-          PageRankData prd = input.second();
-          return Math.abs(prd.score - prd.lastScore);
-        }
-      }, ptf.floats())).getValue();
-    }
-    assertEquals(0.0048, delta, 0.001);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/StageResultsCountersIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/StageResultsCountersIT.java b/crunch/src/it/java/org/apache/crunch/StageResultsCountersIT.java
deleted file mode 100644
index 19fc302..0000000
--- a/crunch/src/it/java/org/apache/crunch/StageResultsCountersIT.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertTrue;
-
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.crunch.PipelineResult.StageResult;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.From;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.hadoop.mapreduce.Counter;
-import org.junit.After;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-public class StageResultsCountersIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  public static HashSet<String> SPECIAL_KEYWORDS = Sets.newHashSet("AND", "OR", "NOT");
-
-  public static String KEYWORDS_COUNTER_GROUP = "KEYWORDS_COUNTER_GROUP";
-
-  @After
-  public void after() {
-    MemPipeline.clearCounters();
-  }
-  
-  @Test
-  public void testStageResultsCountersMRWritables() throws Exception {
-    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
-        WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testStageResultsCountersMRAvro() throws Exception {
-    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
-        AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testStageResultsCountersMemWritables() throws Exception {
-    testSpecialKeywordCount(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testStageResultsCountersMemAvro() throws Exception {
-    testSpecialKeywordCount(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
-  }
-
-  public void testSpecialKeywordCount(Pipeline pipeline, PTypeFamily tf) throws Exception {
-
-    String rowsInputPath = tmpDir.copyResourceFileName("shakes.txt");
-
-    PipelineResult result = coutSpecialKeywords(pipeline, rowsInputPath, tf);
-
-    assertTrue(result.succeeded());
-
-    Map<String, Long> keywordsMap = countersToMap(result.getStageResults(), KEYWORDS_COUNTER_GROUP);
-
-    assertEquals(3, keywordsMap.size());
-
-    assertEquals("{NOT=157, AND=596, OR=81}", keywordsMap.toString());
-  }
-
-  private static PipelineResult coutSpecialKeywords(Pipeline pipeline, String inputFileName, PTypeFamily tf) {
-
-    pipeline.read(From.textFile(inputFileName)).parallelDo(new DoFn<String, Void>() {
-
-      @Override
-      public void process(String text, Emitter<Void> emitter) {
-
-        if (!StringUtils.isBlank(text)) {
-
-          String[] tokens = text.toUpperCase().split("\\s");
-
-          for (String token : tokens) {
-            if (SPECIAL_KEYWORDS.contains(token)) {
-              getCounter(KEYWORDS_COUNTER_GROUP, token).increment(1);
-            }
-          }
-        }
-      }
-    }, tf.nulls()).materialize(); // TODO can we avoid the materialize ?
-
-    return pipeline.done();
-  }
-
-  private static Map<String, Long> countersToMap(List<StageResult> stages, String counterGroupName) {
-
-    Map<String, Long> countersMap = Maps.newHashMap();
-
-    for (StageResult sr : stages) {
-      Iterator<Counter> iterator = sr.getCounters().getGroup(counterGroupName).iterator();
-      while (iterator.hasNext()) {
-        Counter counter = (Counter) iterator.next();
-        countersMap.put(counter.getDisplayName(), counter.getValue());
-      }
-    }
-
-    return countersMap;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/TermFrequencyIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/TermFrequencyIT.java b/crunch/src/it/java/org/apache/crunch/TermFrequencyIT.java
deleted file mode 100644
index ca66aa8..0000000
--- a/crunch/src/it/java/org/apache/crunch/TermFrequencyIT.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class TermFrequencyIT implements Serializable {
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testTermFrequencyWithNoTransform() throws IOException {
-    run(new MRPipeline(TermFrequencyIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), false);
-  }
-
-  @Test
-  public void testTermFrequencyWithTransform() throws IOException {
-    run(new MRPipeline(TermFrequencyIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), true);
-  }
-
-  @Test
-  public void testTermFrequencyNoTransformInMemory() throws IOException {
-    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance(), false);
-  }
-
-  @Test
-  public void testTermFrequencyWithTransformInMemory() throws IOException {
-    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance(), true);
-  }
-
-  public void run(Pipeline pipeline, PTypeFamily typeFamily, boolean transformTF) throws IOException {
-    String input = tmpDir.copyResourceFileName("docs.txt");
-
-    File transformedOutput = tmpDir.getFile("transformed-output");
-    File tfOutput = tmpDir.getFile("tf-output");
-
-    PCollection<String> docs = pipeline.readTextFile(input);
-
-    PTypeFamily ptf = docs.getTypeFamily();
-
-    /*
-     * Input: String Input title text
-     * 
-     * Output: PTable<Pair<String, String>, Long> Pair<Pair<word, title>, count
-     * in title>
-     */
-    PTable<Pair<String, String>, Long> tf = Aggregate.count(docs.parallelDo("term document frequency",
-        new DoFn<String, Pair<String, String>>() {
-          @Override
-          public void process(String doc, Emitter<Pair<String, String>> emitter) {
-            String[] kv = doc.split("\t");
-            String title = kv[0];
-            String text = kv[1];
-            for (String word : text.split("\\W+")) {
-              if (word.length() > 0) {
-                Pair<String, String> pair = Pair.of(word.toLowerCase(), title);
-                emitter.emit(pair);
-              }
-            }
-          }
-        }, ptf.pairs(ptf.strings(), ptf.strings())));
-
-    if (transformTF) {
-      /*
-       * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count
-       * in title>
-       * 
-       * Output: PTable<String, Pair<String, Long>> PTable<word, Pair<title,
-       * count in title>>
-       */
-      PTable<String, Pair<String, Long>> wordDocumentCountPair = tf.parallelDo("transform wordDocumentPairCount",
-          new MapFn<Pair<Pair<String, String>, Long>, Pair<String, Pair<String, Long>>>() {
-            @Override
-            public Pair<String, Pair<String, Long>> map(Pair<Pair<String, String>, Long> input) {
-              Pair<String, String> wordDocumentPair = input.first();
-              return Pair.of(wordDocumentPair.first(), Pair.of(wordDocumentPair.second(), input.second()));
-            }
-          }, ptf.tableOf(ptf.strings(), ptf.pairs(ptf.strings(), ptf.longs())));
-
-      pipeline.writeTextFile(wordDocumentCountPair, transformedOutput.getAbsolutePath());
-    }
-
-    SourceTarget<String> st = At.textFile(tfOutput.getAbsolutePath());
-    pipeline.write(tf, st);
-
-    pipeline.run();
-
-    // test the case we should see
-    Iterable<String> lines = ((ReadableSourceTarget<String>) st).read(pipeline.getConfiguration());
-    boolean passed = false;
-    for (String line : lines) {
-      if ("[well,A]\t0".equals(line)) {
-        fail("Found " + line + " but well is in Document A 1 time");
-      }
-      if ("[well,A]\t1".equals(line)) {
-        passed = true;
-      }
-    }
-    assertTrue(passed);
-    pipeline.done();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/TextPairIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/TextPairIT.java b/crunch/src/it/java/org/apache/crunch/TextPairIT.java
deleted file mode 100644
index 55d9af9..0000000
--- a/crunch/src/it/java/org/apache/crunch/TextPairIT.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.From;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Rule;
-import org.junit.Test;
-
-public class TextPairIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritables() throws IOException {
-    run(new MRPipeline(TextPairIT.class, tmpDir.getDefaultConfiguration()));
-  }
-
-  private static final String CANARY = "Writables.STRING_TO_TEXT";
-
-  public static PCollection<Pair<String, String>> wordDuplicate(PCollection<String> words) {
-    return words.parallelDo("my word duplicator", new DoFn<String, Pair<String, String>>() {
-      public void process(String line, Emitter<Pair<String, String>> emitter) {
-        for (String word : line.split("\\W+")) {
-          if (word.length() > 0) {
-            Pair<String, String> pair = Pair.of(CANARY, word);
-            emitter.emit(pair);
-          }
-        }
-      }
-    }, Writables.pairs(Writables.strings(), Writables.strings()));
-  }
-
-  public void run(Pipeline pipeline) throws IOException {
-    String input = tmpDir.copyResourceFileName("shakes.txt");
-
-    PCollection<String> shakespeare = pipeline.read(From.textFile(input));
-    Iterable<Pair<String, String>> lines = pipeline.materialize(wordDuplicate(shakespeare));
-    boolean passed = false;
-    for (Pair<String, String> line : lines) {
-      if (line.first().contains(CANARY)) {
-        passed = true;
-        break;
-      }
-    }
-
-    pipeline.done();
-    assertTrue(passed);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/TfIdfIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/TfIdfIT.java b/crunch/src/it/java/org/apache/crunch/TfIdfIT.java
deleted file mode 100644
index 218f538..0000000
--- a/crunch/src/it/java/org/apache/crunch/TfIdfIT.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.charset.Charset;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.crunch.fn.MapKeysFn;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.seq.SeqFileSourceTarget;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.lib.Join;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.hadoop.fs.Path;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-import com.google.common.io.Files;
-
-@SuppressWarnings("serial")
-public class TfIdfIT implements Serializable {
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  // total number of documents, should calculate
-  protected static final double N = 2;
-
-  @Test
-  public void testWritablesSingleRun() throws IOException {
-    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), true);
-  }
-
-  @Test
-  public void testWritablesMultiRun() throws IOException {
-    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), false);
-  }
-
-  /**
-   * This method should generate a TF-IDF score for the input.
-   */
-  public PTable<String, Collection<Pair<String, Double>>> generateTFIDF(PCollection<String> docs, Path termFreqPath,
-      PTypeFamily ptf) throws IOException {
-
-    /*
-     * Input: String Input title text
-     * 
-     * Output: PTable<Pair<String, String>, Long> Pair<Pair<word, title>, count
-     * in title>
-     */
-    PTable<Pair<String, String>, Long> tf = Aggregate.count(docs.parallelDo("term document frequency",
-        new DoFn<String, Pair<String, String>>() {
-          @Override
-          public void process(String doc, Emitter<Pair<String, String>> emitter) {
-            String[] kv = doc.split("\t");
-            String title = kv[0];
-            String text = kv[1];
-            for (String word : text.split("\\W+")) {
-              if (word.length() > 0) {
-                Pair<String, String> pair = Pair.of(word.toLowerCase(), title);
-                emitter.emit(pair);
-              }
-            }
-          }
-        }, ptf.pairs(ptf.strings(), ptf.strings())));
-
-    tf.write(new SeqFileSourceTarget<Pair<Pair<String, String>, Long>>(termFreqPath, tf.getPType()));
-
-    /*
-     * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count in
-     * title>
-     * 
-     * Output: PTable<String, Long> PTable<word, # of docs containing word>
-     */
-    PTable<String, Long> n = Aggregate.count(tf.parallelDo("little n (# of docs contain word)",
-        new DoFn<Pair<Pair<String, String>, Long>, String>() {
-          @Override
-          public void process(Pair<Pair<String, String>, Long> input, Emitter<String> emitter) {
-            emitter.emit(input.first().first());
-          }
-        }, ptf.strings()));
-
-    /*
-     * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count in
-     * title>
-     * 
-     * Output: PTable<String, Pair<String, Long>> PTable<word, Pair<title, count
-     * in title>>
-     */
-    PTable<String, Collection<Pair<String, Long>>> wordDocumentCountPair = tf.parallelDo(
-        "transform wordDocumentPairCount",
-        new DoFn<Pair<Pair<String, String>, Long>, Pair<String, Collection<Pair<String, Long>>>>() {
-          Collection<Pair<String, Long>> buffer;
-          String key;
-
-          @Override
-          public void process(Pair<Pair<String, String>, Long> input,
-              Emitter<Pair<String, Collection<Pair<String, Long>>>> emitter) {
-            Pair<String, String> wordDocumentPair = input.first();
-            if (!wordDocumentPair.first().equals(key)) {
-              flush(emitter);
-              key = wordDocumentPair.first();
-              buffer = Lists.newArrayList();
-            }
-            buffer.add(Pair.of(wordDocumentPair.second(), input.second()));
-          }
-
-          protected void flush(Emitter<Pair<String, Collection<Pair<String, Long>>>> emitter) {
-            if (buffer != null) {
-              emitter.emit(Pair.of(key, buffer));
-              buffer = null;
-            }
-          }
-
-          @Override
-          public void cleanup(Emitter<Pair<String, Collection<Pair<String, Long>>>> emitter) {
-            flush(emitter);
-          }
-        }, ptf.tableOf(ptf.strings(), ptf.collections(ptf.pairs(ptf.strings(), ptf.longs()))));
-
-    PTable<String, Pair<Long, Collection<Pair<String, Long>>>> joinedResults = Join.join(n, wordDocumentCountPair);
-
-    /*
-     * Input: Pair<String, Pair<Long, Collection<Pair<String, Long>>> Pair<word,
-     * Pair<# of docs containing word, Collection<Pair<title, term frequency>>>
-     * 
-     * Output: Pair<String, Collection<Pair<String, Double>>> Pair<word,
-     * Collection<Pair<title, tfidf>>>
-     */
-    return joinedResults
-        .parallelDo(
-            "calculate tfidf",
-            new MapFn<Pair<String, Pair<Long, Collection<Pair<String, Long>>>>, Pair<String, Collection<Pair<String, Double>>>>() {
-              @Override
-              public Pair<String, Collection<Pair<String, Double>>> map(
-                  Pair<String, Pair<Long, Collection<Pair<String, Long>>>> input) {
-                Collection<Pair<String, Double>> tfidfs = Lists.newArrayList();
-                String word = input.first();
-                double n = input.second().first();
-                double idf = Math.log(N / n);
-                for (Pair<String, Long> tf : input.second().second()) {
-                  double tfidf = tf.second() * idf;
-                  tfidfs.add(Pair.of(tf.first(), tfidf));
-                }
-                return Pair.of(word, tfidfs);
-              }
-
-            }, ptf.tableOf(ptf.strings(), ptf.collections(ptf.pairs(ptf.strings(), ptf.doubles()))));
-  }
-
-  public void run(Pipeline pipeline, PTypeFamily typeFamily, boolean singleRun) throws IOException {
-    String inputFile = tmpDir.copyResourceFileName("docs.txt");
-    String outputPath1 = tmpDir.getFileName("output1");
-    String outputPath2 = tmpDir.getFileName("output2");
-
-    Path tfPath = tmpDir.getPath("termfreq");
-
-    PCollection<String> docs = pipeline.readTextFile(inputFile);
-
-    PTable<String, Collection<Pair<String, Double>>> results = generateTFIDF(docs, tfPath, typeFamily);
-    pipeline.writeTextFile(results, outputPath1);
-    if (!singleRun) {
-      pipeline.run();
-    }
-
-    PTable<String, Collection<Pair<String, Double>>> uppercased = results.parallelDo(
-        new MapKeysFn<String, String, Collection<Pair<String, Double>>>() {
-          @Override
-          public String map(String k1) {
-            return k1.toUpperCase();
-          }
-        }, results.getPTableType());
-    pipeline.writeTextFile(uppercased, outputPath2);
-    pipeline.done();
-
-    // Check the lowercase version...
-    File outputFile = new File(outputPath1, "part-r-00000");
-    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
-    boolean passed = false;
-    for (String line : lines) {
-      if (line.startsWith("[the") && line.contains("B,0.6931471805599453")) {
-        passed = true;
-        break;
-      }
-    }
-    assertTrue(passed);
-
-    // ...and the uppercase version
-    outputFile = new File(outputPath2, "part-r-00000");
-    lines = Files.readLines(outputFile, Charset.defaultCharset());
-    passed = false;
-    for (String line : lines) {
-      if (line.startsWith("[THE") && line.contains("B,0.6931471805599453")) {
-        passed = true;
-        break;
-      }
-    }
-    assertTrue(passed);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java b/crunch/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java
deleted file mode 100644
index e49f4d5..0000000
--- a/crunch/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.List;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.io.Files;
-
-
-public class TupleNClassCastBugIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  public static PCollection<TupleN> mapGroupDo(PCollection<String> lines, PTypeFamily ptf) {
-    PTable<String, TupleN> mapped = lines.parallelDo(new MapFn<String, Pair<String, TupleN>>() {
-
-      @Override
-      public Pair<String, TupleN> map(String line) {
-        String[] columns = line.split("\\t");
-        String docId = columns[0];
-        String docLine = columns[1];
-        return Pair.of(docId, new TupleN(docId, docLine));
-      }
-    }, ptf.tableOf(ptf.strings(), ptf.tuples(ptf.strings(), ptf.strings())));
-    return mapped.groupByKey().parallelDo(new DoFn<Pair<String, Iterable<TupleN>>, TupleN>() {
-      @Override
-      public void process(Pair<String, Iterable<TupleN>> input, Emitter<TupleN> tupleNEmitter) {
-        for (TupleN tuple : input.second()) {
-          tupleNEmitter.emit(tuple);
-        }
-      }
-    }, ptf.tuples(ptf.strings(), ptf.strings()));
-  }
-
-  @Test
-  public void testWritables() throws IOException {
-    run(new MRPipeline(TupleNClassCastBugIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvro() throws IOException {
-    run(new MRPipeline(TupleNClassCastBugIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("docs.txt");
-    String outputPath = tmpDir.getFileName("output");
-
-    PCollection<String> docLines = pipeline.readTextFile(inputPath);
-    pipeline.writeTextFile(mapGroupDo(docLines, typeFamily), outputPath);
-    pipeline.done();
-
-    // *** We are not directly testing the output, we are looking for a
-    // ClassCastException
-    // *** which is thrown in a different thread during the reduce phase. If all
-    // is well
-    // *** the file will exist and have six lines. Otherwise the bug is present.
-    File outputFile = new File(outputPath, "part-r-00000");
-    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
-    int lineCount = 0;
-    for (String line : lines) {
-      lineCount++;
-    }
-    assertEquals(6, lineCount);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java b/crunch/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java
deleted file mode 100644
index 501a944..0000000
--- a/crunch/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-/**
- * Collection of tests re-using the same PCollection in various unions.
- */
-public class UnionFromSameSourceIT {
-
-  private static final int NUM_ELEMENTS = 4;
-
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  private Pipeline pipeline;
-  private PType<String> elementType = Writables.strings();
-  private PTableType<String, String> tableType = Writables.tableOf(Writables.strings(),
-    Writables.strings());
-
-  @Before
-  public void setUp() {
-    pipeline = new MRPipeline(UnionFromSameSourceIT.class, tmpDir.getDefaultConfiguration());
-  }
-
-  @Test
-  public void testUnion_SingleRead() throws IOException {
-    PCollection<String> strings = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
-    PCollection<String> union = strings.union(strings.parallelDo(IdentityFn.<String> getInstance(),
-      strings.getPType()));
-
-    assertEquals(NUM_ELEMENTS * 2, getCount(union));
-  }
-
-  @Test
-  public void testUnion_TwoReads() throws IOException {
-    PCollection<String> stringsA = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
-    PCollection<String> stringsB = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
-
-    PCollection<String> union = stringsA.union(stringsB);
-
-    assertEquals(NUM_ELEMENTS * 2, getCount(union));
-  }
-
-  @Test
-  public void testDoubleUnion_EndingWithGBK() throws IOException {
-    runDoubleUnionPipeline(true);
-  }
-
-  @Test
-  public void testDoubleUnion_EndingWithoutGBK() throws IOException {
-    runDoubleUnionPipeline(false);
-  }
-
-  private void runDoubleUnionPipeline(boolean endWithGBK) throws IOException {
-    PCollection<String> strings = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
-    PTable<String, String> tableA = strings.parallelDo("to table A", new ToTableFn(), tableType);
-    PTable<String, String> tableB = strings.parallelDo("to table B", new ToTableFn(), tableType);
-
-    PGroupedTable<String, String> groupedTable = tableA.union(tableB).groupByKey();
-    PCollection<String> ungrouped = groupedTable.parallelDo("ungroup before union",
-      new FromGroupedTableFn(), elementType).union(
-      strings.parallelDo("fake id", IdentityFn.<String> getInstance(), elementType));
-
-    PTable<String, String> table = ungrouped.parallelDo("union back to table", new ToTableFn(),
-      tableType);
-
-    if (endWithGBK) {
-      table = table.groupByKey().ungroup();
-    }
-
-    assertEquals(3 * NUM_ELEMENTS, getCount(table));
-  }
-
-  private int getCount(PCollection<?> pcollection) {
-    int cnt = 0;
-    for (Object v : pcollection.materialize()) {
-      cnt++;
-    }
-    return cnt;
-  }
-
-  private static class ToTableFn extends MapFn<String, Pair<String, String>> {
-
-    @Override
-    public Pair<String, String> map(String input) {
-      return Pair.of(input, input);
-    }
-
-  }
-
-  private static class FromGroupedTableFn extends DoFn<Pair<String, Iterable<String>>, String> {
-
-    @Override
-    public void process(Pair<String, Iterable<String>> input, Emitter<String> emitter) {
-      for (String value : input.second()) {
-        emitter.emit(value);
-      }
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/UnionIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/UnionIT.java b/crunch/src/it/java/org/apache/crunch/UnionIT.java
deleted file mode 100644
index 1c60a1b..0000000
--- a/crunch/src/it/java/org/apache/crunch/UnionIT.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.crunch.fn.Aggregators;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.test.Tests;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableMultiset;
-
-
-public class UnionIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  private MRPipeline pipeline;
-  private PCollection<String> words1;
-  private PCollection<String> words2;
-
-  @Before
-  public void setUp() throws IOException {
-    pipeline = new MRPipeline(UnionIT.class, tmpDir.getDefaultConfiguration());
-    words1 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
-    words2 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
-  }
-
-  @After
-  public void tearDown() {
-    pipeline.done();
-  }
-
-  @Test
-  public void testUnion() throws Exception {
-    IdentityFn<String> identity = IdentityFn.getInstance();
-    words1 = words1.parallelDo(identity, Avros.strings());
-    words2 = words2.parallelDo(identity, Avros.strings());
-
-    PCollection<String> union = words1.union(words2);
-
-    ImmutableMultiset<String> actual = ImmutableMultiset.copyOf(union.materialize());
-    assertThat(actual.elementSet().size(), is(3));
-    assertThat(actual.count("a1"), is(4));
-    assertThat(actual.count("b2"), is(2));
-    assertThat(actual.count("c3"), is(2));
-  }
-
-  @Test
-  public void testTableUnion() throws IOException {
-    PTable<String, String> words1ByFirstLetter = byFirstLetter(words1);
-    PTable<String, String> words2ByFirstLetter = byFirstLetter(words2);
-
-    PTable<String, String> union = words1ByFirstLetter.union(words2ByFirstLetter);
-
-    ImmutableMultiset<Pair<String, String>> actual = ImmutableMultiset.copyOf(union.materialize());
-
-    assertThat(actual.elementSet().size(), is(3));
-    assertThat(actual.count(Pair.of("a", "1")), is(4));
-    assertThat(actual.count(Pair.of("b", "2")), is(2));
-    assertThat(actual.count(Pair.of("c", "3")), is(2));
-  }
-
-  @Test
-  public void testUnionThenGroupByKey() throws IOException {
-    PCollection<String> union = words1.union(words2);
-
-    PGroupedTable<String, String> grouped = byFirstLetter(union).groupByKey();
-
-    Map<String, String> actual = grouped.combineValues(Aggregators.STRING_CONCAT("", true))
-        .materializeToMap();
-
-    Map<String, String> expected = ImmutableMap.of("a", "1111", "b", "22", "c", "33");
-    assertThat(actual, is(expected));
-  }
-
-  @Test
-  public void testTableUnionThenGroupByKey() throws IOException {
-    PTable<String, String> words1ByFirstLetter = byFirstLetter(words1);
-    PTable<String, String> words2ByFirstLetter = byFirstLetter(words2);
-
-    PTable<String, String> union = words1ByFirstLetter.union(words2ByFirstLetter);
-
-    PGroupedTable<String, String> grouped = union.groupByKey();
-
-    Map<String, String> actual = grouped.combineValues(Aggregators.STRING_CONCAT("", true))
-        .materializeToMap();
-
-    Map<String, String> expected = ImmutableMap.of("a", "1111", "b", "22", "c", "33");
-    assertThat(actual, is(expected));
-  }
-
-
-  private static PTable<String, String> byFirstLetter(PCollection<String> values) {
-    return values.parallelDo("byFirstLetter", new FirstLetterKeyFn(),
-        Avros.tableOf(Avros.strings(), Avros.strings()));
-  }
-
-  private static class FirstLetterKeyFn extends DoFn<String, Pair<String, String>> {
-    @Override
-    public void process(String input, Emitter<Pair<String, String>> emitter) {
-      if (input.length() > 1) {
-        emitter.emit(Pair.of(input.substring(0, 1), input.substring(1)));
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/UnionResultsIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/UnionResultsIT.java b/crunch/src/it/java/org/apache/crunch/UnionResultsIT.java
deleted file mode 100644
index df0511a..0000000
--- a/crunch/src/it/java/org/apache/crunch/UnionResultsIT.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.CrunchTestSupport;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-public class UnionResultsIT extends CrunchTestSupport implements Serializable {
-
-  static class StringLengthMapFn extends MapFn<String, Pair<String, Long>> {
-
-    @Override
-    public Pair<String, Long> map(String input) {
-      return new Pair<String, Long>(input, 10L);
-    }
-  }
-
-
-  /**
-   * Tests combining a GBK output with a map-only job output into a single
-   * unioned collection.
-   */
-  @Test
-  public void testUnionOfGroupedOutputAndNonGroupedOutput() throws IOException {
-    String inputPath = tempDir.copyResourceFileName("set1.txt");
-    String inputPath2 = tempDir.copyResourceFileName("set2.txt");
-
-    Pipeline pipeline = new MRPipeline(UnionResultsIT.class);
-
-    PCollection<String> set1Lines = pipeline.read(At.textFile(inputPath, Writables.strings()));
-    PCollection<Pair<String, Long>> set1Lengths = set1Lines.parallelDo(new StringLengthMapFn(),
-        Writables.pairs(Writables.strings(), Writables.longs()));
-    PCollection<Pair<String, Long>> set2Counts = pipeline.read(At.textFile(inputPath2, Writables.strings())).count();
-
-    PCollection<Pair<String, Long>> union = set1Lengths.union(set2Counts);
-
-    List<Pair<String, Long>> unionValues = Lists.newArrayList(union.materialize());
-    assertEquals(7, unionValues.size());
-
-    Set<Pair<String, Long>> expectedPairs = Sets.newHashSet();
-    expectedPairs.add(Pair.of("b", 10L));
-    expectedPairs.add(Pair.of("c", 10L));
-    expectedPairs.add(Pair.of("a", 10L));
-    expectedPairs.add(Pair.of("e", 10L));
-    expectedPairs.add(Pair.of("a", 1L));
-    expectedPairs.add(Pair.of("c", 1L));
-    expectedPairs.add(Pair.of("d", 1L));
-
-    assertEquals(expectedPairs, Sets.newHashSet(unionValues));
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/WordCountIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/WordCountIT.java b/crunch/src/it/java/org/apache/crunch/WordCountIT.java
deleted file mode 100644
index c646663..0000000
--- a/crunch/src/it/java/org/apache/crunch/WordCountIT.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.List;
-
-import org.apache.crunch.fn.Aggregators;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.io.To;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.io.Files;
-
-public class WordCountIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  enum WordCountStats {
-    ANDS
-  };
-
-  public static PTable<String, Long> wordCount(PCollection<String> words, PTypeFamily typeFamily) {
-    return Aggregate.count(words.parallelDo(new DoFn<String, String>() {
-
-      @Override
-      public void process(String line, Emitter<String> emitter) {
-        for (String word : line.split("\\s+")) {
-          emitter.emit(word);
-          if ("and".equals(word)) {
-            increment(WordCountStats.ANDS);
-          }
-        }
-      }
-    }, typeFamily.strings()));
-  }
-
-  public static PTable<String, Long> substr(PTable<String, Long> ptable) {
-    return ptable.parallelDo(new DoFn<Pair<String, Long>, Pair<String, Long>>() {
-
-      public void process(Pair<String, Long> input, Emitter<Pair<String, Long>> emitter) {
-        if (input.first().length() > 0) {
-          emitter.emit(Pair.of(input.first().substring(0, 1), input.second()));
-        }
-      }
-    }, ptable.getPTableType());
-  }
-
-  private boolean runSecond = false;
-  private boolean useToOutput = false;
-
-  @Test
-  public void testWritables() throws IOException {
-    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testWritablesWithSecond() throws IOException {
-    runSecond = true;
-    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testWritablesWithSecondUseToOutput() throws IOException {
-    runSecond = true;
-    useToOutput = true;
-    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvro() throws IOException {
-    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvroWithSecond() throws IOException {
-    runSecond = true;
-    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testWithTopWritable() throws IOException {
-    runWithTop(WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testWithTopAvro() throws IOException {
-    runWithTop(AvroTypeFamily.getInstance());
-  }
-
-  public void runWithTop(PTypeFamily tf) throws IOException {
-    Pipeline pipeline = new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration());
-    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
-
-    PCollection<String> shakespeare = pipeline.read(At.textFile(inputPath, tf.strings()));
-    PTable<String, Long> wordCount = wordCount(shakespeare, tf);
-    List<Pair<String, Long>> top5 = Lists.newArrayList(Aggregate.top(wordCount, 5, true).materialize());
-    assertEquals(
-        ImmutableList.of(Pair.of("", 1470L), Pair.of("the", 620L), Pair.of("and", 427L), Pair.of("of", 396L),
-            Pair.of("to", 367L)), top5);
-  }
-
-  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
-    String outputPath = tmpDir.getFileName("output");
-
-    PCollection<String> shakespeare = pipeline.read(At.textFile(inputPath, typeFamily.strings()));
-    PTable<String, Long> wordCount = wordCount(shakespeare, typeFamily);
-    if (useToOutput) {
-      wordCount.write(To.textFile(outputPath));
-    } else {
-      pipeline.writeTextFile(wordCount, outputPath);
-    }
-
-    if (runSecond) {
-      String substrPath = tmpDir.getFileName("substr");
-      PTable<String, Long> we = substr(wordCount).groupByKey().combineValues(Aggregators.SUM_LONGS());
-      pipeline.writeTextFile(we, substrPath);
-    }
-    PipelineResult res = pipeline.done();
-    assertTrue(res.succeeded());
-    List<PipelineResult.StageResult> stageResults = res.getStageResults();
-    if (runSecond) {
-      assertEquals(2, stageResults.size());
-    } else {
-      assertEquals(1, stageResults.size());
-      assertEquals(427, stageResults.get(0).getCounterValue(WordCountStats.ANDS));
-    }
-
-    File outputFile = new File(outputPath, "part-r-00000");
-    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
-    boolean passed = false;
-    for (String line : lines) {
-      if (line.startsWith("Macbeth\t28") || line.startsWith("[Macbeth,28]")) {
-        passed = true;
-        break;
-      }
-    }
-    assertTrue(passed);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/fn/AggregatorsIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/fn/AggregatorsIT.java b/crunch/src/it/java/org/apache/crunch/fn/AggregatorsIT.java
deleted file mode 100644
index c9584a1..0000000
--- a/crunch/src/it/java/org/apache/crunch/fn/AggregatorsIT.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.apache.crunch.fn.Aggregators.SUM_INTS;
-import static org.apache.crunch.fn.Aggregators.pairAggregator;
-import static org.apache.crunch.types.writable.Writables.ints;
-import static org.apache.crunch.types.writable.Writables.pairs;
-import static org.apache.crunch.types.writable.Writables.strings;
-import static org.apache.crunch.types.writable.Writables.tableOf;
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.test.Tests;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-
-@RunWith(Parameterized.class)
-public class AggregatorsIT {
-  private Pipeline pipeline;
-
-  @Parameters
-  public static Collection<Object[]> params() {
-    return Tests.pipelinesParams(AggregatorsIT.class);
-  }
-
-  public AggregatorsIT(Pipeline pipeline) {
-    this.pipeline = pipeline;
-  }
-
-  @Test
-  public void testPairAggregator() {
-    PCollection<String> lines = pipeline.readTextFile(Tests.pathTo(this, "ints.txt"));
-
-    PTable<String, Pair<Integer, Integer>> table = lines.parallelDo(new SplitLine(),
-        tableOf(strings(), pairs(ints(), ints())));
-
-    PTable<String, Pair<Integer, Integer>> combinedTable = table.groupByKey().combineValues(
-        pairAggregator(SUM_INTS(), SUM_INTS()));
-
-    Map<String, Pair<Integer, Integer>> result = combinedTable.asMap().getValue();
-
-    assertThat(result.size(), is(2));
-    assertThat(result.get("a"), is(Pair.of(9,  12)));
-    assertThat(result.get("b"), is(Pair.of(11,  13)));
-  }
-
-  private static final class SplitLine extends MapFn<String, Pair<String, Pair<Integer, Integer>>> {
-    @Override
-    public Pair<String, Pair<Integer, Integer>> map(String input) {
-      String[] split = input.split("\t");
-      return Pair.of(split[0],
-          Pair.of(Integer.parseInt(split[1]), Integer.parseInt(split[2])));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java b/crunch/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java
deleted file mode 100644
index 976a43e..0000000
--- a/crunch/src/it/java/org/apache/crunch/impl/mem/MemPipelineFileWritingIT.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.util.List;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.base.Charsets;
-import com.google.common.collect.ImmutableList;
-import com.google.common.io.Files;
-
-public class MemPipelineFileWritingIT {
-  @Rule
-  public TemporaryPath baseTmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testMemPipelineFileWriter() throws Exception {
-    File tmpDir = baseTmpDir.getFile("mempipe");
-    Pipeline p = MemPipeline.getInstance();
-    PCollection<String> lines = MemPipeline.collectionOf("hello", "world");
-    p.writeTextFile(lines, tmpDir.toString());
-    p.done();
-    assertTrue(tmpDir.exists());
-    File[] files = tmpDir.listFiles();
-    assertTrue(files != null && files.length > 0);
-    for (File f : files) {
-      if (!f.getName().startsWith(".")) {
-        List<String> txt = Files.readLines(f, Charsets.UTF_8);
-        assertEquals(ImmutableList.of("hello", "world"), txt);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java b/crunch/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java
deleted file mode 100644
index f9f73b2..0000000
--- a/crunch/src/it/java/org/apache/crunch/impl/mr/collect/UnionCollectionIT.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTableKeyValueIT;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.io.To;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-
-@RunWith(value = Parameterized.class)
-public class UnionCollectionIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  private static final Log LOG = LogFactory.getLog(UnionCollectionIT.class);
-
-  private PTypeFamily typeFamily;
-  private Pipeline pipeline;
-  private PCollection<String> union;
-
-  private ArrayList<String> EXPECTED = Lists.newArrayList("a", "a", "b", "c", "c", "d", "e");
-
-  private Class pipelineClass;
-
-  @Before
-  @SuppressWarnings("unchecked")
-  public void setUp() throws IOException {
-    String inputFile1 = tmpDir.copyResourceFileName("set1.txt");
-    String inputFile2 = tmpDir.copyResourceFileName("set2.txt");
-    if (pipelineClass == null) {
-      pipeline = MemPipeline.getInstance();
-    } else {
-      pipeline = new MRPipeline(pipelineClass, tmpDir.getDefaultConfiguration());
-    }
-    PCollection<String> firstCollection = pipeline.read(At.textFile(inputFile1, typeFamily.strings()));
-    PCollection<String> secondCollection = pipeline.read(At.textFile(inputFile2, typeFamily.strings()));
-
-    LOG.info("Test fixture: [" + pipeline.getClass().getSimpleName() + " : " + typeFamily.getClass().getSimpleName()
-        + "]  First: " + Lists.newArrayList(firstCollection.materialize().iterator()) + ", Second: "
-        + Lists.newArrayList(secondCollection.materialize().iterator()));
-
-    union = secondCollection.union(firstCollection);
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() throws IOException {
-    Object[][] data = new Object[][] { { WritableTypeFamily.getInstance(), PTableKeyValueIT.class },
-        { WritableTypeFamily.getInstance(), null }, { AvroTypeFamily.getInstance(), PTableKeyValueIT.class },
-        { AvroTypeFamily.getInstance(), null } };
-    return Arrays.asList(data);
-  }
-
-  public UnionCollectionIT(PTypeFamily typeFamily, Class pipelineClass) {
-    this.typeFamily = typeFamily;
-    this.pipelineClass = pipelineClass;
-  }
-
-  @Test
-  public void unionMaterializeShouldNotThrowNPE() throws Exception {
-    checkMaterialized(union.materialize());
-    checkMaterialized(pipeline.materialize(union));
-  }
-
-  private void checkMaterialized(Iterable<String> materialized) {
-    List<String> materializedValues = Lists.newArrayList(materialized.iterator());
-    Collections.sort(materializedValues);
-    LOG.info("Materialized union: " + materializedValues);
-    assertEquals(EXPECTED, materializedValues);
-  }
-
-  @Test
-  public void unionWriteShouldNotThrowNPE() throws IOException {
-    String outputPath1 = tmpDir.getFileName("output1");
-    String outputPath2 = tmpDir.getFileName("output2");
-    String outputPath3 = tmpDir.getFileName("output3");
-
-    if (typeFamily == AvroTypeFamily.getInstance()) {
-      union.write(To.avroFile(outputPath1));
-      pipeline.write(union, To.avroFile(outputPath2));
-
-      pipeline.run();
-
-      checkFileContents(outputPath1);
-      checkFileContents(outputPath2);
-
-    } else {
-
-      union.write(To.textFile(outputPath1));
-      pipeline.write(union, To.textFile(outputPath2));
-      pipeline.writeTextFile(union, outputPath3);
-
-      pipeline.run();
-
-      checkFileContents(outputPath1);
-      checkFileContents(outputPath2);
-      checkFileContents(outputPath3);
-    }
-  }
-
-  private void checkFileContents(String filePath) throws IOException {
-
-    List<String> fileContentValues = (typeFamily != AvroTypeFamily.getInstance() || !(pipeline instanceof MRPipeline)) ? Lists
-        .newArrayList(pipeline.read(At.textFile(filePath, typeFamily.strings())).materialize().iterator()) : Lists
-        .newArrayList(pipeline.read(At.avroFile(filePath, Avros.strings())).materialize().iterator());
-
-    Collections.sort(fileContentValues);
-
-    LOG.info("Saved Union: " + fileContentValues);
-    assertEquals(EXPECTED, fileContentValues);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java b/crunch/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java
deleted file mode 100644
index 08d226d..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/CompositePathIterableIT.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.crunch.io.text.TextFileReaderFactory;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class CompositePathIterableIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testCreate_FilePresent() throws IOException {
-    String inputFilePath = tmpDir.copyResourceFileName("set1.txt");
-    Configuration conf = new Configuration();
-    LocalFileSystem local = FileSystem.getLocal(conf);
-
-    Iterable<String> iterable = CompositePathIterable.create(local, new Path(inputFilePath),
-        new TextFileReaderFactory<String>(Writables.strings()));
-
-    assertEquals(Lists.newArrayList("b", "c", "a", "e"), Lists.newArrayList(iterable));
-
-  }
-
-  @Test
-  public void testCreate_DirectoryPresentButNoFiles() throws IOException {
-    Path emptyInputDir = tmpDir.getRootPath();
-
-    Configuration conf = new Configuration();
-    LocalFileSystem local = FileSystem.getLocal(conf);
-
-    Iterable<String> iterable = CompositePathIterable.create(local, emptyInputDir,
-        new TextFileReaderFactory<String>(Writables.strings()));
-
-    assertTrue(Lists.newArrayList(iterable).isEmpty());
-  }
-
-  @Test(expected = IOException.class)
-  public void testCreate_DirectoryNotPresent() throws IOException {
-    File nonExistentDir = tmpDir.getFile("not-there");
-
-    // Sanity check
-    assertFalse(nonExistentDir.exists());
-
-    Configuration conf = new Configuration();
-    LocalFileSystem local = FileSystem.getLocal(conf);
-
-    CompositePathIterable.create(local, new Path(nonExistentDir.getAbsolutePath()), new TextFileReaderFactory<String>(
-        Writables.strings()));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/NLineInputIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/NLineInputIT.java b/crunch/src/it/java/org/apache/crunch/io/NLineInputIT.java
deleted file mode 100644
index 52b8ff5..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/NLineInputIT.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.text.NLineFileSource;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Rule;
-import org.junit.Test;
-
-public class NLineInputIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  @Test
-  public void testNLine() throws Exception {
-    String urlsInputPath = tmpDir.copyResourceFileName("urls.txt");
-    Configuration conf = new Configuration(tmpDir.getDefaultConfiguration());
-    conf.setInt("io.sort.mb", 10);
-    Pipeline pipeline = new MRPipeline(NLineInputIT.class, conf);
-    PCollection<String> urls = pipeline.read(new NLineFileSource<String>(urlsInputPath,
-        Writables.strings(), 2));
-    assertEquals(new Integer(2),
-        urls.parallelDo(new LineCountFn(), Avros.ints()).max().getValue());
-  }
-  
-  private static class LineCountFn extends DoFn<String, Integer> {
-
-    private int lineCount = 0;
-    
-    @Override
-    public void initialize() {
-      this.lineCount = 0;
-    }
-    
-    @Override
-    public void process(String input, Emitter<Integer> emitter) {
-      lineCount++;
-    }
-    
-    @Override
-    public void cleanup(Emitter<Integer> emitter) {
-      emitter.emit(lineCount);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/io/TextFileTableIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/io/TextFileTableIT.java b/crunch/src/it/java/org/apache/crunch/io/TextFileTableIT.java
deleted file mode 100644
index bddc0b5..0000000
--- a/crunch/src/it/java/org/apache/crunch/io/TextFileTableIT.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import static org.apache.crunch.types.writable.Writables.*;
-import static org.junit.Assert.assertEquals;
-
-import java.util.Set;
-
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.text.TextFileTableSource;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableSet;
-
-/**
- *
- */
-public class TextFileTableIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  @Test
-  public void testTextFileTable() throws Exception {
-    String urlsFile = tmpDir.copyResourceFileName("urls.txt");
-    Pipeline pipeline = new MRPipeline(TextFileTableIT.class, tmpDir.getDefaultConfiguration());
-    PTable<String, String> urls = pipeline.read(
-        new TextFileTableSource<String, String>(urlsFile, tableOf(strings(), strings())));
-    Set<Pair<String, Long>> cnts = ImmutableSet.copyOf(urls.keys().count().materialize());
-    assertEquals(ImmutableSet.of(Pair.of("www.A.com", 4L), Pair.of("www.B.com", 2L),
-        Pair.of("www.C.com", 1L), Pair.of("www.D.com", 1L), Pair.of("www.E.com", 1L),
-        Pair.of("www.F.com", 2L)), cnts);
-  }
-}


[37/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/shakes.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/shakes.txt b/crunch-core/src/it/resources/shakes.txt
new file mode 100644
index 0000000..63acf18
--- /dev/null
+++ b/crunch-core/src/it/resources/shakes.txt
@@ -0,0 +1,3667 @@
+***The Project Gutenberg's Etext of Shakespeare's First Folio***
+********************The Tragedie of Macbeth*********************
+
+This is our 3rd edition of most of these plays.  See the index.
+
+
+Copyright laws are changing all over the world, be sure to check
+the copyright laws for your country before posting these files!!
+
+Please take a look at the important information in this header.
+We encourage you to keep this file on your own disk, keeping an
+electronic path open for the next readers.  Do not remove this.
+
+
+**Welcome To The World of Free Plain Vanilla Electronic Texts**
+
+**Etexts Readable By Both Humans and By Computers, Since 1971**
+
+*These Etexts Prepared By Hundreds of Volunteers and Donations*
+
+Information on contacting Project Gutenberg to get Etexts, and
+further information is included below.  We need your donations.
+
+
+The Tragedie of Macbeth
+
+by William Shakespeare
+
+July, 2000  [Etext #2264]
+
+
+***The Project Gutenberg's Etext of Shakespeare's First Folio***
+********************The Tragedie of Macbeth*********************
+
+*****This file should be named 0ws3410.txt or 0ws3410.zip******
+
+Corrected EDITIONS of our etexts get a new NUMBER, 0ws3411.txt
+VERSIONS based on separate sources get new LETTER, 0ws3410a.txt
+
+
+Project Gutenberg Etexts are usually created from multiple editions,
+all of which are in the Public Domain in the United States, unless a
+copyright notice is included.  Therefore, we usually do NOT keep any
+of these books in compliance with any particular paper edition.
+
+
+We are now trying to release all our books one month in advance
+of the official release dates, leaving time for better editing.
+
+Please note:  neither this list nor its contents are final till
+midnight of the last day of the month of any such announcement.
+The official release date of all Project Gutenberg Etexts is at
+Midnight, Central Time, of the last day of the stated month.  A
+preliminary version may often be posted for suggestion, comment
+and editing by those who wish to do so.  To be sure you have an
+up to date first edition [xxxxx10x.xxx] please check file sizes
+in the first week of the next month.  Since our ftp program has
+a bug in it that scrambles the date [tried to fix and failed] a
+look at the file size will have to do, but we will try to see a
+new copy has at least one byte more or less.
+
+
+Information about Project Gutenberg (one page)
+
+We produce about two million dollars for each hour we work.  The
+time it takes us, a rather conservative estimate, is fifty hours
+to get any etext selected, entered, proofread, edited, copyright
+searched and analyzed, the copyright letters written, etc.  This
+projected audience is one hundred million readers.  If our value
+per text is nominally estimated at one dollar then we produce $2
+million dollars per hour this year as we release thirty-six text
+files per month, or 432 more Etexts in 1999 for a total of 2000+
+If these reach just 10% of the computerized population, then the
+total should reach over 200 billion Etexts given away this year.
+
+The Goal of Project Gutenberg is to Give Away One Trillion Etext
+Files by December 31, 2001.  [10,000 x 100,000,000 = 1 Trillion]
+This is ten thousand titles each to one hundred million readers,
+which is only ~5% of the present number of computer users.
+
+At our revised rates of production, we will reach only one-third
+of that goal by the end of 2001, or about 3,333 Etexts unless we
+manage to get some real funding; currently our funding is mostly
+from Michael Hart's salary at Carnegie-Mellon University, and an
+assortment of sporadic gifts; this salary is only good for a few
+more years, so we are looking for something to replace it, as we
+don't want Project Gutenberg to be so dependent on one person.
+
+We need your donations more than ever!
+
+
+All donations should be made to "Project Gutenberg/CMU": and are
+tax deductible to the extent allowable by law.  (CMU = Carnegie-
+Mellon University).
+
+For these and other matters, please mail to:
+
+Project Gutenberg
+P. O. Box  2782
+Champaign, IL 61825
+
+When all other email fails. . .try our Executive Director:
+Michael S. Hart <ha...@pobox.com>
+hart@pobox.com forwards to hart@prairienet.org and archive.org
+if your mail bounces from archive.org, I will still see it, if
+it bounces from prairienet.org, better resend later on. . . .
+
+We would prefer to send you this information by email.
+
+******
+
+To access Project Gutenberg etexts, use any Web browser
+to view http://promo.net/pg.  This site lists Etexts by
+author and by title, and includes information about how
+to get involved with Project Gutenberg.  You could also
+download our past Newsletters, or subscribe here.  This
+is one of our major sites, please email hart@pobox.com,
+for a more complete list of our various sites.
+
+To go directly to the etext collections, use FTP or any
+Web browser to visit a Project Gutenberg mirror (mirror
+sites are available on 7 continents; mirrors are listed
+at http://promo.net/pg).
+
+Mac users, do NOT point and click, typing works better.
+
+Example FTP session:
+
+ftp sunsite.unc.edu
+login: anonymous
+password: your@login
+cd pub/docs/books/gutenberg
+cd etext90 through etext99
+dir [to see files]
+get or mget [to get files. . .set bin for zip files]
+GET GUTINDEX.??  [to get a year's listing of books, e.g., GUTINDEX.99]
+GET GUTINDEX.ALL [to get a listing of ALL books]
+
+***
+
+**Information prepared by the Project Gutenberg legal advisor**
+
+(Three Pages)
+
+
+***START**THE SMALL PRINT!**FOR PUBLIC DOMAIN ETEXTS**START***
+Why is this "Small Print!" statement here?  You know: lawyers.
+They tell us you might sue us if there is something wrong with
+your copy of this etext, even if you got it for free from
+someone other than us, and even if what's wrong is not our
+fault.  So, among other things, this "Small Print!" statement
+disclaims most of our liability to you.  It also tells you how
+you can distribute copies of this etext if you want to.
+
+*BEFORE!* YOU USE OR READ THIS ETEXT
+By using or reading any part of this PROJECT GUTENBERG-tm
+etext, you indicate that you understand, agree to and accept
+this "Small Print!" statement.  If you do not, you can receive
+a refund of the money (if any) you paid for this etext by
+sending a request within 30 days of receiving it to the person
+you got it from.  If you received this etext on a physical
+medium (such as a disk), you must return it with your request.
+
+ABOUT PROJECT GUTENBERG-TM ETEXTS
+This PROJECT GUTENBERG-tm etext, like most PROJECT GUTENBERG-
+tm etexts, is a "public domain" work distributed by Professor
+Michael S. Hart through the Project Gutenberg Association at
+Carnegie-Mellon University (the "Project").  Among other
+things, this means that no one owns a United States copyright
+on or for this work, so the Project (and you!) can copy and
+distribute it in the United States without permission and
+without paying copyright royalties.  Special rules, set forth
+below, apply if you wish to copy and distribute this etext
+under the Project's "PROJECT GUTENBERG" trademark.
+
+To create these etexts, the Project expends considerable
+efforts to identify, transcribe and proofread public domain
+works.  Despite these efforts, the Project's etexts and any
+medium they may be on may contain "Defects".  Among other
+things, Defects may take the form of incomplete, inaccurate or
+corrupt data, transcription errors, a copyright or other
+intellectual property infringement, a defective or damaged
+disk or other etext medium, a computer virus, or computer
+codes that damage or cannot be read by your equipment.
+
+LIMITED WARRANTY; DISCLAIMER OF DAMAGES
+But for the "Right of Replacement or Refund" described below,
+[1] the Project (and any other party you may receive this
+etext from as a PROJECT GUTENBERG-tm etext) disclaims all
+liability to you for damages, costs and expenses, including
+legal fees, and [2] YOU HAVE NO REMEDIES FOR NEGLIGENCE OR
+UNDER STRICT LIABILITY, OR FOR BREACH OF WARRANTY OR CONTRACT,
+INCLUDING BUT NOT LIMITED TO INDIRECT, CONSEQUENTIAL, PUNITIVE
+OR INCIDENTAL DAMAGES, EVEN IF YOU GIVE NOTICE OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+If you discover a Defect in this etext within 90 days of
+receiving it, you can receive a refund of the money (if any)
+you paid for it by sending an explanatory note within that
+time to the person you received it from.  If you received it
+on a physical medium, you must return it with your note, and
+such person may choose to alternatively give you a replacement
+copy.  If you received it electronically, such person may
+choose to alternatively give you a second opportunity to
+receive it electronically.
+
+THIS ETEXT IS OTHERWISE PROVIDED TO YOU "AS-IS".  NO OTHER
+WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, ARE MADE TO YOU AS
+TO THE ETEXT OR ANY MEDIUM IT MAY BE ON, INCLUDING BUT NOT
+LIMITED TO WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+PARTICULAR PURPOSE.
+
+Some states do not allow disclaimers of implied warranties or
+the exclusion or limitation of consequential damages, so the
+above disclaimers and exclusions may not apply to you, and you
+may have other legal rights.
+
+INDEMNITY
+You will indemnify and hold the Project, its directors,
+officers, members and agents harmless from all liability, cost
+and expense, including legal fees, that arise directly or
+indirectly from any of the following that you do or cause:
+[1] distribution of this etext, [2] alteration, modification,
+or addition to the etext, or [3] any Defect.
+
+DISTRIBUTION UNDER "PROJECT GUTENBERG-tm"
+You may distribute copies of this etext electronically, or by
+disk, book or any other medium if you either delete this
+"Small Print!" and all other references to Project Gutenberg,
+or:
+
+[1]  Only give exact copies of it.  Among other things, this
+     requires that you do not remove, alter or modify the
+     etext or this "small print!" statement.  You may however,
+     if you wish, distribute this etext in machine readable
+     binary, compressed, mark-up, or proprietary form,
+     including any form resulting from conversion by word pro-
+     cessing or hypertext software, but only so long as
+     *EITHER*:
+
+     [*]  The etext, when displayed, is clearly readable, and
+          does *not* contain characters other than those
+          intended by the author of the work, although tilde
+          (~), asterisk (*) and underline (_) characters may
+          be used to convey punctuation intended by the
+          author, and additional characters may be used to
+          indicate hypertext links; OR
+
+     [*]  The etext may be readily converted by the reader at
+          no expense into plain ASCII, EBCDIC or equivalent
+          form by the program that displays the etext (as is
+          the case, for instance, with most word processors);
+          OR
+
+     [*]  You provide, or agree to also provide on request at
+          no additional cost, fee or expense, a copy of the
+          etext in its original plain ASCII form (or in EBCDIC
+          or other equivalent proprietary form).
+
+[2]  Honor the etext refund and replacement provisions of this
+     "Small Print!" statement.
+
+[3]  Pay a trademark license fee to the Project of 20% of the
+     net profits you derive calculated using the method you
+     already use to calculate your applicable taxes.  If you
+     don't derive profits, no royalty is due.  Royalties are
+     payable to "Project Gutenberg Association/Carnegie-Mellon
+     University" within the 60 days following each
+     date you prepare (or were legally required to prepare)
+     your annual (or equivalent periodic) tax return.
+
+WHAT IF YOU *WANT* TO SEND MONEY EVEN IF YOU DON'T HAVE TO?
+The Project gratefully accepts contributions in money, time,
+scanning machines, OCR software, public domain etexts, royalty
+free copyright licenses, and every other sort of contribution
+you can think of.  Money should be paid to "Project Gutenberg
+Association / Carnegie-Mellon University".
+
+*END*THE SMALL PRINT! FOR PUBLIC DOMAIN ETEXTS*Ver.04.29.93*END*
+
+
+
+
+
+Project Gutenberg's Etext of Shakespeare's The Tragedie of Macbeth
+
+
+
+
+
+Executive Director's Notes:
+
+In addition to the notes below, and so you will *NOT* think all
+the spelling errors introduced by the printers of the time have
+been corrected, here are the first few lines of Hamlet, as they
+are presented herein:
+
+  Barnardo. Who's there?
+  Fran. Nay answer me: Stand & vnfold
+your selfe
+
+   Bar. Long liue the King
+
+***
+
+As I understand it, the printers often ran out of certain words
+or letters they had often packed into a "cliche". . .this is the
+original meaning of the term cliche. . .and thus, being unwilling
+to unpack the cliches, and thus you will see some substitutions
+that look very odd. . .such as the exchanges of u for v, v for u,
+above. . .and you may wonder why they did it this way, presuming
+Shakespeare did not actually write the play in this manner. . . .
+
+The answer is that they MAY have packed "liue" into a cliche at a
+time when they were out of "v"'s. . .possibly having used "vv" in
+place of some "w"'s, etc.  This was a common practice of the day,
+as print was still quite expensive, and they didn't want to spend
+more on a wider selection of characters than they had to.
+
+You will find a lot of these kinds of "errors" in this text, as I
+have mentioned in other times and places, many "scholars" have an
+extreme attachment to these errors, and many have accorded them a
+very high place in the "canon" of Shakespeare.  My father read an
+assortment of these made available to him by Cambridge University
+in England for several months in a glass room constructed for the
+purpose.  To the best of my knowledge he read ALL those available
+. . .in great detail. . .and determined from the various changes,
+that Shakespeare most likely did not write in nearly as many of a
+variety of errors we credit him for, even though he was in/famous
+for signing his name with several different spellings.
+
+So, please take this into account when reading the comments below
+made by our volunteer who prepared this file:  you may see errors
+that are "not" errors. . . .
+
+So. . .with this caveat. . .we have NOT changed the canon errors,
+here is the Project Gutenberg Etext of Shakespeare's The Tragedie 
+of Macbeth.
+
+Michael S. Hart
+Project Gutenberg
+Executive Director
+
+
+***
+
+
+Scanner's Notes: What this is and isn't.  This was taken from
+a copy of Shakespeare's first folio and it is as close as I can
+come in ASCII to the printed text.
+
+The elongated S's have been changed to small s's and the
+conjoined ae have been changed to ae.  I have left the spelling,
+punctuation, capitalization as close as possible to the
+printed text.  I have corrected some spelling mistakes (I have put
+together a spelling dictionary devised from the spellings of the
+Geneva Bible and Shakespeare's First Folio and have unified
+spellings according to this template), typo's and expanded
+abbreviations as I have come across them.  Everything within
+brackets [] is what I have added.  So if you don't like that
+you can delete everything within the brackets if you want a
+purer Shakespeare.
+
+Another thing that you should be aware of is that there are textual
+differences between various copies of the first folio.  So there may
+be differences (other than what I have mentioned above) between
+this and other first folio editions.  This is due to the printer's
+habit of setting the type and running off a number of copies and
+then proofing the printed copy and correcting the type and then
+continuing the printing run.  The proof run wasn't thrown away but
+incorporated into the printed copies.  This is just the way it is.
+The text I have used was a composite of more than 30 different
+First Folio editions' best pages.
+
+If you find any scanning errors, out and out typos, punctuation
+errors, or if you disagree with my spelling choices please feel
+free to email me those errors.  I wish to make this the best
+etext possible.  My email address for right now are haradda@aol.com
+and davidr@inconnect.com.  I hope that you enjoy this.
+
+David Reed
+
+The Tragedie of Macbeth
+
+Actus Primus. Scoena Prima.
+
+Thunder and Lightning. Enter three Witches.
+
+  1. When shall we three meet againe?
+In Thunder, Lightning, or in Raine?
+  2. When the Hurley-burley's done,
+When the Battaile's lost, and wonne
+
+   3. That will be ere the set of Sunne
+
+   1. Where the place?
+  2. Vpon the Heath
+
+   3. There to meet with Macbeth
+
+   1. I come, Gray-Malkin
+
+   All. Padock calls anon: faire is foule, and foule is faire,
+Houer through the fogge and filthie ayre.
+
+Exeunt.
+
+
+Scena Secunda.
+
+Alarum within. Enter King Malcome, Donalbaine, Lenox, with
+attendants,
+meeting a bleeding Captaine.
+
+  King. What bloody man is that? he can report,
+As seemeth by his plight, of the Reuolt
+The newest state
+
+   Mal. This is the Serieant,
+Who like a good and hardie Souldier fought
+'Gainst my Captiuitie: Haile braue friend;
+Say to the King, the knowledge of the Broyle,
+As thou didst leaue it
+
+   Cap. Doubtfull it stood,
+As two spent Swimmers, that doe cling together,
+And choake their Art: The mercilesse Macdonwald
+(Worthie to be a Rebell, for to that
+The multiplying Villanies of Nature
+Doe swarme vpon him) from the Westerne Isles
+Of Kernes and Gallowgrosses is supply'd,
+And Fortune on his damned Quarry smiling,
+Shew'd like a Rebells Whore: but all's too weake:
+For braue Macbeth (well hee deserues that Name)
+Disdayning Fortune, with his brandisht Steele,
+Which smoak'd with bloody execution
+(Like Valours Minion) caru'd out his passage,
+Till hee fac'd the Slaue:
+Which neu'r shooke hands, nor bad farwell to him,
+Till he vnseam'd him from the Naue toth' Chops,
+And fix'd his Head vpon our Battlements
+
+   King. O valiant Cousin, worthy Gentleman
+
+   Cap. As whence the Sunne 'gins his reflection,
+Shipwracking Stormes, and direfull Thunders:
+So from that Spring, whence comfort seem'd to come,
+Discomfort swells: Marke King of Scotland, marke,
+No sooner Iustice had, with Valour arm'd,
+Compell'd these skipping Kernes to trust their heeles,
+But the Norweyan Lord, surueying vantage,
+With furbusht Armes, and new supplyes of men,
+Began a fresh assault
+
+   King. Dismay'd not this our Captaines, Macbeth and
+Banquoh?
+  Cap. Yes, as Sparrowes, Eagles;
+Or the Hare, the Lyon:
+If I say sooth, I must report they were
+As Cannons ouer-charg'd with double Cracks,
+So they doubly redoubled stroakes vpon the Foe:
+Except they meant to bathe in reeking Wounds,
+Or memorize another Golgotha,
+I cannot tell: but I am faint,
+My Gashes cry for helpe
+
+   King. So well thy words become thee, as thy wounds,
+They smack of Honor both: Goe get him Surgeons.
+Enter Rosse and Angus.
+
+Who comes here?
+  Mal. The worthy Thane of Rosse
+
+   Lenox. What a haste lookes through his eyes?
+So should he looke, that seemes to speake things strange
+
+   Rosse. God saue the King
+
+   King. Whence cam'st thou, worthy Thane?
+  Rosse. From Fiffe, great King,
+Where the Norweyan Banners flowt the Skie,
+And fanne our people cold.
+Norway himselfe, with terrible numbers,
+Assisted by that most disloyall Traytor,
+The Thane of Cawdor, began a dismall Conflict,
+Till that Bellona's Bridegroome, lapt in proofe,
+Confronted him with selfe-comparisons,
+Point against Point, rebellious Arme 'gainst Arme,
+Curbing his lauish spirit: and to conclude,
+The Victorie fell on vs
+
+   King. Great happinesse
+
+   Rosse. That now Sweno, the Norwayes King,
+Craues composition:
+Nor would we deigne him buriall of his men,
+Till he disbursed, at Saint Colmes ynch,
+Ten thousand Dollars, to our generall vse
+
+   King. No more that Thane of Cawdor shall deceiue
+Our Bosome interest: Goe pronounce his present death,
+And with his former Title greet Macbeth
+
+   Rosse. Ile see it done
+
+   King. What he hath lost, Noble Macbeth hath wonne.
+
+Exeunt.
+
+
+Scena Tertia.
+
+Thunder. Enter the three Witches.
+
+  1. Where hast thou beene, Sister?
+  2. Killing Swine
+
+   3. Sister, where thou?
+  1. A Saylors Wife had Chestnuts in her Lappe,
+And mouncht, & mouncht, and mouncht:
+Giue me, quoth I.
+Aroynt thee, Witch, the rumpe-fed Ronyon cryes.
+Her Husband's to Aleppo gone, Master o'th' Tiger:
+But in a Syue Ile thither sayle,
+And like a Rat without a tayle,
+Ile doe, Ile doe, and Ile doe
+
+   2. Ile giue thee a Winde
+
+   1. Th'art kinde
+
+   3. And I another
+
+   1. I my selfe haue all the other,
+And the very Ports they blow,
+All the Quarters that they know,
+I'th' Ship-mans Card.
+Ile dreyne him drie as Hay:
+Sleepe shall neyther Night nor Day
+Hang vpon his Pent-house Lid:
+He shall liue a man forbid:
+Wearie Seu'nights, nine times nine,
+Shall he dwindle, peake, and pine:
+Though his Barke cannot be lost,
+Yet it shall be Tempest-tost.
+Looke what I haue
+
+   2. Shew me, shew me
+
+   1. Here I haue a Pilots Thumbe,
+Wrackt, as homeward he did come.
+
+Drum within.
+
+  3. A Drumme, a Drumme:
+Macbeth doth come
+
+   All. The weyward Sisters, hand in hand,
+Posters of the Sea and Land,
+Thus doe goe, about, about,
+Thrice to thine, and thrice to mine,
+And thrice againe, to make vp nine.
+Peace, the Charme's wound vp.
+Enter Macbeth and Banquo.
+
+  Macb. So foule and faire a day I haue not seene
+
+   Banquo. How farre is't call'd to Soris? What are these,
+So wither'd, and so wilde in their attyre,
+That looke not like th' Inhabitants o'th' Earth,
+And yet are on't? Liue you, or are you aught
+That man may question? you seeme to vnderstand me,
+By each at once her choppie finger laying
+Vpon her skinnie Lips: you should be Women,
+And yet your Beards forbid me to interprete
+That you are so
+
+   Mac. Speake if you can: what are you?
+  1. All haile Macbeth, haile to thee Thane of Glamis
+
+   2. All haile Macbeth, haile to thee Thane of Cawdor
+
+   3. All haile Macbeth, that shalt be King hereafter
+
+   Banq. Good Sir, why doe you start, and seeme to feare
+Things that doe sound so faire? i'th' name of truth
+Are ye fantasticall, or that indeed
+Which outwardly ye shew? My Noble Partner
+You greet with present Grace, and great prediction
+Of Noble hauing, and of Royall hope,
+That he seemes wrapt withall: to me you speake not.
+If you can looke into the Seedes of Time,
+And say, which Graine will grow, and which will not,
+Speake then to me, who neyther begge, nor feare
+Your fauors, nor your hate
+
+   1. Hayle
+
+   2. Hayle
+
+   3. Hayle
+
+   1. Lesser than Macbeth, and greater
+
+   2. Not so happy, yet much happyer
+
+   3. Thou shalt get Kings, though thou be none:
+So all haile Macbeth, and Banquo
+
+   1. Banquo, and Macbeth, all haile
+
+   Macb. Stay you imperfect Speakers, tell me more:
+By Sinells death, I know I am Thane of Glamis,
+But how, of Cawdor? the Thane of Cawdor liues
+A prosperous Gentleman: And to be King,
+Stands not within the prospect of beleefe,
+No more then to be Cawdor. Say from whence
+You owe this strange Intelligence, or why
+Vpon this blasted Heath you stop our way
+With such Prophetique greeting?
+Speake, I charge you.
+
+Witches vanish.
+
+  Banq. The Earth hath bubbles, as the Water ha's,
+And these are of them: whither are they vanish'd?
+  Macb. Into the Ayre: and what seem'd corporall,
+Melted, as breath into the Winde.
+Would they had stay'd
+
+   Banq. Were such things here, as we doe speake about?
+Or haue we eaten on the insane Root,
+That takes the Reason Prisoner?
+  Macb. Your Children shall be Kings
+
+   Banq. You shall be King
+
+   Macb. And Thane of Cawdor too: went it not so?
+  Banq. Toth' selfe-same tune and words: who's here?
+Enter Rosse and Angus.
+
+  Rosse. The King hath happily receiu'd, Macbeth,
+The newes of thy successe: and when he reades
+Thy personall Venture in the Rebels sight,
+His Wonders and his Prayses doe contend,
+Which should be thine, or his: silenc'd with that,
+In viewing o're the rest o'th' selfe-same day,
+He findes thee in the stout Norweyan Rankes,
+Nothing afeard of what thy selfe didst make
+Strange Images of death, as thick as Tale
+Can post with post, and euery one did beare
+Thy prayses in his Kingdomes great defence,
+And powr'd them downe before him
+
+   Ang. Wee are sent,
+To giue thee from our Royall Master thanks,
+Onely to harrold thee into his sight,
+Not pay thee
+
+   Rosse. And for an earnest of a greater Honor,
+He bad me, from him, call thee Thane of Cawdor:
+In which addition, haile most worthy Thane,
+For it is thine
+
+   Banq. What, can the Deuill speake true?
+  Macb. The Thane of Cawdor liues:
+Why doe you dresse me in borrowed Robes?
+  Ang. Who was the Thane, liues yet,
+But vnder heauie Iudgement beares that Life,
+Which he deserues to loose.
+Whether he was combin'd with those of Norway,
+Or did lyne the Rebell with hidden helpe,
+And vantage; or that with both he labour'd
+In his Countreyes wracke, I know not:
+But Treasons Capitall, confess'd, and prou'd,
+Haue ouerthrowne him
+
+   Macb. Glamys, and Thane of Cawdor:
+The greatest is behinde. Thankes for your paines.
+Doe you not hope your Children shall be Kings,
+When those that gaue the Thane of Cawdor to me,
+Promis'd no lesse to them
+
+   Banq. That trusted home,
+Might yet enkindle you vnto the Crowne,
+Besides the Thane of Cawdor. But 'tis strange:
+And oftentimes, to winne vs to our harme,
+The Instruments of Darknesse tell vs Truths,
+Winne vs with honest Trifles, to betray's
+In deepest consequence.
+Cousins, a word, I pray you
+
+   Macb. Two Truths are told,
+As happy Prologues to the swelling Act
+Of the Imperiall Theame. I thanke you Gentlemen:
+This supernaturall solliciting
+Cannot be ill; cannot be good.
+If ill? why hath it giuen me earnest of successe,
+Commencing in a Truth? I am Thane of Cawdor.
+If good? why doe I yeeld to that suggestion,
+Whose horrid Image doth vnfixe my Heire,
+And make my seated Heart knock at my Ribbes,
+Against the vse of Nature? Present Feares
+Are lesse then horrible Imaginings:
+My Thought, whose Murther yet is but fantasticall,
+Shakes so my single state of Man,
+That Function is smother'd in surmise,
+And nothing is, but what is not
+
+   Banq. Looke how our Partner's rapt
+
+   Macb. If Chance will haue me King,
+Why Chance may Crowne me,
+Without my stirre
+
+   Banq. New Honors come vpon him
+Like our strange Garments, cleaue not to their mould,
+But with the aid of vse
+
+   Macb. Come what come may,
+Time, and the Houre, runs through the roughest Day
+
+   Banq. Worthy Macbeth, wee stay vpon your leysure
+
+   Macb. Giue me your fauour:
+My dull Braine was wrought with things forgotten.
+Kinde Gentlemen, your paines are registred,
+Where euery day I turne the Leafe,
+To reade them.
+Let vs toward the King: thinke vpon
+What hath chanc'd: and at more time,
+The Interim hauing weigh'd it, let vs speake
+Our free Hearts each to other
+
+   Banq. Very gladly
+
+   Macb. Till then enough:
+Come friends.
+
+Exeunt.
+
+
+Scena Quarta.
+
+Flourish. Enter King, Lenox, Malcolme, Donalbaine, and
+Attendants.
+
+  King. Is execution done on Cawdor?
+Or not those in Commission yet return'd?
+  Mal. My Liege, they are not yet come back.
+But I haue spoke with one that saw him die:
+Who did report, that very frankly hee
+Confess'd his Treasons, implor'd your Highnesse Pardon,
+And set forth a deepe Repentance:
+Nothing in his Life became him,
+Like the leauing it. Hee dy'de,
+As one that had beene studied in his death,
+To throw away the dearest thing he ow'd,
+As 'twere a carelesse Trifle
+
+   King. There's no Art,
+To finde the Mindes construction in the Face.
+He was a Gentleman, on whom I built
+An absolute Trust.
+Enter Macbeth, Banquo, Rosse, and Angus.
+
+O worthyest Cousin,
+The sinne of my Ingratitude euen now
+Was heauie on me. Thou art so farre before,
+That swiftest Wing of Recompence is slow,
+To ouertake thee. Would thou hadst lesse deseru'd,
+That the proportion both of thanks, and payment,
+Might haue beene mine: onely I haue left to say,
+More is thy due, then more then all can pay
+
+   Macb. The seruice, and the loyaltie I owe,
+In doing it, payes it selfe.
+Your Highnesse part, is to receiue our Duties:
+And our Duties are to your Throne, and State,
+Children, and Seruants; which doe but what they should,
+By doing euery thing safe toward your Loue
+And Honor
+
+   King. Welcome hither:
+I haue begun to plant thee, and will labour
+To make thee full of growing. Noble Banquo,
+That hast no lesse deseru'd, nor must be knowne
+No lesse to haue done so: Let me enfold thee,
+And hold thee to my Heart
+
+   Banq. There if I grow,
+The Haruest is your owne
+
+   King. My plenteous Ioyes,
+Wanton in fulnesse, seeke to hide themselues
+In drops of sorrow. Sonnes, Kinsmen, Thanes,
+And you whose places are the nearest, know,
+We will establish our Estate vpon
+Our eldest, Malcolme, whom we name hereafter,
+The Prince of Cumberland: which Honor must
+Not vnaccompanied, inuest him onely,
+But signes of Noblenesse, like Starres, shall shine
+On all deseruers. From hence to Envernes,
+And binde vs further to you
+
+   Macb. The Rest is Labor, which is not vs'd for you:
+Ile be my selfe the Herbenger, and make ioyfull
+The hearing of my Wife, with your approach:
+So humbly take my leaue
+
+   King. My worthy Cawdor
+
+   Macb. The Prince of Cumberland: that is a step,
+On which I must fall downe, or else o're-leape,
+For in my way it lyes. Starres hide your fires,
+Let not Light see my black and deepe desires:
+The Eye winke at the Hand: yet let that bee,
+Which the Eye feares, when it is done to see.
+Enter.
+
+  King. True worthy Banquo: he is full so valiant,
+And in his commendations, I am fed:
+It is a Banquet to me. Let's after him,
+Whose care is gone before, to bid vs welcome:
+It is a peerelesse Kinsman.
+
+Flourish. Exeunt.
+
+
+Scena Quinta.
+
+Enter Macbeths Wife alone with a Letter.
+
+  Lady. They met me in the day of successe: and I haue
+learn'd by the perfect'st report, they haue more in them, then
+mortall knowledge. When I burnt in desire to question them
+further, they made themselues Ayre, into which they vanish'd.
+Whiles I stood rapt in the wonder of it, came Missiues from
+the King, who all-hail'd me Thane of Cawdor, by which Title
+before, these weyward Sisters saluted me, and referr'd me to
+the comming on of time, with haile King that shalt be. This
+haue I thought good to deliuer thee (my dearest Partner of
+Greatnesse) that thou might'st not loose the dues of reioycing
+by being ignorant of what Greatnesse is promis'd thee. Lay
+it to thy heart and farewell.
+Glamys thou art, and Cawdor, and shalt be
+What thou art promis'd: yet doe I feare thy Nature,
+It is too full o'th' Milke of humane kindnesse,
+To catch the neerest way. Thou would'st be great,
+Art not without Ambition, but without
+The illnesse should attend it. What thou would'st highly,
+That would'st thou holily: would'st not play false,
+And yet would'st wrongly winne.
+Thould'st haue, great Glamys, that which cryes,
+Thus thou must doe, if thou haue it;
+And that which rather thou do'st feare to doe,
+Then wishest should be vndone. High thee hither,
+That I may powre my Spirits in thine Eare,
+And chastise with the valour of my Tongue
+All that impeides thee from the Golden Round,
+Which Fate and Metaphysicall ayde doth seeme
+To haue thee crown'd withall.
+Enter Messenger.
+
+What is your tidings?
+  Mess. The King comes here to Night
+
+   Lady. Thou'rt mad to say it.
+Is not thy Master with him? who, wer't so,
+Would haue inform'd for preparation
+
+   Mess. So please you, it is true: our Thane is comming:
+One of my fellowes had the speed of him;
+Who almost dead for breath, had scarcely more
+Then would make vp his Message
+
+   Lady. Giue him tending,
+He brings great newes,
+
+Exit Messenger.
+
+The Rauen himselfe is hoarse,
+That croakes the fatall entrance of Duncan
+Vnder my Battlements. Come you Spirits,
+That tend on mortall thoughts, vnsex me here,
+And fill me from the Crowne to the Toe, top-full
+Of direst Crueltie: make thick my blood,
+Stop vp th' accesse, and passage to Remorse,
+That no compunctious visitings of Nature
+Shake my fell purpose, nor keepe peace betweene
+Th' effect, and hit. Come to my Womans Brests,
+And take my Milke for Gall, you murth'ring Ministers,
+Where-euer, in your sightlesse substances,
+You wait on Natures Mischiefe. Come thick Night,
+And pall thee in the dunnest smoake of Hell,
+
+That my keene Knife see not the Wound it makes,
+Nor Heauen peepe through the Blanket of the darke,
+To cry, hold, hold.
+Enter Macbeth.
+
+Great Glamys, worthy Cawdor,
+Greater then both, by the all-haile hereafter,
+Thy Letters haue transported me beyond
+This ignorant present, and I feele now
+The future in the instant
+
+   Macb. My dearest Loue,
+Duncan comes here to Night
+
+   Lady. And when goes hence?
+  Macb. To morrow, as he purposes
+
+   Lady. O neuer,
+Shall Sunne that Morrow see.
+Your Face, my Thane, is as a Booke, where men
+May reade strange matters, to beguile the time.
+Looke like the time, beare welcome in your Eye,
+Your Hand, your Tongue: looke like th' innocent flower,
+But be the Serpent vnder't. He that's comming,
+Must be prouided for: and you shall put
+This Nights great Businesse into my dispatch,
+Which shall to all our Nights, and Dayes to come,
+Giue solely soueraigne sway, and Masterdome
+
+   Macb. We will speake further,
+  Lady. Onely looke vp cleare:
+To alter fauor, euer is to feare:
+Leaue all the rest to me.
+
+Exeunt.
+
+
+Scena Sexta.
+
+Hoboyes, and Torches. Enter King, Malcolme, Donalbaine,
+Banquo, Lenox,
+Macduff, Rosse, Angus, and Attendants.
+
+  King. This Castle hath a pleasant seat,
+The ayre nimbly and sweetly recommends it selfe
+Vnto our gentle sences
+
+   Banq. This Guest of Summer,
+The Temple-haunting Barlet does approue,
+By his loued Mansonry, that the Heauens breath
+Smells wooingly here: no Iutty frieze,
+Buttrice, nor Coigne of Vantage, but this Bird
+Hath made his pendant Bed, and procreant Cradle,
+Where they must breed, and haunt: I haue obseru'd
+The ayre is delicate.
+Enter Lady.
+
+  King. See, see our honor'd Hostesse:
+The Loue that followes vs, sometime is our trouble,
+Which still we thanke as Loue. Herein I teach you,
+How you shall bid God-eyld vs for your paines,
+And thanke vs for your trouble
+
+   Lady. All our seruice,
+In euery point twice done, and then done double,
+Were poore, and single Businesse, to contend
+Against those Honors deepe, and broad,
+Wherewith your Maiestie loades our House:
+For those of old, and the late Dignities,
+Heap'd vp to them, we rest your Ermites
+
+   King. Where's the Thane of Cawdor?
+We courst him at the heeles, and had a purpose
+To be his Purueyor: But he rides well,
+And his great Loue (sharpe as his Spurre) hath holp him
+To his home before vs: Faire and Noble Hostesse
+We are your guest to night
+
+   La. Your Seruants euer,
+Haue theirs, themselues, and what is theirs in compt,
+To make their Audit at your Highnesse pleasure,
+Still to returne your owne
+
+   King. Giue me your hand:
+Conduct me to mine Host we loue him highly,
+And shall continue, our Graces towards him.
+By your leaue Hostesse.
+
+Exeunt.
+
+Scena Septima.
+
+Hoboyes. Torches. Enter a Sewer, and diuers Seruants with Dishes
+and
+Seruice ouer the Stage. Then enter Macbeth
+
+   Macb. If it were done, when 'tis done, then 'twer well,
+It were done quickly: If th' Assassination
+Could trammell vp the Consequence, and catch
+With his surcease, Successe: that but this blow
+Might be the be all, and the end all. Heere,
+But heere, vpon this Banke and Schoole of time,
+Wee'ld iumpe the life to come. But in these Cases,
+We still haue iudgement heere, that we but teach
+Bloody Instructions, which being taught, returne
+To plague th' Inuenter, this euen-handed Iustice
+Commends th' Ingredience of our poyson'd Challice
+To our owne lips. Hee's heere in double trust;
+First, as I am his Kinsman, and his Subiect,
+Strong both against the Deed: Then, as his Host,
+Who should against his Murtherer shut the doore,
+Not beare the knife my selfe. Besides, this Duncane
+Hath borne his Faculties so meeke; hath bin
+So cleere in his great Office, that his Vertues
+Will pleade like Angels, Trumpet-tongu'd against
+The deepe damnation of his taking off:
+And Pitty, like a naked New-borne-Babe,
+Striding the blast, or Heauens Cherubin, hors'd
+Vpon the sightlesse Curriors of the Ayre,
+Shall blow the horrid deed in euery eye,
+That teares shall drowne the winde. I haue no Spurre
+To pricke the sides of my intent, but onely
+Vaulting Ambition, which ore-leapes it selfe,
+And falles on th' other.
+Enter Lady.
+
+How now? What Newes?
+  La. He has almost supt: why haue you left the chamber?
+  Mac. Hath he ask'd for me?
+  La. Know you not, he ha's?
+  Mac. We will proceed no further in this Businesse:
+He hath Honour'd me of late, and I haue bought
+Golden Opinions from all sorts of people,
+Which would be worne now in their newest glosse,
+Not cast aside so soone
+
+   La. Was the hope drunke,
+Wherein you drest your selfe? Hath it slept since?
+And wakes it now to looke so greene, and pale,
+At what it did so freely? From this time,
+Such I account thy loue. Art thou affear'd
+To be the same in thine owne Act, and Valour,
+As thou art in desire? Would'st thou haue that
+Which thou esteem'st the Ornament of Life,
+And liue a Coward in thine owne Esteeme?
+Letting I dare not, wait vpon I would,
+Like the poore Cat i'th' Addage
+
+   Macb. Prythee peace:
+I dare do all that may become a man,
+Who dares do more, is none
+
+   La. What Beast was't then
+That made you breake this enterprize to me?
+When you durst do it, then you were a man:
+And to be more then what you were, you would
+Be so much more the man. Nor time, nor place
+Did then adhere, and yet you would make both:
+They haue made themselues, and that their fitnesse now
+Do's vnmake you. I haue giuen Sucke, and know
+How tender 'tis to loue the Babe that milkes me,
+I would, while it was smyling in my Face,
+Haue pluckt my Nipple from his Bonelesse Gummes,
+And dasht the Braines out, had I so sworne
+As you haue done to this
+
+   Macb. If we should faile?
+  Lady. We faile?
+But screw your courage to the sticking place,
+And wee'le not fayle: when Duncan is asleepe,
+(Whereto the rather shall his dayes hard Iourney
+Soundly inuite him) his two Chamberlaines
+Will I with Wine, and Wassell, so conuince,
+That Memorie, the Warder of the Braine,
+Shall be a Fume, and the Receit of Reason
+A Lymbeck onely: when in Swinish sleepe,
+Their drenched Natures lyes as in a Death,
+What cannot you and I performe vpon
+Th' vnguarded Duncan? What not put vpon
+His spungie Officers? who shall beare the guilt
+Of our great quell
+
+   Macb. Bring forth Men-Children onely:
+For thy vndaunted Mettle should compose
+Nothing but Males. Will it not be receiu'd,
+When we haue mark'd with blood those sleepie two
+Of his owne Chamber, and vs'd their very Daggers,
+That they haue don't?
+  Lady. Who dares receiue it other,
+As we shall make our Griefes and Clamor rore,
+Vpon his Death?
+  Macb. I am settled, and bend vp
+Each corporall Agent to this terrible Feat.
+Away, and mock the time with fairest show,
+False Face must hide what the false Heart doth know.
+
+Exeunt.
+
+
+Actus Secundus. Scena Prima.
+
+Enter Banquo, and Fleance, with a Torch before him.
+
+  Banq. How goes the Night, Boy?
+  Fleance. The Moone is downe: I haue not heard the
+Clock
+
+   Banq. And she goes downe at Twelue
+
+   Fleance. I take't, 'tis later, Sir
+
+   Banq. Hold, take my Sword:
+There's Husbandry in Heauen,
+Their Candles are all out: take thee that too.
+A heauie Summons lyes like Lead vpon me,
+And yet I would not sleepe:
+Mercifull Powers, restraine in me the cursed thoughts
+That Nature giues way to in repose.
+Enter Macbeth, and a Seruant with a Torch.
+
+Giue me my Sword: who's there?
+  Macb. A Friend
+
+   Banq. What Sir, not yet at rest? the King's a bed.
+He hath beene in vnusuall Pleasure,
+And sent forth great Largesse to your Offices.
+This Diamond he greetes your Wife withall,
+By the name of most kind Hostesse,
+And shut vp in measurelesse content
+
+   Mac. Being vnprepar'd,
+Our will became the seruant to defect,
+Which else should free haue wrought
+
+   Banq. All's well.
+I dreamt last Night of the three weyward Sisters:
+To you they haue shew'd some truth
+
+   Macb. I thinke not of them:
+Yet when we can entreat an houre to serue,
+We would spend it in some words vpon that Businesse,
+If you would graunt the time
+
+   Banq. At your kind'st leysure
+
+   Macb. If you shall cleaue to my consent,
+When 'tis, it shall make Honor for you
+
+   Banq. So I lose none,
+In seeking to augment it, but still keepe
+My Bosome franchis'd, and Allegeance cleare,
+I shall be counsail'd
+
+   Macb. Good repose the while
+
+   Banq. Thankes Sir: the like to you.
+
+Exit Banquo.
+
+  Macb. Goe bid thy Mistresse, when my drinke is ready,
+She strike vpon the Bell. Get thee to bed.
+Enter.
+
+Is this a Dagger, which I see before me,
+The Handle toward my Hand? Come, let me clutch thee:
+I haue thee not, and yet I see thee still.
+Art thou not fatall Vision, sensible
+To feeling, as to sight? or art thou but
+A Dagger of the Minde, a false Creation,
+Proceeding from the heat-oppressed Braine?
+I see thee yet, in forme as palpable,
+As this which now I draw.
+Thou marshall'st me the way that I was going,
+And such an Instrument I was to vse.
+Mine Eyes are made the fooles o'th' other Sences,
+Or else worth all the rest: I see thee still;
+And on thy Blade, and Dudgeon, Gouts of Blood,
+Which was not so before. There's no such thing:
+It is the bloody Businesse, which informes
+Thus to mine Eyes. Now o're the one halfe World
+Nature seemes dead, and wicked Dreames abuse
+The Curtain'd sleepe: Witchcraft celebrates
+Pale Heccats Offrings: and wither'd Murther,
+Alarum'd by his Centinell, the Wolfe,
+Whose howle's his Watch, thus with his stealthy pace,
+With Tarquins rauishing sides, towards his designe
+Moues like a Ghost. Thou sowre and firme-set Earth
+Heare not my steps, which they may walke, for feare
+Thy very stones prate of my where-about,
+And take the present horror from the time,
+Which now sutes with it. Whiles I threat, he liues:
+Words to the heat of deedes too cold breath giues.
+
+A Bell rings.
+
+I goe, and it is done: the Bell inuites me.
+Heare it not, Duncan, for it is a Knell,
+That summons thee to Heauen, or to Hell.
+Enter.
+
+
+Scena Secunda.
+
+Enter Lady.
+
+  La. That which hath made the[m] drunk, hath made me bold:
+What hath quench'd them, hath giuen me fire.
+Hearke, peace: it was the Owle that shriek'd,
+The fatall Bell-man, which giues the stern'st good-night.
+He is about it, the Doores are open:
+And the surfeted Groomes doe mock their charge
+With Snores. I haue drugg'd their Possets,
+That Death and Nature doe contend about them,
+Whether they liue, or dye.
+Enter Macbeth.
+
+  Macb. Who's there? what hoa?
+  Lady. Alack, I am afraid they haue awak'd,
+And 'tis not done: th' attempt, and not the deed,
+Confounds vs: hearke: I lay'd their Daggers ready,
+He could not misse 'em. Had he not resembled
+My Father as he slept, I had don't.
+My Husband?
+  Macb. I haue done the deed:
+Didst thou not heare a noyse?
+  Lady. I heard the Owle schreame, and the Crickets cry.
+Did not you speake?
+  Macb. When?
+  Lady. Now
+
+   Macb. As I descended?
+  Lady. I
+
+   Macb. Hearke, who lyes i'th' second Chamber?
+  Lady. Donalbaine
+
+   Mac. This is a sorry sight
+
+   Lady. A foolish thought, to say a sorry sight
+
+   Macb. There's one did laugh in's sleepe,
+And one cry'd Murther, that they did wake each other:
+I stood, and heard them: But they did say their Prayers,
+And addrest them againe to sleepe
+
+   Lady. There are two lodg'd together
+
+   Macb. One cry'd God blesse vs, and Amen the other,
+As they had seene me with these Hangmans hands:
+Listning their feare, I could not say Amen,
+When they did say God blesse vs
+
+   Lady. Consider it not so deepely
+
+   Mac. But wherefore could not I pronounce Amen?
+I had most need of Blessing, and Amen stuck in my throat
+
+   Lady. These deeds must not be thought
+After these wayes: so, it will make vs mad
+
+   Macb. Me thought I heard a voyce cry, Sleep no more:
+Macbeth does murther Sleepe, the innocent Sleepe,
+Sleepe that knits vp the rauel'd Sleeue of Care,
+The death of each dayes Life, sore Labors Bath,
+Balme of hurt Mindes, great Natures second Course,
+Chiefe nourisher in Life's Feast
+
+   Lady. What doe you meane?
+  Macb. Still it cry'd, Sleepe no more to all the House:
+Glamis hath murther'd Sleepe, and therefore Cawdor
+Shall sleepe no more: Macbeth shall sleepe no more
+
+   Lady. Who was it, that thus cry'd? why worthy Thane,
+You doe vnbend your Noble strength, to thinke
+So braine-sickly of things: Goe get some Water,
+And wash this filthie Witnesse from your Hand.
+Why did you bring these Daggers from the place?
+They must lye there: goe carry them, and smeare
+The sleepie Groomes with blood
+
+   Macb. Ile goe no more:
+I am afraid, to thinke what I haue done:
+Looke on't againe, I dare not
+
+   Lady. Infirme of purpose:
+Giue me the Daggers: the sleeping, and the dead,
+Are but as Pictures: 'tis the Eye of Childhood,
+That feares a painted Deuill. If he doe bleed,
+Ile guild the Faces of the Groomes withall,
+For it must seeme their Guilt.
+Enter.
+
+Knocke within.
+
+  Macb. Whence is that knocking?
+How is't with me, when euery noyse appalls me?
+What Hands are here? hah: they pluck out mine Eyes.
+Will all great Neptunes Ocean wash this blood
+Cleane from my Hand? no: this my Hand will rather
+The multitudinous Seas incarnardine,
+Making the Greene one, Red.
+Enter Lady.
+
+  Lady. My Hands are of your colour: but I shame
+To weare a Heart so white.
+
+Knocke.
+
+I heare a knocking at the South entry:
+Retyre we to our Chamber:
+A little Water cleares vs of this deed.
+How easie is it then? your Constancie
+Hath left you vnattended.
+
+Knocke.
+
+Hearke, more knocking.
+Get on your Night-Gowne, least occasion call vs,
+And shew vs to be Watchers: be not lost
+So poorely in your thoughts
+
+   Macb. To know my deed,
+
+Knocke.
+
+'Twere best not know my selfe.
+Wake Duncan with thy knocking:
+I would thou could'st.
+
+Exeunt.
+
+
+Scena Tertia.
+
+Enter a Porter. Knocking within.
+
+  Porter. Here's a knocking indeede: if a man were
+Porter of Hell Gate, hee should haue old turning the
+Key.
+
+Knock.
+
+Knock, Knock, Knock. Who's there
+i'th' name of Belzebub? Here's a Farmer, that hang'd
+himselfe on th' expectation of Plentie: Come in time, haue
+Napkins enow about you, here you'le sweat for't.
+
+Knock.
+
+Knock, knock. Who's there in th' other Deuils Name?
+Faith here's an Equiuocator, that could sweare in both
+the Scales against eyther Scale, who committed Treason
+enough for Gods sake, yet could not equiuocate to Heauen:
+oh come in, Equiuocator.
+
+Knock.
+
+Knock, Knock, Knock. Who's there? 'Faith here's an English
+Taylor come hither, for stealing out of a French Hose:
+Come in Taylor, here you may rost your Goose.
+Knock.
+
+Knock, Knock. Neuer at quiet: What are you? but this
+place is too cold for Hell. Ile Deuill-Porter it no further:
+I had thought to haue let in some of all Professions, that
+goe the Primrose way to th' euerlasting Bonfire.
+
+Knock.
+
+Anon, anon, I pray you remember the Porter.
+Enter Macduff, and Lenox.
+
+  Macd. Was it so late, friend, ere you went to Bed,
+That you doe lye so late?
+  Port. Faith Sir, we were carowsing till the second Cock:
+And Drinke, Sir, is a great prouoker of three things
+
+   Macd. What three things does Drinke especially
+prouoke?
+  Port. Marry, Sir, Nose-painting, Sleepe, and Vrine.
+Lecherie, Sir, it prouokes, and vnprouokes: it prouokes
+the desire, but it takes away the performance. Therefore
+much Drinke may be said to be an Equiuocator with Lecherie:
+it makes him, and it marres him; it sets him on,
+and it takes him off; it perswades him, and dis-heartens
+him; makes him stand too, and not stand too: in conclusion,
+equiuocates him in a sleepe, and giuing him the Lye,
+leaues him
+
+   Macd. I beleeue, Drinke gaue thee the Lye last Night
+
+   Port. That it did, Sir, i'the very Throat on me: but I
+requited him for his Lye, and (I thinke) being too strong
+for him, though he tooke vp my Legges sometime, yet I
+made a Shift to cast him.
+Enter Macbeth.
+
+  Macd. Is thy Master stirring?
+Our knocking ha's awak'd him: here he comes
+
+   Lenox. Good morrow, Noble Sir
+
+   Macb. Good morrow both
+
+   Macd. Is the King stirring, worthy Thane?
+  Macb. Not yet
+
+   Macd. He did command me to call timely on him,
+I haue almost slipt the houre
+
+   Macb. Ile bring you to him
+
+   Macd. I know this is a ioyfull trouble to you:
+But yet 'tis one
+
+   Macb. The labour we delight in, Physicks paine:
+This is the Doore
+
+   Macd. Ile make so bold to call, for 'tis my limitted
+seruice.
+
+Exit Macduffe.
+
+  Lenox. Goes the King hence to day?
+  Macb. He does: he did appoint so
+
+   Lenox. The Night ha's been vnruly:
+Where we lay, our Chimneys were blowne downe,
+And (as they say) lamentings heard i'th' Ayre;
+Strange Schreemes of Death,
+And Prophecying, with Accents terrible,
+Of dyre Combustion, and confus'd Euents,
+New hatch'd toth' wofull time.
+The obscure Bird clamor'd the liue-long Night.
+Some say, the Earth was Feuorous,
+And did shake
+
+   Macb. 'Twas a rough Night
+
+   Lenox. My young remembrance cannot paralell
+A fellow to it.
+Enter Macduff.
+
+  Macd. O horror, horror, horror,
+Tongue nor Heart cannot conceiue, nor name thee
+
+   Macb. and Lenox. What's the matter?
+  Macd. Confusion now hath made his Master-peece:
+Most sacrilegious Murther hath broke ope
+The Lords anoynted Temple, and stole thence
+The Life o'th' Building
+
+   Macb. What is't you say, the Life?
+  Lenox. Meane you his Maiestie?
+  Macd. Approch the Chamber, and destroy your sight
+With a new Gorgon. Doe not bid me speake:
+See, and then speake your selues: awake, awake,
+
+Exeunt. Macbeth and Lenox.
+
+Ring the Alarum Bell: Murther, and Treason,
+Banquo, and Donalbaine: Malcolme awake,
+Shake off this Downey sleepe, Deaths counterfeit,
+And looke on Death it selfe: vp, vp, and see
+The great Doomes Image: Malcolme, Banquo,
+As from your Graues rise vp, and walke like Sprights,
+To countenance this horror. Ring the Bell.
+
+Bell rings. Enter Lady.
+
+  Lady. What's the Businesse?
+That such a hideous Trumpet calls to parley
+The sleepers of the House? speake, speake
+
+   Macd. O gentle Lady,
+'Tis not for you to heare what I can speake:
+The repetition in a Womans eare,
+Would murther as it fell.
+Enter Banquo.
+
+O Banquo, Banquo, Our Royall Master's murther'd
+
+   Lady. Woe, alas:
+What, in our House?
+  Ban. Too cruell, any where.
+Deare Duff, I prythee contradict thy selfe,
+And say, it is not so.
+Enter Macbeth, Lenox, and Rosse.
+
+  Macb. Had I but dy'd an houre before this chance,
+I had liu'd a blessed time: for from this instant,
+There's nothing serious in Mortalitie:
+All is but Toyes: Renowne and Grace is dead,
+The Wine of Life is drawne, and the meere Lees
+Is left this Vault, to brag of.
+Enter Malcolme and Donalbaine.
+
+  Donal. What is amisse?
+  Macb. You are, and doe not know't:
+The Spring, the Head, the Fountaine of your Blood
+Is stopt, the very Source of it is stopt
+
+   Macd. Your Royall Father's murther'd
+
+   Mal. Oh, by whom?
+  Lenox. Those of his Chamber, as it seem'd, had don't:
+Their Hands and Faces were all badg'd with blood,
+So were their Daggers, which vnwip'd, we found
+Vpon their Pillowes: they star'd, and were distracted,
+No mans Life was to be trusted with them
+
+   Macb. O, yet I doe repent me of my furie,
+That I did kill them
+
+   Macd. Wherefore did you so?
+  Macb. Who can be wise, amaz'd, temp'rate, & furious,
+Loyall, and Neutrall, in a moment? No man:
+Th' expedition of my violent Loue
+Out-run the pawser, Reason. Here lay Duncan,
+His Siluer skinne, lac'd with His Golden Blood,
+And his gash'd Stabs, look'd like a Breach in Nature,
+For Ruines wastfull entrance: there the Murtherers,
+Steep'd in the Colours of their Trade; their Daggers
+Vnmannerly breech'd with gore: who could refraine,
+That had a heart to loue; and in that heart,
+Courage, to make's loue knowne?
+  Lady. Helpe me hence, hoa
+
+   Macd. Looke to the Lady
+
+   Mal. Why doe we hold our tongues,
+That most may clayme this argument for ours?
+  Donal. What should be spoken here,
+Where our Fate hid in an augure hole,
+May rush, and seize vs? Let's away,
+Our Teares are not yet brew'd
+
+   Mal. Nor our strong Sorrow
+Vpon the foot of Motion
+
+   Banq. Looke to the Lady:
+And when we haue our naked Frailties hid,
+That suffer in exposure; let vs meet,
+And question this most bloody piece of worke,
+To know it further. Feares and scruples shake vs:
+In the great Hand of God I stand, and thence,
+Against the vndivulg'd pretence, I fight
+Of Treasonous Mallice
+
+   Macd. And so doe I
+
+   All. So all
+
+   Macb. Let's briefely put on manly readinesse,
+And meet i'th' Hall together
+
+   All. Well contented.
+
+Exeunt.
+
+  Malc. What will you doe?
+Let's not consort with them:
+To shew an vnfelt Sorrow, is an Office
+Which the false man do's easie.
+Ile to England
+
+   Don. To Ireland, I:
+Our seperated fortune shall keepe vs both the safer:
+Where we are, there's Daggers in mens smiles;
+The neere in blood, the neerer bloody
+
+   Malc. This murtherous Shaft that's shot,
+Hath not yet lighted: and our safest way,
+Is to auoid the ayme. Therefore to Horse,
+And let vs not be daintie of leaue-taking,
+But shift away: there's warrant in that Theft,
+Which steales it selfe, when there's no mercie left.
+
+Exeunt.
+
+
+
+Scena Quarta.
+
+Enter Rosse, with an Old man.
+
+  Old man. Threescore and ten I can remember well,
+Within the Volume of which Time, I haue seene
+Houres dreadfull, and things strange: but this sore Night
+Hath trifled former knowings
+
+   Rosse. Ha, good Father,
+Thou seest the Heauens, as troubled with mans Act,
+Threatens his bloody Stage: byth' Clock 'tis Day,
+And yet darke Night strangles the trauailing Lampe:
+Is't Nights predominance, or the Dayes shame,
+That Darknesse does the face of Earth intombe,
+When liuing Light should kisse it?
+  Old man. 'Tis vnnaturall,
+Euen like the deed that's done: On Tuesday last,
+A Faulcon towring in her pride of place,
+Was by a Mowsing Owle hawkt at, and kill'd
+
+   Rosse. And Duncans Horses,
+(A thing most strange, and certaine)
+Beauteous, and swift, the Minions of their Race,
+Turn'd wilde in nature, broke their stalls, flong out,
+Contending 'gainst Obedience, as they would
+Make Warre with Mankinde
+
+   Old man. 'Tis said, they eate each other
+
+   Rosse. They did so:
+To th' amazement of mine eyes that look'd vpon't.
+Enter Macduffe.
+
+Heere comes the good Macduffe.
+How goes the world Sir, now?
+  Macd. Why see you not?
+  Ross. Is't known who did this more then bloody deed?
+  Macd. Those that Macbeth hath slaine
+
+   Ross. Alas the day,
+What good could they pretend?
+  Macd. They were subborned,
+Malcolme, and Donalbaine the Kings two Sonnes
+Are stolne away and fled, which puts vpon them
+Suspition of the deed
+
+   Rosse. 'Gainst Nature still,
+Thriftlesse Ambition, that will rauen vp
+Thine owne liues meanes: Then 'tis most like,
+The Soueraignty will fall vpon Macbeth
+
+   Macd. He is already nam'd, and gone to Scone
+To be inuested
+
+   Rosse. Where is Duncans body?
+  Macd. Carried to Colmekill,
+The Sacred Store-house of his Predecessors,
+And Guardian of their Bones
+
+   Rosse. Will you to Scone?
+  Macd. No Cosin, Ile to Fife
+
+   Rosse. Well, I will thither
+
+   Macd. Well may you see things wel done there: Adieu
+Least our old Robes sit easier then our new
+
+   Rosse. Farewell, Father
+
+   Old M. Gods benyson go with you, and with those
+That would make good of bad, and Friends of Foes.
+
+Exeunt. omnes
+
+Actus Tertius. Scena Prima.
+
+Enter Banquo.
+
+  Banq. Thou hast it now, King, Cawdor, Glamis, all,
+As the weyard Women promis'd, and I feare
+Thou playd'st most fowly for't: yet it was saide
+It should not stand in thy Posterity,
+But that my selfe should be the Roote, and Father
+Of many Kings. If there come truth from them,
+As vpon thee Macbeth, their Speeches shine,
+Why by the verities on thee made good,
+May they not be my Oracles as well,
+And set me vp in hope. But hush, no more.
+
+Senit sounded. Enter Macbeth as King, Lady Lenox, Rosse, Lords,
+and
+Attendants.
+
+  Macb. Heere's our chiefe Guest
+
+   La. If he had beene forgotten,
+It had bene as a gap in our great Feast,
+And all-thing vnbecomming
+
+   Macb. To night we hold a solemne Supper sir,
+And Ile request your presence
+
+   Banq. Let your Highnesse
+Command vpon me, to the which my duties
+Are with a most indissoluble tye
+For euer knit
+
+   Macb. Ride you this afternoone?
+  Ban. I, my good Lord
+
+   Macb. We should haue else desir'd your good aduice
+(Which still hath been both graue, and prosperous)
+In this dayes Councell: but wee'le take to morrow.
+Is't farre you ride?
+  Ban. As farre, my Lord, as will fill vp the time
+'Twixt this, and Supper. Goe not my Horse the better,
+I must become a borrower of the Night,
+For a darke houre, or twaine
+
+   Macb. Faile not our Feast
+
+   Ban. My Lord, I will not
+
+   Macb. We heare our bloody Cozens are bestow'd
+In England, and in Ireland, not confessing
+Their cruell Parricide, filling their hearers
+With strange inuention. But of that to morrow,
+When therewithall, we shall haue cause of State,
+Crauing vs ioyntly. Hye you to Horse:
+Adieu, till you returne at Night.
+Goes Fleance with you?
+  Ban. I, my good Lord: our time does call vpon's
+
+   Macb. I wish your Horses swift, and sure of foot:
+And so I doe commend you to their backs.
+Farwell.
+
+Exit Banquo.
+
+Let euery man be master of his time,
+Till seuen at Night, to make societie
+The sweeter welcome:
+We will keepe our selfe till Supper time alone:
+While then, God be with you.
+
+Exeunt. Lords.
+
+Sirrha, a word with you: Attend those men
+Our pleasure?
+  Seruant. They are, my Lord, without the Pallace
+Gate
+
+   Macb. Bring them before vs.
+
+Exit Seruant.
+
+To be thus, is nothing, but to be safely thus
+Our feares in Banquo sticke deepe,
+And in his Royaltie of Nature reignes that
+Which would be fear'd. 'Tis much he dares,
+And to that dauntlesse temper of his Minde,
+He hath a Wisdome, that doth guide his Valour,
+To act in safetie. There is none but he,
+Whose being I doe feare: and vnder him,
+My Genius is rebuk'd, as it is said
+Mark Anthonies was by Caesar. He chid the Sisters,
+When first they put the Name of King vpon me,
+And bad them speake to him. Then Prophet-like,
+They hayl'd him Father to a Line of Kings.
+Vpon my Head they plac'd a fruitlesse Crowne,
+And put a barren Scepter in my Gripe,
+Thence to be wrencht with an vnlineall Hand,
+No Sonne of mine succeeding: if't be so,
+For Banquo's Issue haue I fil'd my Minde,
+For them, the gracious Duncan haue I murther'd,
+Put Rancours in the Vessell of my Peace
+Onely for them, and mine eternall Iewell
+Giuen to the common Enemie of Man,
+To make them Kings, the Seedes of Banquo Kings.
+Rather then so, come Fate into the Lyst,
+And champion me to th' vtterance.
+Who's there?
+Enter Seruant, and two Murtherers.
+
+Now goe to the Doore, and stay there till we call.
+
+Exit Seruant.
+
+Was it not yesterday we spoke together?
+  Murth. It was, so please your Highnesse
+
+   Macb. Well then,
+Now haue you consider'd of my speeches:
+Know, that it was he, in the times past,
+Which held you so vnder fortune,
+Which you thought had been our innocent selfe.
+This I made good to you, in our last conference,
+Past in probation with you:
+How you were borne in hand, how crost:
+The Instruments: who wrought with them:
+And all things else, that might
+To halfe a Soule, and to a Notion craz'd,
+Say, Thus did Banquo
+
+   1.Murth. You made it knowne to vs
+
+   Macb. I did so:
+And went further, which is now
+Our point of second meeting.
+Doe you finde your patience so predominant,
+In your nature, that you can let this goe?
+Are you so Gospell'd, to pray for this good man,
+And for his Issue, whose heauie hand
+Hath bow'd you to the Graue, and begger'd
+Yours for euer?
+  1.Murth. We are men, my Liege
+
+   Macb. I, in the Catalogue ye goe for men,
+As Hounds, and Greyhounds, Mungrels, Spaniels, Curres,
+Showghes, Water-Rugs, and Demy-Wolues are clipt
+All by the Name of Dogges: the valued file
+Distinguishes the swift, the slow, the subtle,
+The House-keeper, the Hunter, euery one
+According to the gift, which bounteous Nature
+Hath in him clos'd: whereby he does receiue
+Particular addition, from the Bill,
+That writes them all alike: and so of men.
+Now, if you haue a station in the file,
+Not i'th' worst ranke of Manhood, say't,
+And I will put that Businesse in your Bosomes,
+Whose execution takes your Enemie off,
+Grapples you to the heart; and loue of vs,
+Who weare our Health but sickly in his Life,
+Which in his Death were perfect
+
+   2.Murth. I am one, my Liege,
+Whom the vile Blowes and Buffets of the World
+Hath so incens'd, that I am recklesse what I doe,
+To spight the World
+
+   1.Murth. And I another,
+So wearie with Disasters, tugg'd with Fortune,
+That I would set my Life on any Chance,
+To mend it, or be rid on't
+
+   Macb. Both of you know Banquo was your Enemie
+
+   Murth. True, my Lord
+
+   Macb. So is he mine: and in such bloody distance,
+That euery minute of his being, thrusts
+Against my neer'st of Life: and though I could
+With bare-fac'd power sweepe him from my sight,
+And bid my will auouch it; yet I must not,
+For certaine friends that are both his, and mine,
+Whose loues I may not drop, but wayle his fall,
+Who I my selfe struck downe: and thence it is,
+That I to your assistance doe make loue,
+Masking the Businesse from the common Eye,
+For sundry weightie Reasons
+
+   2.Murth. We shall, my Lord,
+Performe what you command vs
+
+   1.Murth. Though our Liues-
+  Macb. Your Spirits shine through you.
+Within this houre, at most,
+I will aduise you where to plant your selues,
+Acquaint you with the perfect Spy o'th' time,
+The moment on't, for't must be done to Night,
+And something from the Pallace: alwayes thought,
+That I require a clearenesse; and with him,
+To leaue no Rubs nor Botches in the Worke:
+  Fleans , his Sonne, that keepes him companie,
+Whose absence is no lesse materiall to me,
+Then is his Fathers, must embrace the fate
+Of that darke houre: resolue your selues apart,
+Ile come to you anon
+
+   Murth. We are resolu'd, my Lord
+
+   Macb. Ile call vpon you straight: abide within,
+It is concluded: Banquo, thy Soules flight,
+If it finde Heauen, must finde it out to Night.
+
+Exeunt.
+
+
+Scena Secunda.
+
+Enter Macbeths Lady, and a Seruant.
+
+  Lady. Is Banquo gone from Court?
+  Seruant. I, Madame, but returnes againe to Night
+
+   Lady. Say to the King, I would attend his leysure,
+For a few words
+
+   Seruant. Madame, I will.
+Enter.
+
+  Lady. Nought's had, all's spent.
+Where our desire is got without content:
+'Tis safer, to be that which we destroy,
+Then by destruction dwell in doubtfull ioy.
+Enter Macbeth.
+
+How now, my Lord, why doe you keepe alone?
+Of sorryest Fancies your Companions making,
+Vsing those Thoughts, which should indeed haue dy'd
+With them they thinke on: things without all remedie
+Should be without regard: what's done, is done
+
+   Macb. We haue scorch'd the Snake, not kill'd it:
+Shee'le close, and be her selfe, whilest our poore Mallice
+Remaines in danger of her former Tooth.
+But let the frame of things dis-ioynt,
+Both the Worlds suffer,
+Ere we will eate our Meale in feare, and sleepe
+In the affliction of these terrible Dreames,
+That shake vs Nightly: Better be with the dead,
+Whom we, to gayne our peace, haue sent to peace,
+Then on the torture of the Minde to lye
+In restlesse extasie.
+Duncane is in his Graue:
+After Lifes fitfull Feuer, he sleepes well,
+Treason ha's done his worst: nor Steele, nor Poyson,
+Mallice domestique, forraine Leuie, nothing,
+Can touch him further
+
+   Lady. Come on:
+Gentle my Lord, sleeke o're your rugged Lookes,
+Be bright and Iouiall among your Guests to Night
+
+   Macb. So shall I Loue, and so I pray be you:
+Let your remembrance apply to Banquo,
+Present him Eminence, both with Eye and Tongue:
+Vnsafe the while, that wee must laue
+Our Honors in these flattering streames,
+And make our Faces Vizards to our Hearts,
+Disguising what they are
+
+   Lady. You must leaue this
+
+   Macb. O, full of Scorpions is my Minde, deare Wife:
+Thou know'st, that Banquo and his Fleans liues
+
+   Lady. But in them, Natures Coppie's not eterne
+
+   Macb. There's comfort yet, they are assaileable,
+Then be thou iocund: ere the Bat hath flowne
+His Cloyster'd flight, ere to black Heccats summons
+The shard-borne Beetle, with his drowsie hums,
+Hath rung Nights yawning Peale,
+There shall be done a deed of dreadfull note
+
+   Lady. What's to be done?
+  Macb. Be innocent of the knowledge, dearest Chuck,
+Till thou applaud the deed: Come, seeling Night,
+Skarfe vp the tender Eye of pittifull Day,
+And with thy bloodie and inuisible Hand
+Cancell and teare to pieces that great Bond,
+Which keepes me pale. Light thickens,
+And the Crow makes Wing toth' Rookie Wood:
+Good things of Day begin to droope, and drowse,
+Whiles Nights black Agents to their Prey's doe rowse.
+Thou maruell'st at my words: but hold thee still,
+Things bad begun, make strong themselues by ill:
+So prythee goe with me.
+
+Exeunt.
+
+
+Scena Tertia.
+
+Enter three Murtherers.
+
+  1. But who did bid thee ioyne with vs?
+  3. Macbeth
+
+   2. He needes not our mistrust, since he deliuers
+Our Offices, and what we haue to doe,
+To the direction iust
+
+   1. Then stand with vs:
+The West yet glimmers with some streakes of Day.
+Now spurres the lated Traueller apace,
+To gayne the timely Inne, and neere approches
+The subiect of our Watch
+
+   3. Hearke, I heare Horses
+
+   Banquo within. Giue vs a Light there, hoa
+
+   2. Then 'tis hee:
+The rest, that are within the note of expectation,
+Alreadie are i'th' Court
+
+   1. His Horses goe about
+
+   3. Almost a mile: but he does vsually,
+So all men doe, from hence toth' Pallace Gate
+Make it their Walke.
+Enter Banquo and Fleans, with a Torch.
+
+  2. A Light, a Light
+
+   3. 'Tis hee
+
+   1. Stand too't
+
+   Ban. It will be Rayne to Night
+
+   1. Let it come downe
+
+   Ban. O, Trecherie!
+Flye good Fleans, flye, flye, flye,
+Thou may'st reuenge. O Slaue!
+  3. Who did strike out the Light?
+  1. Was't not the way?
+  3. There's but one downe: the Sonne is fled
+
+   2. We haue lost
+Best halfe of our Affaire
+
+   1. Well, let's away, and say how much is done.
+
+Exeunt.
+
+
+Scaena Quarta.
+
+Banquet prepar'd. Enter Macbeth, Lady, Rosse, Lenox, Lords, and
+Attendants.
+
+  Macb. You know your owne degrees, sit downe:
+At first and last, the hearty welcome
+
+   Lords. Thankes to your Maiesty
+
+   Macb. Our selfe will mingle with Society,
+And play the humble Host:
+Our Hostesse keepes her State, but in best time
+We will require her welcome
+
+   La. Pronounce it for me Sir, to all our Friends,
+For my heart speakes, they are welcome.
+Enter first Murtherer.
+
+  Macb. See they encounter thee with their harts thanks
+Both sides are euen: heere Ile sit i'th' mid'st,
+Be large in mirth, anon wee'l drinke a Measure
+The Table round. There's blood vpon thy face
+
+   Mur. 'Tis Banquo's then
+
+   Macb. 'Tis better thee without, then he within.
+Is he dispatch'd?
+  Mur. My Lord his throat is cut, that I did for him
+
+   Mac. Thou art the best o'th' Cut-throats,
+Yet hee's good that did the like for Fleans:
+If thou did'st it, thou art the Non-pareill
+
+   Mur. Most Royall Sir
+Fleans is scap'd
+
+   Macb. Then comes my Fit againe:
+I had else beene perfect;
+Whole as the Marble, founded as the Rocke,
+As broad, and generall, as the casing Ayre:
+But now I am cabin'd, crib'd, confin'd, bound in
+To sawcy doubts, and feares. But Banquo's safe?
+  Mur. I, my good Lord: safe in a ditch he bides,
+With twenty trenched gashes on his head;
+The least a Death to Nature
+
+   Macb. Thankes for that:
+There the growne Serpent lyes, the worme that's fled
+Hath Nature that in time will Venom breed,
+No teeth for th' present. Get thee gone, to morrow
+Wee'l heare our selues againe.
+
+Exit Murderer.
+
+  Lady. My Royall Lord,
+You do not giue the Cheere, the Feast is sold
+That is not often vouch'd, while 'tis a making:
+'Tis giuen, with welcome: to feede were best at home:
+From thence, the sawce to meate is Ceremony,
+Meeting were bare without it.
+Enter the Ghost of Banquo, and sits in Macbeths place.
+
+  Macb. Sweet Remembrancer:
+Now good digestion waite on Appetite,
+And health on both
+
+   Lenox. May't please your Highnesse sit
+
+   Macb. Here had we now our Countries Honor, roof'd,
+Were the grac'd person of our Banquo present:
+Who, may I rather challenge for vnkindnesse,
+Then pitty for Mischance
+
+   Rosse. His absence (Sir)
+Layes blame vpon his promise. Pleas't your Highnesse
+To grace vs with your Royall Company?
+  Macb. The Table's full
+
+   Lenox. Heere is a place reseru'd Sir
+
+   Macb. Where?
+  Lenox. Heere my good Lord.
+What is't that moues your Highnesse?
+  Macb. Which of you haue done this?
+  Lords. What, my good Lord?
+  Macb. Thou canst not say I did it: neuer shake
+Thy goary lockes at me
+
+   Rosse. Gentlemen rise, his Highnesse is not well
+
+   Lady. Sit worthy Friends: my Lord is often thus,
+And hath beene from his youth. Pray you keepe Seat,
+The fit is momentary, vpon a thought
+He will againe be well. If much you note him
+You shall offend him, and extend his Passion,
+Feed, and regard him not. Are you a man?
+  Macb. I, and a bold one, that dare looke on that
+Which might appall the Diuell
+
+   La. O proper stuffe:
+This is the very painting of your feare:
+This is the Ayre-drawne-Dagger which you said
+Led you to Duncan. O, these flawes and starts
+(Impostors to true feare) would well become
+A womans story, at a Winters fire
+Authoriz'd by her Grandam: shame it selfe,
+Why do you make such faces? When all's done
+You looke but on a stoole
+
+   Macb. Prythee see there:
+Behold, looke, loe, how say you:
+Why what care I, if thou canst nod, speake too.
+If Charnell houses, and our Graues must send
+Those that we bury, backe; our Monuments
+Shall be the Mawes of Kytes
+
+   La. What? quite vnmann'd in folly
+
+   Macb. If I stand heere, I saw him
+
+   La. Fie for shame
+
+   Macb. Blood hath bene shed ere now, i'th' olden time
+Ere humane Statute purg'd the gentle Weale:
+I, and since too, Murthers haue bene perform'd
+Too terrible for the eare. The times has bene,
+That when the Braines were out, the man would dye,
+And there an end: But now they rise againe
+With twenty mortall murthers on their crownes,
+And push vs from our stooles. This is more strange
+Then such a murther is
+
+   La. My worthy Lord
+Your Noble Friends do lacke you
+
+   Macb. I do forget:
+Do not muse at me my most worthy Friends,
+I haue a strange infirmity, which is nothing
+To those that know me. Come, loue and health to all,
+Then Ile sit downe: Giue me some Wine, fill full:
+Enter Ghost.
+
+I drinke to th' generall ioy o'th' whole Table,
+And to our deere Friend Banquo, whom we misse:
+Would he were heere: to all, and him we thirst,
+And all to all
+
+   Lords. Our duties, and the pledge
+
+   Mac. Auant, & quit my sight, let the earth hide thee:
+Thy bones are marrowlesse, thy blood is cold:
+Thou hast no speculation in those eyes
+Which thou dost glare with
+
+   La. Thinke of this good Peeres
+But as a thing of Custome: 'Tis no other,
+Onely it spoyles the pleasure of the time
+
+   Macb. What man dare, I dare:
+Approach thou like the rugged Russian Beare,
+The arm'd Rhinoceros, or th' Hircan Tiger,
+Take any shape but that, and my firme Nerues
+Shall neuer tremble. Or be aliue againe,
+And dare me to the Desart with thy Sword:
+If trembling I inhabit then, protest mee
+The Baby of a Girle. Hence horrible shadow,
+Vnreall mock'ry hence. Why so, being gone
+I am a man againe: pray you sit still
+
+   La. You haue displac'd the mirth,
+Broke the good meeting, with most admir'd disorder
+
+   Macb. Can such things be,
+And ouercome vs like a Summers Clowd,
+Without our speciall wonder? You make me strange
+Euen to the disposition that I owe,
+When now I thinke you can behold such sights,
+And keepe the naturall Rubie of your Cheekes,
+When mine is blanch'd with feare
+
+   Rosse. What sights, my Lord?
+  La. I pray you speake not: he growes worse & worse
+Question enrages him: at once, goodnight.
+Stand not vpon the order of your going,
+But go at once
+
+   Len. Good night, and better health
+Attend his Maiesty
+
+   La. A kinde goodnight to all.
+
+Exit Lords.
+
+  Macb. It will haue blood they say:
+Blood will haue Blood:
+Stones haue beene knowne to moue, & Trees to speake:
+Augures, and vnderstood Relations, haue
+By Maggot Pyes, & Choughes, & Rookes brought forth
+The secret'st man of Blood. What is the night?
+  La. Almost at oddes with morning, which is which
+
+   Macb. How say'st thou that Macduff denies his person
+At our great bidding
+
+   La. Did you send to him Sir?
+  Macb. I heare it by the way: But I will send:
+There's not a one of them but in his house
+I keepe a Seruant Feed. I will to morrow
+(And betimes I will) to the weyard Sisters.
+More shall they speake: for now I am bent to know
+By the worst meanes, the worst, for mine owne good,
+All causes shall giue way. I am in blood
+Stept in so farre, that should I wade no more,
+Returning were as tedious as go ore:
+Strange things I haue in head, that will to hand,
+Which must be acted, ere they may be scand
+
+   La. You lacke the season of all Natures, sleepe
+
+   Macb. Come, wee'l to sleepe: My strange & self-abuse
+Is the initiate feare, that wants hard vse:
+We are yet but yong indeed.
+
+Exeunt.
+
+
+Scena Quinta.
+
+Thunder. Enter the three Witches, meeting Hecat.
+
+  1. Why how now Hecat, you looke angerly?
+  Hec. Haue I not reason (Beldams) as you are?
+Sawcy, and ouer-bold, how did you dare
+To Trade, and Trafficke with Macbeth,
+In Riddles, and Affaires of death;
+And I the Mistris of your Charmes,
+The close contriuer of all harmes,
+Was neuer call'd to beare my part,
+Or shew the glory of our Art?
+And which is worse, all you haue done
+Hath bene but for a wayward Sonne,
+Spightfull, and wrathfull, who (as others do)
+Loues for his owne ends, not for you.
+But make amends now: Get you gon,
+And at the pit of Acheron
+Meete me i'th' Morning: thither he
+Will come, to know his Destinie.
+Your Vessels, and your Spels prouide,
+Your Charmes, and euery thing beside;
+I am for th' Ayre: This night Ile spend
+Vnto a dismall, and a Fatall end.
+Great businesse must be wrought ere Noone.
+Vpon the Corner of the Moone
+There hangs a vap'rous drop, profound,
+Ile catch it ere it come to ground;
+And that distill'd by Magicke slights,
+Shall raise such Artificiall Sprights,
+As by the strength of their illusion,
+Shall draw him on to his Confusion.
+He shall spurne Fate, scorne Death, and beare
+His hopes 'boue Wisedome, Grace, and Feare:
+And you all know, Security
+Is Mortals cheefest Enemie.
+
+Musicke, and a Song.
+
+Hearke, I am call'd: my little Spirit see
+Sits in Foggy cloud, and stayes for me.
+
+Sing within. Come away, come away, &c.
+
+  1 Come, let's make hast, shee'l soone be
+Backe againe.
+
+Exeunt.
+
+
+Scaena Sexta.
+
+Enter Lenox, and another Lord.
+
+  Lenox. My former Speeches,
+Haue but hit your Thoughts
+Which can interpret farther: Onely I say
+Things haue bin strangely borne. The gracious Duncan
+Was pittied of Macbeth: marry he was dead:
+And the right valiant Banquo walk'd too late,
+Whom you may say (if't please you) Fleans kill'd,
+For Fleans fled: Men must not walke too late.
+Who cannot want the thought, how monstrous
+It was for Malcolme, and for Donalbane
+To kill their gracious Father? Damned Fact,
+How it did greeue Macbeth? Did he not straight
+In pious rage, the two delinquents teare,
+That were the Slaues of drinke, and thralles of sleepe?
+Was not that Nobly done? I, and wisely too:
+For 'twould haue anger'd any heart aliue
+To heare the men deny't. So that I say,
+He ha's borne all things well, and I do thinke,
+That had he Duncans Sonnes vnder his Key,
+(As, and't please Heauen he shall not) they should finde
+What 'twere to kill a Father: So should Fleans.
+But peace; for from broad words, and cause he fayl'd
+His presence at the Tyrants Feast, I heare
+Macduffe liues in disgrace. Sir, can you tell
+Where he bestowes himselfe?
+  Lord. The Sonnes of Duncane
+(From whom this Tyrant holds the due of Birth)
+Liues in the English Court, and is receyu'd
+Of the most Pious Edward, with such grace,
+That the maleuolence of Fortune, nothing
+Takes from his high respect. Thither Macduffe
+Is gone, to pray the Holy King, vpon his ayd
+To wake Northumberland, and warlike Seyward,
+That by the helpe of these (with him aboue)
+To ratifie the Worke) we may againe
+Giue to our Tables meate, sleepe to our Nights:
+Free from our Feasts, and Banquets bloody kniues;
+Do faithfull Homage, and receiue free Honors,
+All which we pine for now. And this report
+Hath so exasperate their King, that hee
+Prepares for some attempt of Warre
+
+   Len. Sent he to Macduffe?
+  Lord. He did: and with an absolute Sir, not I
+The clowdy Messenger turnes me his backe,
+And hums; as who should say, you'l rue the time
+That clogges me with this Answer
+
+   Lenox. And that well might
+Aduise him to a Caution, t' hold what distance
+His wisedome can prouide. Some holy Angell
+Flye to the Court of England, and vnfold
+His Message ere he come, that a swift blessing
+May soone returne to this our suffering Country,
+Vnder a hand accurs'd
+
+   Lord. Ile send my Prayers with him.
+
+Exeunt.
+
+Actus Quartus. Scena Prima.
+
+Thunder. Enter the three Witches.
+
+  1 Thrice the brinded Cat hath mew'd
+
+   2 Thrice, and once the Hedge-Pigge whin'd
+
+   3 Harpier cries, 'tis time, 'tis time
+
+   1 Round about the Caldron go:
+In the poysond Entrailes throw
+Toad, that vnder cold stone,
+Dayes and Nights, ha's thirty one:
+Sweltred Venom sleeping got,
+Boyle thou first i'th' charmed pot
+
+   All. Double, double, toile and trouble;
+Fire burne, and Cauldron bubble
+
+   2 Fillet of a Fenny Snake,
+In the Cauldron boyle and bake:
+Eye of Newt, and Toe of Frogge,
+Wooll of Bat, and Tongue of Dogge:
+Adders Forke, and Blinde-wormes Sting,
+Lizards legge, and Howlets wing:
+For a Charme of powrefull trouble,
+Like a Hell-broth, boyle and bubble
+
+   All. Double, double, toyle and trouble,
+Fire burne, and Cauldron bubble
+
+   3 Scale of Dragon, Tooth of Wolfe,
+Witches Mummey, Maw, and Gulfe
+Of the rauin'd salt Sea sharke:
+Roote of Hemlocke, digg'd i'th' darke:
+Liuer of Blaspheming Iew,
+Gall of Goate, and Slippes of Yew,
+Sliuer'd in the Moones Ecclipse:
+Nose of Turke, and Tartars lips:
+Finger of Birth-strangled Babe,
+Ditch-deliuer'd by a Drab,
+Make the Grewell thicke, and slab.
+Adde thereto a Tigers Chawdron,
+For th' Ingredience of our Cawdron
+
+   All. Double, double, toyle and trouble,
+Fire burne, and Cauldron bubble
+
+   2 Coole it with a Baboones blood,
+Then the Charme is firme and good.
+Enter Hecat, and the other three Witches.
+
+  Hec. O well done: I commend your paines,
+And euery one shall share i'th' gaines:
+And now about the Cauldron sing
+Like Elues and Fairies in a Ring,
+Inchanting all that you put in.
+
+Musicke and a Song. Blacke Spirits, &c.
+
+  2 By the pricking of my Thumbes,
+Something wicked this way comes:
+Open Lockes, who euer knockes.
+Enter Macbeth.
+
+  Macb. How now you secret, black, & midnight Hags?
+What is't you do?
+  All. A deed without a name
+
+   Macb. I coniure you, by that which you Professe,
+(How ere you come to know it) answer me:
+Though you vntye the Windes, and let them fight
+Against the Churches: Though the yesty Waues
+Confound and swallow Nauigation vp:
+Though bladed Corne be lodg'd, & Trees blown downe,
+Though Castles topple on their Warders heads:
+Though Pallaces, and Pyramids do slope
+Their heads to their Foundations: Though the treasure
+Of Natures Germaine, tumble altogether,
+Euen till destruction sicken: Answer me
+To what I aske you
+
+   1 Speake
+
+   2 Demand
+
+   3 Wee'l answer
+
+   1 Say, if th'hadst rather heare it from our mouthes,
+Or from our Masters
+
+   Macb. Call 'em: let me see 'em
+
+   1 Powre in Sowes blood, that hath eaten
+Her nine Farrow: Greaze that's sweaten
+From the Murderers Gibbet, throw
+Into the Flame
+
+   All. Come high or low:
+Thy Selfe and Office deaftly show.
+Thunder. 1. Apparation, an Armed Head.
+
+  Macb. Tell me, thou vnknowne power
+
+   1 He knowes thy thought:
+Heare his speech, but say thou nought
+
+   1 Appar. Macbeth, Macbeth, Macbeth:
+Beware Macduffe,
+Beware the Thane of Fife: dismisse me. Enough.
+
+He Descends.
+
+  Macb. What ere thou art, for thy good caution, thanks
+Thou hast harp'd my feare aright. But one word more
+
+   1 He will not be commanded: heere's another
+More potent then the first.
+
+Thunder. 2 Apparition, a Bloody Childe.
+
+  2 Appar. Macbeth, Macbeth, Macbeth
+
+   Macb. Had I three eares, Il'd heare thee
+
+   Appar. Be bloody, bold, & resolute:
+Laugh to scorne
+The powre of man: For none of woman borne
+Shall harme Macbeth.
+
+Descends.
+
+  Mac. Then liue Macduffe: what need I feare of thee?
+But yet Ile make assurance: double sure,
+And take a Bond of Fate: thou shalt not liue,
+That I may tell pale-hearted Feare, it lies;
+And sleepe in spight of Thunder.
+
+Thunder 3 Apparation, a Childe Crowned, with a Tree in his hand.
+
+What is this, that rises like the issue of a King,
+And weares vpon his Baby-brow, the round
+And top of Soueraignty?
+  All. Listen, but speake not too't
+
+   3 Appar. Be Lyon metled, proud, and take no care:
+Who chafes, who frets, or where Conspirers are:
+Macbeth shall neuer vanquish'd be, vntill
+Great Byrnam Wood, to high Dunsmane Hill
+Shall come against him.
+
+Descend.
+
+  Macb. That will neuer bee:
+Who can impresse the Forrest, bid the Tree
+Vnfixe his earth-bound Root? Sweet boadments, good:
+Rebellious dead, rise neuer till the Wood
+Of Byrnan rise, and our high plac'd Macbeth
+Shall liue the Lease of Nature, pay his breath
+To time, and mortall Custome. Yet my Hart
+Throbs to know one thing: Tell me, if your Art
+Can tell so much: Shall Banquo's issue euer
+Reigne in this Kingdome?
+  All. Seeke to know no more
+
+   Macb. I will be satisfied. Deny me this,
+And an eternall Curse fall on you: Let me know.
+Why sinkes that Caldron? & what noise is this?
+
+Hoboyes
+
+  1 Shew
+
+   2 Shew
+
+   3 Shew
+
+   All. Shew his Eyes, and greeue his Hart,
+Come like shadowes, so depart.
+
+A shew of eight Kings, and Banquo last, with a glasse in his hand.
+
+  Macb. Thou art too like the Spirit of Banquo: Down:
+Thy Crowne do's seare mine Eye-bals. And thy haire
+Thou other Gold-bound-brow, is like the first:
+A third, is like the former. Filthy Hagges,
+Why do you shew me this? - A fourth? Start eyes!
+What will the Line stretch out to'th' cracke of Doome?
+Another yet? A seauenth? Ile see no more:
+And yet the eighth appeares, who beares a glasse,
+Which shewes me many more: and some I see,
+That two-fold Balles, and trebble Scepters carry.
+Horrible sight: Now I see 'tis true,
+For the Blood-bolter'd Banquo smiles vpon me,
+And points at them for his. What? is this so?
+  1 I Sir, all this is so. But why
+Stands Macbeth thus amazedly?
+Come Sisters, cheere we vp his sprights,
+And shew the best of our delights.
+Ile Charme the Ayre to giue a sound,
+While you performe your Antique round:
+That this great King may kindly say,
+Our duties, did his welcome pay.
+
+Musicke. The Witches Dance, and vanish.
+
+  Macb. Where are they? Gone?
+Let this pernitious houre,
+Stand aye accursed in the Kalender.
+Come in, without there.
+Enter Lenox.
+
+  Lenox. What's your Graces will
+
+   Macb. Saw you the Weyard Sisters?
+  Lenox. No my Lord
+
+   Macb. Came they not by you?
+  Lenox. No indeed my Lord
+
+   Macb. Infected be the Ayre whereon they ride,
+And damn'd all those that trust them. I did heare
+The gallopping of Horse. Who was't came by?
+  Len. 'Tis two or three my Lord, that bring you word:
+Macduff is fled to England
+
+   Macb. Fled to England?
+  Len. I, my good Lord
+
+   Macb. Time, thou anticipat'st my dread exploits:
+The flighty purpose neuer is o're-tooke
+Vnlesse the deed go with it. From this moment,
+The very firstlings of my heart shall be
+The firstlings of my hand. And euen now
+To Crown my thoughts with Acts: be it thoght & done:
+The Castle of Macduff, I will surprize.
+Seize vpon Fife; giue to th' edge o'th' Sword
+His Wife, his Babes, and all vnfortunate Soules
+That trace him in his Line. No boasting like a Foole,
+This deed Ile do, before this purpose coole,
+But no more sights. Where are these Gentlemen?
+Come bring me where they are.
+
+Exeunt.
+
+Scena Secunda.
+
+Enter Macduffes Wife, her Son, and Rosse.
+
+  Wife. What had he done, to make him fly the Land?
+  Rosse. You must haue patience Madam
+
+   Wife. He had none:
+His flight was madnesse: when our Actions do not,
+Our feares do make vs Traitors
+
+   Rosse. You know not
+Whether it was his wisedome, or his feare
+
+   Wife. Wisedom? to leaue his wife, to leaue his Babes,
+His Mansion, and his Titles, in a place
+From whence himselfe do's flye? He loues vs not,
+He wants the naturall touch. For the poore Wren
+(The most diminitiue of Birds) will fight,
+Her yong ones in her N

<TRUNCATED>
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/sort_by_value.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/sort_by_value.txt b/crunch-core/src/it/resources/sort_by_value.txt
new file mode 100644
index 0000000..73f7d11
--- /dev/null
+++ b/crunch-core/src/it/resources/sort_by_value.txt
@@ -0,0 +1,5 @@
+A	2
+B	1
+C	3
+D	2
+E	1


[42/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/MapsIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/MapsIT.java b/crunch-core/src/it/java/org/apache/crunch/MapsIT.java
new file mode 100644
index 0000000..5b3187b
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/MapsIT.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.util.Map;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class MapsIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritables() throws Exception {
+    run(WritableTypeFamily.getInstance(), tmpDir);
+  }
+
+  @Test
+  public void testAvros() throws Exception {
+    run(AvroTypeFamily.getInstance(), tmpDir);
+  }
+
+  public static void run(PTypeFamily typeFamily, TemporaryPath tmpDir) throws Exception {
+    Pipeline pipeline = new MRPipeline(MapsIT.class, tmpDir.getDefaultConfiguration());
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    Iterable<Pair<String, Map<String, Long>>> output = shakespeare
+        .parallelDo(new DoFn<String, Pair<String, Map<String, Long>>>() {
+          @Override
+          public void process(String input, Emitter<Pair<String, Map<String, Long>>> emitter) {
+            String last = null;
+            for (String word : input.toLowerCase().split("\\W+")) {
+              if (!word.isEmpty()) {
+                String firstChar = word.substring(0, 1);
+                if (last != null) {
+                  Map<String, Long> cc = ImmutableMap.of(firstChar, 1L);
+                  emitter.emit(Pair.of(last, cc));
+                }
+                last = firstChar;
+              }
+            }
+          }
+        }, typeFamily.tableOf(typeFamily.strings(), typeFamily.maps(typeFamily.longs()))).groupByKey()
+        .combineValues(new CombineFn<String, Map<String, Long>>() {
+          @Override
+          public void process(Pair<String, Iterable<Map<String, Long>>> input,
+              Emitter<Pair<String, Map<String, Long>>> emitter) {
+            Map<String, Long> agg = Maps.newHashMap();
+            for (Map<String, Long> in : input.second()) {
+              for (Map.Entry<String, Long> e : in.entrySet()) {
+                if (!agg.containsKey(e.getKey())) {
+                  agg.put(e.getKey(), e.getValue());
+                } else {
+                  agg.put(e.getKey(), e.getValue() + agg.get(e.getKey()));
+                }
+              }
+            }
+            emitter.emit(Pair.of(input.first(), agg));
+          }
+        }).materialize();
+
+    boolean passed = false;
+    for (Pair<String, Map<String, Long>> v : output) {
+      if (v.first().equals("k") && v.second().get("n") == 8L) {
+        passed = true;
+        break;
+      }
+    }
+    pipeline.done();
+
+    assertThat(passed, is(true));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/MaterializeIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/MaterializeIT.java b/crunch-core/src/it/java/org/apache/crunch/MaterializeIT.java
new file mode 100644
index 0000000..d064993
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/MaterializeIT.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.crunch.fn.FilterFns;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Assume;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class MaterializeIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testMaterializeInput_Writables() throws IOException {
+    runMaterializeInput(new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
+        WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeInput_Avro() throws IOException {
+    runMaterializeInput(new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
+        AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeInput_InMemoryWritables() throws IOException {
+    runMaterializeInput(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeInput_InMemoryAvro() throws IOException {
+    runMaterializeInput(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeEmptyIntermediate_Writables() throws IOException {
+    runMaterializeEmptyIntermediate(
+        new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
+        WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeEmptyIntermediate_Avro() throws IOException {
+    runMaterializeEmptyIntermediate(
+        new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
+        AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeEmptyIntermediate_InMemoryWritables() throws IOException {
+    runMaterializeEmptyIntermediate(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testMaterializeEmptyIntermediate_InMemoryAvro() throws IOException {
+    runMaterializeEmptyIntermediate(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
+  }
+
+  public void runMaterializeInput(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    List<String> expectedContent = Lists.newArrayList("b", "c", "a", "e");
+    String inputPath = tmpDir.copyResourceFileName("set1.txt");
+
+    PCollection<String> lines = pipeline.readTextFile(inputPath);
+    assertEquals(expectedContent, Lists.newArrayList(lines.materialize()));
+    pipeline.done();
+  }
+
+  public void runMaterializeEmptyIntermediate(Pipeline pipeline, PTypeFamily typeFamily)
+      throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("set1.txt");
+    PCollection<String> empty = pipeline.readTextFile(inputPath).filter(FilterFns.<String>REJECT_ALL());
+
+    assertTrue(Lists.newArrayList(empty.materialize()).isEmpty());
+    pipeline.done();
+  }
+
+  static class StringToStringWrapperPersonPairMapFn extends MapFn<String, Pair<StringWrapper, Person>> {
+
+    @Override
+    public Pair<StringWrapper, Person> map(String input) {
+      Person person = new Person();
+      person.name = input;
+      person.age = 42;
+      person.siblingnames = Lists.<CharSequence> newArrayList();
+      return Pair.of(new StringWrapper(input), person);
+    }
+
+  }
+
+  @Test
+  public void testMaterializeAvroPersonAndReflectsPair_GroupedTable() throws IOException {
+    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
+    Pipeline pipeline = new MRPipeline(MaterializeIT.class);
+    List<Pair<StringWrapper, Person>> pairList = Lists.newArrayList(pipeline
+        .readTextFile(tmpDir.copyResourceFileName("set1.txt"))
+        .parallelDo(new StringToStringWrapperPersonPairMapFn(),
+            Avros.pairs(Avros.reflects(StringWrapper.class), Avros.records(Person.class)))
+        .materialize());
+    
+    // We just need to make sure this doesn't crash
+    assertEquals(4, pairList.size());
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/MaterializeToMapIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/MaterializeToMapIT.java b/crunch-core/src/it/java/org/apache/crunch/MaterializeToMapIT.java
new file mode 100644
index 0000000..7fef30e
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/MaterializeToMapIT.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class MaterializeToMapIT {
+
+  static final ImmutableList<Pair<Integer, String>> kvPairs = ImmutableList.of(Pair.of(0, "a"), Pair.of(1, "b"),
+      Pair.of(2, "c"), Pair.of(3, "e"));
+
+  public void assertMatches(Map<Integer, String> m) {
+    for (Integer k : m.keySet()) {
+      assertEquals(kvPairs.get(k).second(), m.get(k));
+    }
+  }
+
+  @Test
+  public void testMemMaterializeToMap() {
+    assertMatches(MemPipeline.tableOf(kvPairs).materializeToMap());
+  }
+
+  private static class Set1Mapper extends MapFn<String, Pair<Integer, String>> {
+    @Override
+    public Pair<Integer, String> map(String input) {
+
+      int k = -1;
+      if (input.equals("a"))
+        k = 0;
+      else if (input.equals("b"))
+        k = 1;
+      else if (input.equals("c"))
+        k = 2;
+      else if (input.equals("e"))
+        k = 3;
+      return Pair.of(k, input);
+    }
+  }
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testMRMaterializeToMap() throws IOException {
+    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
+    String inputFile = tmpDir.copyResourceFileName("set1.txt");
+    PCollection<String> c = p.readTextFile(inputFile);
+    PTypeFamily tf = c.getTypeFamily();
+    PTable<Integer, String> t = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(), tf.strings()));
+    Map<Integer, String> m = t.materializeToMap();
+    assertMatches(m);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/MultipleOutputIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/MultipleOutputIT.java b/crunch-core/src/it/java/org/apache/crunch/MultipleOutputIT.java
new file mode 100644
index 0000000..1a85b6a
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/MultipleOutputIT.java
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+public class MultipleOutputIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  public static PCollection<String> evenCountLetters(PCollection<String> words, PTypeFamily typeFamily) {
+    return words.parallelDo("even", new FilterFn<String>() {
+
+      @Override
+      public boolean accept(String input) {
+        return input.length() % 2 == 0;
+      }
+    }, typeFamily.strings());
+  }
+
+  public static PCollection<String> oddCountLetters(PCollection<String> words, PTypeFamily typeFamily) {
+    return words.parallelDo("odd", new FilterFn<String>() {
+
+      @Override
+      public boolean accept(String input) {
+        return input.length() % 2 != 0;
+      }
+    }, typeFamily.strings());
+
+  }
+
+  public static PTable<String, Long> substr(PTable<String, Long> ptable) {
+    return ptable.parallelDo(new DoFn<Pair<String, Long>, Pair<String, Long>>() {
+      public void process(Pair<String, Long> input, Emitter<Pair<String, Long>> emitter) {
+        if (input.first().length() > 0) {
+          emitter.emit(Pair.of(input.first().substring(0, 1), input.second()));
+        }
+      }
+    }, ptable.getPTableType());
+  }
+
+  @Test
+  public void testWritables() throws IOException {
+    run(new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvro() throws IOException {
+    run(new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testParallelDosFused() throws IOException {
+
+    PipelineResult result = run(new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration()),
+        WritableTypeFamily.getInstance());
+
+    // Ensure our multiple outputs were fused into a single job.
+    assertEquals("parallel Dos not fused into a single job", 1, result.getStageResults().size());
+  }
+
+  public PipelineResult run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("letters.txt");
+    String outputPathEven = tmpDir.getFileName("even");
+    String outputPathOdd = tmpDir.getFileName("odd");
+
+    PCollection<String> words = pipeline.read(At.textFile(inputPath, typeFamily.strings()));
+
+    PCollection<String> evenCountWords = evenCountLetters(words, typeFamily);
+    PCollection<String> oddCountWords = oddCountLetters(words, typeFamily);
+    pipeline.writeTextFile(evenCountWords, outputPathEven);
+    pipeline.writeTextFile(oddCountWords, outputPathOdd);
+
+    PipelineResult result = pipeline.done();
+
+    checkFileContents(outputPathEven, Arrays.asList("bb"));
+    checkFileContents(outputPathOdd, Arrays.asList("a"));
+
+    return result;
+  }
+
+  /**
+   * Mutates the state of an input and then emits the mutated object.
+   */
+  static class AppendFn extends DoFn<StringWrapper, StringWrapper> {
+
+    private String value;
+
+    public AppendFn(String value) {
+      this.value = value;
+    }
+
+    @Override
+    public void process(StringWrapper input, Emitter<StringWrapper> emitter) {
+      input.setValue(input.getValue() + value);
+      emitter.emit(input);
+    }
+
+  }
+
+  /**
+   * Fusing multiple pipelines has a risk of running into object reuse bugs.
+   * This test verifies that mutating the state of an object that is passed
+   * through multiple streams of a pipeline doesn't allow one stream to affect
+   * another.
+   */
+  @Test
+  public void testFusedMappersObjectReuseBug() throws IOException {
+    Pipeline pipeline = new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<StringWrapper> stringWrappers = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
+        .parallelDo(new StringWrapper.StringToStringWrapperMapFn(), Avros.reflects(StringWrapper.class));
+
+    PCollection<String> stringsA = stringWrappers.parallelDo(new AppendFn("A"), stringWrappers.getPType())
+        .parallelDo(new StringWrapper.StringWrapperToStringMapFn(), Writables.strings());
+    PCollection<String> stringsB = stringWrappers.parallelDo(new AppendFn("B"), stringWrappers.getPType())
+        .parallelDo(new StringWrapper.StringWrapperToStringMapFn(), Writables.strings());
+
+    String outputA = tmpDir.getFileName("stringsA");
+    String outputB = tmpDir.getFileName("stringsB");
+
+    pipeline.writeTextFile(stringsA, outputA);
+    pipeline.writeTextFile(stringsB, outputB);
+    PipelineResult pipelineResult = pipeline.done();
+
+    // Make sure fusing did actually occur
+    assertEquals(1, pipelineResult.getStageResults().size());
+
+    checkFileContents(outputA, Lists.newArrayList("cA", "dA", "aA"));
+    checkFileContents(outputB, Lists.newArrayList("cB", "dB", "aB"));
+
+  }
+
+  private void checkFileContents(String filePath, List<String> expected) throws IOException {
+    File outputFile = new File(filePath, "part-m-00000");
+    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
+    assertEquals(expected, lines);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java b/crunch-core/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java
new file mode 100644
index 0000000..44eb897
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static com.google.common.collect.Lists.newArrayList;
+import static org.apache.crunch.io.At.sequenceFile;
+import static org.apache.crunch.io.At.textFile;
+import static org.apache.crunch.types.writable.Writables.strings;
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.io.IOException;
+
+import org.apache.crunch.fn.FilterFns;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class PCollectionGetSizeIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  private String emptyInputPath;
+  private String nonEmptyInputPath;
+  private String outputPath;
+
+  @Before
+  public void setUp() throws IOException {
+    emptyInputPath = tmpDir.copyResourceFileName("emptyTextFile.txt");
+    nonEmptyInputPath = tmpDir.copyResourceFileName("set1.txt");
+    outputPath = tmpDir.getFileName("output");
+  }
+
+  @Test
+  public void testGetSizeOfEmptyInput_MRPipeline() throws IOException {
+    testCollectionGetSizeOfEmptyInput(new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
+  }
+
+  @Test
+  public void testGetSizeOfEmptyInput_MemPipeline() throws IOException {
+    testCollectionGetSizeOfEmptyInput(MemPipeline.getInstance());
+  }
+
+  private void testCollectionGetSizeOfEmptyInput(Pipeline pipeline) throws IOException {
+
+    assertThat(pipeline.read(textFile(emptyInputPath)).getSize(), is(0L));
+  }
+
+  @Test
+  public void testMaterializeEmptyInput_MRPipeline() throws IOException {
+    testMaterializeEmptyInput(new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
+  }
+
+  @Test
+  public void testMaterializeEmptyImput_MemPipeline() throws IOException {
+    testMaterializeEmptyInput(MemPipeline.getInstance());
+  }
+
+  private void testMaterializeEmptyInput(Pipeline pipeline) throws IOException {
+    assertThat(newArrayList(pipeline.readTextFile(emptyInputPath).materialize().iterator()).size(), is(0));
+  }
+
+  @Test
+  public void testGetSizeOfEmptyIntermediatePCollection_MRPipeline() throws IOException {
+
+    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(
+        new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
+
+    assertThat(emptyIntermediate.getSize(), is(0L));
+  }
+
+  @Test
+  @Ignore("GetSize of a DoCollection is only an estimate based on scale factor, so we can't count on it being reported as 0")
+  public void testGetSizeOfEmptyIntermediatePCollection_NoSave_MRPipeline() throws IOException {
+
+    PCollection<String> data = new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration())
+      .readTextFile(nonEmptyInputPath);
+
+    PCollection<String> emptyPCollection = data.filter(FilterFns.<String>REJECT_ALL());
+
+    assertThat(emptyPCollection.getSize(), is(0L));
+  }
+
+  @Test
+  public void testGetSizeOfEmptyIntermediatePCollection_MemPipeline() {
+
+    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(MemPipeline.getInstance());
+
+    assertThat(emptyIntermediate.getSize(), is(0L));
+  }
+
+  @Test
+  public void testMaterializeOfEmptyIntermediatePCollection_MRPipeline() throws IOException {
+
+    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(
+        new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
+
+    assertThat(newArrayList(emptyIntermediate.materialize()).size(), is(0));
+  }
+
+  @Test
+  public void testMaterializeOfEmptyIntermediatePCollection_MemPipeline() {
+
+    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(MemPipeline.getInstance());
+
+    assertThat(newArrayList(emptyIntermediate.materialize()).size(), is(0));
+  }
+
+  private PCollection<String> createPesistentEmptyIntermediate(Pipeline pipeline) {
+
+    PCollection<String> data = pipeline.readTextFile(nonEmptyInputPath);
+
+    PCollection<String> emptyPCollection = data.filter(FilterFns.<String>REJECT_ALL());
+
+    emptyPCollection.write(sequenceFile(outputPath, strings()));
+
+    pipeline.run();
+
+    return pipeline.read(sequenceFile(outputPath, strings()));
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testExpectExceptionForGettingSizeOfNonExistingFile_MRPipeline() throws IOException {
+    new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()).readTextFile("non_existing.file").getSize();
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testExpectExceptionForGettingSizeOfNonExistingFile_MemPipeline() {
+    MemPipeline.getInstance().readTextFile("non_existing.file").getSize();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/PObjectsIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/PObjectsIT.java b/crunch-core/src/it/java/org/apache/crunch/PObjectsIT.java
new file mode 100644
index 0000000..6ee849f
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/PObjectsIT.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.lang.Integer;
+import java.lang.Iterable;
+import java.lang.String;
+import java.util.Iterator;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.materialize.pobject.PObjectImpl;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Rule;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class PObjectsIT {
+
+  private static final Integer LINES_IN_SHAKES = 3667;
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  /**
+   * A mock PObject that should map PCollections of strings to an integer count of the number of
+   * elements in the underlying PCollection.
+   */
+  public static class MockPObjectImpl extends PObjectImpl<String, Integer> {
+    private int numProcessCalls;
+
+    public MockPObjectImpl(PCollection<String> collect) {
+      super(collect);
+      numProcessCalls = 0;
+    }
+
+    @Override
+    public Integer process(Iterable<String> input) {
+      numProcessCalls++;
+      int i = 0;
+      Iterator<String> itr = input.iterator();
+      while (itr.hasNext()) {
+        i++;
+        itr.next();
+      }
+      return i;
+    }
+
+    public int getNumProcessCalls() {
+      return numProcessCalls;
+    }
+  }
+
+  @Test
+  public void testMRPipeline() throws IOException {
+    run(new MRPipeline(PObjectsIT.class, tmpDir.getDefaultConfiguration()));
+  }
+
+  @Test
+  public void testInMemoryPipeline() throws IOException {
+    run(MemPipeline.getInstance());
+  }
+
+  public void run(Pipeline pipeline) throws IOException {
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    MockPObjectImpl lineCount = new MockPObjectImpl(shakespeare);
+    // Get the line count once and verify it's correctness.
+    assertEquals("Incorrect number of lines counted from PCollection.", LINES_IN_SHAKES,
+        lineCount.getValue());
+    // And do it again.
+    assertEquals("Incorrect number of lines counted from PCollection.", LINES_IN_SHAKES,
+        lineCount.getValue());
+    // Make sure process was called only once because the PObject's value was cached after the
+    // first call.
+    assertEquals("Process on PObject not called exactly 1 times.", 1,
+        lineCount.getNumProcessCalls());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/PTableKeyValueIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/PTableKeyValueIT.java b/crunch-core/src/it/java/org/apache/crunch/PTableKeyValueIT.java
new file mode 100644
index 0000000..d56e122
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/PTableKeyValueIT.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+
+import junit.framework.Assert;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+
+@RunWith(value = Parameterized.class)
+public class PTableKeyValueIT implements Serializable {
+
+  private static final long serialVersionUID = 4374227704751746689L;
+
+  private transient PTypeFamily typeFamily;
+  private transient MRPipeline pipeline;
+  private transient String inputFile;
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Before
+  public void setUp() throws IOException {
+    pipeline = new MRPipeline(PTableKeyValueIT.class, tmpDir.getDefaultConfiguration());
+    inputFile = tmpDir.copyResourceFileName("set1.txt");
+  }
+
+  @After
+  public void tearDown() {
+    pipeline.done();
+  }
+
+  public PTableKeyValueIT(PTypeFamily typeFamily) {
+    this.typeFamily = typeFamily;
+  }
+
+  @Parameters
+  public static Collection<Object[]> data() {
+    Object[][] data = new Object[][] { { WritableTypeFamily.getInstance() }, { AvroTypeFamily.getInstance() } };
+    return Arrays.asList(data);
+  }
+
+  @Test
+  public void testKeysAndValues() throws Exception {
+
+    PCollection<String> collection = pipeline.read(At.textFile(inputFile, typeFamily.strings()));
+
+    PTable<String, String> table = collection.parallelDo(new DoFn<String, Pair<String, String>>() {
+
+      @Override
+      public void process(String input, Emitter<Pair<String, String>> emitter) {
+        emitter.emit(Pair.of(input.toUpperCase(), input));
+
+      }
+    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.strings()));
+
+    PCollection<String> keys = table.keys();
+    PCollection<String> values = table.values();
+
+    ArrayList<String> keyList = Lists.newArrayList(keys.materialize().iterator());
+    ArrayList<String> valueList = Lists.newArrayList(values.materialize().iterator());
+
+    Assert.assertEquals(keyList.size(), valueList.size());
+    for (int i = 0; i < keyList.size(); i++) {
+      Assert.assertEquals(keyList.get(i), valueList.get(i).toUpperCase());
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/PageRankIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/PageRankIT.java b/crunch-core/src/it/java/org/apache/crunch/PageRankIT.java
new file mode 100644
index 0000000..6291ef8
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/PageRankIT.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.PTypes;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+public class PageRankIT {
+
+  public static class PageRankData {
+    public float score;
+    public float lastScore;
+    public List<String> urls;
+
+    public PageRankData() {
+    }
+
+    public PageRankData(float score, float lastScore, Iterable<String> urls) {
+      this.score = score;
+      this.lastScore = lastScore;
+      this.urls = Lists.newArrayList(urls);
+    }
+
+    public PageRankData next(float newScore) {
+      return new PageRankData(newScore, score, urls);
+    }
+
+    public float propagatedScore() {
+      return score / urls.size();
+    }
+
+    @Override
+    public String toString() {
+      return score + " " + lastScore + " " + urls;
+    }
+  }
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testAvroReflect() throws Exception {
+    PTypeFamily tf = AvroTypeFamily.getInstance();
+    PType<PageRankData> prType = Avros.reflects(PageRankData.class);
+    String urlInput = tmpDir.copyResourceFileName("urls.txt");
+    run(new MRPipeline(PageRankIT.class, tmpDir.getDefaultConfiguration()),
+        urlInput, prType, tf);
+  }
+
+  @Test
+  public void testAvroMReflectInMemory() throws Exception {
+    PTypeFamily tf = AvroTypeFamily.getInstance();
+    PType<PageRankData> prType = Avros.reflects(PageRankData.class);
+    String urlInput = tmpDir.copyResourceFileName("urls.txt");
+    run(MemPipeline.getInstance(), urlInput, prType, tf);
+  }
+
+  @Test
+  public void testAvroJSON() throws Exception {
+    PTypeFamily tf = AvroTypeFamily.getInstance();
+    PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
+    String urlInput = tmpDir.copyResourceFileName("urls.txt");
+    run(new MRPipeline(PageRankIT.class, tmpDir.getDefaultConfiguration()),
+        urlInput, prType, tf);
+  }
+
+  @Test
+  public void testWritablesJSON() throws Exception {
+    PTypeFamily tf = WritableTypeFamily.getInstance();
+    PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
+    String urlInput = tmpDir.copyResourceFileName("urls.txt");
+    run(new MRPipeline(PageRankIT.class, tmpDir.getDefaultConfiguration()),
+        urlInput, prType, tf);
+  }
+
+  public static PTable<String, PageRankData> pageRank(PTable<String, PageRankData> input, final float d) {
+    PTypeFamily ptf = input.getTypeFamily();
+    PTable<String, Float> outbound = input.parallelDo(new DoFn<Pair<String, PageRankData>, Pair<String, Float>>() {
+      @Override
+      public void process(Pair<String, PageRankData> input, Emitter<Pair<String, Float>> emitter) {
+        PageRankData prd = input.second();
+        for (String link : prd.urls) {
+          emitter.emit(Pair.of(link, prd.propagatedScore()));
+        }
+      }
+    }, ptf.tableOf(ptf.strings(), ptf.floats()));
+
+    return input.cogroup(outbound).parallelDo(
+        new MapFn<Pair<String, Pair<Collection<PageRankData>, Collection<Float>>>, Pair<String, PageRankData>>() {
+          @Override
+          public Pair<String, PageRankData> map(Pair<String, Pair<Collection<PageRankData>, Collection<Float>>> input) {
+            PageRankData prd = Iterables.getOnlyElement(input.second().first());
+            Collection<Float> propagatedScores = input.second().second();
+            float sum = 0.0f;
+            for (Float s : propagatedScores) {
+              sum += s;
+            }
+            return Pair.of(input.first(), prd.next(d + (1.0f - d) * sum));
+          }
+        }, input.getPTableType());
+  }
+
+  public static void run(Pipeline pipeline, String urlInput,
+      PType<PageRankData> prType, PTypeFamily ptf) throws Exception {
+    PTable<String, PageRankData> scores = pipeline.readTextFile(urlInput)
+        .parallelDo(new MapFn<String, Pair<String, String>>() {
+          @Override
+          public Pair<String, String> map(String input) {
+            String[] urls = input.split("\\t");
+            return Pair.of(urls[0], urls[1]);
+          }
+        }, ptf.tableOf(ptf.strings(), ptf.strings())).groupByKey()
+        .parallelDo(new MapFn<Pair<String, Iterable<String>>, Pair<String, PageRankData>>() {
+          @Override
+          public Pair<String, PageRankData> map(Pair<String, Iterable<String>> input) {
+            return Pair.of(input.first(), new PageRankData(1.0f, 0.0f, input.second()));
+          }
+        }, ptf.tableOf(ptf.strings(), prType));
+
+    Float delta = 1.0f;
+    while (delta > 0.01) {
+      scores = pageRank(scores, 0.5f);
+      scores.materialize().iterator(); // force the write
+      delta = Aggregate.max(scores.parallelDo(new MapFn<Pair<String, PageRankData>, Float>() {
+        @Override
+        public Float map(Pair<String, PageRankData> input) {
+          PageRankData prd = input.second();
+          return Math.abs(prd.score - prd.lastScore);
+        }
+      }, ptf.floats())).getValue();
+    }
+    assertEquals(0.0048, delta, 0.001);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/StageResultsCountersIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/StageResultsCountersIT.java b/crunch-core/src/it/java/org/apache/crunch/StageResultsCountersIT.java
new file mode 100644
index 0000000..19fc302
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/StageResultsCountersIT.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertTrue;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.crunch.PipelineResult.StageResult;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.From;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.hadoop.mapreduce.Counter;
+import org.junit.After;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class StageResultsCountersIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  public static HashSet<String> SPECIAL_KEYWORDS = Sets.newHashSet("AND", "OR", "NOT");
+
+  public static String KEYWORDS_COUNTER_GROUP = "KEYWORDS_COUNTER_GROUP";
+
+  @After
+  public void after() {
+    MemPipeline.clearCounters();
+  }
+  
+  @Test
+  public void testStageResultsCountersMRWritables() throws Exception {
+    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
+        WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testStageResultsCountersMRAvro() throws Exception {
+    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
+        AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testStageResultsCountersMemWritables() throws Exception {
+    testSpecialKeywordCount(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testStageResultsCountersMemAvro() throws Exception {
+    testSpecialKeywordCount(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
+  }
+
+  public void testSpecialKeywordCount(Pipeline pipeline, PTypeFamily tf) throws Exception {
+
+    String rowsInputPath = tmpDir.copyResourceFileName("shakes.txt");
+
+    PipelineResult result = coutSpecialKeywords(pipeline, rowsInputPath, tf);
+
+    assertTrue(result.succeeded());
+
+    Map<String, Long> keywordsMap = countersToMap(result.getStageResults(), KEYWORDS_COUNTER_GROUP);
+
+    assertEquals(3, keywordsMap.size());
+
+    assertEquals("{NOT=157, AND=596, OR=81}", keywordsMap.toString());
+  }
+
+  private static PipelineResult coutSpecialKeywords(Pipeline pipeline, String inputFileName, PTypeFamily tf) {
+
+    pipeline.read(From.textFile(inputFileName)).parallelDo(new DoFn<String, Void>() {
+
+      @Override
+      public void process(String text, Emitter<Void> emitter) {
+
+        if (!StringUtils.isBlank(text)) {
+
+          String[] tokens = text.toUpperCase().split("\\s");
+
+          for (String token : tokens) {
+            if (SPECIAL_KEYWORDS.contains(token)) {
+              getCounter(KEYWORDS_COUNTER_GROUP, token).increment(1);
+            }
+          }
+        }
+      }
+    }, tf.nulls()).materialize(); // TODO can we avoid the materialize ?
+
+    return pipeline.done();
+  }
+
+  private static Map<String, Long> countersToMap(List<StageResult> stages, String counterGroupName) {
+
+    Map<String, Long> countersMap = Maps.newHashMap();
+
+    for (StageResult sr : stages) {
+      Iterator<Counter> iterator = sr.getCounters().getGroup(counterGroupName).iterator();
+      while (iterator.hasNext()) {
+        Counter counter = (Counter) iterator.next();
+        countersMap.put(counter.getDisplayName(), counter.getValue());
+      }
+    }
+
+    return countersMap;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/TermFrequencyIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/TermFrequencyIT.java b/crunch-core/src/it/java/org/apache/crunch/TermFrequencyIT.java
new file mode 100644
index 0000000..ca66aa8
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/TermFrequencyIT.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class TermFrequencyIT implements Serializable {
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testTermFrequencyWithNoTransform() throws IOException {
+    run(new MRPipeline(TermFrequencyIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), false);
+  }
+
+  @Test
+  public void testTermFrequencyWithTransform() throws IOException {
+    run(new MRPipeline(TermFrequencyIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), true);
+  }
+
+  @Test
+  public void testTermFrequencyNoTransformInMemory() throws IOException {
+    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance(), false);
+  }
+
+  @Test
+  public void testTermFrequencyWithTransformInMemory() throws IOException {
+    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance(), true);
+  }
+
+  public void run(Pipeline pipeline, PTypeFamily typeFamily, boolean transformTF) throws IOException {
+    String input = tmpDir.copyResourceFileName("docs.txt");
+
+    File transformedOutput = tmpDir.getFile("transformed-output");
+    File tfOutput = tmpDir.getFile("tf-output");
+
+    PCollection<String> docs = pipeline.readTextFile(input);
+
+    PTypeFamily ptf = docs.getTypeFamily();
+
+    /*
+     * Input: String Input title text
+     * 
+     * Output: PTable<Pair<String, String>, Long> Pair<Pair<word, title>, count
+     * in title>
+     */
+    PTable<Pair<String, String>, Long> tf = Aggregate.count(docs.parallelDo("term document frequency",
+        new DoFn<String, Pair<String, String>>() {
+          @Override
+          public void process(String doc, Emitter<Pair<String, String>> emitter) {
+            String[] kv = doc.split("\t");
+            String title = kv[0];
+            String text = kv[1];
+            for (String word : text.split("\\W+")) {
+              if (word.length() > 0) {
+                Pair<String, String> pair = Pair.of(word.toLowerCase(), title);
+                emitter.emit(pair);
+              }
+            }
+          }
+        }, ptf.pairs(ptf.strings(), ptf.strings())));
+
+    if (transformTF) {
+      /*
+       * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count
+       * in title>
+       * 
+       * Output: PTable<String, Pair<String, Long>> PTable<word, Pair<title,
+       * count in title>>
+       */
+      PTable<String, Pair<String, Long>> wordDocumentCountPair = tf.parallelDo("transform wordDocumentPairCount",
+          new MapFn<Pair<Pair<String, String>, Long>, Pair<String, Pair<String, Long>>>() {
+            @Override
+            public Pair<String, Pair<String, Long>> map(Pair<Pair<String, String>, Long> input) {
+              Pair<String, String> wordDocumentPair = input.first();
+              return Pair.of(wordDocumentPair.first(), Pair.of(wordDocumentPair.second(), input.second()));
+            }
+          }, ptf.tableOf(ptf.strings(), ptf.pairs(ptf.strings(), ptf.longs())));
+
+      pipeline.writeTextFile(wordDocumentCountPair, transformedOutput.getAbsolutePath());
+    }
+
+    SourceTarget<String> st = At.textFile(tfOutput.getAbsolutePath());
+    pipeline.write(tf, st);
+
+    pipeline.run();
+
+    // test the case we should see
+    Iterable<String> lines = ((ReadableSourceTarget<String>) st).read(pipeline.getConfiguration());
+    boolean passed = false;
+    for (String line : lines) {
+      if ("[well,A]\t0".equals(line)) {
+        fail("Found " + line + " but well is in Document A 1 time");
+      }
+      if ("[well,A]\t1".equals(line)) {
+        passed = true;
+      }
+    }
+    assertTrue(passed);
+    pipeline.done();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/TextPairIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/TextPairIT.java b/crunch-core/src/it/java/org/apache/crunch/TextPairIT.java
new file mode 100644
index 0000000..55d9af9
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/TextPairIT.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.From;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class TextPairIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritables() throws IOException {
+    run(new MRPipeline(TextPairIT.class, tmpDir.getDefaultConfiguration()));
+  }
+
+  private static final String CANARY = "Writables.STRING_TO_TEXT";
+
+  public static PCollection<Pair<String, String>> wordDuplicate(PCollection<String> words) {
+    return words.parallelDo("my word duplicator", new DoFn<String, Pair<String, String>>() {
+      public void process(String line, Emitter<Pair<String, String>> emitter) {
+        for (String word : line.split("\\W+")) {
+          if (word.length() > 0) {
+            Pair<String, String> pair = Pair.of(CANARY, word);
+            emitter.emit(pair);
+          }
+        }
+      }
+    }, Writables.pairs(Writables.strings(), Writables.strings()));
+  }
+
+  public void run(Pipeline pipeline) throws IOException {
+    String input = tmpDir.copyResourceFileName("shakes.txt");
+
+    PCollection<String> shakespeare = pipeline.read(From.textFile(input));
+    Iterable<Pair<String, String>> lines = pipeline.materialize(wordDuplicate(shakespeare));
+    boolean passed = false;
+    for (Pair<String, String> line : lines) {
+      if (line.first().contains(CANARY)) {
+        passed = true;
+        break;
+      }
+    }
+
+    pipeline.done();
+    assertTrue(passed);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/TfIdfIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/TfIdfIT.java b/crunch-core/src/it/java/org/apache/crunch/TfIdfIT.java
new file mode 100644
index 0000000..218f538
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/TfIdfIT.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.crunch.fn.MapKeysFn;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.seq.SeqFileSourceTarget;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.lib.Join;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.hadoop.fs.Path;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+@SuppressWarnings("serial")
+public class TfIdfIT implements Serializable {
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  // total number of documents, should calculate
+  protected static final double N = 2;
+
+  @Test
+  public void testWritablesSingleRun() throws IOException {
+    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), true);
+  }
+
+  @Test
+  public void testWritablesMultiRun() throws IOException {
+    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), false);
+  }
+
+  /**
+   * This method should generate a TF-IDF score for the input.
+   */
+  public PTable<String, Collection<Pair<String, Double>>> generateTFIDF(PCollection<String> docs, Path termFreqPath,
+      PTypeFamily ptf) throws IOException {
+
+    /*
+     * Input: String Input title text
+     * 
+     * Output: PTable<Pair<String, String>, Long> Pair<Pair<word, title>, count
+     * in title>
+     */
+    PTable<Pair<String, String>, Long> tf = Aggregate.count(docs.parallelDo("term document frequency",
+        new DoFn<String, Pair<String, String>>() {
+          @Override
+          public void process(String doc, Emitter<Pair<String, String>> emitter) {
+            String[] kv = doc.split("\t");
+            String title = kv[0];
+            String text = kv[1];
+            for (String word : text.split("\\W+")) {
+              if (word.length() > 0) {
+                Pair<String, String> pair = Pair.of(word.toLowerCase(), title);
+                emitter.emit(pair);
+              }
+            }
+          }
+        }, ptf.pairs(ptf.strings(), ptf.strings())));
+
+    tf.write(new SeqFileSourceTarget<Pair<Pair<String, String>, Long>>(termFreqPath, tf.getPType()));
+
+    /*
+     * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count in
+     * title>
+     * 
+     * Output: PTable<String, Long> PTable<word, # of docs containing word>
+     */
+    PTable<String, Long> n = Aggregate.count(tf.parallelDo("little n (# of docs contain word)",
+        new DoFn<Pair<Pair<String, String>, Long>, String>() {
+          @Override
+          public void process(Pair<Pair<String, String>, Long> input, Emitter<String> emitter) {
+            emitter.emit(input.first().first());
+          }
+        }, ptf.strings()));
+
+    /*
+     * Input: Pair<Pair<String, String>, Long> Pair<Pair<word, title>, count in
+     * title>
+     * 
+     * Output: PTable<String, Pair<String, Long>> PTable<word, Pair<title, count
+     * in title>>
+     */
+    PTable<String, Collection<Pair<String, Long>>> wordDocumentCountPair = tf.parallelDo(
+        "transform wordDocumentPairCount",
+        new DoFn<Pair<Pair<String, String>, Long>, Pair<String, Collection<Pair<String, Long>>>>() {
+          Collection<Pair<String, Long>> buffer;
+          String key;
+
+          @Override
+          public void process(Pair<Pair<String, String>, Long> input,
+              Emitter<Pair<String, Collection<Pair<String, Long>>>> emitter) {
+            Pair<String, String> wordDocumentPair = input.first();
+            if (!wordDocumentPair.first().equals(key)) {
+              flush(emitter);
+              key = wordDocumentPair.first();
+              buffer = Lists.newArrayList();
+            }
+            buffer.add(Pair.of(wordDocumentPair.second(), input.second()));
+          }
+
+          protected void flush(Emitter<Pair<String, Collection<Pair<String, Long>>>> emitter) {
+            if (buffer != null) {
+              emitter.emit(Pair.of(key, buffer));
+              buffer = null;
+            }
+          }
+
+          @Override
+          public void cleanup(Emitter<Pair<String, Collection<Pair<String, Long>>>> emitter) {
+            flush(emitter);
+          }
+        }, ptf.tableOf(ptf.strings(), ptf.collections(ptf.pairs(ptf.strings(), ptf.longs()))));
+
+    PTable<String, Pair<Long, Collection<Pair<String, Long>>>> joinedResults = Join.join(n, wordDocumentCountPair);
+
+    /*
+     * Input: Pair<String, Pair<Long, Collection<Pair<String, Long>>> Pair<word,
+     * Pair<# of docs containing word, Collection<Pair<title, term frequency>>>
+     * 
+     * Output: Pair<String, Collection<Pair<String, Double>>> Pair<word,
+     * Collection<Pair<title, tfidf>>>
+     */
+    return joinedResults
+        .parallelDo(
+            "calculate tfidf",
+            new MapFn<Pair<String, Pair<Long, Collection<Pair<String, Long>>>>, Pair<String, Collection<Pair<String, Double>>>>() {
+              @Override
+              public Pair<String, Collection<Pair<String, Double>>> map(
+                  Pair<String, Pair<Long, Collection<Pair<String, Long>>>> input) {
+                Collection<Pair<String, Double>> tfidfs = Lists.newArrayList();
+                String word = input.first();
+                double n = input.second().first();
+                double idf = Math.log(N / n);
+                for (Pair<String, Long> tf : input.second().second()) {
+                  double tfidf = tf.second() * idf;
+                  tfidfs.add(Pair.of(tf.first(), tfidf));
+                }
+                return Pair.of(word, tfidfs);
+              }
+
+            }, ptf.tableOf(ptf.strings(), ptf.collections(ptf.pairs(ptf.strings(), ptf.doubles()))));
+  }
+
+  public void run(Pipeline pipeline, PTypeFamily typeFamily, boolean singleRun) throws IOException {
+    String inputFile = tmpDir.copyResourceFileName("docs.txt");
+    String outputPath1 = tmpDir.getFileName("output1");
+    String outputPath2 = tmpDir.getFileName("output2");
+
+    Path tfPath = tmpDir.getPath("termfreq");
+
+    PCollection<String> docs = pipeline.readTextFile(inputFile);
+
+    PTable<String, Collection<Pair<String, Double>>> results = generateTFIDF(docs, tfPath, typeFamily);
+    pipeline.writeTextFile(results, outputPath1);
+    if (!singleRun) {
+      pipeline.run();
+    }
+
+    PTable<String, Collection<Pair<String, Double>>> uppercased = results.parallelDo(
+        new MapKeysFn<String, String, Collection<Pair<String, Double>>>() {
+          @Override
+          public String map(String k1) {
+            return k1.toUpperCase();
+          }
+        }, results.getPTableType());
+    pipeline.writeTextFile(uppercased, outputPath2);
+    pipeline.done();
+
+    // Check the lowercase version...
+    File outputFile = new File(outputPath1, "part-r-00000");
+    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
+    boolean passed = false;
+    for (String line : lines) {
+      if (line.startsWith("[the") && line.contains("B,0.6931471805599453")) {
+        passed = true;
+        break;
+      }
+    }
+    assertTrue(passed);
+
+    // ...and the uppercase version
+    outputFile = new File(outputPath2, "part-r-00000");
+    lines = Files.readLines(outputFile, Charset.defaultCharset());
+    passed = false;
+    for (String line : lines) {
+      if (line.startsWith("[THE") && line.contains("B,0.6931471805599453")) {
+        passed = true;
+        break;
+      }
+    }
+    assertTrue(passed);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java b/crunch-core/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java
new file mode 100644
index 0000000..e49f4d5
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/TupleNClassCastBugIT.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.io.Files;
+
+
+public class TupleNClassCastBugIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  public static PCollection<TupleN> mapGroupDo(PCollection<String> lines, PTypeFamily ptf) {
+    PTable<String, TupleN> mapped = lines.parallelDo(new MapFn<String, Pair<String, TupleN>>() {
+
+      @Override
+      public Pair<String, TupleN> map(String line) {
+        String[] columns = line.split("\\t");
+        String docId = columns[0];
+        String docLine = columns[1];
+        return Pair.of(docId, new TupleN(docId, docLine));
+      }
+    }, ptf.tableOf(ptf.strings(), ptf.tuples(ptf.strings(), ptf.strings())));
+    return mapped.groupByKey().parallelDo(new DoFn<Pair<String, Iterable<TupleN>>, TupleN>() {
+      @Override
+      public void process(Pair<String, Iterable<TupleN>> input, Emitter<TupleN> tupleNEmitter) {
+        for (TupleN tuple : input.second()) {
+          tupleNEmitter.emit(tuple);
+        }
+      }
+    }, ptf.tuples(ptf.strings(), ptf.strings()));
+  }
+
+  @Test
+  public void testWritables() throws IOException {
+    run(new MRPipeline(TupleNClassCastBugIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvro() throws IOException {
+    run(new MRPipeline(TupleNClassCastBugIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("docs.txt");
+    String outputPath = tmpDir.getFileName("output");
+
+    PCollection<String> docLines = pipeline.readTextFile(inputPath);
+    pipeline.writeTextFile(mapGroupDo(docLines, typeFamily), outputPath);
+    pipeline.done();
+
+    // *** We are not directly testing the output, we are looking for a
+    // ClassCastException
+    // *** which is thrown in a different thread during the reduce phase. If all
+    // is well
+    // *** the file will exist and have six lines. Otherwise the bug is present.
+    File outputFile = new File(outputPath, "part-r-00000");
+    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
+    int lineCount = 0;
+    for (String line : lines) {
+      lineCount++;
+    }
+    assertEquals(6, lineCount);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java b/crunch-core/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java
new file mode 100644
index 0000000..501a944
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/UnionFromSameSourceIT.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+/**
+ * Collection of tests re-using the same PCollection in various unions.
+ */
+public class UnionFromSameSourceIT {
+
+  private static final int NUM_ELEMENTS = 4;
+
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  private Pipeline pipeline;
+  private PType<String> elementType = Writables.strings();
+  private PTableType<String, String> tableType = Writables.tableOf(Writables.strings(),
+    Writables.strings());
+
+  @Before
+  public void setUp() {
+    pipeline = new MRPipeline(UnionFromSameSourceIT.class, tmpDir.getDefaultConfiguration());
+  }
+
+  @Test
+  public void testUnion_SingleRead() throws IOException {
+    PCollection<String> strings = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
+    PCollection<String> union = strings.union(strings.parallelDo(IdentityFn.<String> getInstance(),
+      strings.getPType()));
+
+    assertEquals(NUM_ELEMENTS * 2, getCount(union));
+  }
+
+  @Test
+  public void testUnion_TwoReads() throws IOException {
+    PCollection<String> stringsA = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
+    PCollection<String> stringsB = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
+
+    PCollection<String> union = stringsA.union(stringsB);
+
+    assertEquals(NUM_ELEMENTS * 2, getCount(union));
+  }
+
+  @Test
+  public void testDoubleUnion_EndingWithGBK() throws IOException {
+    runDoubleUnionPipeline(true);
+  }
+
+  @Test
+  public void testDoubleUnion_EndingWithoutGBK() throws IOException {
+    runDoubleUnionPipeline(false);
+  }
+
+  private void runDoubleUnionPipeline(boolean endWithGBK) throws IOException {
+    PCollection<String> strings = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"));
+    PTable<String, String> tableA = strings.parallelDo("to table A", new ToTableFn(), tableType);
+    PTable<String, String> tableB = strings.parallelDo("to table B", new ToTableFn(), tableType);
+
+    PGroupedTable<String, String> groupedTable = tableA.union(tableB).groupByKey();
+    PCollection<String> ungrouped = groupedTable.parallelDo("ungroup before union",
+      new FromGroupedTableFn(), elementType).union(
+      strings.parallelDo("fake id", IdentityFn.<String> getInstance(), elementType));
+
+    PTable<String, String> table = ungrouped.parallelDo("union back to table", new ToTableFn(),
+      tableType);
+
+    if (endWithGBK) {
+      table = table.groupByKey().ungroup();
+    }
+
+    assertEquals(3 * NUM_ELEMENTS, getCount(table));
+  }
+
+  private int getCount(PCollection<?> pcollection) {
+    int cnt = 0;
+    for (Object v : pcollection.materialize()) {
+      cnt++;
+    }
+    return cnt;
+  }
+
+  private static class ToTableFn extends MapFn<String, Pair<String, String>> {
+
+    @Override
+    public Pair<String, String> map(String input) {
+      return Pair.of(input, input);
+    }
+
+  }
+
+  private static class FromGroupedTableFn extends DoFn<Pair<String, Iterable<String>>, String> {
+
+    @Override
+    public void process(Pair<String, Iterable<String>> input, Emitter<String> emitter) {
+      for (String value : input.second()) {
+        emitter.emit(value);
+      }
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/UnionIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/UnionIT.java b/crunch-core/src/it/java/org/apache/crunch/UnionIT.java
new file mode 100644
index 0000000..1c60a1b
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/UnionIT.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.crunch.fn.Aggregators;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.test.Tests;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMultiset;
+
+
+public class UnionIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  private MRPipeline pipeline;
+  private PCollection<String> words1;
+  private PCollection<String> words2;
+
+  @Before
+  public void setUp() throws IOException {
+    pipeline = new MRPipeline(UnionIT.class, tmpDir.getDefaultConfiguration());
+    words1 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
+    words2 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
+  }
+
+  @After
+  public void tearDown() {
+    pipeline.done();
+  }
+
+  @Test
+  public void testUnion() throws Exception {
+    IdentityFn<String> identity = IdentityFn.getInstance();
+    words1 = words1.parallelDo(identity, Avros.strings());
+    words2 = words2.parallelDo(identity, Avros.strings());
+
+    PCollection<String> union = words1.union(words2);
+
+    ImmutableMultiset<String> actual = ImmutableMultiset.copyOf(union.materialize());
+    assertThat(actual.elementSet().size(), is(3));
+    assertThat(actual.count("a1"), is(4));
+    assertThat(actual.count("b2"), is(2));
+    assertThat(actual.count("c3"), is(2));
+  }
+
+  @Test
+  public void testTableUnion() throws IOException {
+    PTable<String, String> words1ByFirstLetter = byFirstLetter(words1);
+    PTable<String, String> words2ByFirstLetter = byFirstLetter(words2);
+
+    PTable<String, String> union = words1ByFirstLetter.union(words2ByFirstLetter);
+
+    ImmutableMultiset<Pair<String, String>> actual = ImmutableMultiset.copyOf(union.materialize());
+
+    assertThat(actual.elementSet().size(), is(3));
+    assertThat(actual.count(Pair.of("a", "1")), is(4));
+    assertThat(actual.count(Pair.of("b", "2")), is(2));
+    assertThat(actual.count(Pair.of("c", "3")), is(2));
+  }
+
+  @Test
+  public void testUnionThenGroupByKey() throws IOException {
+    PCollection<String> union = words1.union(words2);
+
+    PGroupedTable<String, String> grouped = byFirstLetter(union).groupByKey();
+
+    Map<String, String> actual = grouped.combineValues(Aggregators.STRING_CONCAT("", true))
+        .materializeToMap();
+
+    Map<String, String> expected = ImmutableMap.of("a", "1111", "b", "22", "c", "33");
+    assertThat(actual, is(expected));
+  }
+
+  @Test
+  public void testTableUnionThenGroupByKey() throws IOException {
+    PTable<String, String> words1ByFirstLetter = byFirstLetter(words1);
+    PTable<String, String> words2ByFirstLetter = byFirstLetter(words2);
+
+    PTable<String, String> union = words1ByFirstLetter.union(words2ByFirstLetter);
+
+    PGroupedTable<String, String> grouped = union.groupByKey();
+
+    Map<String, String> actual = grouped.combineValues(Aggregators.STRING_CONCAT("", true))
+        .materializeToMap();
+
+    Map<String, String> expected = ImmutableMap.of("a", "1111", "b", "22", "c", "33");
+    assertThat(actual, is(expected));
+  }
+
+
+  private static PTable<String, String> byFirstLetter(PCollection<String> values) {
+    return values.parallelDo("byFirstLetter", new FirstLetterKeyFn(),
+        Avros.tableOf(Avros.strings(), Avros.strings()));
+  }
+
+  private static class FirstLetterKeyFn extends DoFn<String, Pair<String, String>> {
+    @Override
+    public void process(String input, Emitter<Pair<String, String>> emitter) {
+      if (input.length() > 1) {
+        emitter.emit(Pair.of(input.substring(0, 1), input.substring(1)));
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/UnionResultsIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/UnionResultsIT.java b/crunch-core/src/it/java/org/apache/crunch/UnionResultsIT.java
new file mode 100644
index 0000000..df0511a
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/UnionResultsIT.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.CrunchTestSupport;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class UnionResultsIT extends CrunchTestSupport implements Serializable {
+
+  static class StringLengthMapFn extends MapFn<String, Pair<String, Long>> {
+
+    @Override
+    public Pair<String, Long> map(String input) {
+      return new Pair<String, Long>(input, 10L);
+    }
+  }
+
+
+  /**
+   * Tests combining a GBK output with a map-only job output into a single
+   * unioned collection.
+   */
+  @Test
+  public void testUnionOfGroupedOutputAndNonGroupedOutput() throws IOException {
+    String inputPath = tempDir.copyResourceFileName("set1.txt");
+    String inputPath2 = tempDir.copyResourceFileName("set2.txt");
+
+    Pipeline pipeline = new MRPipeline(UnionResultsIT.class);
+
+    PCollection<String> set1Lines = pipeline.read(At.textFile(inputPath, Writables.strings()));
+    PCollection<Pair<String, Long>> set1Lengths = set1Lines.parallelDo(new StringLengthMapFn(),
+        Writables.pairs(Writables.strings(), Writables.longs()));
+    PCollection<Pair<String, Long>> set2Counts = pipeline.read(At.textFile(inputPath2, Writables.strings())).count();
+
+    PCollection<Pair<String, Long>> union = set1Lengths.union(set2Counts);
+
+    List<Pair<String, Long>> unionValues = Lists.newArrayList(union.materialize());
+    assertEquals(7, unionValues.size());
+
+    Set<Pair<String, Long>> expectedPairs = Sets.newHashSet();
+    expectedPairs.add(Pair.of("b", 10L));
+    expectedPairs.add(Pair.of("c", 10L));
+    expectedPairs.add(Pair.of("a", 10L));
+    expectedPairs.add(Pair.of("e", 10L));
+    expectedPairs.add(Pair.of("a", 1L));
+    expectedPairs.add(Pair.of("c", 1L));
+    expectedPairs.add(Pair.of("d", 1L));
+
+    assertEquals(expectedPairs, Sets.newHashSet(unionValues));
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/WordCountIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/WordCountIT.java b/crunch-core/src/it/java/org/apache/crunch/WordCountIT.java
new file mode 100644
index 0000000..c646663
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/WordCountIT.java
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
+
+import org.apache.crunch.fn.Aggregators;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.io.To;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+public class WordCountIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  enum WordCountStats {
+    ANDS
+  };
+
+  public static PTable<String, Long> wordCount(PCollection<String> words, PTypeFamily typeFamily) {
+    return Aggregate.count(words.parallelDo(new DoFn<String, String>() {
+
+      @Override
+      public void process(String line, Emitter<String> emitter) {
+        for (String word : line.split("\\s+")) {
+          emitter.emit(word);
+          if ("and".equals(word)) {
+            increment(WordCountStats.ANDS);
+          }
+        }
+      }
+    }, typeFamily.strings()));
+  }
+
+  public static PTable<String, Long> substr(PTable<String, Long> ptable) {
+    return ptable.parallelDo(new DoFn<Pair<String, Long>, Pair<String, Long>>() {
+
+      public void process(Pair<String, Long> input, Emitter<Pair<String, Long>> emitter) {
+        if (input.first().length() > 0) {
+          emitter.emit(Pair.of(input.first().substring(0, 1), input.second()));
+        }
+      }
+    }, ptable.getPTableType());
+  }
+
+  private boolean runSecond = false;
+  private boolean useToOutput = false;
+
+  @Test
+  public void testWritables() throws IOException {
+    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testWritablesWithSecond() throws IOException {
+    runSecond = true;
+    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testWritablesWithSecondUseToOutput() throws IOException {
+    runSecond = true;
+    useToOutput = true;
+    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvro() throws IOException {
+    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvroWithSecond() throws IOException {
+    runSecond = true;
+    run(new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testWithTopWritable() throws IOException {
+    runWithTop(WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testWithTopAvro() throws IOException {
+    runWithTop(AvroTypeFamily.getInstance());
+  }
+
+  public void runWithTop(PTypeFamily tf) throws IOException {
+    Pipeline pipeline = new MRPipeline(WordCountIT.class, tmpDir.getDefaultConfiguration());
+    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
+
+    PCollection<String> shakespeare = pipeline.read(At.textFile(inputPath, tf.strings()));
+    PTable<String, Long> wordCount = wordCount(shakespeare, tf);
+    List<Pair<String, Long>> top5 = Lists.newArrayList(Aggregate.top(wordCount, 5, true).materialize());
+    assertEquals(
+        ImmutableList.of(Pair.of("", 1470L), Pair.of("the", 620L), Pair.of("and", 427L), Pair.of("of", 396L),
+            Pair.of("to", 367L)), top5);
+  }
+
+  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
+    String outputPath = tmpDir.getFileName("output");
+
+    PCollection<String> shakespeare = pipeline.read(At.textFile(inputPath, typeFamily.strings()));
+    PTable<String, Long> wordCount = wordCount(shakespeare, typeFamily);
+    if (useToOutput) {
+      wordCount.write(To.textFile(outputPath));
+    } else {
+      pipeline.writeTextFile(wordCount, outputPath);
+    }
+
+    if (runSecond) {
+      String substrPath = tmpDir.getFileName("substr");
+      PTable<String, Long> we = substr(wordCount).groupByKey().combineValues(Aggregators.SUM_LONGS());
+      pipeline.writeTextFile(we, substrPath);
+    }
+    PipelineResult res = pipeline.done();
+    assertTrue(res.succeeded());
+    List<PipelineResult.StageResult> stageResults = res.getStageResults();
+    if (runSecond) {
+      assertEquals(2, stageResults.size());
+    } else {
+      assertEquals(1, stageResults.size());
+      assertEquals(427, stageResults.get(0).getCounterValue(WordCountStats.ANDS));
+    }
+
+    File outputFile = new File(outputPath, "part-r-00000");
+    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
+    boolean passed = false;
+    for (String line : lines) {
+      if (line.startsWith("Macbeth\t28") || line.startsWith("[Macbeth,28]")) {
+        passed = true;
+        break;
+      }
+    }
+    assertTrue(passed);
+  }
+}


[05/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/Avros.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/Avros.java b/crunch/src/main/java/org/apache/crunch/types/avro/Avros.java
deleted file mode 100644
index fc30eaf..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/Avros.java
+++ /dev/null
@@ -1,709 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.UUID;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Type;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.avro.util.Utf8;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.fn.CompositeMapFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.types.CollectionDeepCopier;
-import org.apache.crunch.types.DeepCopier;
-import org.apache.crunch.types.MapDeepCopier;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypes;
-import org.apache.crunch.types.TupleDeepCopier;
-import org.apache.crunch.types.TupleFactory;
-import org.apache.crunch.types.writable.WritableDeepCopier;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-/**
- * Defines static methods that are analogous to the methods defined in
- * {@link AvroTypeFamily} for convenient static importing.
- * 
- */
-public class Avros {
-
-  /**
-   * Older versions of Avro (i.e., before 1.7.0) do not support schemas that are
-   * composed of a mix of specific and reflection-based schemas. This bit
-   * controls whether or not we allow Crunch jobs to be created that involve
-   * mixing specific and reflection-based schemas and can be overridden by the
-   * client developer.
-   */
-  public static final boolean CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS;
-
-  static {
-    CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS = AvroCapabilities.canDecodeSpecificSchemaWithReflectDatumReader();
-  }
-
-  /**
-   * The instance we use for generating reflected schemas. May be modified by
-   * clients (e.g., Scrunch.)
-   */
-  public static ReflectDataFactory REFLECT_DATA_FACTORY = new ReflectDataFactory();
-
-  /**
-   * The name of the configuration parameter that tracks which reflection
-   * factory to use.
-   */
-  public static final String REFLECT_DATA_FACTORY_CLASS = "crunch.reflectdatafactory";
-
-  public static void configureReflectDataFactory(Configuration conf) {
-    conf.setClass(REFLECT_DATA_FACTORY_CLASS, REFLECT_DATA_FACTORY.getClass(), ReflectDataFactory.class);
-  }
-
-  public static ReflectDataFactory getReflectDataFactory(Configuration conf) {
-    return (ReflectDataFactory) ReflectionUtils.newInstance(
-        conf.getClass(REFLECT_DATA_FACTORY_CLASS, ReflectDataFactory.class), conf);
-  }
-
-  public static void checkCombiningSpecificAndReflectionSchemas() {
-    if (!CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS) {
-      throw new IllegalStateException("Crunch does not support running jobs that"
-          + " contain a mixture of reflection-based and avro-generated data types."
-          + " Please consider turning your reflection-based type into an avro-generated"
-          + " type and using that generated type instead."
-          + " If the version of Avro you are using is 1.7.0 or greater, you can enable"
-          + " combined schemas by setting the Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS" + " field to 'true'.");
-    }
-  }
-
-  public static MapFn<CharSequence, String> UTF8_TO_STRING = new MapFn<CharSequence, String>() {
-    @Override
-    public String map(CharSequence input) {
-      return input.toString();
-    }
-  };
-
-  public static MapFn<String, Utf8> STRING_TO_UTF8 = new MapFn<String, Utf8>() {
-    @Override
-    public Utf8 map(String input) {
-      return new Utf8(input);
-    }
-  };
-
-  public static MapFn<Object, ByteBuffer> BYTES_IN = new MapFn<Object, ByteBuffer>() {
-    @Override
-    public ByteBuffer map(Object input) {
-      if (input instanceof ByteBuffer) {
-        return (ByteBuffer) input;
-      }
-      return ByteBuffer.wrap((byte[]) input);
-    }
-  };
-
-  private static final AvroType<String> strings = new AvroType<String>(String.class, Schema.create(Schema.Type.STRING),
-      UTF8_TO_STRING, STRING_TO_UTF8, new DeepCopier.NoOpDeepCopier<String>());
-  private static final AvroType<Void> nulls = create(Void.class, Schema.Type.NULL);
-  private static final AvroType<Long> longs = create(Long.class, Schema.Type.LONG);
-  private static final AvroType<Integer> ints = create(Integer.class, Schema.Type.INT);
-  private static final AvroType<Float> floats = create(Float.class, Schema.Type.FLOAT);
-  private static final AvroType<Double> doubles = create(Double.class, Schema.Type.DOUBLE);
-  private static final AvroType<Boolean> booleans = create(Boolean.class, Schema.Type.BOOLEAN);
-  private static final AvroType<ByteBuffer> bytes = new AvroType<ByteBuffer>(ByteBuffer.class,
-      Schema.create(Schema.Type.BYTES), BYTES_IN, IdentityFn.getInstance(), new DeepCopier.NoOpDeepCopier<ByteBuffer>());
-
-  private static final Map<Class<?>, PType<?>> PRIMITIVES = ImmutableMap.<Class<?>, PType<?>> builder()
-      .put(String.class, strings).put(Long.class, longs).put(Integer.class, ints).put(Float.class, floats)
-      .put(Double.class, doubles).put(Boolean.class, booleans).put(ByteBuffer.class, bytes).build();
-
-  private static final Map<Class<?>, AvroType<?>> EXTENSIONS = Maps.newHashMap();
-
-  public static <T> void register(Class<T> clazz, AvroType<T> ptype) {
-    EXTENSIONS.put(clazz, ptype);
-  }
-
-  public static <T> PType<T> getPrimitiveType(Class<T> clazz) {
-    return (PType<T>) PRIMITIVES.get(clazz);
-  }
-
-  static <T> boolean isPrimitive(AvroType<T> avroType) {
-    return avroType.getTypeClass().isPrimitive() || PRIMITIVES.containsKey(avroType.getTypeClass());
-  }
-
-  private static <T> AvroType<T> create(Class<T> clazz, Schema.Type schemaType) {
-    return new AvroType<T>(clazz, Schema.create(schemaType), new DeepCopier.NoOpDeepCopier<T>());
-  }
-
-  public static final AvroType<Void> nulls() {
-    return nulls;
-  }
-
-  public static final AvroType<String> strings() {
-    return strings;
-  }
-
-  public static final AvroType<Long> longs() {
-    return longs;
-  }
-
-  public static final AvroType<Integer> ints() {
-    return ints;
-  }
-
-  public static final AvroType<Float> floats() {
-    return floats;
-  }
-
-  public static final AvroType<Double> doubles() {
-    return doubles;
-  }
-
-  public static final AvroType<Boolean> booleans() {
-    return booleans;
-  }
-
-  public static final AvroType<ByteBuffer> bytes() {
-    return bytes;
-  }
-
-  public static final <T> AvroType<T> records(Class<T> clazz) {
-    if (EXTENSIONS.containsKey(clazz)) {
-      return (AvroType<T>) EXTENSIONS.get(clazz);
-    }
-    return containers(clazz);
-  }
-
-  public static final AvroType<GenericData.Record> generics(Schema schema) {
-    return new AvroType<GenericData.Record>(GenericData.Record.class, schema, new AvroDeepCopier.AvroGenericDeepCopier(
-        schema));
-  }
-
-  public static final <T> AvroType<T> containers(Class<T> clazz) {
-    if (SpecificRecord.class.isAssignableFrom(clazz)) {
-      return (AvroType<T>) specifics((Class<SpecificRecord>) clazz);
-    }
-    return reflects(clazz);
-  }
-
-  public static final <T extends SpecificRecord> AvroType<T> specifics(Class<T> clazz) {
-    T t = ReflectionUtils.newInstance(clazz, null);
-    Schema schema = t.getSchema();
-    return new AvroType<T>(clazz, schema, new AvroDeepCopier.AvroSpecificDeepCopier<T>(clazz, schema));
-  }
-
-  public static final <T> AvroType<T> reflects(Class<T> clazz) {
-    Schema schema = REFLECT_DATA_FACTORY.getReflectData().getSchema(clazz);
-    return new AvroType<T>(clazz, schema, new AvroDeepCopier.AvroReflectDeepCopier<T>(clazz, schema));
-  }
-
-  private static class BytesToWritableMapFn<T extends Writable> extends MapFn<Object, T> {
-    private static final Log LOG = LogFactory.getLog(BytesToWritableMapFn.class);
-
-    private final Class<T> writableClazz;
-
-    public BytesToWritableMapFn(Class<T> writableClazz) {
-      this.writableClazz = writableClazz;
-    }
-
-    @Override
-    public T map(Object input) {
-      ByteBuffer byteBuffer = BYTES_IN.map(input);
-      T instance = ReflectionUtils.newInstance(writableClazz, null);
-      try {
-        instance.readFields(new DataInputStream(new ByteArrayInputStream(byteBuffer.array(),
-            byteBuffer.arrayOffset(), byteBuffer.limit())));
-      } catch (IOException e) {
-        LOG.error("Exception thrown reading instance of: " + writableClazz, e);
-      }
-      return instance;
-    }
-  }
-
-  private static class WritableToBytesMapFn<T extends Writable> extends MapFn<T, ByteBuffer> {
-    private static final Log LOG = LogFactory.getLog(WritableToBytesMapFn.class);
-
-    @Override
-    public ByteBuffer map(T input) {
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      DataOutputStream das = new DataOutputStream(baos);
-      try {
-        input.write(das);
-      } catch (IOException e) {
-        LOG.error("Exception thrown converting Writable to bytes", e);
-      }
-      return ByteBuffer.wrap(baos.toByteArray());
-    }
-  }
-
-  public static final <T extends Writable> AvroType<T> writables(Class<T> clazz) {
-    return new AvroType<T>(clazz, Schema.create(Schema.Type.BYTES), new BytesToWritableMapFn<T>(clazz),
-        new WritableToBytesMapFn<T>(), new WritableDeepCopier<T>(clazz));
-  }
-
-  private static class GenericDataArrayToCollection<T> extends MapFn<Object, Collection<T>> {
-
-    private final MapFn<Object, T> mapFn;
-
-    public GenericDataArrayToCollection(MapFn<Object, T> mapFn) {
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public Collection<T> map(Object input) {
-      Collection<T> ret = Lists.newArrayList();
-      if (input instanceof Collection) {
-        for (Object in : (Collection<Object>) input) {
-          ret.add(mapFn.map(in));
-        }
-      } else {
-        // Assume it is an array
-        Object[] arr = (Object[]) input;
-        for (Object in : arr) {
-          ret.add(mapFn.map(in));
-        }
-      }
-      return ret;
-    }
-  }
-
-  private static class CollectionToGenericDataArray extends MapFn<Collection<?>, GenericData.Array<?>> {
-
-    private final MapFn mapFn;
-    private final String jsonSchema;
-    private transient Schema schema;
-
-    public CollectionToGenericDataArray(Schema schema, MapFn mapFn) {
-      this.mapFn = mapFn;
-      this.jsonSchema = schema.toString();
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public GenericData.Array<?> map(Collection<?> input) {
-      if (schema == null) {
-        schema = new Schema.Parser().parse(jsonSchema);
-      }
-      GenericData.Array array = new GenericData.Array(input.size(), schema);
-      for (Object in : input) {
-        array.add(mapFn.map(in));
-      }
-      return array;
-    }
-  }
-
-  public static final <T> AvroType<Collection<T>> collections(PType<T> ptype) {
-    AvroType<T> avroType = (AvroType<T>) ptype;
-    Schema collectionSchema = Schema.createArray(allowNulls(avroType.getSchema()));
-    GenericDataArrayToCollection<T> input = new GenericDataArrayToCollection<T>(avroType.getInputMapFn());
-    CollectionToGenericDataArray output = new CollectionToGenericDataArray(collectionSchema, avroType.getOutputMapFn());
-    return new AvroType(Collection.class, collectionSchema, input, output, new CollectionDeepCopier<T>(ptype), ptype);
-  }
-
-  private static class AvroMapToMap<T> extends MapFn<Map<CharSequence, Object>, Map<String, T>> {
-    private final MapFn<Object, T> mapFn;
-
-    public AvroMapToMap(MapFn<Object, T> mapFn) {
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      mapFn.initialize();
-    }
-
-    @Override
-    public Map<String, T> map(Map<CharSequence, Object> input) {
-      Map<String, T> out = Maps.newHashMap();
-      for (Map.Entry<CharSequence, Object> e : input.entrySet()) {
-        out.put(e.getKey().toString(), mapFn.map(e.getValue()));
-      }
-      return out;
-    }
-  }
-
-  private static class MapToAvroMap<T> extends MapFn<Map<String, T>, Map<Utf8, Object>> {
-    private final MapFn<T, Object> mapFn;
-
-    public MapToAvroMap(MapFn<T, Object> mapFn) {
-      this.mapFn = mapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      mapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      mapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      this.mapFn.initialize();
-    }
-
-    @Override
-    public Map<Utf8, Object> map(Map<String, T> input) {
-      Map<Utf8, Object> out = Maps.newHashMap();
-      for (Map.Entry<String, T> e : input.entrySet()) {
-        out.put(new Utf8(e.getKey()), mapFn.map(e.getValue()));
-      }
-      return out;
-    }
-  }
-
-  public static final <T> AvroType<Map<String, T>> maps(PType<T> ptype) {
-    AvroType<T> avroType = (AvroType<T>) ptype;
-    Schema mapSchema = Schema.createMap(allowNulls(avroType.getSchema()));
-    AvroMapToMap<T> inputFn = new AvroMapToMap<T>(avroType.getInputMapFn());
-    MapToAvroMap<T> outputFn = new MapToAvroMap<T>(avroType.getOutputMapFn());
-    return new AvroType(Map.class, mapSchema, inputFn, outputFn, new MapDeepCopier<T>(ptype), ptype);
-  }
-
-  private static class GenericRecordToTuple extends MapFn<GenericRecord, Tuple> {
-    private final TupleFactory<?> tupleFactory;
-    private final List<MapFn> fns;
-
-    private transient Object[] values;
-
-    public GenericRecordToTuple(TupleFactory<?> tupleFactory, PType<?>... ptypes) {
-      this.tupleFactory = tupleFactory;
-      this.fns = Lists.newArrayList();
-      for (PType<?> ptype : ptypes) {
-        AvroType atype = (AvroType) ptype;
-        fns.add(atype.getInputMapFn());
-      }
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      for (MapFn fn : fns) {
-        fn.configure(conf);
-      }
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      for (MapFn fn : fns) {
-        fn.setContext(context);
-      }
-    }
-    
-    @Override
-    public void initialize() {
-      for (MapFn fn : fns) {
-        fn.initialize();
-      }
-      this.values = new Object[fns.size()];
-      tupleFactory.initialize();
-    }
-
-    @Override
-    public Tuple map(GenericRecord input) {
-      for (int i = 0; i < values.length; i++) {
-        Object v = input.get(i);
-        if (v == null) {
-          values[i] = null;
-        } else {
-          values[i] = fns.get(i).map(v);
-        }
-      }
-      return tupleFactory.makeTuple(values);
-    }
-  }
-
-  private static class TupleToGenericRecord extends MapFn<Tuple, GenericRecord> {
-    private final List<MapFn> fns;
-    private final List<AvroType> avroTypes;
-    private final String jsonSchema;
-    private final boolean isReflect;
-    private transient Schema schema;
-
-    public TupleToGenericRecord(Schema schema, PType<?>... ptypes) {
-      this.fns = Lists.newArrayList();
-      this.avroTypes = Lists.newArrayList();
-      this.jsonSchema = schema.toString();
-      boolean reflectFound = false;
-      boolean specificFound = false;
-      for (PType ptype : ptypes) {
-        AvroType atype = (AvroType) ptype;
-        fns.add(atype.getOutputMapFn());
-        avroTypes.add(atype);
-        if (atype.hasReflect()) {
-          reflectFound = true;
-        }
-        if (atype.hasSpecific()) {
-          specificFound = true;
-        }
-      }
-      if (specificFound && reflectFound) {
-        checkCombiningSpecificAndReflectionSchemas();
-      }
-      this.isReflect = reflectFound;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      for (MapFn fn : fns) {
-        fn.configure(conf);
-      }
-    }
- 
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      for (MapFn fn : fns) {
-        fn.setContext(getContext());
-      }
-    }
-    
-    @Override
-    public void initialize() {
-      this.schema = new Schema.Parser().parse(jsonSchema);
-      for (MapFn fn : fns) {
-        fn.initialize();
-      }
-    }
-
-    private GenericRecord createRecord() {
-      if (isReflect) {
-        return new ReflectGenericRecord(schema);
-      } else {
-        return new GenericData.Record(schema);
-      }
-    }
-
-    @Override
-    public GenericRecord map(Tuple input) {
-      GenericRecord record = createRecord();
-      for (int i = 0; i < input.size(); i++) {
-        Object v = input.get(i);
-        if (v == null) {
-          record.put(i, null);
-        } else {
-          record.put(i, fns.get(i).map(v));
-        }
-      }
-      return record;
-    }
-  }
-
-  public static final <V1, V2> AvroType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2) {
-    Schema schema = createTupleSchema(p1, p2);
-    GenericRecordToTuple input = new GenericRecordToTuple(TupleFactory.PAIR, p1, p2);
-    TupleToGenericRecord output = new TupleToGenericRecord(schema, p1, p2);
-    return new AvroType(Pair.class, schema, input, output, new TupleDeepCopier(Pair.class, p1, p2), p1, p2);
-  }
-
-  public static final <V1, V2, V3> AvroType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3) {
-    Schema schema = createTupleSchema(p1, p2, p3);
-    return new AvroType(Tuple3.class, schema, new GenericRecordToTuple(TupleFactory.TUPLE3, p1, p2, p3),
-        new TupleToGenericRecord(schema, p1, p2, p3), new TupleDeepCopier(Tuple3.class, p1, p2, p3), p1, p2, p3);
-  }
-
-  public static final <V1, V2, V3, V4> AvroType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3,
-      PType<V4> p4) {
-    Schema schema = createTupleSchema(p1, p2, p3, p4);
-    return new AvroType(Tuple4.class, schema, new GenericRecordToTuple(TupleFactory.TUPLE4, p1, p2, p3, p4),
-        new TupleToGenericRecord(schema, p1, p2, p3, p4), new TupleDeepCopier(Tuple4.class, p1, p2, p3, p4), p1, p2,
-        p3, p4);
-  }
-
-  public static final AvroType<TupleN> tuples(PType... ptypes) {
-    Schema schema = createTupleSchema(ptypes);
-    return new AvroType(TupleN.class, schema, new GenericRecordToTuple(TupleFactory.TUPLEN, ptypes),
-        new TupleToGenericRecord(schema, ptypes), new TupleDeepCopier(TupleN.class, ptypes), ptypes);
-  }
-
-  public static <T extends Tuple> AvroType<T> tuples(Class<T> clazz, PType... ptypes) {
-    Schema schema = createTupleSchema(ptypes);
-    Class[] typeArgs = new Class[ptypes.length];
-    for (int i = 0; i < typeArgs.length; i++) {
-      typeArgs[i] = ptypes[i].getTypeClass();
-    }
-    TupleFactory<T> factory = TupleFactory.create(clazz, typeArgs);
-    return new AvroType<T>(clazz, schema, new GenericRecordToTuple(factory, ptypes), new TupleToGenericRecord(schema,
-        ptypes), new TupleDeepCopier(clazz, ptypes), ptypes);
-  }
-
-  private static Schema createTupleSchema(PType<?>... ptypes) {
-    // Guarantee each tuple schema has a globally unique name
-    String tupleName = "tuple" + UUID.randomUUID().toString().replace('-', 'x');
-    Schema schema = Schema.createRecord(tupleName, "", "crunch", false);
-    List<Schema.Field> fields = Lists.newArrayList();
-    for (int i = 0; i < ptypes.length; i++) {
-      AvroType atype = (AvroType) ptypes[i];
-      Schema fieldSchema = allowNulls(atype.getSchema());
-      fields.add(new Schema.Field("v" + i, fieldSchema, "", null));
-    }
-    schema.setFields(fields);
-    return schema;
-  }
-
-  public static final <S, T> AvroType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn,
-      PType<S> base) {
-    AvroType<S> abase = (AvroType<S>) base;
-    return new AvroType<T>(clazz, abase.getSchema(), new CompositeMapFn(abase.getInputMapFn(), inputFn),
-        new CompositeMapFn(outputFn, abase.getOutputMapFn()), new DeepCopier.NoOpDeepCopier<T>(), base.getSubTypes()
-            .toArray(new PType[0]));
-  }
-
-  public static <T> PType<T> jsons(Class<T> clazz) {
-    return PTypes.jsonString(clazz, AvroTypeFamily.getInstance());
-  }
-
-  public static final <K, V> AvroTableType<K, V> tableOf(PType<K> key, PType<V> value) {
-    if (key instanceof PTableType) {
-      PTableType ptt = (PTableType) key;
-      key = Avros.pairs(ptt.getKeyType(), ptt.getValueType());
-    }
-    if (value instanceof PTableType) {
-      PTableType ptt = (PTableType) value;
-      value = Avros.pairs(ptt.getKeyType(), ptt.getValueType());
-    }
-    AvroType<K> avroKey = (AvroType<K>) key;
-    AvroType<V> avroValue = (AvroType<V>) value;
-    return new AvroTableType(avroKey, avroValue, Pair.class);
-  }
-
-  private static final Schema NULL_SCHEMA = Schema.create(Type.NULL);
-
-  private static Schema allowNulls(Schema base) {
-    if (NULL_SCHEMA.equals(base)) {
-      return base;
-    }
-    return Schema.createUnion(ImmutableList.of(base, NULL_SCHEMA));
-  }
-
-  private static class ReflectGenericRecord extends GenericData.Record {
-
-    public ReflectGenericRecord(Schema schema) {
-      super(schema);
-    }
-
-    @Override
-    public int hashCode() {
-      return reflectAwareHashCode(this, getSchema());
-    }
-  }
-
-  /*
-   * TODO: Remove this once we no longer have to support 1.5.4.
-   */
-  private static int reflectAwareHashCode(Object o, Schema s) {
-    if (o == null)
-      return 0; // incomplete datum
-    int hashCode = 1;
-    switch (s.getType()) {
-    case RECORD:
-      for (Schema.Field f : s.getFields()) {
-        if (f.order() == Schema.Field.Order.IGNORE)
-          continue;
-        hashCode = hashCodeAdd(hashCode, ReflectData.get().getField(o, f.name(), f.pos()), f.schema());
-      }
-      return hashCode;
-    case ARRAY:
-      Collection<?> a = (Collection<?>) o;
-      Schema elementType = s.getElementType();
-      for (Object e : a)
-        hashCode = hashCodeAdd(hashCode, e, elementType);
-      return hashCode;
-    case UNION:
-      return reflectAwareHashCode(o, s.getTypes().get(ReflectData.get().resolveUnion(s, o)));
-    case ENUM:
-      return s.getEnumOrdinal(o.toString());
-    case NULL:
-      return 0;
-    case STRING:
-      return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
-    default:
-      return o.hashCode();
-    }
-  }
-
-  /** Add the hash code for an object into an accumulated hash code. */
-  private static int hashCodeAdd(int hashCode, Object o, Schema s) {
-    return 31 * hashCode + reflectAwareHashCode(o, s);
-  }
-
-  private Avros() {
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java b/crunch/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java
deleted file mode 100644
index e973cca..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/ReflectDataFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import org.apache.avro.Schema;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.reflect.ReflectDatumReader;
-import org.apache.avro.reflect.ReflectDatumWriter;
-
-/**
- * A Factory class for constructing Avro reflection-related objects.
- */
-public class ReflectDataFactory {
-
-  public ReflectData getReflectData() {
-    return ReflectData.AllowNull.get();
-  }
-
-  public <T> ReflectDatumReader<T> getReader(Schema schema) {
-    return new ReflectDatumReader<T>(schema);
-  }
-
-  public <T> ReflectDatumWriter<T> getWriter(Schema schema) {
-    return new ReflectDatumWriter<T>(schema);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java b/crunch/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java
deleted file mode 100644
index 8bd18b0..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/SafeAvroSerialization.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.avro.Schema;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.BinaryEncoder;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.io.DatumWriter;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.io.EncoderFactory;
-import org.apache.avro.mapred.AvroJob;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.avro.mapred.Pair;
-import org.apache.avro.reflect.ReflectDatumWriter;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.serializer.Deserializer;
-import org.apache.hadoop.io.serializer.Serialization;
-import org.apache.hadoop.io.serializer.Serializer;
-import org.apache.hadoop.util.ReflectionUtils;
-
-/** The {@link Serialization} used by jobs configured with {@link AvroJob}. */
-class SafeAvroSerialization<T> extends Configured implements Serialization<AvroWrapper<T>> {
-
-  public boolean accept(Class<?> c) {
-    return AvroWrapper.class.isAssignableFrom(c);
-  }
-
-  /**
-   * Returns the specified map output deserializer. Defaults to the final output
-   * deserializer if no map output schema was specified.
-   */
-  public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) {
-    boolean isKey = AvroKey.class.isAssignableFrom(c);
-    Configuration conf = getConf();
-    Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob
-        .getMapOutputSchema(conf));
-
-    DatumReader<T> datumReader = null;
-    if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) {
-      ReflectDataFactory factory = (ReflectDataFactory) ReflectionUtils.newInstance(
-          conf.getClass("crunch.reflectdatafactory", ReflectDataFactory.class), conf);
-      datumReader = factory.getReader(schema);
-    } else {
-      datumReader = new SpecificDatumReader<T>(schema);
-    }
-    return new AvroWrapperDeserializer(datumReader, isKey);
-  }
-
-  private static final DecoderFactory FACTORY = DecoderFactory.get();
-
-  private class AvroWrapperDeserializer implements Deserializer<AvroWrapper<T>> {
-
-    private DatumReader<T> reader;
-    private BinaryDecoder decoder;
-    private boolean isKey;
-
-    public AvroWrapperDeserializer(DatumReader<T> reader, boolean isKey) {
-      this.reader = reader;
-      this.isKey = isKey;
-    }
-
-    public void open(InputStream in) {
-      this.decoder = FACTORY.directBinaryDecoder(in, decoder);
-    }
-
-    public AvroWrapper<T> deserialize(AvroWrapper<T> wrapper) throws IOException {
-      T datum = reader.read(wrapper == null ? null : wrapper.datum(), decoder);
-      if (wrapper == null) {
-        wrapper = isKey ? new AvroKey<T>(datum) : new AvroValue<T>(datum);
-      } else {
-        wrapper.datum(datum);
-      }
-      return wrapper;
-    }
-
-    public void close() throws IOException {
-      decoder.inputStream().close();
-    }
-  }
-
-  /** Returns the specified output serializer. */
-  public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) {
-    // AvroWrapper used for final output, AvroKey or AvroValue for map output
-    boolean isFinalOutput = c.equals(AvroWrapper.class);
-    Configuration conf = getConf();
-    Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair
-        .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)));
-
-    ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
-    ReflectDatumWriter<T> writer = factory.getWriter(schema);
-    return new AvroWrapperSerializer(writer);
-  }
-
-  private class AvroWrapperSerializer implements Serializer<AvroWrapper<T>> {
-    private DatumWriter<T> writer;
-    private OutputStream out;
-    private BinaryEncoder encoder;
-
-    public AvroWrapperSerializer(DatumWriter<T> writer) {
-      this.writer = writer;
-    }
-
-    public void open(OutputStream out) {
-      this.out = out;
-      this.encoder = new EncoderFactory().configureBlockSize(512).binaryEncoder(out, null);
-    }
-
-    public void serialize(AvroWrapper<T> wrapper) throws IOException {
-      writer.write(wrapper.datum(), encoder);
-      // would be a lot faster if the Serializer interface had a flush()
-      // method and the Hadoop framework called it when needed rather
-      // than for every record.
-      encoder.flush();
-    }
-
-    public void close() throws IOException {
-      out.close();
-    }
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/package-info.java b/crunch/src/main/java/org/apache/crunch/types/avro/package-info.java
deleted file mode 100644
index abaf60f..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Business object serialization using Apache Avro.
- */
-package org.apache.crunch.types.avro;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/package-info.java b/crunch/src/main/java/org/apache/crunch/types/package-info.java
deleted file mode 100644
index b420b03..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Common functionality for business object serialization.
- */
-package org.apache.crunch.types;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java b/crunch/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java
deleted file mode 100644
index 8b54008..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/GenericArrayWritable.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableFactories;
-import org.apache.hadoop.io.WritableUtils;
-
-/**
- * A {@link Writable} for marshalling/unmarshalling Collections. Note that
- * element order is <em>undefined</em>!
- *
- * @param <T> The value type
- */
-class GenericArrayWritable<T> implements Writable {
-  private Writable[] values;
-  private Class<? extends Writable> valueClass;
-
-  public GenericArrayWritable(Class<? extends Writable> valueClass) {
-    this.valueClass = valueClass;
-  }
-
-  public GenericArrayWritable() {
-    // for deserialization
-  }
-
-  public void set(Writable[] values) {
-    this.values = values;
-  }
-
-  public Writable[] get() {
-    return values;
-  }
-
-  public void readFields(DataInput in) throws IOException {
-    values = new Writable[WritableUtils.readVInt(in)]; // construct values
-    if (values.length > 0) {
-      int nulls = WritableUtils.readVInt(in);
-      if (nulls == values.length) {
-        return;
-      }
-      String valueType = Text.readString(in);
-      setValueType(valueType);
-      for (int i = 0; i < values.length - nulls; i++) {
-        Writable value = WritableFactories.newInstance(valueClass);
-        value.readFields(in); // read a value
-        values[i] = value; // store it in values
-      }
-    }
-  }
-
-  protected void setValueType(String valueType) {
-    if (valueClass == null) {
-      try {
-        valueClass = Class.forName(valueType).asSubclass(Writable.class);
-      } catch (ClassNotFoundException e) {
-        throw new CrunchRuntimeException(e);
-      }
-    } else if (!valueType.equals(valueClass.getName())) {
-      throw new IllegalStateException("Incoming " + valueType + " is not " + valueClass);
-    }
-  }
-
-  public void write(DataOutput out) throws IOException {
-    WritableUtils.writeVInt(out, values.length);
-    if (values.length > 0) {
-      int nulls = 0;
-      for (int i = 0; i < values.length; i++) {
-        if (values[i] == null) {
-          nulls++;
-        }
-      }
-      WritableUtils.writeVInt(out, nulls);
-      if (values.length - nulls > 0) {
-        if (valueClass == null) {
-          throw new IllegalStateException("Value class not set by constructor or read");
-        }
-        Text.writeString(out, valueClass.getName());
-        for (int i = 0; i < values.length; i++) {
-          if (values[i] != null) {
-            values[i].write(out);
-          }
-        }
-      }
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(values).toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    GenericArrayWritable other = (GenericArrayWritable) obj;
-    if (!Arrays.equals(values, other.values))
-      return false;
-    return true;
-  }
-
-  @Override
-  public String toString() {
-    return Arrays.toString(values);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java b/crunch/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java
deleted file mode 100644
index 1ab51df..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/TextMapWritable.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableUtils;
-
-import com.google.common.collect.Maps;
-
-class TextMapWritable<T extends Writable> implements Writable {
-
-  private Class<T> valueClazz;
-  private final Map<Text, T> instance;
-
-  public TextMapWritable() {
-    this.instance = Maps.newHashMap();
-  }
-
-  public TextMapWritable(Class<T> valueClazz) {
-    this.valueClazz = valueClazz;
-    this.instance = Maps.newHashMap();
-  }
-
-  public void put(Text txt, T value) {
-    instance.put(txt, value);
-  }
-
-  public Set<Map.Entry<Text, T>> entrySet() {
-    return instance.entrySet();
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    instance.clear();
-    try {
-      this.valueClazz = (Class<T>) Class.forName(Text.readString(in));
-    } catch (ClassNotFoundException e) {
-      throw (IOException) new IOException("Failed map init").initCause(e);
-    }
-    int entries = WritableUtils.readVInt(in);
-    try {
-      for (int i = 0; i < entries; i++) {
-        Text txt = new Text();
-        txt.readFields(in);
-        T value = valueClazz.newInstance();
-        value.readFields(in);
-        instance.put(txt, value);
-      }
-    } catch (IllegalAccessException e) {
-      throw (IOException) new IOException("Failed map init").initCause(e);
-    } catch (InstantiationException e) {
-      throw (IOException) new IOException("Failed map init").initCause(e);
-    }
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    Text.writeString(out, valueClazz.getName());
-    WritableUtils.writeVInt(out, instance.size());
-    for (Map.Entry<Text, T> e : instance.entrySet()) {
-      e.getKey().write(out);
-      e.getValue().write(out);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/TupleWritable.java b/crunch/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
deleted file mode 100644
index 1c3536b..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
-
-/**
- * A straight copy of the TupleWritable implementation in the join package,
- * added here because of its package visibility restrictions.
- * 
- */
-public class TupleWritable implements WritableComparable<TupleWritable> {
-
-  private long written;
-  private Writable[] values;
-
-  /**
-   * Create an empty tuple with no allocated storage for writables.
-   */
-  public TupleWritable() {
-  }
-
-  /**
-   * Initialize tuple with storage; unknown whether any of them contain
-   * &quot;written&quot; values.
-   */
-  public TupleWritable(Writable[] vals) {
-    written = 0L;
-    values = vals;
-  }
-
-  /**
-   * Return true if tuple has an element at the position provided.
-   */
-  public boolean has(int i) {
-    return 0 != ((1 << i) & written);
-  }
-
-  /**
-   * Get ith Writable from Tuple.
-   */
-  public Writable get(int i) {
-    return values[i];
-  }
-
-  /**
-   * The number of children in this Tuple.
-   */
-  public int size() {
-    return values.length;
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public boolean equals(Object other) {
-    if (other instanceof TupleWritable) {
-      TupleWritable that = (TupleWritable) other;
-      if (this.size() != that.size() || this.written != that.written) {
-        return false;
-      }
-      for (int i = 0; i < values.length; ++i) {
-        if (!has(i))
-          continue;
-        if (!values[i].equals(that.get(i))) {
-          return false;
-        }
-      }
-      return true;
-    }
-    return false;
-  }
-
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-    builder.append(written);
-    for (Writable v : values) {
-      builder.append(v);
-    }
-    return builder.toHashCode();
-  }
-
-  /**
-   * Convert Tuple to String as in the following.
-   * <tt>[<child1>,<child2>,...,<childn>]</tt>
-   */
-  public String toString() {
-    StringBuffer buf = new StringBuffer("[");
-    for (int i = 0; i < values.length; ++i) {
-      buf.append(has(i) ? values[i].toString() : "");
-      buf.append(",");
-    }
-    if (values.length != 0)
-      buf.setCharAt(buf.length() - 1, ']');
-    else
-      buf.append(']');
-    return buf.toString();
-  }
-
-  /**
-   * Writes each Writable to <code>out</code>. TupleWritable format:
-   * {@code
-   *  <count><type1><type2>...<typen><obj1><obj2>...<objn>
-   * }
-   */
-  public void write(DataOutput out) throws IOException {
-    WritableUtils.writeVInt(out, values.length);
-    WritableUtils.writeVLong(out, written);
-    for (int i = 0; i < values.length; ++i) {
-      if (has(i)) {
-        Text.writeString(out, values[i].getClass().getName());
-      }
-    }
-    for (int i = 0; i < values.length; ++i) {
-      if (has(i)) {
-        values[i].write(out);
-      }
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  @SuppressWarnings("unchecked")
-  // No static typeinfo on Tuples
-  public void readFields(DataInput in) throws IOException {
-    int card = WritableUtils.readVInt(in);
-    values = new Writable[card];
-    written = WritableUtils.readVLong(in);
-    Class<? extends Writable>[] cls = new Class[card];
-    try {
-      for (int i = 0; i < card; ++i) {
-        if (has(i)) {
-          cls[i] = Class.forName(Text.readString(in)).asSubclass(Writable.class);
-        }
-      }
-      for (int i = 0; i < card; ++i) {
-        if (has(i)) {
-          values[i] = cls[i].newInstance();
-          values[i].readFields(in);
-        }
-      }
-    } catch (ClassNotFoundException e) {
-      throw (IOException) new IOException("Failed tuple init").initCause(e);
-    } catch (IllegalAccessException e) {
-      throw (IOException) new IOException("Failed tuple init").initCause(e);
-    } catch (InstantiationException e) {
-      throw (IOException) new IOException("Failed tuple init").initCause(e);
-    }
-  }
-
-  /**
-   * Record that the tuple contains an element at the position provided.
-   */
-  public void setWritten(int i) {
-    written |= 1 << i;
-  }
-
-  /**
-   * Record that the tuple does not contain an element at the position provided.
-   */
-  public void clearWritten(int i) {
-    written &= -1 ^ (1 << i);
-  }
-
-  /**
-   * Clear any record of which writables have been written to, without releasing
-   * storage.
-   */
-  public void clearWritten() {
-    written = 0L;
-  }
-
-  @Override
-  public int compareTo(TupleWritable o) {
-    for (int i = 0; i < values.length; ++i) {
-      if (has(i) && !o.has(i)) {
-        return 1;
-      } else if (!has(i) && o.has(i)) {
-        return -1;
-      } else {
-        Writable v1 = values[i];
-        Writable v2 = o.values[i];
-        if (v1 != v2 && (v1 != null && !v1.equals(v2))) {
-          if (v1 instanceof WritableComparable && v2 instanceof WritableComparable) {
-            int cmp = ((WritableComparable) v1).compareTo((WritableComparable) v2);
-            if (cmp != 0) {
-              return cmp;
-            }
-          } else {
-            int cmp = v1.hashCode() - v2.hashCode();
-            if (cmp != 0) {
-              return cmp;
-            }
-          }
-        }
-      }
-    }
-    return values.length - o.values.length;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java
deleted file mode 100644
index 7b6e11b..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritableDeepCopier.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.types.DeepCopier;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Writable;
-
-/**
- * Performs deep copies of Writable values.
- * 
- * @param <T> The type of Writable that can be copied
- */
-public class WritableDeepCopier<T extends Writable> implements DeepCopier<T> {
-
-  private Class<T> writableClass;
-
-  public WritableDeepCopier(Class<T> writableClass) {
-    this.writableClass = writableClass;
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-  }
-
-  @Override
-  public T deepCopy(T source) {
-    
-    if (source == null) {
-      return null;
-    }
-    
-    ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
-    DataOutputStream dataOut = new DataOutputStream(byteOutStream);
-    T copiedValue = null;
-    try {
-      source.write(dataOut);
-      dataOut.flush();
-      ByteArrayInputStream byteInStream = new ByteArrayInputStream(byteOutStream.toByteArray());
-      DataInput dataInput = new DataInputStream(byteInStream);
-      copiedValue = writableClass.newInstance();
-      copiedValue.readFields(dataInput);
-    } catch (Exception e) {
-      throw new CrunchRuntimeException("Error while deep copying " + source, e);
-    }
-    return copiedValue;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java
deleted file mode 100644
index 84318d3..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritableGroupedTableType.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.lib.PTables;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-
-class WritableGroupedTableType<K, V> extends PGroupedTableType<K, V> {
-
-  private final MapFn inputFn;
-  private final MapFn outputFn;
-  private final Converter converter;
-
-  public WritableGroupedTableType(WritableTableType<K, V> tableType) {
-    super(tableType);
-    WritableType keyType = (WritableType) tableType.getKeyType();
-    WritableType valueType = (WritableType) tableType.getValueType();
-    this.inputFn = new PairIterableMapFn(keyType.getInputMapFn(), valueType.getInputMapFn());
-    this.outputFn = tableType.getOutputMapFn();
-    this.converter = new WritablePairConverter(keyType.getSerializationClass(),
-        valueType.getSerializationClass());
-  }
-
-  @Override
-  public Class<Pair<K, Iterable<V>>> getTypeClass() {
-    return (Class<Pair<K, Iterable<V>>>) Pair.of(null, null).getClass();
-  }
-
-  @Override
-  public Converter getGroupingConverter() {
-    return converter;
-  }
-
-  @Override
-  public MapFn getInputMapFn() {
-    return inputFn;
-  }
-
-  @Override
-  public MapFn getOutputMapFn() {
-    return outputFn;
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    this.tableType.initialize(conf);
-  }
-
-  @Override
-  public Pair<K, Iterable<V>> getDetachedValue(Pair<K, Iterable<V>> value) {
-    return PTables.getGroupedDetachedValue(this, value);
-  }
-
-  @Override
-  public void configureShuffle(Job job, GroupingOptions options) {
-    if (options != null) {
-      options.configure(job);
-    }
-    WritableType keyType = (WritableType) tableType.getKeyType();
-    WritableType valueType = (WritableType) tableType.getValueType();
-    job.setMapOutputKeyClass(keyType.getSerializationClass());
-    job.setMapOutputValueClass(valueType.getSerializationClass());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java
deleted file mode 100644
index 2db0238..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritablePairConverter.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.Converter;
-
-class WritablePairConverter<K, V> implements Converter<K, V, Pair<K, V>, Pair<K, Iterable<V>>> {
-
-  private final Class<K> keyClass;
-  private final Class<V> valueClass;
-
-  public WritablePairConverter(Class<K> keyClass, Class<V> valueClass) {
-    this.keyClass = keyClass;
-    this.valueClass = valueClass;
-  }
-
-  @Override
-  public Pair<K, V> convertInput(K key, V value) {
-    return Pair.of(key, value);
-  }
-
-  @Override
-  public K outputKey(Pair<K, V> value) {
-    return value.first();
-  }
-
-  @Override
-  public V outputValue(Pair<K, V> value) {
-    return value.second();
-  }
-
-  @Override
-  public Class<K> getKeyClass() {
-    return keyClass;
-  }
-
-  @Override
-  public Class<V> getValueClass() {
-    return valueClass;
-  }
-
-  @Override
-  public Pair<K, Iterable<V>> convertIterableInput(K key, Iterable<V> value) {
-    return Pair.of(key, value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritableTableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritableTableType.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritableTableType.java
deleted file mode 100644
index 93e0fd6..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritableTableType.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.util.List;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.fn.PairMapFn;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.io.seq.SeqFileTableSourceTarget;
-import org.apache.crunch.lib.PTables;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-
-import com.google.common.collect.ImmutableList;
-
-class WritableTableType<K, V> implements PTableType<K, V> {
-
-  private final WritableType<K, Writable> keyType;
-  private final WritableType<V, Writable> valueType;
-  private final MapFn inputFn;
-  private final MapFn outputFn;
-  private final Converter converter;
-
-  public WritableTableType(WritableType<K, Writable> keyType, WritableType<V, Writable> valueType) {
-    this.keyType = keyType;
-    this.valueType = valueType;
-    this.inputFn = new PairMapFn(keyType.getInputMapFn(), valueType.getInputMapFn());
-    this.outputFn = new PairMapFn(keyType.getOutputMapFn(), valueType.getOutputMapFn());
-    this.converter = new WritablePairConverter(keyType.getSerializationClass(),
-        valueType.getSerializationClass());
-  }
-
-  @Override
-  public Class<Pair<K, V>> getTypeClass() {
-    return (Class<Pair<K, V>>) Pair.of(null, null).getClass();
-  }
-
-  @Override
-  public List<PType> getSubTypes() {
-    return ImmutableList.<PType> of(keyType, valueType);
-  }
-
-  @Override
-  public MapFn getInputMapFn() {
-    return inputFn;
-  }
-
-  @Override
-  public MapFn getOutputMapFn() {
-    return outputFn;
-  }
-
-  @Override
-  public Converter getConverter() {
-    return converter;
-  }
-
-  @Override
-  public PTypeFamily getFamily() {
-    return WritableTypeFamily.getInstance();
-  }
-
-  public PType<K> getKeyType() {
-    return keyType;
-  }
-
-  public PType<V> getValueType() {
-    return valueType;
-  }
-
-  @Override
-  public PGroupedTableType<K, V> getGroupedTableType() {
-    return new WritableGroupedTableType<K, V>(this);
-  }
-
-  @Override
-  public ReadableSourceTarget<Pair<K, V>> getDefaultFileSource(Path path) {
-    return new SeqFileTableSourceTarget<K, V>(path, this);
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    keyType.initialize(conf);
-    valueType.initialize(conf);
-  }
-
-  @Override
-  public Pair<K, V> getDetachedValue(Pair<K, V> value) {
-    return PTables.getDetachedValue(this, value);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == null || !(obj instanceof WritableTableType)) {
-      return false;
-    }
-    WritableTableType that = (WritableTableType) obj;
-    return keyType.equals(that.keyType) && valueType.equals(that.valueType);
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(keyType).append(valueType).toHashCode();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritableType.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritableType.java
deleted file mode 100644
index 734946c..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritableType.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.util.List;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.io.seq.SeqFileSourceTarget;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.DeepCopier;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-
-import com.google.common.collect.ImmutableList;
-
-public class WritableType<T, W extends Writable> implements PType<T> {
-
-  private final Class<T> typeClass;
-  private final Class<W> writableClass;
-  private final Converter converter;
-  private final MapFn<W, T> inputFn;
-  private final MapFn<T, W> outputFn;
-  private final DeepCopier<W> deepCopier;
-  private final List<PType> subTypes;
-  private boolean initialized = false;
-
-  public WritableType(Class<T> typeClass, Class<W> writableClass, MapFn<W, T> inputDoFn,
-      MapFn<T, W> outputDoFn, PType... subTypes) {
-    this.typeClass = typeClass;
-    this.writableClass = writableClass;
-    this.inputFn = inputDoFn;
-    this.outputFn = outputDoFn;
-    this.converter = new WritableValueConverter(writableClass);
-    this.deepCopier = new WritableDeepCopier<W>(writableClass);
-    this.subTypes = ImmutableList.<PType> builder().add(subTypes).build();
-  }
-
-  @Override
-  public PTypeFamily getFamily() {
-    return WritableTypeFamily.getInstance();
-  }
-
-  @Override
-  public Class<T> getTypeClass() {
-    return typeClass;
-  }
-
-  @Override
-  public Converter getConverter() {
-    return converter;
-  }
-
-  @Override
-  public MapFn getInputMapFn() {
-    return inputFn;
-  }
-
-  @Override
-  public MapFn getOutputMapFn() {
-    return outputFn;
-  }
-
-  @Override
-  public List<PType> getSubTypes() {
-    return subTypes;
-  }
-
-  public Class<W> getSerializationClass() {
-    return writableClass;
-  }
-
-  @Override
-  public ReadableSourceTarget<T> getDefaultFileSource(Path path) {
-    return new SeqFileSourceTarget<T>(path, this);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == null || !(obj instanceof WritableType)) {
-      return false;
-    }
-    WritableType wt = (WritableType) obj;
-    return (typeClass.equals(wt.typeClass) && writableClass.equals(wt.writableClass) && subTypes
-        .equals(wt.subTypes));
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    this.inputFn.initialize();
-    this.outputFn.initialize();
-    for (PType subType : subTypes) {
-      subType.initialize(conf);
-    }
-    this.initialized = true;
-  }
-
-  @Override
-  public T getDetachedValue(T value) {
-    if (!initialized) {
-      throw new IllegalStateException("Cannot call getDetachedValue on an uninitialized PType");
-    }
-    W writableValue = outputFn.map(value);
-    W deepCopy = this.deepCopier.deepCopy(writableValue);
-    return inputFn.map(deepCopy);
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(typeClass).append(writableClass).append(subTypes);
-    return hcb.toHashCode();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java
deleted file mode 100644
index a94db96..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritableTypeFamily.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.PTypeUtils;
-import org.apache.hadoop.io.Writable;
-
-/**
- * The {@link Writable}-based implementation of the
- * {@link org.apache.crunch.types.PTypeFamily} interface.
- */
-public class WritableTypeFamily implements PTypeFamily {
-
-  private static final WritableTypeFamily INSTANCE = new WritableTypeFamily();
-
-  public static WritableTypeFamily getInstance() {
-    return INSTANCE;
-  }
-
-  // Disallow construction
-  private WritableTypeFamily() {
-  }
-
-  public PType<Void> nulls() {
-    return Writables.nulls();
-  }
-
-  public PType<String> strings() {
-    return Writables.strings();
-  }
-
-  public PType<Long> longs() {
-    return Writables.longs();
-  }
-
-  public PType<Integer> ints() {
-    return Writables.ints();
-  }
-
-  public PType<Float> floats() {
-    return Writables.floats();
-  }
-
-  public PType<Double> doubles() {
-    return Writables.doubles();
-  }
-
-  public PType<Boolean> booleans() {
-    return Writables.booleans();
-  }
-
-  public PType<ByteBuffer> bytes() {
-    return Writables.bytes();
-  }
-
-  public <T> PType<T> records(Class<T> clazz) {
-    return Writables.records(clazz);
-  }
-
-  public <W extends Writable> PType<W> writables(Class<W> clazz) {
-    return Writables.writables(clazz);
-  }
-
-  public <K, V> PTableType<K, V> tableOf(PType<K> key, PType<V> value) {
-    return Writables.tableOf(key, value);
-  }
-
-  public <V1, V2> PType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2) {
-    return Writables.pairs(p1, p2);
-  }
-
-  public <V1, V2, V3> PType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3) {
-    return Writables.triples(p1, p2, p3);
-  }
-
-  public <V1, V2, V3, V4> PType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3, PType<V4> p4) {
-    return Writables.quads(p1, p2, p3, p4);
-  }
-
-  public PType<TupleN> tuples(PType<?>... ptypes) {
-    return Writables.tuples(ptypes);
-  }
-
-  public <T> PType<Collection<T>> collections(PType<T> ptype) {
-    return Writables.collections(ptype);
-  }
-
-  public <T> PType<Map<String, T>> maps(PType<T> ptype) {
-    return Writables.maps(ptype);
-  }
-
-  @Override
-  public <T> PType<T> as(PType<T> ptype) {
-    if (ptype instanceof WritableType || ptype instanceof WritableTableType
-        || ptype instanceof WritableGroupedTableType) {
-      return ptype;
-    }
-    if (ptype instanceof PGroupedTableType) {
-      PTableType ptt = ((PGroupedTableType) ptype).getTableType();
-      return new WritableGroupedTableType((WritableTableType) as(ptt));
-    }
-    PType<T> prim = Writables.getPrimitiveType(ptype.getTypeClass());
-    if (prim != null) {
-      return prim;
-    }
-    return PTypeUtils.convert(ptype, this);
-  }
-
-  @Override
-  public <T extends Tuple> PType<T> tuples(Class<T> clazz, PType<?>... ptypes) {
-    return Writables.tuples(clazz, ptypes);
-  }
-
-  @Override
-  public <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
-    return Writables.derived(clazz, inputFn, outputFn, base);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java b/crunch/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java
deleted file mode 100644
index 3670b90..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/writable/WritableValueConverter.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import org.apache.crunch.types.Converter;
-import org.apache.hadoop.io.NullWritable;
-
-class WritableValueConverter<W> implements Converter<Object, W, W, Iterable<W>> {
-
-  private final Class<W> serializationClass;
-
-  public WritableValueConverter(Class<W> serializationClass) {
-    this.serializationClass = serializationClass;
-  }
-
-  @Override
-  public W convertInput(Object key, W value) {
-    return value;
-  }
-
-  @Override
-  public Object outputKey(W value) {
-    return NullWritable.get();
-  }
-
-  @Override
-  public W outputValue(W value) {
-    return value;
-  }
-
-  @Override
-  public Class<Object> getKeyClass() {
-    return (Class<Object>) (Class<?>) NullWritable.class;
-  }
-
-  @Override
-  public Class<W> getValueClass() {
-    return serializationClass;
-  }
-
-  @Override
-  public Iterable<W> convertIterableInput(Object key, Iterable<W> value) {
-    return value;
-  }
-}
\ No newline at end of file


[03/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java b/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java
deleted file mode 100644
index 6ee1972..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.apache.crunch.fn.Aggregators.MAX_BIGINTS;
-import static org.apache.crunch.fn.Aggregators.MAX_DOUBLES;
-import static org.apache.crunch.fn.Aggregators.MAX_FLOATS;
-import static org.apache.crunch.fn.Aggregators.MAX_INTS;
-import static org.apache.crunch.fn.Aggregators.MAX_LONGS;
-import static org.apache.crunch.fn.Aggregators.MAX_N;
-import static org.apache.crunch.fn.Aggregators.MIN_BIGINTS;
-import static org.apache.crunch.fn.Aggregators.MIN_DOUBLES;
-import static org.apache.crunch.fn.Aggregators.MIN_FLOATS;
-import static org.apache.crunch.fn.Aggregators.MIN_INTS;
-import static org.apache.crunch.fn.Aggregators.MIN_LONGS;
-import static org.apache.crunch.fn.Aggregators.MIN_N;
-import static org.apache.crunch.fn.Aggregators.STRING_CONCAT;
-import static org.apache.crunch.fn.Aggregators.SUM_BIGINTS;
-import static org.apache.crunch.fn.Aggregators.SUM_DOUBLES;
-import static org.apache.crunch.fn.Aggregators.SUM_FLOATS;
-import static org.apache.crunch.fn.Aggregators.SUM_INTS;
-import static org.apache.crunch.fn.Aggregators.SUM_LONGS;
-import static org.hamcrest.Matchers.closeTo;
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
-import java.math.BigInteger;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.crunch.Aggregator;
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.impl.mem.emit.InMemoryEmitter;
-import org.junit.Test;
-
-import com.google.common.base.Function;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Iterables;
-
-
-public class AggregatorsTest {
-
-  @Test
-  public void testSums2() {
-    assertThat(sapply(SUM_INTS(), 1, 2, 3, -4), is(2));
-    assertThat(sapply(SUM_LONGS(), 1L, 2L, 3L, -4L, 5000000000L), is(5000000002L));
-    assertThat(sapply(SUM_FLOATS(), 1f, 2f, 3f, -4f), is(2f));
-    assertThat(sapply(SUM_DOUBLES(), 0.1, 0.2, 0.3), is(closeTo(0.6, 0.00001)));
-    assertThat(sapply(SUM_BIGINTS(), bigInt("7"), bigInt("3")), is(bigInt("10")));
-  }
-
-  @Test
-  public void testSums() {
-    assertThat(sapply(SUM_LONGS(), 29L, 17L, 1729L), is(1775L));
-    assertThat(sapply(SUM_LONGS(), 29L, 7L, 1729L), is(1765L));
-    assertThat(sapply(SUM_INTS(), 29, 17, 1729), is(1775));
-    assertThat(sapply(SUM_FLOATS(), 29f, 17f, 1729f), is(1775.0f));
-    assertThat(sapply(SUM_DOUBLES(), 29.0, 17.0, 1729.0), is(1775.0));
-    assertThat(sapply(SUM_BIGINTS(), bigInt("29"), bigInt("17"), bigInt("1729")), is(bigInt("1775")));
-  }
-
-  @Test
-  public void testMax() {
-    assertThat(sapply(MAX_LONGS(), 29L, 17L, 1729L), is(1729L));
-    assertThat(sapply(MAX_INTS(), 29, 17, 1729), is(1729));
-    assertThat(sapply(MAX_FLOATS(), 29f, 17f, 1729f), is(1729.0f));
-    assertThat(sapply(MAX_DOUBLES(), 29.0, 17.0, 1729.0), is(1729.0));
-    assertThat(sapply(MAX_FLOATS(), 29f, 1745f, 17f, 1729f), is(1745.0f));
-    assertThat(sapply(MAX_BIGINTS(), bigInt("29"), bigInt("17"), bigInt("1729")), is(bigInt("1729")));
-  }
-
-  @Test
-  public void testMin() {
-    assertThat(sapply(MIN_LONGS(), 29L, 17L, 1729L), is(17L));
-    assertThat(sapply(MIN_INTS(), 29, 17, 1729), is(17));
-    assertThat(sapply(MIN_FLOATS(), 29f, 17f, 1729f), is(17.0f));
-    assertThat(sapply(MIN_DOUBLES(), 29.0, 17.0, 1729.0), is(17.0));
-    assertThat(sapply(MIN_INTS(), 29, 170, 1729), is(29));
-    assertThat(sapply(MIN_BIGINTS(), bigInt("29"), bigInt("17"), bigInt("1729")), is(bigInt("17")));
-  }
-
-  @Test
-  public void testMaxN() {
-    assertThat(apply(MAX_INTS(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(98, 1009)));
-    assertThat(apply(MAX_N(1, String.class), "b", "a"), is(ImmutableList.of("b")));
-    assertThat(apply(MAX_N(3, String.class), "b", "a", "d", "c"), is(ImmutableList.of("b", "c", "d")));
-  }
-
-  @Test
-  public void testMinN() {
-    assertThat(apply(MIN_INTS(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(17, 29)));
-    assertThat(apply(MIN_N(1, String.class), "b", "a"), is(ImmutableList.of("a")));
-    assertThat(apply(MIN_N(3, String.class), "b", "a", "d", "c"), is(ImmutableList.of("a", "b", "c")));
-  }
-
-  @Test
-  public void testFirstN() {
-    assertThat(apply(Aggregators.<Integer>FIRST_N(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(17, 34)));
-  }
-
-  @Test
-  public void testLastN() {
-    assertThat(apply(Aggregators.<Integer>LAST_N(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(29, 1009)));
-  }
-  
-  @Test
-  public void testUniqueElements() {
-    assertThat(ImmutableSet.copyOf(apply(Aggregators.<Integer>UNIQUE_ELEMENTS(), 17, 29, 29, 16, 17)),
-        is(ImmutableSet.of(17, 29, 16)));
-    
-    Iterable<Integer> samp = apply(Aggregators.<Integer>SAMPLE_UNIQUE_ELEMENTS(2), 17, 29, 16, 17, 29, 16);
-    assertThat(Iterables.size(samp), is(2));
-    assertThat(ImmutableSet.copyOf(samp).size(), is(2)); // check that the two elements are unique
-  }
-  
-  @Test
-  public void testPairs() {
-    List<Pair<Long, Double>> input = ImmutableList.of(Pair.of(1720L, 17.29), Pair.of(9L, -3.14));
-    Aggregator<Pair<Long, Double>> a = Aggregators.pairAggregator(SUM_LONGS(), MIN_DOUBLES());
-
-    assertThat(sapply(a, input), is(Pair.of(1729L, -3.14)));
-  }
-
-  @Test
-  public void testPairsTwoLongs() {
-    List<Pair<Long, Long>> input = ImmutableList.of(Pair.of(1720L, 1L), Pair.of(9L, 19L));
-    Aggregator<Pair<Long, Long>> a = Aggregators.pairAggregator(SUM_LONGS(), SUM_LONGS());
-
-    assertThat(sapply(a, input), is(Pair.of(1729L, 20L)));
-  }
-
-  @Test
-  public void testTrips() {
-    List<Tuple3<Float, Double, Double>> input = ImmutableList.of(Tuple3.of(17.29f, 12.2, 0.1),
-        Tuple3.of(3.0f, 1.2, 3.14), Tuple3.of(-1.0f, 14.5, -0.98));
-    Aggregator<Tuple3<Float, Double, Double>> a = Aggregators.tripAggregator(
-        MAX_FLOATS(), MAX_DOUBLES(), MIN_DOUBLES());
-
-    assertThat(sapply(a, input), is(Tuple3.of(17.29f, 14.5, -0.98)));
-  }
-
-  @Test
-  public void testQuads() {
-    List<Tuple4<Float, Double, Double, Integer>> input = ImmutableList.of(Tuple4.of(17.29f, 12.2, 0.1, 1),
-        Tuple4.of(3.0f, 1.2, 3.14, 2), Tuple4.of(-1.0f, 14.5, -0.98, 3));
-    Aggregator<Tuple4<Float, Double, Double, Integer>> a = Aggregators.quadAggregator(
-        MAX_FLOATS(), MAX_DOUBLES(), MIN_DOUBLES(), SUM_INTS());
-
-    assertThat(sapply(a, input), is(Tuple4.of(17.29f, 14.5, -0.98, 6)));
-  }
-
-  @Test
-  public void testTupleN() {
-    List<TupleN> input = ImmutableList.of(new TupleN(1, 3.0, 1, 2.0, 4L), new TupleN(4, 17.0, 1, 9.7, 12L));
-    Aggregator<TupleN> a = Aggregators.tupleAggregator(
-        MIN_INTS(), SUM_DOUBLES(), MAX_INTS(), MIN_DOUBLES(), MAX_LONGS());
-
-    assertThat(sapply(a, input), is(new TupleN(1, 20.0, 1, 2.0, 12L)));
-  }
-
-  @Test
-  public void testConcatenation() {
-    assertThat(sapply(STRING_CONCAT("", true), "foo", "foobar", "bar"), is("foofoobarbar"));
-    assertThat(sapply(STRING_CONCAT("/", false), "foo", "foobar", "bar"), is("foo/foobar/bar"));
-    assertThat(sapply(STRING_CONCAT(" ", true), " ", ""), is("  "));
-    assertThat(sapply(STRING_CONCAT(" ", true), Arrays.asList(null, "")), is(""));
-    assertThat(sapply(STRING_CONCAT(" ", true, 20, 3), "foo", "foobar", "bar"), is("foo bar"));
-    assertThat(sapply(STRING_CONCAT(" ", true, 10, 6), "foo", "foobar", "bar"), is("foo foobar"));
-    assertThat(sapply(STRING_CONCAT(" ", true, 9, 6), "foo", "foobar", "bar"), is("foo bar"));
-  }
-
-  @Test(expected = NullPointerException.class)
-  public void testConcatenationNullException() {
-    sapply(STRING_CONCAT(" ", false), Arrays.asList(null, "" ));
-  }
-
-
-  private static <T> T sapply(Aggregator<T> a, T... values) {
-    return sapply(a, ImmutableList.copyOf(values));
-  }
-
-  private static <T> T sapply(Aggregator<T> a, Iterable<T> values) {
-    return Iterables.getOnlyElement(apply(a, values));
-  }
-
-  private static <T> ImmutableList<T> apply(Aggregator<T> a, T... values) {
-    return apply(a, ImmutableList.copyOf(values));
-  }
-
-  private static <T> ImmutableList<T> apply(Aggregator<T> a, Iterable<T> values) {
-    CombineFn<String, T> fn = Aggregators.toCombineFn(a);
-
-    InMemoryEmitter<Pair<String, T>> e1 = new InMemoryEmitter<Pair<String,T>>();
-    fn.process(Pair.of("", values), e1);
-
-    // and a second time to make sure Aggregator.reset() works
-    InMemoryEmitter<Pair<String, T>> e2 = new InMemoryEmitter<Pair<String,T>>();
-    fn.process(Pair.of("", values), e2);
-
-    assertEquals(getValues(e1), getValues(e2));
-
-    return getValues(e1);
-  }
-
-  private static <K, V> ImmutableList<V> getValues(InMemoryEmitter<Pair<K, V>> emitter) {
-    return ImmutableList.copyOf(
-        Iterables.transform(emitter.getOutput(), new Function<Pair<K, V>, V>() {
-      @Override
-      public V apply(Pair<K, V> input) {
-        return input.second();
-      }
-    }));
-  }
-
-  private static BigInteger bigInt(String value) {
-    return new BigInteger(value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java b/crunch/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java
deleted file mode 100644
index b5b2a1b..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class ExtractKeyFnTest {
-
-  protected static final MapFn<String, Integer> mapFn = new MapFn<String, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return input.hashCode();
-    }
-  };
-
-  protected static final ExtractKeyFn<Integer, String> one = new ExtractKeyFn<Integer, String>(mapFn);
-
-  @Test
-  public void test() {
-    StoreLastEmitter<Pair<Integer, String>> emitter = StoreLastEmitter.create();
-    one.process("boza", emitter);
-    assertEquals(Pair.of("boza".hashCode(), "boza"), emitter.getLast());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/FilterFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/FilterFnTest.java b/crunch/src/test/java/org/apache/crunch/fn/FilterFnTest.java
deleted file mode 100644
index a649f99..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/FilterFnTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-
-import org.apache.crunch.FilterFn;
-import org.junit.Test;
-
-import com.google.common.base.Predicates;
-
-
-public class FilterFnTest {
-
-  private static final FilterFn<String> TRUE = FilterFns.<String>ACCEPT_ALL();
-  private static final FilterFn<String> FALSE = FilterFns.<String>REJECT_ALL();
-
-  @Test
-  public void testAcceptAll() {
-    assertThat(TRUE.accept(""), is(true));
-    assertThat(TRUE.accept("foo"), is(true));
-  }
-
-  @Test
-  public void testRejectAll() {
-    assertThat(FALSE.accept(""), is(false));
-    assertThat(FALSE.accept("foo"), is(false));
-
-    Predicates.or(Predicates.alwaysFalse(), Predicates.alwaysTrue());
-  }
-
-  @Test
-  public void testAnd() {
-    assertThat(FilterFns.and(TRUE, TRUE).accept("foo"), is(true));
-    assertThat(FilterFns.and(TRUE, FALSE).accept("foo"), is(false));
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testGeneric() {
-    assertThat(FilterFns.and(TRUE).accept("foo"), is(true));
-    assertThat(FilterFns.and(FALSE).accept("foo"), is(false));
-    assertThat(FilterFns.and(FALSE, FALSE, FALSE).accept("foo"), is(false));
-    assertThat(FilterFns.and(TRUE, TRUE, FALSE).accept("foo"), is(false));
-    assertThat(FilterFns.and(FALSE, FALSE, FALSE, FALSE).accept("foo"), is(false));
-  }
-
-  @Test
-  public void testOr() {
-    assertThat(FilterFns.or(FALSE, TRUE).accept("foo"), is(true));
-    assertThat(FilterFns.or(TRUE, FALSE).accept("foo"), is(true));
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testOrGeneric() {
-    assertThat(FilterFns.or(TRUE).accept("foo"), is(true));
-    assertThat(FilterFns.or(FALSE).accept("foo"), is(false));
-    assertThat(FilterFns.or(TRUE, FALSE, TRUE).accept("foo"), is(true));
-    assertThat(FilterFns.or(FALSE, FALSE, TRUE).accept("foo"), is(true));
-    assertThat(FilterFns.or(FALSE, FALSE, FALSE).accept("foo"), is(false));
-  }
-
-  @Test
-  public void testNot() {
-    assertThat(FilterFns.not(TRUE).accept("foo"), is(false));
-    assertThat(FilterFns.not(FALSE).accept("foo"), is(true));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/MapKeysTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/MapKeysTest.java b/crunch/src/test/java/org/apache/crunch/fn/MapKeysTest.java
deleted file mode 100644
index 6b73700..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/MapKeysTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.Pair;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class MapKeysTest {
-
-  protected static final MapKeysFn<String, Integer, Integer> one = new MapKeysFn<String, Integer, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return 1;
-    }
-  };
-
-  protected static final MapKeysFn<String, Integer, Integer> two = new MapKeysFn<String, Integer, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return 2;
-    }
-  };
-
-  @Test
-  public void test() {
-    StoreLastEmitter<Pair<Integer, Integer>> emitter = StoreLastEmitter.create();
-    one.process(Pair.of("k", Integer.MAX_VALUE), emitter);
-    assertEquals(Pair.of(1, Integer.MAX_VALUE), emitter.getLast());
-    two.process(Pair.of("k", Integer.MAX_VALUE), emitter);
-    assertEquals(Pair.of(2, Integer.MAX_VALUE), emitter.getLast());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/MapValuesTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/MapValuesTest.java b/crunch/src/test/java/org/apache/crunch/fn/MapValuesTest.java
deleted file mode 100644
index 097b008..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/MapValuesTest.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.Pair;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class MapValuesTest {
-
-  static final MapValuesFn<String, String, Integer> one = new MapValuesFn<String, String, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return 1;
-    }
-  };
-
-  static final MapValuesFn<String, String, Integer> two = new MapValuesFn<String, String, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return 2;
-    }
-  };
-
-  @Test
-  public void test() {
-    StoreLastEmitter<Pair<String, Integer>> emitter = StoreLastEmitter.create();
-    one.process(Pair.of("k", "v"), emitter);
-    assertEquals(Pair.of("k", 1), emitter.getLast());
-    two.process(Pair.of("k", "v"), emitter);
-    assertEquals(Pair.of("k", 2), emitter.getLast());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/PairMapTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/PairMapTest.java b/crunch/src/test/java/org/apache/crunch/fn/PairMapTest.java
deleted file mode 100644
index bef6c85..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/PairMapTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class PairMapTest {
-
-  static final MapFn<String, Integer> one = new MapFn<String, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return 1;
-    }
-  };
-
-  static final MapFn<String, Integer> two = new MapFn<String, Integer>() {
-    @Override
-    public Integer map(String input) {
-      return 2;
-    }
-  };
-
-  @Test
-  public void testPairMap() {
-    StoreLastEmitter<Pair<Integer, Integer>> emitter = StoreLastEmitter.create();
-    PairMapFn<String, String, Integer, Integer> fn = new PairMapFn<String, String, Integer, Integer>(one, two);
-    fn.process(Pair.of("a", "b"), emitter);
-    Pair<Integer, Integer> pair = emitter.getLast();
-    assertTrue(pair.first() == 1);
-    assertTrue(pair.second() == 2);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java b/crunch/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java
deleted file mode 100644
index cdd8754..0000000
--- a/crunch/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.Emitter;
-
-class StoreLastEmitter<T> implements Emitter<T> {
-  private T last;
-
-  @Override
-  public void emit(T emitted) {
-    last = emitted;
-  }
-
-  public T getLast() {
-    return last;
-  }
-
-  @Override
-  public void flush() {
-  }
-
-  public static <T> StoreLastEmitter<T> create() {
-    return new StoreLastEmitter<T>();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java b/crunch/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java
deleted file mode 100644
index 811a0a3..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.List;
-
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class SingleUseIterableTest {
-
-  @Test
-  public void testIterator() {
-    List<Integer> values = Lists.newArrayList(1,2,3);
-    
-    SingleUseIterable<Integer> iterable = new SingleUseIterable<Integer>(values);
-
-    List<Integer> retrievedValues = Lists.newArrayList(iterable);
-    
-    assertEquals(values, retrievedValues);
-  }
-  
-  @Test(expected=IllegalStateException.class)
-  public void testIterator_MultipleCalls() {
-    List<Integer> values = Lists.newArrayList(1,2,3);
-    
-    SingleUseIterable<Integer> iterable = new SingleUseIterable<Integer>(values);
-
-    List<Integer> retrievedValues = Lists.newArrayList(iterable);
-
-    for (Integer n : iterable) {
-      
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java
deleted file mode 100644
index 9ed7a46..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr;
-
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.spy;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.run.RuntimeParameters;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
-
-
-@RunWith(MockitoJUnitRunner.class)
-public class MRPipelineTest {
-  @Rule
-  public TemporaryFolder tempDir = new TemporaryFolder();
-  @Mock
-  private PCollectionImpl<String> pcollection;
-  @Mock
-  private ReadableSourceTarget<String> readableSourceTarget;
-  @Mock
-  private SourceTarget<String> nonReadableSourceTarget;
-  private MRPipeline pipeline;
-
-  @Before
-  public void setUp() throws IOException {
-    Configuration conf = new Configuration();
-    conf.set(RuntimeParameters.TMP_DIR, tempDir.getRoot().getAbsolutePath());
-    pipeline = spy(new MRPipeline(MRPipelineTest.class, conf));
-  }
-
-  @Test
-  public void testGetMaterializeSourceTarget_AlreadyMaterialized() {
-    when(pcollection.getMaterializedAt()).thenReturn(readableSourceTarget);
-
-    assertEquals(readableSourceTarget, pipeline.getMaterializeSourceTarget(pcollection));
-  }
-
-  @Test
-  public void testGetMaterializeSourceTarget_NotMaterialized_HasOutput() {
-    when(pcollection.getPType()).thenReturn(Avros.strings());
-    doReturn(readableSourceTarget).when(pipeline).createIntermediateOutput(Avros.strings());
-    when(pcollection.getMaterializedAt()).thenReturn(null);
-
-    assertEquals(readableSourceTarget, pipeline.getMaterializeSourceTarget(pcollection));
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testGetMaterializeSourceTarget_NotMaterialized_NotReadableSourceTarget() {
-    when(pcollection.getPType()).thenReturn(Avros.strings());
-    doReturn(nonReadableSourceTarget).when(pipeline).createIntermediateOutput(Avros.strings());
-    when(pcollection.getMaterializedAt()).thenReturn(null);
-
-    pipeline.getMaterializeSourceTarget(pcollection);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java
deleted file mode 100644
index fd582bc..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Test;
-
-public class DoCollectionImplTest {
-
-  @Test
-  public void testGetSizeInternal_NoScaleFactor() {
-    runScaleTest(100L, 1.0f, 100L);
-  }
-
-  @Test
-  public void testGetSizeInternal_ScaleFactorBelowZero() {
-    runScaleTest(100L, 0.5f, 50L);
-  }
-
-  @Test
-  public void testGetSizeInternal_ScaleFactorAboveZero() {
-    runScaleTest(100L, 1.5f, 150L);
-  }
-
-  private void runScaleTest(long inputSize, float scaleFactor, long expectedScaledSize) {
-    PCollectionImpl<String> parentCollection = new SizedPCollectionImpl("Sized collection", inputSize);
-
-    DoCollectionImpl<String> doCollectionImpl = new DoCollectionImpl<String>("Scaled collection", parentCollection,
-        new ScaledFunction(scaleFactor), Writables.strings());
-
-    assertEquals(expectedScaledSize, doCollectionImpl.getSizeInternal());
-  }
-
-  static class ScaledFunction extends DoFn<String, String> {
-
-    private float scaleFactor;
-
-    public ScaledFunction(float scaleFactor) {
-      this.scaleFactor = scaleFactor;
-    }
-
-    @Override
-    public void process(String input, Emitter<String> emitter) {
-      emitter.emit(input);
-    }
-
-    @Override
-    public float scaleFactor() {
-      return scaleFactor;
-    }
-
-  }
-
-  static class SizedPCollectionImpl extends PCollectionImpl<String> {
-
-    private long internalSize;
-
-    public SizedPCollectionImpl(String name, long internalSize) {
-      super(name);
-      this.internalSize = internalSize;
-    }
-
-    @Override
-    public PType getPType() {
-      return null;
-    }
-
-    @Override
-    public DoNode createDoNode() {
-      return null;
-    }
-
-    @Override
-    public List getParents() {
-      return null;
-    }
-
-    @Override
-    protected void acceptInternal(Visitor visitor) {
-    }
-
-    @Override
-    protected long getSizeInternal() {
-      return internalSize;
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java
deleted file mode 100644
index 89b9944..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import static org.apache.crunch.types.writable.Writables.strings;
-import static org.apache.crunch.types.writable.Writables.tableOf;
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.junit.Test;
-
-public class DoTableImplTest {
-
-  @Test
-  public void testGetSizeInternal_NoScaleFactor() {
-    runScaleTest(100L, 1.0f, 100L);
-  }
-
-  @Test
-  public void testGetSizeInternal_ScaleFactorBelowZero() {
-    runScaleTest(100L, 0.5f, 50L);
-  }
-
-  @Test
-  public void testGetSizeInternal_ScaleFactorAboveZero() {
-    runScaleTest(100L, 1.5f, 150L);
-  }
-
-  private void runScaleTest(long inputSize, float scaleFactor, long expectedScaledSize) {
-
-    @SuppressWarnings("unchecked")
-    PCollectionImpl<String> parentCollection = (PCollectionImpl<String>) mock(PCollectionImpl.class);
-
-    when(parentCollection.getSize()).thenReturn(inputSize);
-
-    DoTableImpl<String, String> doTableImpl = new DoTableImpl<String, String>("Scalled table collection",
-        parentCollection, new TableScaledFunction(scaleFactor), tableOf(strings(), strings()));
-
-    assertEquals(expectedScaledSize, doTableImpl.getSizeInternal());
-
-    verify(parentCollection).getSize();
-
-    verifyNoMoreInteractions(parentCollection);
-  }
-
-  static class TableScaledFunction extends DoFn<String, Pair<String, String>> {
-
-    private float scaleFactor;
-
-    public TableScaledFunction(float scaleFactor) {
-      this.scaleFactor = scaleFactor;
-    }
-
-    @Override
-    public float scaleFactor() {
-      return scaleFactor;
-    }
-
-    @Override
-    public void process(String input, Emitter<Pair<String, String>> emitter) {
-      emitter.emit(Pair.of(input, input));
-
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java
deleted file mode 100644
index dd72364..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.emit;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.spy;
-import static org.mockito.Mockito.verify;
-
-import org.apache.crunch.impl.mr.run.RTNode;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Before;
-import org.junit.Test;
-import org.mockito.ArgumentCaptor;
-
-import com.google.common.collect.Lists;
-
-public class IntermediateEmitterTest {
-
-  private StringWrapper stringWrapper;
-  private PType ptype;
-
-  @Before
-  public void setUp() {
-    stringWrapper = new StringWrapper("test");
-    ptype = spy(Avros.reflects(StringWrapper.class));
-  }
-
-  @Test
-  public void testEmit_SingleChild() {
-    RTNode singleChild = mock(RTNode.class);
-    IntermediateEmitter emitter = new IntermediateEmitter(ptype, Lists.newArrayList(singleChild),
-        new Configuration());
-    emitter.emit(stringWrapper);
-
-    ArgumentCaptor<StringWrapper> argumentCaptor = ArgumentCaptor.forClass(StringWrapper.class);
-    verify(singleChild).process(argumentCaptor.capture());
-    assertSame(stringWrapper, argumentCaptor.getValue());
-  }
-
-  @Test
-  public void testEmit_MultipleChildren() {
-    RTNode childA = mock(RTNode.class);
-    RTNode childB = mock(RTNode.class);
-    IntermediateEmitter emitter = new IntermediateEmitter(ptype, Lists.newArrayList(childA, childB),
-        new Configuration());
-    emitter.emit(stringWrapper);
-
-    ArgumentCaptor<StringWrapper> argumentCaptorA = ArgumentCaptor.forClass(StringWrapper.class);
-    ArgumentCaptor<StringWrapper> argumentCaptorB = ArgumentCaptor.forClass(StringWrapper.class);
-
-    verify(childA).process(argumentCaptorA.capture());
-    verify(childB).process(argumentCaptorB.capture());
-
-    assertEquals(stringWrapper, argumentCaptorA.getValue());
-    assertEquals(stringWrapper, argumentCaptorB.getValue());
-
-    // Make sure that multiple children means deep copies are performed
-    assertNotSame(stringWrapper, argumentCaptorA.getValue());
-    assertNotSame(stringWrapper, argumentCaptorB.getValue());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java
deleted file mode 100644
index 958df12..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.exec;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Test;
-
-public class CappedExponentialCounterTest {
-
-  @Test
-  public void testGet() {
-    CappedExponentialCounter c = new CappedExponentialCounter(1L, Long.MAX_VALUE);
-    assertEquals(1L, c.get());
-    assertEquals(2L, c.get());
-    assertEquals(4L, c.get());
-    assertEquals(8L, c.get());
-  }
-
-  @Test
-  public void testCap() {
-    CappedExponentialCounter c = new CappedExponentialCounter(1L, 2);
-    assertEquals(1L, c.get());
-    assertEquals(2L, c.get());
-    assertEquals(2L, c.get());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java
deleted file mode 100644
index f03c3e2..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.exec;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Test;
-
-public class CrunchJobHooksTest {
-
-  @Test
-  public void testExtractPartitionNumber() {
-    assertEquals(0, CrunchJobHooks.extractPartitionNumber("out1-r-00000"));
-    assertEquals(10, CrunchJobHooks.extractPartitionNumber("out2-r-00010"));
-    assertEquals(99999, CrunchJobHooks.extractPartitionNumber("out3-r-99999"));
-  }
-
-  @Test
-  public void testExtractPartitionNumber_WithSuffix() {
-    assertEquals(10, CrunchJobHooks.extractPartitionNumber("out2-r-00010.avro"));
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testExtractPartitionNumber_MapOutputFile() {
-    CrunchJobHooks.extractPartitionNumber("out1-m-00000");
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
deleted file mode 100644
index 562238d..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.util.List;
-
-import org.apache.crunch.Source;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.plan.DotfileWriter.MRTaskType;
-import org.junit.Before;
-import org.junit.Test;
-import org.mockito.Mockito;
-
-import com.google.common.collect.Lists;
-
-public class DotfileWriterTest {
-
-  private DotfileWriter dotfileWriter;
-
-  @Before
-  public void setUp() {
-    dotfileWriter = new DotfileWriter();
-  }
-
-  @Test
-  public void testFormatPCollectionNodeDeclaration() {
-    PCollectionImpl<?> pcollectionImpl = mock(PCollectionImpl.class);
-    JobPrototype jobPrototype = mock(JobPrototype.class);
-    when(pcollectionImpl.getName()).thenReturn("collection");
-
-    assertEquals("\"collection@" + pcollectionImpl.hashCode() + "@" + jobPrototype.hashCode()
-        + "\" [label=\"collection\" shape=box];",
-        dotfileWriter.formatPCollectionNodeDeclaration(pcollectionImpl, jobPrototype));
-  }
-
-  @Test
-  public void testFormatPCollectionNodeDeclaration_InputPCollection() {
-    InputCollection<?> inputCollection = mock(InputCollection.class, Mockito.RETURNS_DEEP_STUBS);
-    JobPrototype jobPrototype = mock(JobPrototype.class);
-    when(inputCollection.getName()).thenReturn("input");
-    when(inputCollection.getSource().toString()).thenReturn("source");
-
-    assertEquals("\"source\" [label=\"input\" shape=folder];",
-        dotfileWriter.formatPCollectionNodeDeclaration(inputCollection, jobPrototype));
-  }
-
-  @Test
-  public void testFormatTargetNodeDeclaration() {
-    Target target = mock(Target.class);
-    when(target.toString()).thenReturn("target/path");
-
-    assertEquals("\"target/path\" [label=\"target/path\" shape=folder];",
-        dotfileWriter.formatTargetNodeDeclaration(target));
-  }
-
-  @Test
-  public void testFormatPCollection() {
-    PCollectionImpl<?> pcollectionImpl = mock(PCollectionImpl.class);
-    JobPrototype jobPrototype = mock(JobPrototype.class);
-    when(pcollectionImpl.getName()).thenReturn("collection");
-
-    assertEquals("\"collection@" + pcollectionImpl.hashCode() + "@" + jobPrototype.hashCode() + "\"",
-        dotfileWriter.formatPCollection(pcollectionImpl, jobPrototype));
-  }
-
-  @Test
-  public void testFormatPCollection_InputCollection() {
-    InputCollection<Object> inputCollection = mock(InputCollection.class);
-    Source<Object> source = mock(Source.class);
-    JobPrototype jobPrototype = mock(JobPrototype.class);
-    when(source.toString()).thenReturn("mocksource");
-    when(inputCollection.getSource()).thenReturn(source);
-
-    assertEquals("\"mocksource\"", dotfileWriter.formatPCollection(inputCollection, jobPrototype));
-  }
-
-  @Test
-  public void testFormatNodeCollection() {
-    List<String> nodeCollection = Lists.newArrayList("one", "two", "three");
-    assertEquals("one -> two -> three;", dotfileWriter.formatNodeCollection(nodeCollection));
-  }
-
-  @Test
-  public void testFormatNodePath() {
-    PCollectionImpl<?> tail = mock(PCollectionImpl.class);
-    PCollectionImpl<?> head = mock(PCollectionImpl.class);
-    JobPrototype jobPrototype = mock(JobPrototype.class);
-
-    when(tail.getName()).thenReturn("tail");
-    when(head.getName()).thenReturn("head");
-
-    NodePath nodePath = new NodePath(tail);
-    nodePath.close(head);
-
-    assertEquals(
-        Lists.newArrayList("\"head@" + head.hashCode() + "@" + jobPrototype.hashCode() + "\" -> \"tail@"
-            + tail.hashCode() + "@" + jobPrototype.hashCode() + "\";"),
-        dotfileWriter.formatNodePath(nodePath, jobPrototype));
-  }
-
-  @Test
-  public void testGetTaskGraphAttributes_Map() {
-    assertEquals("label = Map; color = blue;", dotfileWriter.getTaskGraphAttributes(MRTaskType.MAP));
-  }
-
-  @Test
-  public void testGetTaskGraphAttributes_Reduce() {
-    assertEquals("label = Reduce; color = red;", dotfileWriter.getTaskGraphAttributes(MRTaskType.REDUCE));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java b/crunch/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
deleted file mode 100644
index 7963c83..0000000
--- a/crunch/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class JobNameBuilderTest {
-
-  @Test
-  public void testBuild() {
-    final String pipelineName = "PipelineName";
-    final String nodeName = "outputNode";
-    DoNode doNode = DoNode.createOutputNode(nodeName, Writables.strings());
-    JobNameBuilder jobNameBuilder = new JobNameBuilder(pipelineName);
-    jobNameBuilder.visit(Lists.newArrayList(doNode));
-    String jobName = jobNameBuilder.build();
-
-    assertEquals(String.format("%s: %s", pipelineName, nodeName), jobName);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java b/crunch/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java
deleted file mode 100644
index 467da15..0000000
--- a/crunch/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class SequentialFileNamingSchemeTest {
-
-  // The partition id used for testing. This partition id should be ignored by
-  // the SequentialFileNamingScheme.
-  private static final int PARTITION_ID = 42;
-
-  private SequentialFileNamingScheme namingScheme;
-  private Configuration configuration;
-
-  @Rule
-  public TemporaryFolder tmpOutputDir = new TemporaryFolder();
-
-  @Before
-  public void setUp() throws IOException {
-    configuration = new Configuration();
-    namingScheme = new SequentialFileNamingScheme();
-  }
-
-  @Test
-  public void testGetMapOutputName_EmptyDirectory() throws IOException {
-    assertEquals("part-m-00000",
-        namingScheme.getMapOutputName(configuration, new Path(tmpOutputDir.getRoot().getAbsolutePath())));
-  }
-
-  @Test
-  public void testGetMapOutputName_NonEmptyDirectory() throws IOException {
-    File outputDirectory = tmpOutputDir.getRoot();
-
-    new File(outputDirectory, "existing-1").createNewFile();
-    new File(outputDirectory, "existing-2").createNewFile();
-
-    assertEquals("part-m-00002",
-        namingScheme.getMapOutputName(configuration, new Path(outputDirectory.getAbsolutePath())));
-  }
-
-  @Test
-  public void testGetReduceOutputName_EmptyDirectory() throws IOException {
-    assertEquals("part-r-00000", namingScheme.getReduceOutputName(configuration, new Path(tmpOutputDir.getRoot()
-        .getAbsolutePath()), PARTITION_ID));
-  }
-
-  @Test
-  public void testGetReduceOutputName_NonEmptyDirectory() throws IOException {
-    File outputDirectory = tmpOutputDir.getRoot();
-
-    new File(outputDirectory, "existing-1").createNewFile();
-    new File(outputDirectory, "existing-2").createNewFile();
-
-    assertEquals("part-r-00002",
-        namingScheme.getReduceOutputName(configuration, new Path(outputDirectory.getAbsolutePath()), PARTITION_ID));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java b/crunch/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
deleted file mode 100644
index 5b0ea55..0000000
--- a/crunch/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
-import org.junit.Test;
-
-public class SourceTargetHelperTest {
-
-  @Test
-  public void testGetNonexistentPathSize() throws Exception {
-    File tmp = File.createTempFile("pathsize", "");
-    Path tmpPath = new Path(tmp.getAbsolutePath());
-    tmp.delete();
-    FileSystem fs = FileSystem.getLocal(new Configuration());
-    assertEquals(-1L, SourceTargetHelper.getPathSize(fs, tmpPath));
-  }
-
-  @Test
-  public void testGetNonExistentPathSize_NonExistantPath() throws IOException {
-    FileSystem mockFs = new MockFileSystem();
-    assertEquals(-1L, SourceTargetHelper.getPathSize(mockFs, new Path("does/not/exist")));
-  }
-
-  /**
-   * Mock FileSystem that returns null for {@link FileSystem#listStatus(Path)}.
-   */
-  static class MockFileSystem extends LocalFileSystem {
-
-    @Override
-    public FileStatus[] listStatus(Path f) throws IOException {
-      return null;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java b/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java
deleted file mode 100644
index 62085f8..0000000
--- a/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.reflect.ReflectDatumReader;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.junit.After;
-import org.junit.Assume;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class AvroFileReaderFactoryTest {
-
-  private File avroFile;
-
-  @Before
-  public void setUp() throws IOException {
-    avroFile = File.createTempFile("test", ".av");
-  }
-
-  @After
-  public void tearDown() {
-    avroFile.delete();
-  }
-
-  private void populateGenericFile(List<GenericRecord> genericRecords, Schema outputSchema) throws IOException {
-    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
-    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(outputSchema);
-
-    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
-    dataFileWriter.create(outputSchema, outputStream);
-
-    for (GenericRecord record : genericRecords) {
-      dataFileWriter.append(record);
-    }
-
-    dataFileWriter.close();
-    outputStream.close();
-
-  }
-
-  private <T> AvroFileReaderFactory<T> createFileReaderFactory(AvroType<T> avroType) {
-    return new AvroFileReaderFactory<T>(avroType);
-  }
-
-  @Test
-  public void testRead_GenericReader() throws IOException {
-    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
-    savedRecord.put("name", "John Doe");
-    savedRecord.put("age", 42);
-    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
-    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
-
-    AvroFileReaderFactory<GenericData.Record> genericReader = createFileReaderFactory(Avros.generics(Person.SCHEMA$));
-    Iterator<GenericData.Record> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()),
-        new Path(this.avroFile.getAbsolutePath()));
-
-    GenericRecord genericRecord = recordIterator.next();
-    assertEquals(savedRecord, genericRecord);
-    assertFalse(recordIterator.hasNext());
-  }
-
-  @Test
-  public void testRead_SpecificReader() throws IOException {
-    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
-    savedRecord.put("name", "John Doe");
-    savedRecord.put("age", 42);
-    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
-    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
-
-    AvroFileReaderFactory<Person> genericReader = createFileReaderFactory(Avros.records(Person.class));
-    Iterator<Person> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(
-        this.avroFile.getAbsolutePath()));
-
-    Person expectedPerson = new Person();
-    expectedPerson.age = 42;
-    expectedPerson.name = "John Doe";
-    List<CharSequence> siblingNames = Lists.newArrayList();
-    siblingNames.add("Jimmy");
-    siblingNames.add("Jane");
-    expectedPerson.siblingnames = siblingNames;
-
-    Person person = recordIterator.next();
-
-    assertEquals(expectedPerson, person);
-    assertFalse(recordIterator.hasNext());
-  }
-
-  @Test
-  public void testRead_ReflectReader() throws IOException {
-    Schema reflectSchema = ReflectData.get().getSchema(StringWrapper.class);
-    GenericRecord savedRecord = new GenericData.Record(reflectSchema);
-    savedRecord.put("value", "stringvalue");
-    populateGenericFile(Lists.newArrayList(savedRecord), reflectSchema);
-
-    AvroFileReaderFactory<StringWrapper> genericReader = createFileReaderFactory(Avros.reflects(StringWrapper.class));
-    Iterator<StringWrapper> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(
-        this.avroFile.getAbsolutePath()));
-
-    StringWrapper stringWrapper = recordIterator.next();
-
-    assertEquals("stringvalue", stringWrapper.getValue());
-    assertFalse(recordIterator.hasNext());
-  }
-
-  @Test
-  public void testCreateDatumReader_Generic() {
-    DatumReader<Record> datumReader = AvroFileReaderFactory.createDatumReader(Avros.generics(Person.SCHEMA$));
-    assertEquals(GenericDatumReader.class, datumReader.getClass());
-  }
-
-  @Test
-  public void testCreateDatumReader_Reflect() {
-    DatumReader<StringWrapper> datumReader = AvroFileReaderFactory.createDatumReader(Avros
-        .reflects(StringWrapper.class));
-    assertEquals(ReflectDatumReader.class, datumReader.getClass());
-  }
-
-  @Test
-  public void testCreateDatumReader_Specific() {
-    DatumReader<Person> datumReader = AvroFileReaderFactory.createDatumReader(Avros.records(Person.class));
-    assertEquals(SpecificDatumReader.class, datumReader.getClass());
-  }
-
-  @Test
-  public void testCreateDatumReader_ReflectAndSpecific() {
-    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
-
-    DatumReader<Pair<Person, StringWrapper>> datumReader = AvroFileReaderFactory.createDatumReader(Avros.pairs(
-        Avros.records(Person.class), Avros.reflects(StringWrapper.class)));
-    assertEquals(ReflectDatumReader.class, datumReader.getClass());
-  }
-
-  @Test(expected = IllegalStateException.class)
-  public void testCreateDatumReader_ReflectAndSpecific_NotSupported() {
-    Assume.assumeTrue(!Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
-    AvroFileReaderFactory.createDatumReader(Avros.pairs(Avros.records(Person.class),
-        Avros.reflects(StringWrapper.class)));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java b/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java
deleted file mode 100644
index ceef2b2..0000000
--- a/crunch/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.mapred.AvroJob;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-public class AvroFileSourceTest {
-
-  private Job job;
-  File tempFile;
-
-  @Before
-  public void setUp() throws IOException {
-    job = new Job();
-    tempFile = File.createTempFile("test", ".avr");
-  }
-
-  @After
-  public void tearDown() {
-    tempFile.delete();
-  }
-
-  @Test
-  public void testConfigureJob_SpecificData() throws IOException {
-    AvroType<Person> avroSpecificType = Avros.records(Person.class);
-    AvroFileSource<Person> personFileSource = new AvroFileSource<Person>(new Path(tempFile.getAbsolutePath()),
-        avroSpecificType);
-
-    personFileSource.configureSource(job, -1);
-
-    assertFalse(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true));
-    assertEquals(Person.SCHEMA$.toString(), job.getConfiguration().get(AvroJob.INPUT_SCHEMA));
-  }
-
-  @Test
-  public void testConfigureJob_GenericData() throws IOException {
-    AvroType<Record> avroGenericType = Avros.generics(Person.SCHEMA$);
-    AvroFileSource<Record> personFileSource = new AvroFileSource<Record>(new Path(tempFile.getAbsolutePath()),
-        avroGenericType);
-
-    personFileSource.configureSource(job, -1);
-
-    assertFalse(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true));
-
-  }
-
-  @Test
-  public void testConfigureJob_ReflectData() throws IOException {
-    AvroType<StringWrapper> avroReflectType = Avros.reflects(StringWrapper.class);
-    AvroFileSource<StringWrapper> personFileSource = new AvroFileSource<StringWrapper>(new Path(
-        tempFile.getAbsolutePath()), avroReflectType);
-
-    personFileSource.configureSource(job, -1);
-
-    assertTrue(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, false));
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java b/crunch/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java
deleted file mode 100644
index 0dfed32..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.crunch.lib.join.JoinUtils.AvroIndexedRecordPartitioner;
-import org.junit.Before;
-import org.junit.Test;
-
-public class AvroIndexedRecordPartitionerTest {
-
-  private AvroIndexedRecordPartitioner avroPartitioner;
-
-  @Before
-  public void setUp() {
-    avroPartitioner = new AvroIndexedRecordPartitioner();
-  }
-
-  @Test
-  public void testGetPartition() {
-    IndexedRecord indexedRecord = new MockIndexedRecord(3);
-    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
-
-    assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
-    assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
-  }
-
-  @Test
-  public void testGetPartition_NegativeHashValue() {
-    IndexedRecord indexedRecord = new MockIndexedRecord(-3);
-    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
-
-    assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
-    assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
-  }
-
-  @Test
-  public void testGetPartition_IntegerMinValue() {
-    IndexedRecord indexedRecord = new MockIndexedRecord(Integer.MIN_VALUE);
-    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
-
-    assertEquals(0, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), Integer.MAX_VALUE));
-  }
-
-  /**
-   * Mock implementation of IndexedRecord to give us control over the hashCode.
-   */
-  static class MockIndexedRecord implements IndexedRecord {
-
-    private Integer value;
-
-    public MockIndexedRecord(Integer value) {
-      this.value = value;
-    }
-
-    @Override
-    public int hashCode() {
-      return value.hashCode();
-    }
-
-    @Override
-    public Schema getSchema() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Object get(int arg0) {
-      return this.value;
-    }
-
-    @Override
-    public void put(int arg0, Object arg1) {
-      throw new UnsupportedOperationException();
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/CartesianTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/CartesianTest.java b/crunch/src/test/java/org/apache/crunch/lib/CartesianTest.java
deleted file mode 100644
index b19097c..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/CartesianTest.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class CartesianTest {
-
-  @Test
-  public void testCartesianCollection_SingleValues() {
-
-    PCollection<String> letters = MemPipeline.typedCollectionOf(Writables.strings(), "a", "b");
-    PCollection<Integer> ints = MemPipeline.typedCollectionOf(Writables.ints(), 1, 2);
-
-    PCollection<Pair<String, Integer>> cartesianProduct = Cartesian.cross(letters, ints);
-
-    @SuppressWarnings("unchecked")
-    List<Pair<String, Integer>> expectedResults = Lists.newArrayList(Pair.of("a", 1), Pair.of("a", 2), Pair.of("b", 1),
-        Pair.of("b", 2));
-    List<Pair<String, Integer>> actualResults = Lists.newArrayList(cartesianProduct.materialize());
-    Collections.sort(actualResults);
-
-    assertEquals(expectedResults, actualResults);
-  }
-
-  @Test
-  public void testCartesianCollection_Tables() {
-
-    PTable<String, Integer> leftTable = MemPipeline.typedTableOf(
-        Writables.tableOf(Writables.strings(), Writables.ints()), "a", 1, "b", 2);
-    PTable<String, Float> rightTable = MemPipeline.typedTableOf(
-        Writables.tableOf(Writables.strings(), Writables.floats()), "A", 1.0f, "B", 2.0f);
-
-    PTable<Pair<String, String>, Pair<Integer, Float>> cartesianProduct = Cartesian.cross(leftTable, rightTable);
-
-    List<Pair<Pair<String, String>, Pair<Integer, Float>>> expectedResults = Lists.newArrayList();
-    expectedResults.add(Pair.of(Pair.of("a", "A"), Pair.of(1, 1.0f)));
-    expectedResults.add(Pair.of(Pair.of("a", "B"), Pair.of(1, 2.0f)));
-    expectedResults.add(Pair.of(Pair.of("b", "A"), Pair.of(2, 1.0f)));
-    expectedResults.add(Pair.of(Pair.of("b", "B"), Pair.of(2, 2.0f)));
-
-    List<Pair<Pair<String, String>, Pair<Integer, Float>>> actualResults = Lists.newArrayList(cartesianProduct
-        .materialize());
-    Collections.sort(actualResults);
-
-    assertEquals(expectedResults, actualResults);
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java b/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java
deleted file mode 100644
index 8c0b3bf..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableSet;
-
-public class DistinctTest {
-  private static final List<Integer> DATA = Arrays.asList(
-      17, 29, 17, 29, 17, 29, 36, 45, 17, 45, 36, 29
-  );
-
-  @Test
-  public void testDistinct() {
-    PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), DATA);
-    Iterable<Integer> unique = Distinct.distinct(input).materialize();
-
-    assertEquals(ImmutableSet.copyOf(DATA), ImmutableSet.copyOf(unique));
-  }
-
-  @Test
-  public void testDistinctFlush() {
-    PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), DATA);
-    Iterable<Integer> unique = Distinct.distinct(input, 2).materialize();
-
-    assertEquals(ImmutableSet.copyOf(DATA), ImmutableSet.copyOf(unique));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/SampleTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/SampleTest.java b/crunch/src/test/java/org/apache/crunch/lib/SampleTest.java
deleted file mode 100644
index bd6fd81..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/SampleTest.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-
-public class SampleTest {
-  private PCollection<Pair<String, Double>> values = MemPipeline.typedCollectionOf(
-      Writables.pairs(Writables.strings(), Writables.doubles()),
-      ImmutableList.of(
-        Pair.of("foo", 200.0),
-        Pair.of("bar", 400.0),
-        Pair.of("baz", 100.0),
-        Pair.of("biz", 100.0)));
-  
-  @Test
-  public void testWRS() throws Exception {
-    Map<String, Integer> histogram = Maps.newHashMap();
-    
-    for (int i = 0; i < 100; i++) {
-      PCollection<String> sample = Sample.weightedReservoirSample(values, 1, 1729L + i);
-      for (String s : sample.materialize()) {
-        if (!histogram.containsKey(s)) {
-          histogram.put(s, 1);
-        } else {
-          histogram.put(s, 1 + histogram.get(s));
-        }
-      }
-    }
-    
-    Map<String, Integer> expected = ImmutableMap.of(
-        "foo", 24, "bar", 51, "baz", 13, "biz", 12);
-    assertEquals(expected, histogram);
-  }
-
-  @Test
-  public void testSample() {
-    PCollection<Integer> pcollect = MemPipeline.collectionOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-    Iterable<Integer> sample = Sample.sample(pcollect, 123998L, 0.2).materialize();
-    List<Integer> sampleValues = ImmutableList.copyOf(sample);
-    assertEquals(ImmutableList.of(6, 7), sampleValues);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/SecondarySortTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/SecondarySortTest.java b/crunch/src/test/java/org/apache/crunch/lib/SecondarySortTest.java
deleted file mode 100644
index 933b986..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/SecondarySortTest.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.apache.crunch.types.avro.Avros.*;
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-
-
-public class SecondarySortTest {
-  @Test
-  public void testInMemory() throws Exception {
-    PTable<Long, Pair<Long, String>> input = MemPipeline.typedTableOf(tableOf(longs(), pairs(longs(), strings())),
-        1729L, Pair.of(17L, "a"), 100L, Pair.of(29L, "b"), 1729L, Pair.of(29L, "c"));
-    PCollection<String> letters = SecondarySort.sortAndApply(input, new StringifyFn(), strings());
-    assertEquals(ImmutableList.of("b", "ac"), letters.materialize());
-  }
-  
-  private static class StringifyFn extends DoFn<Pair<Long, Iterable<Pair<Long, String>>>, String> {
-    @Override
-    public void process(Pair<Long, Iterable<Pair<Long, String>>> input, Emitter<String> emitter) {
-      StringBuilder sb = new StringBuilder();
-      for (Pair<Long, String> p : input.second()) {
-        sb.append(p.second());
-      }
-      emitter.emit(sb.toString());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java b/crunch/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java
deleted file mode 100644
index 35ccc11..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.lib.join.JoinUtils.TupleWritablePartitioner;
-import org.apache.crunch.types.writable.TupleWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TupleWritablePartitionerTest {
-
-  private TupleWritablePartitioner tupleWritableParitioner;
-
-  @Before
-  public void setUp() {
-    tupleWritableParitioner = new TupleWritablePartitioner();
-  }
-
-  @Test
-  public void testGetPartition() {
-    IntWritable intWritable = new IntWritable(3);
-    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
-    assertEquals(3, tupleWritableParitioner.getPartition(key, NullWritable.get(), 5));
-    assertEquals(1, tupleWritableParitioner.getPartition(key, NullWritable.get(), 2));
-  }
-
-  @Test
-  public void testGetPartition_NegativeHashValue() {
-    IntWritable intWritable = new IntWritable(-3);
-    // Sanity check, if this doesn't work then the premise of this test is wrong
-    assertEquals(-3, intWritable.hashCode());
-
-    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
-    assertEquals(3, tupleWritableParitioner.getPartition(key, NullWritable.get(), 5));
-    assertEquals(1, tupleWritableParitioner.getPartition(key, NullWritable.get(), 2));
-  }
-
-  @Test
-  public void testGetPartition_IntegerMinValue() {
-    IntWritable intWritable = new IntWritable(Integer.MIN_VALUE);
-    // Sanity check, if this doesn't work then the premise of this test is wrong
-    assertEquals(Integer.MIN_VALUE, intWritable.hashCode());
-
-    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
-    assertEquals(0, tupleWritableParitioner.getPartition(key, NullWritable.get(), Integer.MAX_VALUE));
-  }
-
-}


[02/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java b/crunch/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java
deleted file mode 100644
index 7e2e444..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.apache.crunch.test.StringWrapper.wrap;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.CrunchTestSupport;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class BrokenLeftAndOuterJoinTest {
-
-  List<Pair<StringWrapper, String>> createValuePairList(StringWrapper leftValue, String rightValue) {
-    Pair<StringWrapper, String> valuePair = Pair.of(leftValue, rightValue);
-    List<Pair<StringWrapper, String>> valuePairList = Lists.newArrayList();
-    valuePairList.add(valuePair);
-    return valuePairList;
-  }
-  
-  @Test
-  public void testOuterJoin() {
-    JoinFn<StringWrapper, StringWrapper, String> joinFn = new LeftOuterJoinFn<StringWrapper, StringWrapper, String>(
-        Avros.reflects(StringWrapper.class),
-        Avros.reflects(StringWrapper.class));
-    joinFn.setContext(CrunchTestSupport.getTestContext(new Configuration()));
-    joinFn.initialize();
-    Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter = mock(Emitter.class);
-    
-    StringWrapper key = new StringWrapper();
-    StringWrapper leftValue = new StringWrapper();
-    key.setValue("left-only");
-    leftValue.setValue("left-only-left");
-    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
-
-    key.setValue("right-only");
-    joinFn.join(key, 1, createValuePairList(null, "right-only-right"), emitter);
-
-    verify(emitter).emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
-    verifyNoMoreInteractions(emitter);
-  }
-  
-  @Test
-  public void testFullJoin() {
-    JoinFn<StringWrapper, StringWrapper, String> joinFn = new FullOuterJoinFn<StringWrapper, StringWrapper, String>(
-        Avros.reflects(StringWrapper.class),
-        Avros.reflects(StringWrapper.class));
-    joinFn.setContext(CrunchTestSupport.getTestContext(new Configuration()));
-    joinFn.initialize();
-    Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter = mock(Emitter.class);
-    
-    StringWrapper key = new StringWrapper();
-    StringWrapper leftValue = new StringWrapper();
-    key.setValue("left-only");
-    leftValue.setValue("left-only-left");
-    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
-
-    key.setValue("right-only");
-    joinFn.join(key, 1, createValuePairList(null, "right-only-right"), emitter);
-
-    verify(emitter).emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
-    verify(emitter).emit(Pair.of(wrap("right-only"), Pair.of((StringWrapper)null, "right-only-right")));
-    verifyNoMoreInteractions(emitter);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java b/crunch/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java
deleted file mode 100644
index 5cf4f51..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.apache.crunch.test.StringWrapper.wrap;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.Avros;
-
-public class FullOuterJoinFnTest extends JoinFnTestBase {
-
-  @Override
-  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter) {
-    verify(emitter)
-        .emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
-    verify(emitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
-    verify(emitter).emit(
-        Pair.of(wrap("right-only"), Pair.of((StringWrapper) null, "right-only-right")));
-    verifyNoMoreInteractions(emitter);
-  }
-
-  @Override
-  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
-    return new FullOuterJoinFn<StringWrapper, StringWrapper, String>(
-        Avros.reflects(StringWrapper.class),
-        Avros.reflects(StringWrapper.class));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java b/crunch/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java
deleted file mode 100644
index d2347de..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.apache.crunch.test.StringWrapper.wrap;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.Avros;
-
-public class InnerJoinFnTest extends JoinFnTestBase {
-
-  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> joinEmitter) {
-    verify(joinEmitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
-    verifyNoMoreInteractions(joinEmitter);
-  }
-
-  @Override
-  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
-    return new InnerJoinFn<StringWrapper, StringWrapper, String>(
-        Avros.reflects(StringWrapper.class),
-        Avros.reflects(StringWrapper.class));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java b/crunch/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java
deleted file mode 100644
index 9e4337f..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.mockito.Mockito.mock;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.CrunchTestSupport;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public abstract class JoinFnTestBase {
-
-  private JoinFn<StringWrapper, StringWrapper, String> joinFn;
-
-  private Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter;
-
-  // Avoid warnings on generic Emitter mock
-  @SuppressWarnings("unchecked")
-  @Before
-  public void setUp() {
-    joinFn = getJoinFn();
-    joinFn.setContext(CrunchTestSupport.getTestContext(new Configuration()));
-    joinFn.initialize();
-    emitter = mock(Emitter.class);
-  }
-
-  @Test
-  public void testJoin() {
-
-    StringWrapper key = new StringWrapper();
-    StringWrapper leftValue = new StringWrapper();
-    key.setValue("left-only");
-    leftValue.setValue("left-only-left");
-    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
-
-    key.setValue("both");
-    leftValue.setValue("both-left");
-    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
-    joinFn.join(key, 1, createValuePairList(null, "both-right"), emitter);
-
-    key.setValue("right-only");
-    joinFn.join(key, 1, createValuePairList(null, "right-only-right"), emitter);
-
-    checkOutput(emitter);
-
-  }
-
-  protected abstract void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter);
-
-  protected abstract JoinFn<StringWrapper, StringWrapper, String> getJoinFn();
-
-  protected List<Pair<StringWrapper, String>> createValuePairList(StringWrapper leftValue, String rightValue) {
-    Pair<StringWrapper, String> valuePair = Pair.of(leftValue, rightValue);
-    List<Pair<StringWrapper, String>> valuePairList = Lists.newArrayList();
-    valuePairList.add(valuePair);
-    return valuePairList;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java b/crunch/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java
deleted file mode 100644
index a90457e..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.apache.crunch.test.StringWrapper.wrap;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.Avros;
-
-public class LeftOuterJoinTest extends JoinFnTestBase {
-
-  @Override
-  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter) {
-    verify(emitter)
-        .emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
-    verify(emitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
-    verifyNoMoreInteractions(emitter);
-  }
-
-  @Override
-  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
-    return new LeftOuterJoinFn<StringWrapper, StringWrapper, String>(
-        Avros.reflects(StringWrapper.class),
-        Avros.reflects(StringWrapper.class));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java b/crunch/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java
deleted file mode 100644
index 7e41284..0000000
--- a/crunch/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import static org.apache.crunch.test.StringWrapper.wrap;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.Avros;
-
-public class RightOuterJoinFnTest extends JoinFnTestBase {
-
-  @Override
-  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter) {
-    verify(emitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
-    verify(emitter).emit(
-        Pair.of(wrap("right-only"), Pair.of((StringWrapper) null, "right-only-right")));
-    verifyNoMoreInteractions(emitter);
-  }
-
-  @Override
-  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
-    return new RightOuterJoinFn<StringWrapper, StringWrapper, String>(
-        Avros.reflects(StringWrapper.class),
-        Avros.reflects(StringWrapper.class));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/test/CountersTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/test/CountersTest.java b/crunch/src/test/java/org/apache/crunch/test/CountersTest.java
deleted file mode 100644
index 66f854e..0000000
--- a/crunch/src/test/java/org/apache/crunch/test/CountersTest.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.test;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-/**
- * A test to verify using counters inside of a unit test works. :)
- */
-public class CountersTest {
-
-  public enum CT {
-    ONE,
-    TWO,
-    THREE
-  };
-
-  public static class CTFn extends DoFn<String, String> {
-    CTFn() {
-      setContext(CrunchTestSupport.getTestContext(new Configuration()));
-    }
-
-    @Override
-    public void process(String input, Emitter<String> emitter) {
-      getCounter(CT.ONE).increment(1);
-      getCounter(CT.TWO).increment(4);
-      getCounter(CT.THREE).increment(7);
-    }
-  }
-
-  @Test
-  public void test() {
-    CTFn fn = new CTFn();
-    fn.process("foo", null);
-    fn.process("bar", null);
-    assertEquals(2L, TestCounters.getCounter(CT.ONE).getValue());
-    assertEquals(8L, TestCounters.getCounter(CT.TWO).getValue());
-    assertEquals(14L, TestCounters.getCounter(CT.THREE).getValue());
-  }
-
-  @Test
-  public void secondTest() {
-    CTFn fn = new CTFn();
-    fn.process("foo", null);
-    fn.process("bar", null);
-    assertEquals(2L, TestCounters.getCounter(CT.ONE).getValue());
-    assertEquals(8L, TestCounters.getCounter(CT.TWO).getValue());
-    assertEquals(14L, TestCounters.getCounter(CT.THREE).getValue());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/test/StringWrapper.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/test/StringWrapper.java b/crunch/src/test/java/org/apache/crunch/test/StringWrapper.java
deleted file mode 100644
index 34302b5..0000000
--- a/crunch/src/test/java/org/apache/crunch/test/StringWrapper.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.test;
-
-import org.apache.crunch.MapFn;
-
-/**
- * Simple String wrapper for testing with Avro reflection.
- */
-public class StringWrapper implements Comparable<StringWrapper> {
-
-  public static class StringToStringWrapperMapFn extends MapFn<String, StringWrapper> {
-
-    @Override
-    public StringWrapper map(String input) {
-      return wrap(input);
-    }
-
-  }
-
-  public static class StringWrapperToStringMapFn extends MapFn<StringWrapper, String> {
-
-    @Override
-    public String map(StringWrapper input) {
-      return input.getValue();
-    }
-
-  }
-
-  private String value;
-
-  public StringWrapper() {
-    this("");
-  }
-
-  public StringWrapper(String value) {
-    this.value = value;
-  }
-
-  @Override
-  public int compareTo(StringWrapper o) {
-    return this.value.compareTo(o.value);
-  }
-
-  public String getValue() {
-    return value;
-  }
-
-  public void setValue(String value) {
-    this.value = value;
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = 1;
-    result = prime * result + ((value == null) ? 0 : value.hashCode());
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    StringWrapper other = (StringWrapper) obj;
-    if (value == null) {
-      if (other.value != null)
-        return false;
-    } else if (!value.equals(other.value))
-      return false;
-    return true;
-  }
-
-  @Override
-  public String toString() {
-    return "StringWrapper [value=" + value + "]";
-  }
-
-  public static StringWrapper wrap(String value) {
-    return new StringWrapper(value);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java b/crunch/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java
deleted file mode 100644
index bd7fcd7..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertNull;
-
-import java.util.Collection;
-
-import org.apache.crunch.test.Person;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class CollectionDeepCopierTest {
-
-  @Test
-  public void testDeepCopy() {
-    Person person = new Person();
-    person.age = 42;
-    person.name = "John Smith";
-    person.siblingnames = Lists.<CharSequence> newArrayList();
-
-    Collection<Person> personCollection = Lists.newArrayList(person);
-    CollectionDeepCopier<Person> collectionDeepCopier = new CollectionDeepCopier<Person>(
-        Avros.records(Person.class));
-    collectionDeepCopier.initialize(new Configuration());
-
-    Collection<Person> deepCopyCollection = collectionDeepCopier.deepCopy(personCollection);
-
-    assertEquals(personCollection, deepCopyCollection);
-    assertNotSame(personCollection.iterator().next(), deepCopyCollection.iterator().next());
-  }
-
-  @Test
-  public void testNullDeepCopy() {
-    CollectionDeepCopier<Person> collectionDeepCopier = new CollectionDeepCopier<Person>(
-        Avros.records(Person.class));
-    collectionDeepCopier.initialize(new Configuration());
-    Collection<Person> nullCollection = null;
-    assertNull(collectionDeepCopier.deepCopy(nullCollection));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java b/crunch/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java
deleted file mode 100644
index c13e4a2..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertNull;
-
-import java.util.Map;
-
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Maps;
-
-public class MapDeepCopierTest {
-
-  @Test
-  public void testDeepCopy() {
-    StringWrapper stringWrapper = new StringWrapper("value");
-    String key = "key";
-    Map<String, StringWrapper> map = Maps.newHashMap();
-    map.put(key, stringWrapper);
-
-    MapDeepCopier<StringWrapper> deepCopier = new MapDeepCopier<StringWrapper>(
-        Avros.reflects(StringWrapper.class));
-    deepCopier.initialize(new Configuration());
-    Map<String, StringWrapper> deepCopy = deepCopier.deepCopy(map);
-
-    assertEquals(map, deepCopy);
-    assertNotSame(map.get(key), deepCopy.get(key));
-  }
-  
-  @Test
-  public void testDeepCopy_Null() {
-    Map<String, StringWrapper> map = null;
-
-    MapDeepCopier<StringWrapper> deepCopier = new MapDeepCopier<StringWrapper>(
-        Avros.reflects(StringWrapper.class));
-    deepCopier.initialize(new Configuration());
-    Map<String, StringWrapper> deepCopy = deepCopier.deepCopy(map);
-
-    assertNull(deepCopy);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java b/crunch/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java
deleted file mode 100644
index e6fd90c..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import java.util.Collection;
-
-import org.apache.avro.Schema;
-import org.apache.avro.util.Utf8;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.io.Text;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class PTypeUtilsTest {
-  @Test
-  public void testPrimitives() {
-    assertEquals(Avros.strings(), AvroTypeFamily.getInstance().as(Writables.strings()));
-    Assert.assertEquals(Writables.doubles(), WritableTypeFamily.getInstance().as(Avros.doubles()));
-  }
-
-  @Test
-  public void testTuple3() {
-    PType<Tuple3<String, Float, Integer>> t = Writables.triples(Writables.strings(), Writables.floats(),
-        Writables.ints());
-    PType<Tuple3<String, Float, Integer>> at = AvroTypeFamily.getInstance().as(t);
-    assertEquals(Avros.strings(), at.getSubTypes().get(0));
-    assertEquals(Avros.floats(), at.getSubTypes().get(1));
-    assertEquals(Avros.ints(), at.getSubTypes().get(2));
-  }
-
-  @Test
-  public void testTupleN() {
-    PType<TupleN> t = Avros.tuples(Avros.strings(), Avros.floats(), Avros.ints());
-    PType<TupleN> wt = WritableTypeFamily.getInstance().as(t);
-    assertEquals(Writables.strings(), wt.getSubTypes().get(0));
-    assertEquals(Writables.floats(), wt.getSubTypes().get(1));
-    assertEquals(Writables.ints(), wt.getSubTypes().get(2));
-  }
-
-  @Test
-  public void testWritableCollections() {
-    PType<Collection<String>> t = Avros.collections(Avros.strings());
-    t = WritableTypeFamily.getInstance().as(t);
-    assertEquals(Writables.strings(), t.getSubTypes().get(0));
-  }
-
-  @Test
-  public void testAvroCollections() {
-    PType<Collection<Double>> t = Writables.collections(Writables.doubles());
-    t = AvroTypeFamily.getInstance().as(t);
-    assertEquals(Avros.doubles(), t.getSubTypes().get(0));
-  }
-
-  @Test
-  public void testAvroRegistered() {
-    AvroType<Utf8> at = new AvroType<Utf8>(Utf8.class, Schema.create(Schema.Type.STRING), new DeepCopier.NoOpDeepCopier<Utf8>());
-    Avros.register(Utf8.class, at);
-    assertEquals(at, Avros.records(Utf8.class));
-  }
-
-  @Test
-  public void testWritableBuiltin() {
-    assertNotNull(Writables.records(Text.class));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/PTypesTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/PTypesTest.java b/crunch/src/test/java/org/apache/crunch/types/PTypesTest.java
deleted file mode 100644
index d7c8811..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/PTypesTest.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.UUID;
-
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.junit.Test;
-
-public class PTypesTest {
-  @Test
-  public void testUUID() throws Exception {
-    UUID uuid = UUID.randomUUID();
-    PType<UUID> ptype = PTypes.uuid(AvroTypeFamily.getInstance());
-    assertEquals(uuid, ptype.getInputMapFn().map(ptype.getOutputMapFn().map(uuid)));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java b/crunch/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java
deleted file mode 100644
index e46a680..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertNull;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class TupleDeepCopierTest {
-
-  @Test
-  public void testDeepCopy_Pair() {
-    Person person = new Person();
-    person.name = "John Doe";
-    person.age = 42;
-    person.siblingnames = Lists.<CharSequence> newArrayList();
-
-    Pair<Integer, Person> inputPair = Pair.of(1, person);
-    DeepCopier<Pair> deepCopier = new TupleDeepCopier<Pair>(Pair.class, Avros.ints(),
-        Avros.records(Person.class));
-
-    deepCopier.initialize(new Configuration());
-    Pair<Integer, Person> deepCopyPair = deepCopier.deepCopy(inputPair);
-
-    assertEquals(inputPair, deepCopyPair);
-    assertNotSame(inputPair.second(), deepCopyPair.second());
-  }
-  
-  @Test
-  public void testDeepCopy_PairContainingNull() {
-
-    Pair<Integer, Person> inputPair = Pair.of(1, null);
-    DeepCopier<Pair> deepCopier = new TupleDeepCopier<Pair>(Pair.class, Avros.ints(),
-        Avros.records(Person.class));
-
-    deepCopier.initialize(new Configuration());
-    Pair<Integer, Person> deepCopyPair = deepCopier.deepCopy(inputPair);
-
-    assertEquals(inputPair, deepCopyPair);
-  }
-  
-  @Test
-  public void testDeepCopy_NullPair() {
-    Pair<Integer, Person> inputPair = null;
-    DeepCopier<Pair> deepCopier = new TupleDeepCopier<Pair>(Pair.class, Avros.ints(),
-        Avros.records(Person.class));
-
-    deepCopier.initialize(new Configuration());
-    Pair<Integer, Person> deepCopyPair = deepCopier.deepCopy(inputPair);
-
-    assertNull(deepCopyPair);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/TupleFactoryTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/TupleFactoryTest.java b/crunch/src/test/java/org/apache/crunch/types/TupleFactoryTest.java
deleted file mode 100644
index 0726be2..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/TupleFactoryTest.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.junit.Test;
-
-public class TupleFactoryTest {
-
-  @Test
-  public void testGetTupleFactory_Pair() {
-    assertEquals(TupleFactory.PAIR, TupleFactory.getTupleFactory(Pair.class));
-  }
-
-  @Test
-  public void testGetTupleFactory_Tuple3() {
-    assertEquals(TupleFactory.TUPLE3, TupleFactory.getTupleFactory(Tuple3.class));
-  }
-
-  @Test
-  public void testGetTupleFactory_Tuple4() {
-    assertEquals(TupleFactory.TUPLE4, TupleFactory.getTupleFactory(Tuple4.class));
-  }
-
-  @Test
-  public void testGetTupleFactory_TupleN() {
-    assertEquals(TupleFactory.TUPLEN, TupleFactory.getTupleFactory(TupleN.class));
-  }
-
-  public void testGetTupleFactory_CustomTupleClass() {
-	TupleFactory<CustomTupleImplementation> customTupleFactory = TupleFactory.create(CustomTupleImplementation.class);
-    assertEquals(customTupleFactory, TupleFactory.getTupleFactory(CustomTupleImplementation.class));
-  }
-
-  private static class CustomTupleImplementation implements Tuple {
-
-    @Override
-    public Object get(int index) {
-      return null;
-    }
-
-    @Override
-    public int size() {
-      return 0;
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java b/crunch/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java
deleted file mode 100644
index 37c13c0..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertNull;
-
-import java.util.List;
-
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.types.avro.AvroDeepCopier.AvroSpecificDeepCopier;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class AvroDeepCopierTest {
-  
-  @Test
-  public void testDeepCopySpecific() {
-    Person person = new Person();
-    person.name = "John Doe";
-    person.age = 42;
-    person.siblingnames = Lists.<CharSequence> newArrayList();
-
-    Person deepCopyPerson = new AvroSpecificDeepCopier<Person>(Person.class, Person.SCHEMA$)
-        .deepCopy(person);
-
-    assertEquals(person, deepCopyPerson);
-    assertNotSame(person, deepCopyPerson);
-  }
-
-  @Test
-  public void testDeepCopyGeneric() {
-    Record record = new Record(Person.SCHEMA$);
-    record.put("name", "John Doe");
-    record.put("age", 42);
-    record.put("siblingnames", Lists.newArrayList());
-
-    Record deepCopyRecord = new AvroDeepCopier.AvroGenericDeepCopier(Person.SCHEMA$)
-        .deepCopy(record);
-
-    assertEquals(record, deepCopyRecord);
-    assertNotSame(record, deepCopyRecord);
-  }
-
-  static class ReflectedPerson {
-    String name;
-    int age;
-    List<String> siblingnames;
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == null || !(other instanceof ReflectedPerson)) {
-        return false;
-      }
-      ReflectedPerson that = (ReflectedPerson) other;
-      return name.equals(that.name) && age == that.age && siblingnames.equals(that.siblingnames);
-    }
-  }
-
-  @Test
-  public void testDeepCopyReflect() {
-    ReflectedPerson person = new ReflectedPerson();
-    person.name = "John Doe";
-    person.age = 42;
-    person.siblingnames = Lists.newArrayList();
-
-    AvroDeepCopier<ReflectedPerson> avroDeepCopier = new AvroDeepCopier.AvroReflectDeepCopier<ReflectedPerson>(
-        ReflectedPerson.class, Avros.reflects(ReflectedPerson.class).getSchema());
-    avroDeepCopier.initialize(new Configuration());
-
-    ReflectedPerson deepCopyPerson = avroDeepCopier.deepCopy(person);
-
-    assertEquals(person, deepCopyPerson);
-    assertNotSame(person, deepCopyPerson);
-
-  }
-  
-  @Test
-  public void testDeepCopy_Null() {
-    Person person = null;
-
-    Person deepCopyPerson = new AvroSpecificDeepCopier<Person>(Person.class, Person.SCHEMA$)
-        .deepCopy(person);
-
-    assertNull(deepCopyPerson);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java b/crunch/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java
deleted file mode 100644
index db9ebdc..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-
-import java.util.List;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class AvroGroupedTableTypeTest {
-
-  @Test
-  public void testGetDetachedValue() {
-    Integer integerValue = 42;
-    Person person = new Person();
-    person.name = "John Doe";
-    person.age = 42;
-    person.siblingnames = Lists.<CharSequence> newArrayList();
-
-    Iterable<Person> inputPersonIterable = Lists.newArrayList(person);
-    Pair<Integer, Iterable<Person>> pair = Pair.of(integerValue, inputPersonIterable);
-
-    PGroupedTableType<Integer, Person> groupedTableType = Avros.tableOf(Avros.ints(),
-        Avros.specifics(Person.class)).getGroupedTableType();
-    groupedTableType.initialize(new Configuration());
-
-    Pair<Integer, Iterable<Person>> detachedPair = groupedTableType.getDetachedValue(pair);
-
-    assertSame(integerValue, detachedPair.first());
-    List<Person> personList = Lists.newArrayList(detachedPair.second());
-    assertEquals(inputPersonIterable, personList);
-    assertNotSame(person, personList.get(0));
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java b/crunch/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java
deleted file mode 100644
index 35d4e5b..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class AvroTableTypeTest {
-
-  @Test
-  public void testGetDetachedValue() {
-    Integer integerValue = 42;
-    Person person = new Person();
-    person.name = "John Doe";
-    person.age = 42;
-    person.siblingnames = Lists.<CharSequence> newArrayList();
-
-    Pair<Integer, Person> pair = Pair.of(integerValue, person);
-
-    AvroTableType<Integer, Person> tableType = Avros.tableOf(Avros.ints(),
-        Avros.specifics(Person.class));
-    tableType.initialize(new Configuration());
-
-    Pair<Integer, Person> detachedPair = tableType.getDetachedValue(pair);
-
-    assertSame(integerValue, detachedPair.first());
-    assertEquals(person, detachedPair.second());
-    assertNotSame(person, detachedPair.second());
-  }
-
-  @Test
-  public void testIsReflect_ContainsReflectKey() {
-    assertTrue(Avros.tableOf(Avros.reflects(StringWrapper.class), Avros.ints()).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_ContainsReflectValue() {
-    assertTrue(Avros.tableOf(Avros.ints(), Avros.reflects(StringWrapper.class)).hasReflect());
-  }
-
-  @Test
-  public void testReflect_NoReflectKeyOrValue() {
-    assertFalse(Avros.tableOf(Avros.ints(), Avros.ints()).hasReflect());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java b/crunch/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
deleted file mode 100644
index a874c63..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.crunch.Pair;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-public class AvroTypeTest {
-
-  @Test
-  public void testIsSpecific_SpecificData() {
-    assertTrue(Avros.records(Person.class).hasSpecific());
-  }
-
-  @Test
-  public void testIsGeneric_SpecificData() {
-    assertFalse(Avros.records(Person.class).isGeneric());
-  }
-
-  @Test
-  public void testIsSpecific_GenericData() {
-    assertFalse(Avros.generics(Person.SCHEMA$).hasSpecific());
-  }
-
-  @Test
-  public void testIsGeneric_GenericData() {
-    assertTrue(Avros.generics(Person.SCHEMA$).isGeneric());
-  }
-
-  @Test
-  public void testIsSpecific_NonAvroClass() {
-    assertFalse(Avros.ints().hasSpecific());
-  }
-
-  @Test
-  public void testIsGeneric_NonAvroClass() {
-    assertFalse(Avros.ints().isGeneric());
-  }
-
-  @Test
-  public void testIsSpecific_SpecificAvroTable() {
-    assertTrue(Avros.tableOf(Avros.strings(), Avros.records(Person.class)).hasSpecific());
-  }
-
-  @Test
-  public void testIsGeneric_SpecificAvroTable() {
-    assertFalse(Avros.tableOf(Avros.strings(), Avros.records(Person.class)).isGeneric());
-  }
-
-  @Test
-  public void testIsSpecific_GenericAvroTable() {
-    assertFalse(Avros.tableOf(Avros.strings(), Avros.generics(Person.SCHEMA$)).hasSpecific());
-  }
-
-  @Test
-  public void testIsGeneric_GenericAvroTable() {
-    assertFalse(Avros.tableOf(Avros.strings(), Avros.generics(Person.SCHEMA$)).isGeneric());
-  }
-
-  @Test
-  public void testIsReflect_GenericType() {
-    assertFalse(Avros.generics(Person.SCHEMA$).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_SpecificType() {
-    assertFalse(Avros.records(Person.class).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_ReflectSimpleType() {
-    assertTrue(Avros.reflects(StringWrapper.class).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_NonReflectSubType() {
-    assertFalse(Avros.pairs(Avros.ints(), Avros.ints()).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_ReflectSubType() {
-    assertTrue(Avros.pairs(Avros.ints(), Avros.reflects(StringWrapper.class)).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_TableOfNonReflectTypes() {
-    assertFalse(Avros.tableOf(Avros.ints(), Avros.strings()).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_TableWithReflectKey() {
-    assertTrue(Avros.tableOf(Avros.reflects(StringWrapper.class), Avros.ints()).hasReflect());
-  }
-
-  @Test
-  public void testIsReflect_TableWithReflectValue() {
-    assertTrue(Avros.tableOf(Avros.ints(), Avros.reflects(StringWrapper.class)).hasReflect());
-  }
-
-  @Test
-  public void testReflect_CollectionContainingReflectValue() {
-    assertTrue(Avros.collections(Avros.reflects(StringWrapper.class)).hasReflect());
-  }
-
-  @Test
-  public void testReflect_CollectionNotContainingReflectValue() {
-    assertFalse(Avros.collections(Avros.generics(Person.SCHEMA$)).hasReflect());
-  }
-
-  @Test
-  public void testGetDetachedValue_AlreadyMappedAvroType() {
-    Integer value = 42;
-    AvroType<Integer> intType = Avros.ints();
-    intType.initialize(new Configuration());
-    Integer detachedValue = intType.getDetachedValue(value);
-    assertSame(value, detachedValue);
-  }
-
-  @Test
-  public void testGetDetachedValue_GenericAvroType() {
-    AvroType<Record> genericType = Avros.generics(Person.SCHEMA$);
-    genericType.initialize(new Configuration());
-    GenericData.Record record = new GenericData.Record(Person.SCHEMA$);
-    record.put("name", "name value");
-    record.put("age", 42);
-    record.put("siblingnames", Lists.newArrayList());
-
-    Record detachedRecord = genericType.getDetachedValue(record);
-    assertEquals(record, detachedRecord);
-    assertNotSame(record, detachedRecord);
-  }
-
-  private Person createPerson() {
-    Person person = new Person();
-    person.name = "name value";
-    person.age = 42;
-    person.siblingnames = Lists.<CharSequence> newArrayList();
-    return person;
-  }
-
-  @Test
-  public void testGetDetachedValue_SpecificAvroType() {
-    AvroType<Person> specificType = Avros.specifics(Person.class);
-    specificType.initialize(new Configuration());
-    Person person = createPerson();
-    Person detachedPerson = specificType.getDetachedValue(person);
-    assertEquals(person, detachedPerson);
-    assertNotSame(person, detachedPerson);
-  }
-
-  @Test(expected = IllegalStateException.class)
-  public void testGetDetachedValue_NotInitialized() {
-    AvroType<Person> specificType = Avros.specifics(Person.class);
-    Person person = createPerson();
-    specificType.getDetachedValue(person);
-  }
-
-  static class ReflectedPerson {
-    String name;
-    int age;
-    List<String> siblingnames;
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == null || !(other instanceof ReflectedPerson)) {
-        return false;
-      }
-      ReflectedPerson that = (ReflectedPerson) other;
-      return name.equals(that.name) && age == that.age && siblingnames.equals(that.siblingnames);
-    }
-  }
-
-  @Test
-  public void testGetDetachedValue_ReflectAvroType() {
-    AvroType<ReflectedPerson> reflectType = Avros.reflects(ReflectedPerson.class);
-    reflectType.initialize(new Configuration());
-    ReflectedPerson rp = new ReflectedPerson();
-    rp.name = "josh";
-    rp.age = 32;
-    rp.siblingnames = Lists.newArrayList();
-    ReflectedPerson detached = reflectType.getDetachedValue(rp);
-    assertEquals(rp, detached);
-    assertNotSame(rp, detached);
-  }
-
-  @Test
-  public void testGetDetachedValue_Pair() {
-    Person person = createPerson();
-    AvroType<Pair<Integer, Person>> pairType = Avros.pairs(Avros.ints(),
-        Avros.records(Person.class));
-    pairType.initialize(new Configuration());
-
-    Pair<Integer, Person> inputPair = Pair.of(1, person);
-    Pair<Integer, Person> detachedPair = pairType.getDetachedValue(inputPair);
-
-    assertEquals(inputPair, detachedPair);
-    assertNotSame(inputPair.second(), detachedPair.second());
-  }
-
-  @Test
-  public void testGetDetachedValue_Collection() {
-    Person person = createPerson();
-    List<Person> personList = Lists.newArrayList(person);
-
-    AvroType<Collection<Person>> collectionType = Avros.collections(Avros.records(Person.class));
-    collectionType.initialize(new Configuration());
-
-    Collection<Person> detachedCollection = collectionType.getDetachedValue(personList);
-
-    assertEquals(personList, detachedCollection);
-    Person detachedPerson = detachedCollection.iterator().next();
-
-    assertNotSame(person, detachedPerson);
-  }
-
-  @Test
-  public void testGetDetachedValue_Map() {
-    String key = "key";
-    Person value = createPerson();
-
-    Map<String, Person> stringPersonMap = Maps.newHashMap();
-    stringPersonMap.put(key, value);
-
-    AvroType<Map<String, Person>> mapType = Avros.maps(Avros.records(Person.class));
-    mapType.initialize(new Configuration());
-
-    Map<String, Person> detachedMap = mapType.getDetachedValue(stringPersonMap);
-
-    assertEquals(stringPersonMap, detachedMap);
-    assertNotSame(value, detachedMap.get(key));
-  }
-
-  @Test
-  public void testGetDetachedValue_TupleN() {
-    Person person = createPerson();
-    AvroType<TupleN> ptype = Avros.tuples(Avros.records(Person.class));
-    ptype.initialize(new Configuration());
-    TupleN tuple = new TupleN(person);
-    TupleN detachedTuple = ptype.getDetachedValue(tuple);
-
-    assertEquals(tuple, detachedTuple);
-    assertNotSame(person, detachedTuple.get(0));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/avro/AvrosTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/avro/AvrosTest.java b/crunch/src/test/java/org/apache/crunch/types/avro/AvrosTest.java
deleted file mode 100644
index 5622a56..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/avro/AvrosTest.java
+++ /dev/null
@@ -1,325 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertTrue;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Collections;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Type;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.util.Utf8;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.test.CrunchTestSupport;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.types.DeepCopier;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-/**
- * TODO test Avros.register and Avros.containers
- */
-public class AvrosTest {
-
-  @Test
-  public void testNulls() throws Exception {
-    Void n = null;
-    testInputOutputFn(Avros.nulls(), n, n);
-  }
-
-  @Test
-  public void testStrings() throws Exception {
-    String s = "abc";
-    Utf8 w = new Utf8(s);
-    testInputOutputFn(Avros.strings(), s, w);
-  }
-
-  @Test
-  public void testInts() throws Exception {
-    int j = 55;
-    testInputOutputFn(Avros.ints(), j, j);
-  }
-
-  @Test
-  public void testLongs() throws Exception {
-    long j = Long.MAX_VALUE;
-    testInputOutputFn(Avros.longs(), j, j);
-  }
-
-  @Test
-  public void testFloats() throws Exception {
-    float j = Float.MIN_VALUE;
-    testInputOutputFn(Avros.floats(), j, j);
-  }
-
-  @Test
-  public void testDoubles() throws Exception {
-    double j = Double.MIN_VALUE;
-    testInputOutputFn(Avros.doubles(), j, j);
-  }
-
-  @Test
-  public void testBooleans() throws Exception {
-    boolean j = true;
-    testInputOutputFn(Avros.booleans(), j, j);
-  }
-
-  @Test
-  public void testBytes() throws Exception {
-    byte[] bytes = new byte[] { 17, 26, -98 };
-    ByteBuffer bb = ByteBuffer.wrap(bytes);
-    testInputOutputFn(Avros.bytes(), bb, bb);
-  }
-
-  @Test
-  public void testCollections() throws Exception {
-    Collection<String> j = Lists.newArrayList();
-    j.add("a");
-    j.add("b");
-    Schema collectionSchema = Schema.createArray(Schema.createUnion(ImmutableList.of(Avros.strings().getSchema(),
-        Schema.create(Type.NULL))));
-    GenericData.Array<Utf8> w = new GenericData.Array<Utf8>(2, collectionSchema);
-    w.add(new Utf8("a"));
-    w.add(new Utf8("b"));
-    testInputOutputFn(Avros.collections(Avros.strings()), j, w);
-  }
-
-  @Test
-  public void testNestedTables() throws Exception {
-    PTableType<Long, Long> pll = Avros.tableOf(Avros.longs(), Avros.longs());
-    String schema = Avros.tableOf(pll, Avros.strings()).getSchema().toString();
-    assertNotNull(schema);
-  }
-
-  @Test
-  public void testPairs() throws Exception {
-    AvroType<Pair<String, String>> at = Avros.pairs(Avros.strings(), Avros.strings());
-    Pair<String, String> j = Pair.of("a", "b");
-    GenericData.Record w = new GenericData.Record(at.getSchema());
-    w.put(0, new Utf8("a"));
-    w.put(1, new Utf8("b"));
-    testInputOutputFn(at, j, w);
-  }
-
-  @Test
-  public void testPairEquals() throws Exception {
-    AvroType<Pair<Long, ByteBuffer>> at1 = Avros.pairs(Avros.longs(), Avros.bytes());
-    AvroType<Pair<Long, ByteBuffer>> at2 = Avros.pairs(Avros.longs(), Avros.bytes());
-    assertEquals(at1, at2);
-    assertEquals(at1.hashCode(), at2.hashCode());
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testTriples() throws Exception {
-    AvroType at = Avros.triples(Avros.strings(), Avros.strings(), Avros.strings());
-    Tuple3 j = Tuple3.of("a", "b", "c");
-    GenericData.Record w = new GenericData.Record(at.getSchema());
-    w.put(0, new Utf8("a"));
-    w.put(1, new Utf8("b"));
-    w.put(2, new Utf8("c"));
-    testInputOutputFn(at, j, w);
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testQuads() throws Exception {
-    AvroType at = Avros.quads(Avros.strings(), Avros.strings(), Avros.strings(), Avros.strings());
-    Tuple4 j = Tuple4.of("a", "b", "c", "d");
-    GenericData.Record w = new GenericData.Record(at.getSchema());
-    w.put(0, new Utf8("a"));
-    w.put(1, new Utf8("b"));
-    w.put(2, new Utf8("c"));
-    w.put(3, new Utf8("d"));
-    testInputOutputFn(at, j, w);
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testTupleN() throws Exception {
-    AvroType at = Avros.tuples(Avros.strings(), Avros.strings(), Avros.strings(), Avros.strings(), Avros.strings());
-    TupleN j = new TupleN("a", "b", "c", "d", "e");
-    GenericData.Record w = new GenericData.Record(at.getSchema());
-    w.put(0, new Utf8("a"));
-    w.put(1, new Utf8("b"));
-    w.put(2, new Utf8("c"));
-    w.put(3, new Utf8("d"));
-    w.put(4, new Utf8("e"));
-    testInputOutputFn(at, j, w);
-
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testWritables() throws Exception {
-    AvroType at = Avros.writables(LongWritable.class);
-    
-    TaskInputOutputContext<?, ?, ?, ?> testContext = CrunchTestSupport.getTestContext(new Configuration());
-    at.getInputMapFn().setContext(testContext);
-    at.getInputMapFn().initialize();
-    at.getOutputMapFn().setContext(testContext);
-    at.getOutputMapFn().initialize();
-    
-    LongWritable lw = new LongWritable(1729L);
-    assertEquals(lw, at.getInputMapFn().map(at.getOutputMapFn().map(lw)));
-  }
-
-  @Test
-  @SuppressWarnings("rawtypes")
-  public void testTableOf() throws Exception {
-    AvroType at = Avros.tableOf(Avros.strings(), Avros.strings());
-    Pair<String, String> j = Pair.of("a", "b");
-    org.apache.avro.mapred.Pair w = new org.apache.avro.mapred.Pair(at.getSchema());
-    w.put(0, new Utf8("a"));
-    w.put(1, new Utf8("b"));
-    // TODO update this after resolving the o.a.a.m.Pair.equals issue
-    initialize(at);
-    assertEquals(j, at.getInputMapFn().map(w));
-    org.apache.avro.mapred.Pair converted = (org.apache.avro.mapred.Pair) at.getOutputMapFn().map(j);
-    assertEquals(w.key(), converted.key());
-    assertEquals(w.value(), converted.value());
-  }
-
-  private static void initialize(PType ptype) {
-    ptype.getInputMapFn().initialize();
-    ptype.getOutputMapFn().initialize();
-  }
-
-  @SuppressWarnings({ "unchecked", "rawtypes" })
-  protected static void testInputOutputFn(PType ptype, Object java, Object avro) {
-    initialize(ptype);
-    assertEquals(java, ptype.getInputMapFn().map(avro));
-    assertEquals(avro, ptype.getOutputMapFn().map(java));
-  }
-
-  @Test
-  public void testIsPrimitive_PrimitiveMappedType() {
-    assertTrue(Avros.isPrimitive(Avros.ints()));
-  }
-
-  @Test
-  public void testIsPrimitive_TruePrimitiveValue() {
-    AvroType truePrimitiveAvroType = new AvroType(int.class, Schema.create(Type.INT), new DeepCopier.NoOpDeepCopier());
-    assertTrue(Avros.isPrimitive(truePrimitiveAvroType));
-  }
-
-  @Test
-  public void testIsPrimitive_False() {
-    assertFalse(Avros.isPrimitive(Avros.reflects(Person.class)));
-  }
-
-  @Test
-  public void testPairs_Generic() {
-    Schema schema = ReflectData.get().getSchema(IntWritable.class);
-
-    GenericData.Record recordA = new GenericData.Record(schema);
-    GenericData.Record recordB = new GenericData.Record(schema);
-
-    AvroType<Pair<Record, Record>> pairType = Avros.pairs(Avros.generics(schema), Avros.generics(schema));
-    Pair<Record, Record> pair = Pair.of(recordA, recordB);
-    pairType.getOutputMapFn().initialize();
-    pairType.getInputMapFn().initialize();
-    Object mapped = pairType.getOutputMapFn().map(pair);
-    Pair<Record, Record> doubleMappedPair = pairType.getInputMapFn().map(mapped);
-
-    assertEquals(pair, doubleMappedPair);
-    mapped.hashCode();
-  }
-
-  @Test
-  public void testPairs_Reflect() {
-    IntWritable intWritableA = new IntWritable(1);
-    IntWritable intWritableB = new IntWritable(2);
-
-    AvroType<Pair<IntWritable, IntWritable>> pairType = Avros.pairs(Avros.reflects(IntWritable.class),
-        Avros.reflects(IntWritable.class));
-    Pair<IntWritable, IntWritable> pair = Pair.of(intWritableA, intWritableB);
-    pairType.getOutputMapFn().initialize();
-    pairType.getInputMapFn().initialize();
-    Object mapped = pairType.getOutputMapFn().map(pair);
-
-    Pair<IntWritable, IntWritable> doubleMappedPair = pairType.getInputMapFn().map(mapped);
-
-    assertEquals(pair, doubleMappedPair);
-  }
-
-  @Test
-  public void testPairs_Specific() {
-    Person personA = new Person();
-    Person personB = new Person();
-
-    personA.age = 1;
-    personA.name = "A";
-    personA.siblingnames = Collections.<CharSequence> emptyList();
-
-    personB.age = 2;
-    personB.name = "B";
-    personB.siblingnames = Collections.<CharSequence> emptyList();
-
-    AvroType<Pair<Person, Person>> pairType = Avros.pairs(Avros.records(Person.class), Avros.records(Person.class));
-
-    Pair<Person, Person> pair = Pair.of(personA, personB);
-    pairType.getOutputMapFn().initialize();
-    pairType.getInputMapFn().initialize();
-
-    Object mapped = pairType.getOutputMapFn().map(pair);
-    Pair<Person, Person> doubleMappedPair = pairType.getInputMapFn().map(mapped);
-
-    assertEquals(pair, doubleMappedPair);
-
-  }
-
-  @Test
-  public void testPairOutputMapFn_VerifyNoObjectReuse() {
-    StringWrapper stringWrapper = new StringWrapper("Test");
-
-    Pair<Integer, StringWrapper> pair = Pair.of(1, stringWrapper);
-
-    AvroType<Pair<Integer, StringWrapper>> pairType = Avros.pairs(Avros.ints(), Avros.reflects(StringWrapper.class));
-
-    pairType.getOutputMapFn().initialize();
-
-    Object outputMappedValueA = pairType.getOutputMapFn().map(pair);
-    Object outputMappedValueB = pairType.getOutputMapFn().map(pair);
-
-    assertEquals(outputMappedValueA, outputMappedValueB);
-    assertNotSame(outputMappedValueA, outputMappedValueB);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java b/crunch/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java
deleted file mode 100644
index c807a90..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import static org.hamcrest.Matchers.hasItems;
-import static org.hamcrest.Matchers.is;
-import static org.hamcrest.Matchers.not;
-import static org.hamcrest.Matchers.sameInstance;
-import static org.junit.Assert.assertThat;
-
-import java.util.Arrays;
-
-import org.apache.crunch.test.Tests;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.junit.Test;
-
-
-public class GenericArrayWritableTest {
-
-  @Test
-  public void testEmpty() {
-    GenericArrayWritable<Text> src = new GenericArrayWritable<Text>(Text.class);
-    src.set(new Text[0]);
-
-    GenericArrayWritable<Text> dest = Tests.roundtrip(src, new GenericArrayWritable<Text>());
-
-    assertThat(dest.get().length, is(0));
-  }
-
-  @Test
-  public void testNonEmpty() {
-    GenericArrayWritable<Text> src = new GenericArrayWritable<Text>(Text.class);
-    src.set(new Text[] { new Text("foo"), new Text("bar") });
-
-    GenericArrayWritable<Text> dest = Tests.roundtrip(src, new GenericArrayWritable<Text>());
-
-    assertThat(src.get(), not(sameInstance(dest.get())));
-    assertThat(dest.get().length, is(2));
-    assertThat(Arrays.asList(dest.get()), hasItems((Writable) new Text("foo"), new Text("bar")));
-  }
-
-  @Test
-  public void testNulls() {
-    GenericArrayWritable<Text> src = new GenericArrayWritable<Text>(Text.class);
-    src.set(new Text[] { new Text("a"), null, new Text("b") });
-
-    GenericArrayWritable<Text> dest = Tests.roundtrip(src, new GenericArrayWritable<Text>());
-
-    assertThat(src.get(), not(sameInstance(dest.get())));
-    assertThat(dest.get().length, is(3));
-    assertThat(Arrays.asList(dest.get()), hasItems((Writable) new Text("a"), new Text("b"), null));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java b/crunch/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java
deleted file mode 100644
index c49491b..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertNull;
-
-import org.apache.hadoop.io.Text;
-import org.junit.Before;
-import org.junit.Test;
-
-public class WritableDeepCopierTest {
-
-  private WritableDeepCopier<Text> deepCopier;
-
-  @Before
-  public void setUp() {
-    deepCopier = new WritableDeepCopier<Text>(Text.class);
-  }
-
-  @Test
-  public void testDeepCopy() {
-    Text text = new Text("value");
-    Text deepCopy = deepCopier.deepCopy(text);
-
-    assertEquals(text, deepCopy);
-    assertNotSame(text, deepCopy);
-  }
-  
-  @Test
-  public void testDeepCopy_Null() {
-    Text text = null;
-    Text deepCopy = deepCopier.deepCopy(text);
-    
-    assertNull(deepCopy);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java b/crunch/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java
deleted file mode 100644
index f6c201b..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-
-import java.util.List;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class WritableGroupedTableTypeTest {
-
-  @Test
-  public void testGetDetachedValue() {
-    Integer integerValue = 42;
-    Text textValue = new Text("forty-two");
-    Iterable<Text> inputTextIterable = Lists.newArrayList(textValue);
-    Pair<Integer, Iterable<Text>> pair = Pair.of(integerValue, inputTextIterable);
-
-    PGroupedTableType<Integer, Text> groupedTableType = Writables.tableOf(Writables.ints(),
-        Writables.writables(Text.class)).getGroupedTableType();
-    groupedTableType.initialize(new Configuration());
-
-    Pair<Integer, Iterable<Text>> detachedPair = groupedTableType.getDetachedValue(pair);
-
-    assertSame(integerValue, detachedPair.first());
-    List<Text> textList = Lists.newArrayList(detachedPair.second());
-    assertEquals(inputTextIterable, textList);
-    assertNotSame(textValue, textList.get(0));
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java b/crunch/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java
deleted file mode 100644
index 697a28c..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-
-import org.apache.crunch.Pair;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.junit.Test;
-
-public class WritableTableTypeTest {
-
-  @Test
-  public void testGetDetachedValue() {
-    Integer integerValue = 42;
-    Text textValue = new Text("forty-two");
-    Pair<Integer, Text> pair = Pair.of(integerValue, textValue);
-
-    WritableTableType<Integer, Text> tableType = Writables.tableOf(Writables.ints(),
-        Writables.writables(Text.class));
-    tableType.initialize(new Configuration());
-    Pair<Integer, Text> detachedPair = tableType.getDetachedValue(pair);
-
-    assertSame(integerValue, detachedPair.first());
-    assertEquals(textValue, detachedPair.second());
-    assertNotSame(textValue, detachedPair.second());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java b/crunch/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java
deleted file mode 100644
index 65e946b..0000000
--- a/crunch/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.writable;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.Pair;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.Text;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-public class WritableTypeTest {
-
-  @Test(expected = IllegalStateException.class)
-  public void testGetDetachedValue_NotInitialized() {
-    WritableType<Text, Text> textWritableType = Writables.writables(Text.class);
-    Text value = new Text("test");
-
-    // Calling getDetachedValue without first calling initialize should throw an
-    // exception
-    textWritableType.getDetachedValue(value);
-  }
-
-  @Test
-  public void testGetDetachedValue_CustomWritable() {
-    WritableType<Text, Text> textWritableType = Writables.writables(Text.class);
-    textWritableType.initialize(new Configuration());
-    Text value = new Text("test");
-
-    Text detachedValue = textWritableType.getDetachedValue(value);
-    assertEquals(value, detachedValue);
-    assertNotSame(value, detachedValue);
-  }
-
-  @Test
-  public void testGetDetachedValue_Collection() {
-    Collection<Text> textCollection = Lists.newArrayList(new Text("value"));
-    WritableType<Collection<Text>, GenericArrayWritable<Text>> ptype = Writables
-        .collections(Writables.writables(Text.class));
-    ptype.initialize(new Configuration());
-
-    Collection<Text> detachedCollection = ptype.getDetachedValue(textCollection);
-    assertEquals(textCollection, detachedCollection);
-    assertNotSame(textCollection.iterator().next(), detachedCollection.iterator().next());
-  }
-
-  @Test
-  public void testGetDetachedValue_Tuple() {
-    Pair<Text, Text> textPair = Pair.of(new Text("one"), new Text("two"));
-    WritableType<Pair<Text, Text>, TupleWritable> ptype = Writables.pairs(
-        Writables.writables(Text.class), Writables.writables(Text.class));
-    ptype.initialize(new Configuration());
-
-    Pair<Text, Text> detachedPair = ptype.getDetachedValue(textPair);
-    assertEquals(textPair, detachedPair);
-    assertNotSame(textPair.first(), detachedPair.first());
-    assertNotSame(textPair.second(), detachedPair.second());
-  }
-
-  @Test
-  public void testGetDetachedValue_Map() {
-    Map<String, Text> stringTextMap = Maps.newHashMap();
-    stringTextMap.put("key", new Text("value"));
-
-    WritableType<Map<String, Text>, MapWritable> ptype = Writables.maps(Writables
-        .writables(Text.class));
-    ptype.initialize(new Configuration());
-    Map<String, Text> detachedMap = ptype.getDetachedValue(stringTextMap);
-
-    assertEquals(stringTextMap, detachedMap);
-    assertNotSame(stringTextMap.get("key"), detachedMap.get("key"));
-  }
-
-}


[43/43] git commit: CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
CRUNCH-196: crunch -> crunch-core rename to fix build issues


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/890e0086
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/890e0086
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/890e0086

Branch: refs/heads/master
Commit: 890e0086a12df5006a23cfdd86f3703f929cb147
Parents: cbc7c2f
Author: Josh Wills <jw...@cloudera.com>
Authored: Tue Apr 23 13:37:16 2013 -0700
Committer: Josh Wills <jw...@cloudera.com>
Committed: Tue Apr 23 13:39:00 2013 -0700

----------------------------------------------------------------------
 crunch-contrib/pom.xml                             |    2 +-
 crunch-core/pom.xml                                |  182 +
 .../it/java/org/apache/crunch/CancelJobsIT.java    |   84 +
 .../src/it/java/org/apache/crunch/CleanTextIT.java |   82 +
 .../org/apache/crunch/CollectionPObjectIT.java     |   98 +
 .../it/java/org/apache/crunch/CollectionsIT.java   |  117 +
 .../org/apache/crunch/CollectionsLengthIT.java     |   70 +
 .../org/apache/crunch/DeepCopyCustomTuplesIT.java  |   79 +
 .../src/it/java/org/apache/crunch/EnumPairIT.java  |   59 +
 .../org/apache/crunch/FirstElementPObjectIT.java   |   61 +
 .../apache/crunch/IterableReuseProtectionIT.java   |   89 +
 .../it/java/org/apache/crunch/MRPipelineIT.java    |   78 +
 .../it/java/org/apache/crunch/MapPObjectIT.java    |  101 +
 .../src/it/java/org/apache/crunch/MapsIT.java      |  101 +
 .../it/java/org/apache/crunch/MaterializeIT.java   |  139 +
 .../java/org/apache/crunch/MaterializeToMapIT.java |   81 +
 .../java/org/apache/crunch/MultipleOutputIT.java   |  175 +
 .../org/apache/crunch/PCollectionGetSizeIT.java    |  151 +
 .../src/it/java/org/apache/crunch/PObjectsIT.java  |   99 +
 .../java/org/apache/crunch/PTableKeyValueIT.java   |  103 +
 .../src/it/java/org/apache/crunch/PageRankIT.java  |  168 +
 .../org/apache/crunch/StageResultsCountersIT.java  |  135 +
 .../it/java/org/apache/crunch/TermFrequencyIT.java |  135 +
 .../src/it/java/org/apache/crunch/TextPairIT.java  |   72 +
 .../src/it/java/org/apache/crunch/TfIdfIT.java     |  224 +
 .../org/apache/crunch/TupleNClassCastBugIT.java    |   95 +
 .../org/apache/crunch/UnionFromSameSourceIT.java   |  132 +
 .../src/it/java/org/apache/crunch/UnionIT.java     |  136 +
 .../it/java/org/apache/crunch/UnionResultsIT.java  |   80 +
 .../src/it/java/org/apache/crunch/WordCountIT.java |  171 +
 .../java/org/apache/crunch/fn/AggregatorsIT.java   |   83 +
 .../crunch/impl/mem/MemPipelineFileWritingIT.java  |   58 +
 .../crunch/impl/mr/collect/UnionCollectionIT.java  |  154 +
 .../apache/crunch/io/CompositePathIterableIT.java  |   84 +
 .../it/java/org/apache/crunch/io/NLineInputIT.java |   72 +
 .../java/org/apache/crunch/io/TextFileTableIT.java |   56 +
 .../crunch/io/avro/AvroFileSourceTargetIT.java     |  140 +
 .../org/apache/crunch/io/avro/AvroPipelineIT.java  |   95 +
 .../org/apache/crunch/io/avro/AvroReflectIT.java   |  109 +
 .../org/apache/crunch/io/avro/AvroWritableIT.java  |   89 +
 .../it/java/org/apache/crunch/lib/AggregateIT.java |  231 +
 .../java/org/apache/crunch/lib/AvroTypeSortIT.java |  145 +
 .../it/java/org/apache/crunch/lib/CogroupIT.java   |  112 +
 .../org/apache/crunch/lib/SecondarySortIT.java     |   65 +
 .../src/it/java/org/apache/crunch/lib/SetIT.java   |  114 +
 .../java/org/apache/crunch/lib/SortByValueIT.java  |   84 +
 .../src/it/java/org/apache/crunch/lib/SortIT.java  |  327 +
 .../apache/crunch/lib/SpecificAvroGroupByIT.java   |  119 +
 .../apache/crunch/lib/join/FullOuterJoinIT.java    |   51 +
 .../org/apache/crunch/lib/join/InnerJoinIT.java    |   51 +
 .../org/apache/crunch/lib/join/JoinTester.java     |  108 +
 .../apache/crunch/lib/join/LeftOuterJoinIT.java    |   51 +
 .../org/apache/crunch/lib/join/MapsideJoinIT.java  |  158 +
 .../crunch/lib/join/MultiAvroSchemaJoinIT.java     |  121 +
 .../apache/crunch/lib/join/RightOuterJoinIT.java   |   51 +
 .../org/apache/crunch/test/TemporaryPaths.java     |   40 +
 .../src/it/java/org/apache/crunch/test/Tests.java  |  124 +
 crunch-core/src/it/resources/customers.txt         |    4 +
 crunch-core/src/it/resources/docs.txt              |    6 +
 crunch-core/src/it/resources/letters.txt           |    2 +
 crunch-core/src/it/resources/log4j.properties      |   29 +
 crunch-core/src/it/resources/maugham.txt           |29112 +++++++++++++++
 crunch-core/src/it/resources/orders.txt            |    4 +
 .../org/apache/crunch/UnionITData/src1.txt         |    5 +
 .../org/apache/crunch/UnionITData/src2.txt         |    3 +
 .../apache/crunch/fn/AggregatorsITData/ints.txt    |    5 +
 .../org/apache/crunch/lib/CogroupITData/src1.txt   |    4 +
 .../org/apache/crunch/lib/CogroupITData/src2.txt   |    4 +
 .../src/it/resources/secondary_sort_input.txt      |    7 +
 crunch-core/src/it/resources/set1.txt              |    4 +
 crunch-core/src/it/resources/set2.txt              |    3 +
 crunch-core/src/it/resources/shakes.txt            | 3667 ++
 crunch-core/src/it/resources/sort_by_value.txt     |    5 +
 crunch-core/src/it/resources/urls.txt              |   11 +
 .../main/java/org/apache/crunch/Aggregator.java    |   86 +
 .../src/main/java/org/apache/crunch/CombineFn.java | 1211 +
 .../org/apache/crunch/CrunchRuntimeException.java  |   54 +
 .../src/main/java/org/apache/crunch/DoFn.java      |  162 +
 .../src/main/java/org/apache/crunch/Emitter.java   |   37 +
 .../src/main/java/org/apache/crunch/FilterFn.java  |  244 +
 .../java/org/apache/crunch/GroupingOptions.java    |  167 +
 .../src/main/java/org/apache/crunch/MapFn.java     |   41 +
 .../main/java/org/apache/crunch/PCollection.java   |  245 +
 .../main/java/org/apache/crunch/PGroupedTable.java |   53 +
 .../src/main/java/org/apache/crunch/PObject.java   |   36 +
 .../src/main/java/org/apache/crunch/PTable.java    |  181 +
 .../src/main/java/org/apache/crunch/Pair.java      |  105 +
 .../java/org/apache/crunch/ParallelDoOptions.java  |   62 +
 .../src/main/java/org/apache/crunch/Pipeline.java  |  138 +
 .../java/org/apache/crunch/PipelineExecution.java  |   54 +
 .../java/org/apache/crunch/PipelineResult.java     |   76 +
 .../src/main/java/org/apache/crunch/Source.java    |   52 +
 .../main/java/org/apache/crunch/SourceTarget.java  |   26 +
 .../main/java/org/apache/crunch/TableSource.java   |   28 +
 .../java/org/apache/crunch/TableSourceTarget.java  |   25 +
 .../src/main/java/org/apache/crunch/Target.java    |   83 +
 .../src/main/java/org/apache/crunch/Tuple.java     |   36 +
 .../src/main/java/org/apache/crunch/Tuple3.java    |   96 +
 .../src/main/java/org/apache/crunch/Tuple4.java    |  105 +
 .../src/main/java/org/apache/crunch/TupleN.java    |   73 +
 .../java/org/apache/crunch/fn/Aggregators.java     | 1111 +
 .../java/org/apache/crunch/fn/CompositeMapFn.java  |   71 +
 .../java/org/apache/crunch/fn/ExtractKeyFn.java    |   50 +
 .../main/java/org/apache/crunch/fn/FilterFns.java  |  112 +
 .../main/java/org/apache/crunch/fn/IdentityFn.java |   39 +
 .../main/java/org/apache/crunch/fn/MapKeysFn.java  |   32 +
 .../java/org/apache/crunch/fn/MapValuesFn.java     |   32 +
 .../main/java/org/apache/crunch/fn/PairMapFn.java  |   65 +
 .../java/org/apache/crunch/fn/package-info.java    |   22 +
 .../mapreduce/TaskAttemptContextFactory.java       |   70 +
 .../lib/jobcontrol/CrunchControlledJob.java        |  325 +
 .../mapreduce/lib/jobcontrol/CrunchJobControl.java |  211 +
 .../org/apache/crunch/impl/SingleUseIterable.java  |   49 +
 .../org/apache/crunch/impl/mem/MemPipeline.java    |  275 +
 .../crunch/impl/mem/collect/MemCollection.java     |  295 +
 .../crunch/impl/mem/collect/MemGroupedTable.java   |  113 +
 .../apache/crunch/impl/mem/collect/MemTable.java   |  177 +
 .../apache/crunch/impl/mem/collect/Shuffler.java   |  149 +
 .../crunch/impl/mem/emit/InMemoryEmitter.java      |   57 +
 .../org/apache/crunch/impl/mem/package-info.java   |   22 +
 .../java/org/apache/crunch/impl/mr/MRPipeline.java |  396 +
 .../crunch/impl/mr/collect/DoCollectionImpl.java   |   74 +
 .../apache/crunch/impl/mr/collect/DoTableImpl.java |   84 +
 .../crunch/impl/mr/collect/InputCollection.java    |   85 +
 .../apache/crunch/impl/mr/collect/InputTable.java  |   86 +
 .../crunch/impl/mr/collect/PCollectionImpl.java    |  295 +
 .../crunch/impl/mr/collect/PGroupedTableImpl.java  |  144 +
 .../apache/crunch/impl/mr/collect/PTableBase.java  |  169 +
 .../crunch/impl/mr/collect/UnionCollection.java    |   80 +
 .../apache/crunch/impl/mr/collect/UnionTable.java  |   92 +
 .../crunch/impl/mr/emit/IntermediateEmitter.java   |   64 +
 .../crunch/impl/mr/emit/MultipleOutputEmitter.java |   56 +
 .../apache/crunch/impl/mr/emit/OutputEmitter.java  |   52 +
 .../impl/mr/exec/CappedExponentialCounter.java     |   40 +
 .../apache/crunch/impl/mr/exec/CrunchJobHooks.java |  153 +
 .../org/apache/crunch/impl/mr/exec/MRExecutor.java |  198 +
 .../org/apache/crunch/impl/mr/package-info.java    |   22 +
 .../org/apache/crunch/impl/mr/plan/DoNode.java     |  163 +
 .../apache/crunch/impl/mr/plan/DotfileWriter.java  |  238 +
 .../java/org/apache/crunch/impl/mr/plan/Edge.java  |  125 +
 .../java/org/apache/crunch/impl/mr/plan/Graph.java |  133 +
 .../apache/crunch/impl/mr/plan/GraphBuilder.java   |   92 +
 .../apache/crunch/impl/mr/plan/JobNameBuilder.java |   79 +
 .../apache/crunch/impl/mr/plan/JobPrototype.java   |  245 +
 .../crunch/impl/mr/plan/MSCROutputHandler.java     |   77 +
 .../apache/crunch/impl/mr/plan/MSCRPlanner.java    |  378 +
 .../org/apache/crunch/impl/mr/plan/NodePath.java   |  124 +
 .../crunch/impl/mr/plan/PlanningParameters.java    |   38 +
 .../org/apache/crunch/impl/mr/plan/Vertex.java     |  126 +
 .../apache/crunch/impl/mr/run/CrunchCombiner.java  |   27 +
 .../crunch/impl/mr/run/CrunchInputFormat.java      |   78 +
 .../crunch/impl/mr/run/CrunchInputSplit.java       |  116 +
 .../apache/crunch/impl/mr/run/CrunchMapper.java    |   73 +
 .../crunch/impl/mr/run/CrunchRecordReader.java     |   75 +
 .../apache/crunch/impl/mr/run/CrunchReducer.java   |   73 +
 .../crunch/impl/mr/run/CrunchTaskContext.java      |   86 +
 .../org/apache/crunch/impl/mr/run/NodeContext.java |   35 +
 .../java/org/apache/crunch/impl/mr/run/RTNode.java |  124 +
 .../crunch/impl/mr/run/RuntimeParameters.java      |   38 +
 .../src/main/java/org/apache/crunch/io/At.java     |  257 +
 .../apache/crunch/io/CompositePathIterable.java    |  102 +
 .../java/org/apache/crunch/io/CrunchInputs.java    |   71 +
 .../java/org/apache/crunch/io/CrunchOutputs.java   |  184 +
 .../org/apache/crunch/io/FileNamingScheme.java     |   58 +
 .../org/apache/crunch/io/FileReaderFactory.java    |   27 +
 .../java/org/apache/crunch/io/FormatBundle.java    |  121 +
 .../src/main/java/org/apache/crunch/io/From.java   |  324 +
 .../java/org/apache/crunch/io/MapReduceTarget.java |   27 +
 .../java/org/apache/crunch/io/OutputHandler.java   |   25 +
 .../main/java/org/apache/crunch/io/PathTarget.java |   36 +
 .../java/org/apache/crunch/io/PathTargetImpl.java  |   64 +
 .../java/org/apache/crunch/io/ReadableSource.java  |   41 +
 .../org/apache/crunch/io/ReadableSourceTarget.java |   30 +
 .../crunch/io/SequentialFileNamingScheme.java      |   51 +
 .../org/apache/crunch/io/SourceTargetHelper.java   |   48 +
 .../src/main/java/org/apache/crunch/io/To.java     |  153 +
 .../crunch/io/avro/AvroFileReaderFactory.java      |   96 +
 .../org/apache/crunch/io/avro/AvroFileSource.java  |   58 +
 .../crunch/io/avro/AvroFileSourceTarget.java       |   39 +
 .../org/apache/crunch/io/avro/AvroFileTarget.java  |   91 +
 .../apache/crunch/io/impl/AutoClosingIterator.java |   62 +
 .../org/apache/crunch/io/impl/FileSourceImpl.java  |  104 +
 .../apache/crunch/io/impl/FileTableSourceImpl.java |   41 +
 .../org/apache/crunch/io/impl/FileTargetImpl.java  |  162 +
 .../io/impl/ReadableSourcePathTargetImpl.java      |   39 +
 .../crunch/io/impl/ReadableSourceTargetImpl.java   |   37 +
 .../crunch/io/impl/SourcePathTargetImpl.java       |   50 +
 .../apache/crunch/io/impl/SourceTargetImpl.java    |   89 +
 .../crunch/io/impl/TableSourcePathTargetImpl.java  |   41 +
 .../crunch/io/impl/TableSourceTargetImpl.java      |   35 +
 .../java/org/apache/crunch/io/package-info.java    |   22 +
 .../org/apache/crunch/io/seq/SeqFileHelper.java    |   35 +
 .../apache/crunch/io/seq/SeqFileReaderFactory.java |  112 +
 .../org/apache/crunch/io/seq/SeqFileSource.java    |   47 +
 .../apache/crunch/io/seq/SeqFileSourceTarget.java  |   44 +
 .../apache/crunch/io/seq/SeqFileTableSource.java   |   57 +
 .../crunch/io/seq/SeqFileTableSourceTarget.java    |   54 +
 .../org/apache/crunch/io/seq/SeqFileTarget.java    |   55 +
 .../crunch/io/text/BZip2TextInputFormat.java       |  235 +
 .../apache/crunch/io/text/CBZip2InputStream.java   |  980 +
 .../java/org/apache/crunch/io/text/LineParser.java |  125 +
 .../org/apache/crunch/io/text/NLineFileSource.java |   77 +
 .../crunch/io/text/TextFileReaderFactory.java      |   83 +
 .../org/apache/crunch/io/text/TextFileSource.java  |   73 +
 .../crunch/io/text/TextFileSourceTarget.java       |   44 +
 .../apache/crunch/io/text/TextFileTableSource.java |   81 +
 .../crunch/io/text/TextFileTableSourceTarget.java  |   63 +
 .../org/apache/crunch/io/text/TextFileTarget.java  |  109 +
 .../main/java/org/apache/crunch/lib/Aggregate.java |  272 +
 .../main/java/org/apache/crunch/lib/Cartesian.java |  216 +
 .../main/java/org/apache/crunch/lib/Cogroup.java   |  106 +
 .../main/java/org/apache/crunch/lib/Distinct.java  |  126 +
 .../src/main/java/org/apache/crunch/lib/Join.java  |  181 +
 .../main/java/org/apache/crunch/lib/PTables.java   |  117 +
 .../main/java/org/apache/crunch/lib/Sample.java    |  217 +
 .../java/org/apache/crunch/lib/SampleUtils.java    |  168 +
 .../java/org/apache/crunch/lib/SecondarySort.java  |  118 +
 .../src/main/java/org/apache/crunch/lib/Set.java   |  118 +
 .../src/main/java/org/apache/crunch/lib/Sort.java  |  294 +
 .../apache/crunch/lib/join/FullOuterJoinFn.java    |  102 +
 .../org/apache/crunch/lib/join/InnerJoinFn.java    |   78 +
 .../java/org/apache/crunch/lib/join/JoinFn.java    |   81 +
 .../java/org/apache/crunch/lib/join/JoinUtils.java |  126 +
 .../apache/crunch/lib/join/LeftOuterJoinFn.java    |   98 +
 .../org/apache/crunch/lib/join/MapsideJoin.java    |  164 +
 .../apache/crunch/lib/join/RightOuterJoinFn.java   |   83 +
 .../org/apache/crunch/lib/join/package-info.java   |   22 +
 .../java/org/apache/crunch/lib/package-info.java   |   22 +
 .../org/apache/crunch/lib/sort/Comparators.java    |  187 +
 .../java/org/apache/crunch/lib/sort/SortFns.java   |  210 +
 .../crunch/lib/sort/TotalOrderPartitioner.java     |  145 +
 .../crunch/materialize/MaterializableIterable.java |   81 +
 .../crunch/materialize/MaterializableMap.java      |   50 +
 .../materialize/pobject/CollectionPObject.java     |   55 +
 .../materialize/pobject/FirstElementPObject.java   |   50 +
 .../crunch/materialize/pobject/MapPObject.java     |   62 +
 .../crunch/materialize/pobject/PObjectImpl.java    |   85 +
 .../main/java/org/apache/crunch/package-info.java  |   25 +
 .../apache/crunch/types/CollectionDeepCopier.java  |   57 +
 .../java/org/apache/crunch/types/Converter.java    |   41 +
 .../java/org/apache/crunch/types/DeepCopier.java   |   60 +
 .../org/apache/crunch/types/MapDeepCopier.java     |   54 +
 .../org/apache/crunch/types/PGroupedTableType.java |  141 +
 .../java/org/apache/crunch/types/PTableType.java   |   44 +
 .../main/java/org/apache/crunch/types/PType.java   |   86 +
 .../java/org/apache/crunch/types/PTypeFamily.java  |   77 +
 .../java/org/apache/crunch/types/PTypeUtils.java   |   66 +
 .../main/java/org/apache/crunch/types/PTypes.java  |  252 +
 .../main/java/org/apache/crunch/types/Protos.java  |  173 +
 .../org/apache/crunch/types/TupleDeepCopier.java   |   65 +
 .../java/org/apache/crunch/types/TupleFactory.java |  134 +
 .../apache/crunch/types/avro/AvroCapabilities.java |  106 +
 .../apache/crunch/types/avro/AvroDeepCopier.java   |  209 +
 .../crunch/types/avro/AvroGroupedTableType.java    |  114 +
 .../apache/crunch/types/avro/AvroInputFormat.java  |   41 +
 .../apache/crunch/types/avro/AvroKeyConverter.java |   65 +
 .../apache/crunch/types/avro/AvroOutputFormat.java |   87 +
 .../crunch/types/avro/AvroPairConverter.java       |  108 +
 .../apache/crunch/types/avro/AvroRecordReader.java |  114 +
 .../apache/crunch/types/avro/AvroTableType.java    |  151 +
 .../crunch/types/avro/AvroTextOutputFormat.java    |   60 +
 .../org/apache/crunch/types/avro/AvroType.java     |  199 +
 .../apache/crunch/types/avro/AvroTypeFamily.java   |  164 +
 .../crunch/types/avro/AvroUtf8InputFormat.java     |   98 +
 .../java/org/apache/crunch/types/avro/Avros.java   |  709 +
 .../crunch/types/avro/ReflectDataFactory.java      |   41 +
 .../crunch/types/avro/SafeAvroSerialization.java   |  145 +
 .../org/apache/crunch/types/avro/package-info.java |   22 +
 .../java/org/apache/crunch/types/package-info.java |   22 +
 .../types/writable/GenericArrayWritable.java       |  135 +
 .../crunch/types/writable/TextMapWritable.java     |   88 +
 .../crunch/types/writable/TupleWritable.java       |  224 +
 .../crunch/types/writable/WritableDeepCopier.java  |   70 +
 .../types/writable/WritableGroupedTableType.java   |   85 +
 .../types/writable/WritablePairConverter.java      |   62 +
 .../crunch/types/writable/WritableTableType.java   |  130 +
 .../apache/crunch/types/writable/WritableType.java |  133 +
 .../crunch/types/writable/WritableTypeFamily.java  |  147 +
 .../types/writable/WritableValueConverter.java     |   60 +
 .../apache/crunch/types/writable/Writables.java    |  588 +
 .../apache/crunch/types/writable/package-info.java |   22 +
 .../java/org/apache/crunch/util/CrunchTool.java    |  118 +
 .../java/org/apache/crunch/util/DistCache.java     |  231 +
 .../org/apache/crunch/util/PartitionUtils.java     |   34 +
 .../main/java/org/apache/crunch/util/Tuples.java   |  150 +
 .../java/org/apache/crunch/util/package-info.java  |   22 +
 crunch-core/src/main/resources/log4j.properties    |   24 +
 crunch-core/src/site/site.xml                      |   34 +
 crunch-core/src/test/avro/employee.avsc            |   26 +
 crunch-core/src/test/avro/person.avsc              |   26 +
 .../src/test/java/org/apache/crunch/AndFnTest.java |   77 +
 .../test/java/org/apache/crunch/CombineFnTest.java |  222 +
 .../src/test/java/org/apache/crunch/NotFnTest.java |   72 +
 .../src/test/java/org/apache/crunch/OrFnTest.java  |   78 +
 .../src/test/java/org/apache/crunch/PairTest.java  |   66 +
 .../src/test/java/org/apache/crunch/TupleTest.java |  139 +
 .../test/java/org/apache/crunch/WriteModeTest.java |  103 +
 .../java/org/apache/crunch/fn/AggregatorsTest.java |  239 +
 .../org/apache/crunch/fn/ExtractKeyFnTest.java     |   44 +
 .../java/org/apache/crunch/fn/FilterFnTest.java    |   85 +
 .../java/org/apache/crunch/fn/MapKeysTest.java     |   51 +
 .../java/org/apache/crunch/fn/MapValuesTest.java   |   50 +
 .../java/org/apache/crunch/fn/PairMapTest.java     |   52 +
 .../org/apache/crunch/fn/StoreLastEmitter.java     |   41 +
 .../apache/crunch/impl/SingleUseIterableTest.java  |   54 +
 .../org/apache/crunch/impl/mr/MRPipelineTest.java  |   86 +
 .../impl/mr/collect/DoCollectionImplTest.java      |  112 +
 .../crunch/impl/mr/collect/DoTableImplTest.java    |   86 +
 .../impl/mr/emit/IntermediateEmitterTest.java      |   83 +
 .../impl/mr/exec/CappedExponentialCounterTest.java |   42 +
 .../crunch/impl/mr/exec/CrunchJobHooksTest.java    |   42 +
 .../crunch/impl/mr/plan/DotfileWriterTest.java     |  132 +
 .../crunch/impl/mr/plan/JobNameBuilderTest.java    |   41 +
 .../crunch/io/SequentialFileNamingSchemeTest.java  |   84 +
 .../apache/crunch/io/SourceTargetHelperTest.java   |   59 +
 .../crunch/io/avro/AvroFileReaderFactoryTest.java  |  184 +
 .../apache/crunch/io/avro/AvroFileSourceTest.java  |   91 +
 .../lib/AvroIndexedRecordPartitionerTest.java      |   98 +
 .../java/org/apache/crunch/lib/CartesianTest.java  |   77 +
 .../java/org/apache/crunch/lib/DistinctTest.java   |   52 +
 .../java/org/apache/crunch/lib/SampleTest.java     |   71 +
 .../org/apache/crunch/lib/SecondarySortTest.java   |   53 +
 .../crunch/lib/TupleWritablePartitionerTest.java   |   68 +
 .../lib/join/BrokenLeftAndOuterJoinTest.java       |   90 +
 .../crunch/lib/join/FullOuterJoinFnTest.java       |   48 +
 .../apache/crunch/lib/join/InnerJoinFnTest.java    |   42 +
 .../org/apache/crunch/lib/join/JoinFnTestBase.java |   82 +
 .../apache/crunch/lib/join/LeftOuterJoinTest.java  |   46 +
 .../crunch/lib/join/RightOuterJoinFnTest.java      |   46 +
 .../java/org/apache/crunch/test/CountersTest.java  |   70 +
 .../java/org/apache/crunch/test/StringWrapper.java |  102 +
 .../crunch/types/CollectionDeepCopierTest.java     |   61 +
 .../org/apache/crunch/types/MapDeepCopierTest.java |   63 +
 .../org/apache/crunch/types/PTypeUtilsTest.java    |   89 +
 .../java/org/apache/crunch/types/PTypesTest.java   |   34 +
 .../apache/crunch/types/TupleDeepCopierTest.java   |   77 +
 .../org/apache/crunch/types/TupleFactoryTest.java  |   69 +
 .../crunch/types/avro/AvroDeepCopierTest.java      |  107 +
 .../types/avro/AvroGroupedTableTypeTest.java       |   60 +
 .../crunch/types/avro/AvroTableTypeTest.java       |   72 +
 .../org/apache/crunch/types/avro/AvroTypeTest.java |  279 +
 .../org/apache/crunch/types/avro/AvrosTest.java    |  325 +
 .../types/writable/GenericArrayWritableTest.java   |   70 +
 .../types/writable/WritableDeepCopierTest.java     |   54 +
 .../writable/WritableGroupedTableTypeTest.java     |   56 +
 .../types/writable/WritableTableTypeTest.java      |   47 +
 .../crunch/types/writable/WritableTypeTest.java    |   97 +
 .../crunch/types/writable/WritablesTest.java       |  256 +
 .../java/org/apache/crunch/util/DistCacheTest.java |  156 +
 crunch-dist/pom.xml                                |    2 +-
 crunch-examples/pom.xml                            |    2 +-
 crunch-hbase/pom.xml                               |    2 +-
 crunch-scrunch/pom.xml                             |    2 +-
 crunch/pom.xml                                     |  182 -
 .../it/java/org/apache/crunch/CancelJobsIT.java    |   84 -
 .../src/it/java/org/apache/crunch/CleanTextIT.java |   82 -
 .../org/apache/crunch/CollectionPObjectIT.java     |   98 -
 .../it/java/org/apache/crunch/CollectionsIT.java   |  117 -
 .../org/apache/crunch/CollectionsLengthIT.java     |   70 -
 .../org/apache/crunch/DeepCopyCustomTuplesIT.java  |   79 -
 .../src/it/java/org/apache/crunch/EnumPairIT.java  |   59 -
 .../org/apache/crunch/FirstElementPObjectIT.java   |   61 -
 .../apache/crunch/IterableReuseProtectionIT.java   |   89 -
 .../it/java/org/apache/crunch/MRPipelineIT.java    |   78 -
 .../it/java/org/apache/crunch/MapPObjectIT.java    |  101 -
 crunch/src/it/java/org/apache/crunch/MapsIT.java   |  101 -
 .../it/java/org/apache/crunch/MaterializeIT.java   |  139 -
 .../java/org/apache/crunch/MaterializeToMapIT.java |   81 -
 .../java/org/apache/crunch/MultipleOutputIT.java   |  175 -
 .../org/apache/crunch/PCollectionGetSizeIT.java    |  151 -
 .../src/it/java/org/apache/crunch/PObjectsIT.java  |   99 -
 .../java/org/apache/crunch/PTableKeyValueIT.java   |  103 -
 .../src/it/java/org/apache/crunch/PageRankIT.java  |  168 -
 .../org/apache/crunch/StageResultsCountersIT.java  |  135 -
 .../it/java/org/apache/crunch/TermFrequencyIT.java |  135 -
 .../src/it/java/org/apache/crunch/TextPairIT.java  |   72 -
 crunch/src/it/java/org/apache/crunch/TfIdfIT.java  |  224 -
 .../org/apache/crunch/TupleNClassCastBugIT.java    |   95 -
 .../org/apache/crunch/UnionFromSameSourceIT.java   |  132 -
 crunch/src/it/java/org/apache/crunch/UnionIT.java  |  136 -
 .../it/java/org/apache/crunch/UnionResultsIT.java  |   80 -
 .../src/it/java/org/apache/crunch/WordCountIT.java |  171 -
 .../java/org/apache/crunch/fn/AggregatorsIT.java   |   83 -
 .../crunch/impl/mem/MemPipelineFileWritingIT.java  |   58 -
 .../crunch/impl/mr/collect/UnionCollectionIT.java  |  154 -
 .../apache/crunch/io/CompositePathIterableIT.java  |   84 -
 .../it/java/org/apache/crunch/io/NLineInputIT.java |   72 -
 .../java/org/apache/crunch/io/TextFileTableIT.java |   56 -
 .../crunch/io/avro/AvroFileSourceTargetIT.java     |  140 -
 .../org/apache/crunch/io/avro/AvroPipelineIT.java  |   95 -
 .../org/apache/crunch/io/avro/AvroReflectIT.java   |  109 -
 .../org/apache/crunch/io/avro/AvroWritableIT.java  |   89 -
 .../it/java/org/apache/crunch/lib/AggregateIT.java |  231 -
 .../java/org/apache/crunch/lib/AvroTypeSortIT.java |  145 -
 .../it/java/org/apache/crunch/lib/CogroupIT.java   |  112 -
 .../org/apache/crunch/lib/SecondarySortIT.java     |   65 -
 .../src/it/java/org/apache/crunch/lib/SetIT.java   |  114 -
 .../java/org/apache/crunch/lib/SortByValueIT.java  |   84 -
 .../src/it/java/org/apache/crunch/lib/SortIT.java  |  327 -
 .../apache/crunch/lib/SpecificAvroGroupByIT.java   |  119 -
 .../apache/crunch/lib/join/FullOuterJoinIT.java    |   51 -
 .../org/apache/crunch/lib/join/InnerJoinIT.java    |   51 -
 .../org/apache/crunch/lib/join/JoinTester.java     |  108 -
 .../apache/crunch/lib/join/LeftOuterJoinIT.java    |   51 -
 .../org/apache/crunch/lib/join/MapsideJoinIT.java  |  158 -
 .../crunch/lib/join/MultiAvroSchemaJoinIT.java     |  121 -
 .../apache/crunch/lib/join/RightOuterJoinIT.java   |   51 -
 .../org/apache/crunch/test/TemporaryPaths.java     |   40 -
 .../src/it/java/org/apache/crunch/test/Tests.java  |  124 -
 crunch/src/it/resources/customers.txt              |    4 -
 crunch/src/it/resources/docs.txt                   |    6 -
 crunch/src/it/resources/letters.txt                |    2 -
 crunch/src/it/resources/log4j.properties           |   29 -
 crunch/src/it/resources/maugham.txt                |29112 ---------------
 crunch/src/it/resources/orders.txt                 |    4 -
 .../org/apache/crunch/UnionITData/src1.txt         |    5 -
 .../org/apache/crunch/UnionITData/src2.txt         |    3 -
 .../apache/crunch/fn/AggregatorsITData/ints.txt    |    5 -
 .../org/apache/crunch/lib/CogroupITData/src1.txt   |    4 -
 .../org/apache/crunch/lib/CogroupITData/src2.txt   |    4 -
 crunch/src/it/resources/secondary_sort_input.txt   |    7 -
 crunch/src/it/resources/set1.txt                   |    4 -
 crunch/src/it/resources/set2.txt                   |    3 -
 crunch/src/it/resources/shakes.txt                 | 3667 --
 crunch/src/it/resources/sort_by_value.txt          |    5 -
 crunch/src/it/resources/urls.txt                   |   11 -
 .../main/java/org/apache/crunch/Aggregator.java    |   86 -
 .../src/main/java/org/apache/crunch/CombineFn.java | 1211 -
 .../org/apache/crunch/CrunchRuntimeException.java  |   54 -
 crunch/src/main/java/org/apache/crunch/DoFn.java   |  162 -
 .../src/main/java/org/apache/crunch/Emitter.java   |   37 -
 .../src/main/java/org/apache/crunch/FilterFn.java  |  244 -
 .../java/org/apache/crunch/GroupingOptions.java    |  167 -
 crunch/src/main/java/org/apache/crunch/MapFn.java  |   41 -
 .../main/java/org/apache/crunch/PCollection.java   |  245 -
 .../main/java/org/apache/crunch/PGroupedTable.java |   53 -
 .../src/main/java/org/apache/crunch/PObject.java   |   36 -
 crunch/src/main/java/org/apache/crunch/PTable.java |  181 -
 crunch/src/main/java/org/apache/crunch/Pair.java   |  105 -
 .../java/org/apache/crunch/ParallelDoOptions.java  |   62 -
 .../src/main/java/org/apache/crunch/Pipeline.java  |  138 -
 .../java/org/apache/crunch/PipelineExecution.java  |   54 -
 .../java/org/apache/crunch/PipelineResult.java     |   76 -
 crunch/src/main/java/org/apache/crunch/Source.java |   52 -
 .../main/java/org/apache/crunch/SourceTarget.java  |   26 -
 .../main/java/org/apache/crunch/TableSource.java   |   28 -
 .../java/org/apache/crunch/TableSourceTarget.java  |   25 -
 crunch/src/main/java/org/apache/crunch/Target.java |   83 -
 crunch/src/main/java/org/apache/crunch/Tuple.java  |   36 -
 crunch/src/main/java/org/apache/crunch/Tuple3.java |   96 -
 crunch/src/main/java/org/apache/crunch/Tuple4.java |  105 -
 crunch/src/main/java/org/apache/crunch/TupleN.java |   73 -
 .../java/org/apache/crunch/fn/Aggregators.java     | 1111 -
 .../java/org/apache/crunch/fn/CompositeMapFn.java  |   71 -
 .../java/org/apache/crunch/fn/ExtractKeyFn.java    |   50 -
 .../main/java/org/apache/crunch/fn/FilterFns.java  |  112 -
 .../main/java/org/apache/crunch/fn/IdentityFn.java |   39 -
 .../main/java/org/apache/crunch/fn/MapKeysFn.java  |   32 -
 .../java/org/apache/crunch/fn/MapValuesFn.java     |   32 -
 .../main/java/org/apache/crunch/fn/PairMapFn.java  |   65 -
 .../java/org/apache/crunch/fn/package-info.java    |   22 -
 .../mapreduce/TaskAttemptContextFactory.java       |   70 -
 .../lib/jobcontrol/CrunchControlledJob.java        |  325 -
 .../mapreduce/lib/jobcontrol/CrunchJobControl.java |  211 -
 .../org/apache/crunch/impl/SingleUseIterable.java  |   49 -
 .../org/apache/crunch/impl/mem/MemPipeline.java    |  275 -
 .../crunch/impl/mem/collect/MemCollection.java     |  295 -
 .../crunch/impl/mem/collect/MemGroupedTable.java   |  113 -
 .../apache/crunch/impl/mem/collect/MemTable.java   |  177 -
 .../apache/crunch/impl/mem/collect/Shuffler.java   |  149 -
 .../crunch/impl/mem/emit/InMemoryEmitter.java      |   57 -
 .../org/apache/crunch/impl/mem/package-info.java   |   22 -
 .../java/org/apache/crunch/impl/mr/MRPipeline.java |  396 -
 .../crunch/impl/mr/collect/DoCollectionImpl.java   |   74 -
 .../apache/crunch/impl/mr/collect/DoTableImpl.java |   84 -
 .../crunch/impl/mr/collect/InputCollection.java    |   85 -
 .../apache/crunch/impl/mr/collect/InputTable.java  |   86 -
 .../crunch/impl/mr/collect/PCollectionImpl.java    |  295 -
 .../crunch/impl/mr/collect/PGroupedTableImpl.java  |  144 -
 .../apache/crunch/impl/mr/collect/PTableBase.java  |  169 -
 .../crunch/impl/mr/collect/UnionCollection.java    |   80 -
 .../apache/crunch/impl/mr/collect/UnionTable.java  |   92 -
 .../crunch/impl/mr/emit/IntermediateEmitter.java   |   64 -
 .../crunch/impl/mr/emit/MultipleOutputEmitter.java |   56 -
 .../apache/crunch/impl/mr/emit/OutputEmitter.java  |   52 -
 .../impl/mr/exec/CappedExponentialCounter.java     |   40 -
 .../apache/crunch/impl/mr/exec/CrunchJobHooks.java |  153 -
 .../org/apache/crunch/impl/mr/exec/MRExecutor.java |  198 -
 .../org/apache/crunch/impl/mr/package-info.java    |   22 -
 .../org/apache/crunch/impl/mr/plan/DoNode.java     |  163 -
 .../apache/crunch/impl/mr/plan/DotfileWriter.java  |  238 -
 .../java/org/apache/crunch/impl/mr/plan/Edge.java  |  125 -
 .../java/org/apache/crunch/impl/mr/plan/Graph.java |  133 -
 .../apache/crunch/impl/mr/plan/GraphBuilder.java   |   92 -
 .../apache/crunch/impl/mr/plan/JobNameBuilder.java |   79 -
 .../apache/crunch/impl/mr/plan/JobPrototype.java   |  245 -
 .../crunch/impl/mr/plan/MSCROutputHandler.java     |   77 -
 .../apache/crunch/impl/mr/plan/MSCRPlanner.java    |  378 -
 .../org/apache/crunch/impl/mr/plan/NodePath.java   |  124 -
 .../crunch/impl/mr/plan/PlanningParameters.java    |   38 -
 .../org/apache/crunch/impl/mr/plan/Vertex.java     |  126 -
 .../apache/crunch/impl/mr/run/CrunchCombiner.java  |   27 -
 .../crunch/impl/mr/run/CrunchInputFormat.java      |   78 -
 .../crunch/impl/mr/run/CrunchInputSplit.java       |  116 -
 .../apache/crunch/impl/mr/run/CrunchMapper.java    |   73 -
 .../crunch/impl/mr/run/CrunchRecordReader.java     |   75 -
 .../apache/crunch/impl/mr/run/CrunchReducer.java   |   73 -
 .../crunch/impl/mr/run/CrunchTaskContext.java      |   86 -
 .../org/apache/crunch/impl/mr/run/NodeContext.java |   35 -
 .../java/org/apache/crunch/impl/mr/run/RTNode.java |  124 -
 .../crunch/impl/mr/run/RuntimeParameters.java      |   38 -
 crunch/src/main/java/org/apache/crunch/io/At.java  |  257 -
 .../apache/crunch/io/CompositePathIterable.java    |  102 -
 .../java/org/apache/crunch/io/CrunchInputs.java    |   71 -
 .../java/org/apache/crunch/io/CrunchOutputs.java   |  184 -
 .../org/apache/crunch/io/FileNamingScheme.java     |   58 -
 .../org/apache/crunch/io/FileReaderFactory.java    |   27 -
 .../java/org/apache/crunch/io/FormatBundle.java    |  121 -
 .../src/main/java/org/apache/crunch/io/From.java   |  324 -
 .../java/org/apache/crunch/io/MapReduceTarget.java |   27 -
 .../java/org/apache/crunch/io/OutputHandler.java   |   25 -
 .../main/java/org/apache/crunch/io/PathTarget.java |   36 -
 .../java/org/apache/crunch/io/PathTargetImpl.java  |   64 -
 .../java/org/apache/crunch/io/ReadableSource.java  |   41 -
 .../org/apache/crunch/io/ReadableSourceTarget.java |   30 -
 .../crunch/io/SequentialFileNamingScheme.java      |   51 -
 .../org/apache/crunch/io/SourceTargetHelper.java   |   48 -
 crunch/src/main/java/org/apache/crunch/io/To.java  |  153 -
 .../crunch/io/avro/AvroFileReaderFactory.java      |   96 -
 .../org/apache/crunch/io/avro/AvroFileSource.java  |   58 -
 .../crunch/io/avro/AvroFileSourceTarget.java       |   39 -
 .../org/apache/crunch/io/avro/AvroFileTarget.java  |   91 -
 .../apache/crunch/io/impl/AutoClosingIterator.java |   62 -
 .../org/apache/crunch/io/impl/FileSourceImpl.java  |  104 -
 .../apache/crunch/io/impl/FileTableSourceImpl.java |   41 -
 .../org/apache/crunch/io/impl/FileTargetImpl.java  |  162 -
 .../io/impl/ReadableSourcePathTargetImpl.java      |   39 -
 .../crunch/io/impl/ReadableSourceTargetImpl.java   |   37 -
 .../crunch/io/impl/SourcePathTargetImpl.java       |   50 -
 .../apache/crunch/io/impl/SourceTargetImpl.java    |   89 -
 .../crunch/io/impl/TableSourcePathTargetImpl.java  |   41 -
 .../crunch/io/impl/TableSourceTargetImpl.java      |   35 -
 .../java/org/apache/crunch/io/package-info.java    |   22 -
 .../org/apache/crunch/io/seq/SeqFileHelper.java    |   35 -
 .../apache/crunch/io/seq/SeqFileReaderFactory.java |  112 -
 .../org/apache/crunch/io/seq/SeqFileSource.java    |   47 -
 .../apache/crunch/io/seq/SeqFileSourceTarget.java  |   44 -
 .../apache/crunch/io/seq/SeqFileTableSource.java   |   57 -
 .../crunch/io/seq/SeqFileTableSourceTarget.java    |   54 -
 .../org/apache/crunch/io/seq/SeqFileTarget.java    |   55 -
 .../crunch/io/text/BZip2TextInputFormat.java       |  235 -
 .../apache/crunch/io/text/CBZip2InputStream.java   |  980 -
 .../java/org/apache/crunch/io/text/LineParser.java |  125 -
 .../org/apache/crunch/io/text/NLineFileSource.java |   77 -
 .../crunch/io/text/TextFileReaderFactory.java      |   83 -
 .../org/apache/crunch/io/text/TextFileSource.java  |   73 -
 .../crunch/io/text/TextFileSourceTarget.java       |   44 -
 .../apache/crunch/io/text/TextFileTableSource.java |   81 -
 .../crunch/io/text/TextFileTableSourceTarget.java  |   63 -
 .../org/apache/crunch/io/text/TextFileTarget.java  |  109 -
 .../main/java/org/apache/crunch/lib/Aggregate.java |  272 -
 .../main/java/org/apache/crunch/lib/Cartesian.java |  216 -
 .../main/java/org/apache/crunch/lib/Cogroup.java   |  106 -
 .../main/java/org/apache/crunch/lib/Distinct.java  |  126 -
 .../src/main/java/org/apache/crunch/lib/Join.java  |  181 -
 .../main/java/org/apache/crunch/lib/PTables.java   |  117 -
 .../main/java/org/apache/crunch/lib/Sample.java    |  217 -
 .../java/org/apache/crunch/lib/SampleUtils.java    |  168 -
 .../java/org/apache/crunch/lib/SecondarySort.java  |  118 -
 .../src/main/java/org/apache/crunch/lib/Set.java   |  118 -
 .../src/main/java/org/apache/crunch/lib/Sort.java  |  294 -
 .../apache/crunch/lib/join/FullOuterJoinFn.java    |  102 -
 .../org/apache/crunch/lib/join/InnerJoinFn.java    |   78 -
 .../java/org/apache/crunch/lib/join/JoinFn.java    |   81 -
 .../java/org/apache/crunch/lib/join/JoinUtils.java |  126 -
 .../apache/crunch/lib/join/LeftOuterJoinFn.java    |   98 -
 .../org/apache/crunch/lib/join/MapsideJoin.java    |  164 -
 .../apache/crunch/lib/join/RightOuterJoinFn.java   |   83 -
 .../org/apache/crunch/lib/join/package-info.java   |   22 -
 .../java/org/apache/crunch/lib/package-info.java   |   22 -
 .../org/apache/crunch/lib/sort/Comparators.java    |  187 -
 .../java/org/apache/crunch/lib/sort/SortFns.java   |  210 -
 .../crunch/lib/sort/TotalOrderPartitioner.java     |  145 -
 .../crunch/materialize/MaterializableIterable.java |   81 -
 .../crunch/materialize/MaterializableMap.java      |   50 -
 .../materialize/pobject/CollectionPObject.java     |   55 -
 .../materialize/pobject/FirstElementPObject.java   |   50 -
 .../crunch/materialize/pobject/MapPObject.java     |   62 -
 .../crunch/materialize/pobject/PObjectImpl.java    |   85 -
 .../main/java/org/apache/crunch/package-info.java  |   25 -
 .../apache/crunch/types/CollectionDeepCopier.java  |   57 -
 .../java/org/apache/crunch/types/Converter.java    |   41 -
 .../java/org/apache/crunch/types/DeepCopier.java   |   60 -
 .../org/apache/crunch/types/MapDeepCopier.java     |   54 -
 .../org/apache/crunch/types/PGroupedTableType.java |  141 -
 .../java/org/apache/crunch/types/PTableType.java   |   44 -
 .../main/java/org/apache/crunch/types/PType.java   |   86 -
 .../java/org/apache/crunch/types/PTypeFamily.java  |   77 -
 .../java/org/apache/crunch/types/PTypeUtils.java   |   66 -
 .../main/java/org/apache/crunch/types/PTypes.java  |  252 -
 .../main/java/org/apache/crunch/types/Protos.java  |  173 -
 .../org/apache/crunch/types/TupleDeepCopier.java   |   65 -
 .../java/org/apache/crunch/types/TupleFactory.java |  134 -
 .../apache/crunch/types/avro/AvroCapabilities.java |  106 -
 .../apache/crunch/types/avro/AvroDeepCopier.java   |  209 -
 .../crunch/types/avro/AvroGroupedTableType.java    |  114 -
 .../apache/crunch/types/avro/AvroInputFormat.java  |   41 -
 .../apache/crunch/types/avro/AvroKeyConverter.java |   65 -
 .../apache/crunch/types/avro/AvroOutputFormat.java |   87 -
 .../crunch/types/avro/AvroPairConverter.java       |  108 -
 .../apache/crunch/types/avro/AvroRecordReader.java |  114 -
 .../apache/crunch/types/avro/AvroTableType.java    |  151 -
 .../crunch/types/avro/AvroTextOutputFormat.java    |   60 -
 .../org/apache/crunch/types/avro/AvroType.java     |  199 -
 .../apache/crunch/types/avro/AvroTypeFamily.java   |  164 -
 .../crunch/types/avro/AvroUtf8InputFormat.java     |   98 -
 .../java/org/apache/crunch/types/avro/Avros.java   |  709 -
 .../crunch/types/avro/ReflectDataFactory.java      |   41 -
 .../crunch/types/avro/SafeAvroSerialization.java   |  145 -
 .../org/apache/crunch/types/avro/package-info.java |   22 -
 .../java/org/apache/crunch/types/package-info.java |   22 -
 .../types/writable/GenericArrayWritable.java       |  135 -
 .../crunch/types/writable/TextMapWritable.java     |   88 -
 .../crunch/types/writable/TupleWritable.java       |  224 -
 .../crunch/types/writable/WritableDeepCopier.java  |   70 -
 .../types/writable/WritableGroupedTableType.java   |   85 -
 .../types/writable/WritablePairConverter.java      |   62 -
 .../crunch/types/writable/WritableTableType.java   |  130 -
 .../apache/crunch/types/writable/WritableType.java |  133 -
 .../crunch/types/writable/WritableTypeFamily.java  |  147 -
 .../types/writable/WritableValueConverter.java     |   60 -
 .../apache/crunch/types/writable/Writables.java    |  588 -
 .../apache/crunch/types/writable/package-info.java |   22 -
 .../java/org/apache/crunch/util/CrunchTool.java    |  118 -
 .../java/org/apache/crunch/util/DistCache.java     |  231 -
 .../org/apache/crunch/util/PartitionUtils.java     |   34 -
 .../main/java/org/apache/crunch/util/Tuples.java   |  150 -
 .../java/org/apache/crunch/util/package-info.java  |   22 -
 crunch/src/main/resources/log4j.properties         |   24 -
 crunch/src/site/site.xml                           |   34 -
 crunch/src/test/avro/employee.avsc                 |   26 -
 crunch/src/test/avro/person.avsc                   |   26 -
 .../src/test/java/org/apache/crunch/AndFnTest.java |   77 -
 .../test/java/org/apache/crunch/CombineFnTest.java |  222 -
 .../src/test/java/org/apache/crunch/NotFnTest.java |   72 -
 .../src/test/java/org/apache/crunch/OrFnTest.java  |   78 -
 .../src/test/java/org/apache/crunch/PairTest.java  |   66 -
 .../src/test/java/org/apache/crunch/TupleTest.java |  139 -
 .../test/java/org/apache/crunch/WriteModeTest.java |  103 -
 .../java/org/apache/crunch/fn/AggregatorsTest.java |  239 -
 .../org/apache/crunch/fn/ExtractKeyFnTest.java     |   44 -
 .../java/org/apache/crunch/fn/FilterFnTest.java    |   85 -
 .../java/org/apache/crunch/fn/MapKeysTest.java     |   51 -
 .../java/org/apache/crunch/fn/MapValuesTest.java   |   50 -
 .../java/org/apache/crunch/fn/PairMapTest.java     |   52 -
 .../org/apache/crunch/fn/StoreLastEmitter.java     |   41 -
 .../apache/crunch/impl/SingleUseIterableTest.java  |   54 -
 .../org/apache/crunch/impl/mr/MRPipelineTest.java  |   86 -
 .../impl/mr/collect/DoCollectionImplTest.java      |  112 -
 .../crunch/impl/mr/collect/DoTableImplTest.java    |   86 -
 .../impl/mr/emit/IntermediateEmitterTest.java      |   83 -
 .../impl/mr/exec/CappedExponentialCounterTest.java |   42 -
 .../crunch/impl/mr/exec/CrunchJobHooksTest.java    |   42 -
 .../crunch/impl/mr/plan/DotfileWriterTest.java     |  132 -
 .../crunch/impl/mr/plan/JobNameBuilderTest.java    |   41 -
 .../crunch/io/SequentialFileNamingSchemeTest.java  |   84 -
 .../apache/crunch/io/SourceTargetHelperTest.java   |   59 -
 .../crunch/io/avro/AvroFileReaderFactoryTest.java  |  184 -
 .../apache/crunch/io/avro/AvroFileSourceTest.java  |   91 -
 .../lib/AvroIndexedRecordPartitionerTest.java      |   98 -
 .../java/org/apache/crunch/lib/CartesianTest.java  |   77 -
 .../java/org/apache/crunch/lib/DistinctTest.java   |   52 -
 .../java/org/apache/crunch/lib/SampleTest.java     |   71 -
 .../org/apache/crunch/lib/SecondarySortTest.java   |   53 -
 .../crunch/lib/TupleWritablePartitionerTest.java   |   68 -
 .../lib/join/BrokenLeftAndOuterJoinTest.java       |   90 -
 .../crunch/lib/join/FullOuterJoinFnTest.java       |   48 -
 .../apache/crunch/lib/join/InnerJoinFnTest.java    |   42 -
 .../org/apache/crunch/lib/join/JoinFnTestBase.java |   82 -
 .../apache/crunch/lib/join/LeftOuterJoinTest.java  |   46 -
 .../crunch/lib/join/RightOuterJoinFnTest.java      |   46 -
 .../java/org/apache/crunch/test/CountersTest.java  |   70 -
 .../java/org/apache/crunch/test/StringWrapper.java |  102 -
 .../crunch/types/CollectionDeepCopierTest.java     |   61 -
 .../org/apache/crunch/types/MapDeepCopierTest.java |   63 -
 .../org/apache/crunch/types/PTypeUtilsTest.java    |   89 -
 .../java/org/apache/crunch/types/PTypesTest.java   |   34 -
 .../apache/crunch/types/TupleDeepCopierTest.java   |   77 -
 .../org/apache/crunch/types/TupleFactoryTest.java  |   69 -
 .../crunch/types/avro/AvroDeepCopierTest.java      |  107 -
 .../types/avro/AvroGroupedTableTypeTest.java       |   60 -
 .../crunch/types/avro/AvroTableTypeTest.java       |   72 -
 .../org/apache/crunch/types/avro/AvroTypeTest.java |  279 -
 .../org/apache/crunch/types/avro/AvrosTest.java    |  325 -
 .../types/writable/GenericArrayWritableTest.java   |   70 -
 .../types/writable/WritableDeepCopierTest.java     |   54 -
 .../writable/WritableGroupedTableTypeTest.java     |   56 -
 .../types/writable/WritableTableTypeTest.java      |   47 -
 .../crunch/types/writable/WritableTypeTest.java    |   97 -
 .../crunch/types/writable/WritablesTest.java       |  256 -
 .../java/org/apache/crunch/util/DistCacheTest.java |  156 -
 pom.xml                                            |    4 +-
 702 files changed, 70421 insertions(+), 70421 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-contrib/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-contrib/pom.xml b/crunch-contrib/pom.xml
index 12f9a13..e5a35c5 100644
--- a/crunch-contrib/pom.xml
+++ b/crunch-contrib/pom.xml
@@ -32,7 +32,7 @@ under the License.
   
     <dependency>
       <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch</artifactId>
+      <artifactId>crunch-core</artifactId>
     </dependency>
     
     <dependency>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-core/pom.xml b/crunch-core/pom.xml
new file mode 100644
index 0000000..d365c3d
--- /dev/null
+++ b/crunch-core/pom.xml
@@ -0,0 +1,182 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.crunch</groupId>
+    <artifactId>crunch-parent</artifactId>
+    <version>0.6.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>crunch-core</artifactId>
+  <name>Apache Crunch Core</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.javassist</groupId>
+      <artifactId>javassist</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Override the slf4j dependency from Avro, which is incompatible with
+         Hadoop's. -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-core-asl</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    
+    <!-- Both Protobufs and Thrift are supported as
+         derived serialization types, and you can use
+         (almost) any version of them you like, Crunch
+         only relies on the stable public APIs, not the
+         structure of the files themselves.
+
+         Both dependencies are scoped as provided, in
+         order to not expand the size of the assembly jars
+         unnecessarily.
+    -->
+
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <scope>provided</scope>
+    </dependency>
+   
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Used by LocalJobRunner in integration tests -->
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.crunch</groupId>
+      <artifactId>crunch-test</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>schemas</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>schema</goal>
+            </goals>
+            <configuration>
+              <testSourceDirectory>${project.basedir}/src/test/avro/</testSourceDirectory>
+              <testOutputDirectory>target/generated-test-sources/</testOutputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/CancelJobsIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/CancelJobsIT.java b/crunch-core/src/it/java/org/apache/crunch/CancelJobsIT.java
new file mode 100644
index 0000000..ff01a2f
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/CancelJobsIT.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.To;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Rule;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class CancelJobsIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testRun() throws Exception {
+    PipelineExecution pe = run();
+    pe.waitUntilDone();
+    PipelineResult pr = pe.getResult();
+    assertEquals(PipelineExecution.Status.SUCCEEDED, pe.getStatus());
+    assertEquals(2, pr.getStageResults().size());
+  }
+  
+  @Test
+  public void testKill() throws Exception {
+    PipelineExecution pe = run();
+    pe.kill();
+    pe.waitUntilDone();
+    assertEquals(PipelineExecution.Status.KILLED, pe.getStatus());
+  }
+
+  @Test
+  public void testKillMultipleTimes() throws Exception {
+    PipelineExecution pe = run();
+    for (int i = 0; i < 10; i++) {
+      pe.kill();
+    }
+    pe.waitUntilDone();
+    assertEquals(PipelineExecution.Status.KILLED, pe.getStatus());
+  }
+
+  @Test
+  public void testKillAfterDone() throws Exception {
+    PipelineExecution pe = run();
+    pe.waitUntilDone();
+    assertEquals(PipelineExecution.Status.SUCCEEDED, pe.getStatus());
+    pe.kill(); // expect no-op
+    assertEquals(PipelineExecution.Status.SUCCEEDED, pe.getStatus());
+  }
+  
+  public PipelineExecution run() throws IOException {
+    String shakes = tmpDir.copyResourceFileName("shakes.txt");
+    String out = tmpDir.getFileName("cancel");
+    Pipeline p = new MRPipeline(CancelJobsIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<String> words = p.readTextFile(shakes);
+    p.write(words.count().top(20), To.textFile(out));
+    return p.runAsync(); // need to hack to slow down job start up if this test becomes flaky.
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/CleanTextIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/CleanTextIT.java b/crunch-core/src/it/java/org/apache/crunch/CleanTextIT.java
new file mode 100644
index 0000000..2f4004e
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/CleanTextIT.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.nio.charset.Charset;
+import java.util.List;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.To;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.io.Files;
+
+/**
+ *
+ */
+public class CleanTextIT {
+
+  private static final int LINES_IN_SHAKES = 3667;
+  
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  static DoFn<String, String> CLEANER = new DoFn<String, String>() {
+    @Override
+    public void process(String input, Emitter<String> emitter) {
+      emitter.emit(input.toLowerCase());
+    }
+  };
+  
+  static DoFn<String, String> SPLIT = new DoFn<String, String>() {
+    @Override
+    public void process(String input, Emitter<String> emitter) {
+      for (String word : input.split("\\S+")) {
+        if (!word.isEmpty()) {
+          emitter.emit(word);
+        }
+      }
+    }
+  };
+  
+  @Test
+  public void testMapSideOutputs() throws Exception {
+    Pipeline pipeline = new MRPipeline(CleanTextIT.class, tmpDir.getDefaultConfiguration());
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    
+    PCollection<String> cleanShakes = shakespeare.parallelDo(CLEANER, Avros.strings());
+    File cso = tmpDir.getFile("cleanShakes");
+    cleanShakes.write(To.textFile(cso.getAbsolutePath()));
+    
+    File wc = tmpDir.getFile("wordCounts");
+    cleanShakes.parallelDo(SPLIT, Avros.strings()).count().write(To.textFile(wc.getAbsolutePath()));
+    pipeline.done();
+    
+    File cleanFile = new File(cso, "part-m-00000");
+    List<String> lines = Files.readLines(cleanFile, Charset.defaultCharset());
+    assertEquals(LINES_IN_SHAKES, lines.size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/CollectionPObjectIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/CollectionPObjectIT.java b/crunch-core/src/it/java/org/apache/crunch/CollectionPObjectIT.java
new file mode 100644
index 0000000..7e0c75c
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/CollectionPObjectIT.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.lang.String;
+import java.util.Collection;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.materialize.pobject.CollectionPObject;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Rule;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class CollectionPObjectIT {
+
+  private static final int LINES_IN_SHAKES = 3667;
+
+  private static final String FIRST_SHAKESPEARE_LINE =
+      "***The Project Gutenberg's Etext of Shakespeare's First Folio***";
+
+  private static final String LAST_SHAKESPEARE_LINE =
+      "FINIS. THE TRAGEDIE OF MACBETH.";
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testPObjectMRPipeline() throws IOException {
+    runPObject(new MRPipeline(CollectionPObjectIT.class, tmpDir.getDefaultConfiguration()));
+  }
+
+  @Test
+  public void testAsCollectionMRPipeline() throws IOException {
+    runAsCollection(new MRPipeline(CollectionPObjectIT.class, tmpDir.getDefaultConfiguration()));
+  }
+
+  @Test
+  public void testPObjectMemPipeline() throws IOException {
+    runPObject(MemPipeline.getInstance());
+  }
+
+  @Test
+  public void testAsCollectionMemPipeline() throws IOException {
+    runAsCollection(MemPipeline.getInstance());
+  }
+
+  private PCollection<String> getPCollection(Pipeline pipeline) throws IOException {
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    return shakespeare;
+  }
+
+  private void verifyLines(String[] lines) {
+    assertEquals("Not enough lines in Shakespeare.", LINES_IN_SHAKES, lines.length);
+    assertEquals("First line in Shakespeare is wrong.", FIRST_SHAKESPEARE_LINE, lines[0]);
+    assertEquals("Last line in Shakespeare is wrong.", LAST_SHAKESPEARE_LINE,
+        lines[lines.length - 1]);
+  }
+
+  public void runPObject(Pipeline pipeline) throws IOException {
+    PCollection<String> shakespeare = getPCollection(pipeline);
+    PObject<Collection<String>> linesP = new CollectionPObject<String>(shakespeare);
+    String[] lines = new String[LINES_IN_SHAKES];
+    lines = linesP.getValue().toArray(lines);
+    verifyLines(lines);
+  }
+
+  public void runAsCollection(Pipeline pipeline) throws IOException {
+    PCollection<String> shakespeare = getPCollection(pipeline);
+    String[] lines = new String[LINES_IN_SHAKES];
+    lines = shakespeare.asCollection().getValue().toArray(lines);
+    verifyLines(lines);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/CollectionsIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/CollectionsIT.java b/crunch-core/src/it/java/org/apache/crunch/CollectionsIT.java
new file mode 100644
index 0000000..17d0cae
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/CollectionsIT.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.crunch.fn.Aggregators.SimpleAggregator;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+@SuppressWarnings("serial")
+public class CollectionsIT {
+
+  private static class AggregateStringListFn extends SimpleAggregator<Collection<String>> {
+    private final Collection<String> rtn = Lists.newArrayList();
+
+    @Override
+    public void reset() {
+      rtn.clear();
+    }
+
+    @Override
+    public void update(Collection<String> values) {
+      rtn.addAll(values);
+    }
+
+    @Override
+    public Iterable<Collection<String>> results() {
+      return ImmutableList.of(rtn);
+    }
+  }
+
+  private static PTable<String, Collection<String>> listOfCharcters(PCollection<String> lines, PTypeFamily typeFamily) {
+
+    return lines.parallelDo(new DoFn<String, Pair<String, Collection<String>>>() {
+      @Override
+      public void process(String line, Emitter<Pair<String, Collection<String>>> emitter) {
+        for (String word : line.split("\\s+")) {
+          Collection<String> characters = Lists.newArrayList();
+          for (char c : word.toCharArray()) {
+            characters.add(String.valueOf(c));
+          }
+          emitter.emit(Pair.of(word, characters));
+        }
+      }
+    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.collections(typeFamily.strings())))
+        .groupByKey().combineValues(new AggregateStringListFn());
+  }
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritables() throws IOException {
+    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvro() throws IOException {
+    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testInMemoryWritables() throws IOException {
+    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testInMemoryAvro() throws IOException {
+    run(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
+  }
+
+  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    Iterable<Pair<String, Collection<String>>> lines = listOfCharcters(shakespeare, typeFamily).materialize();
+
+    boolean passed = false;
+    for (Pair<String, Collection<String>> line : lines) {
+      if (line.first().startsWith("yellow")) {
+        passed = true;
+        break;
+      }
+    }
+    pipeline.done();
+    assertTrue(passed);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/CollectionsLengthIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/CollectionsLengthIT.java b/crunch-core/src/it/java/org/apache/crunch/CollectionsLengthIT.java
new file mode 100644
index 0000000..3a38b92
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/CollectionsLengthIT.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.lang.Long;
+
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class CollectionsLengthIT {
+
+  public static final Long LINES_IN_SHAKESPEARE = 3667L;
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritables() throws IOException {
+    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvro() throws IOException {
+    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testInMemoryWritables() throws IOException {
+    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testInMemoryAvro() throws IOException {
+    run(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
+  }
+
+  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    Long length = shakespeare.length().getValue();
+    assertEquals("Incorrect length for shakespear PCollection.", LINES_IN_SHAKESPEARE, length);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java b/crunch-core/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java
new file mode 100644
index 0000000..f1323ca
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.apache.crunch.types.avro.Avros.*;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PType;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Iterables;
+
+/**
+ *
+ */
+public class DeepCopyCustomTuplesIT {
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  public static class PID extends Pair<Integer, String> {
+    public PID(Integer first, String second) {
+      super(first, second);
+    }
+  }
+  
+  private static PType<PID> pids = tuples(PID.class, ints(), strings());
+  
+  @Test
+  public void testDeepCopyCustomTuple() throws Exception {
+    Pipeline p = new MRPipeline(DeepCopyCustomTuplesIT.class, tmpDir.getDefaultConfiguration());
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakes = p.readTextFile(shakesInputPath);
+    Iterable<String> out = shakes
+        .parallelDo(new PreProcFn(), tableOf(ints(), pairs(ints(), pids)))
+        .groupByKey()
+        .parallelDo(new PostProcFn(), strings())
+        .materialize();
+    assertEquals(65, Iterables.size(out));
+    p.done();
+  }
+  
+  private static class PreProcFn extends MapFn<String, Pair<Integer, Pair<Integer, PID>>> {
+    private int counter = 0;
+    @Override
+    public Pair<Integer, Pair<Integer, PID>> map(String input) {
+      return Pair.of(counter++, Pair.of(counter++, new PID(input.length(), input)));
+    }
+  };
+  
+  private static class PostProcFn extends DoFn<Pair<Integer, Iterable<Pair<Integer, PID>>>, String> {
+    @Override
+    public void process(Pair<Integer, Iterable<Pair<Integer, PID>>> input, Emitter<String> emitter) {
+      for (Pair<Integer, PID> p : input.second()) {
+        if (p.second().first() > 0 && p.second().first() < 10) {
+          emitter.emit(p.second().second());
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/EnumPairIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/EnumPairIT.java b/crunch-core/src/it/java/org/apache/crunch/EnumPairIT.java
new file mode 100644
index 0000000..1d0974e
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/EnumPairIT.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypes;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class EnumPairIT implements Serializable {
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  static enum etypes {
+    type1,
+  }
+
+  @Test
+  public void testEnumPTypes() throws IOException {
+    String inputFile1 = tmpDir.copyResourceFileName("set1.txt");
+    Pipeline pipeline = new MRPipeline(EnumPairIT.class);
+    PCollection<String> set1 = pipeline.readTextFile(inputFile1);
+    PTable<String, etypes> data = set1.parallelDo(new DoFn<String, Pair<String, etypes>>() {
+      @Override
+      public void process(String input, Emitter<Pair<String, etypes>> emitter) {
+        emitter.emit(new Pair<String, etypes>(input, etypes.type1));
+      }
+    }, Writables.tableOf(Writables.strings(), PTypes.enums(etypes.class, set1.getTypeFamily())));
+
+    Iterable<Pair<String, etypes>> materialized = data.materialize();
+    pipeline.run();
+    for (Pair<String, etypes> pair : materialized) {
+      assertEquals(etypes.type1, pair.second());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/FirstElementPObjectIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/FirstElementPObjectIT.java b/crunch-core/src/it/java/org/apache/crunch/FirstElementPObjectIT.java
new file mode 100644
index 0000000..d985e10
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/FirstElementPObjectIT.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.lang.String;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.materialize.pobject.FirstElementPObject;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.Rule;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class FirstElementPObjectIT {
+
+  private static final String FIRST_SHAKESPEARE_LINE =
+      "***The Project Gutenberg's Etext of Shakespeare's First Folio***";
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testMRPipeline() throws IOException {
+    run(new MRPipeline(FirstElementPObjectIT.class, tmpDir.getDefaultConfiguration()));
+  }
+
+  @Test
+  public void testInMemoryPipeline() throws IOException {
+    run(MemPipeline.getInstance());
+  }
+
+  public void run(Pipeline pipeline) throws IOException {
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    PObject<String> firstLine = new FirstElementPObject<String>(shakespeare);
+    String first = firstLine.getValue();
+    assertEquals("First line in Shakespeare is wrong.", FIRST_SHAKESPEARE_LINE, first);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java b/crunch-core/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java
new file mode 100644
index 0000000..da487eb
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Verify that calling the iterator method on a Reducer-based Iterable 
+ * is forcefully disallowed.
+ */
+public class IterableReuseProtectionIT {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+  
+  
+  public void checkIteratorReuse(Pipeline pipeline) throws IOException {
+    Iterable<String> values = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
+        .by(IdentityFn.<String>getInstance(), Writables.strings())
+        .groupByKey()
+        .combineValues(new TestIterableReuseFn())
+        .values().materialize();
+    
+    List<String> valueList = Lists.newArrayList(values);
+    Collections.sort(valueList);
+    assertEquals(Lists.newArrayList("a", "b", "c", "e"), valueList);
+  }
+  
+  @Test
+  public void testIteratorReuse_MRPipeline() throws IOException {
+    checkIteratorReuse(new MRPipeline(IterableReuseProtectionIT.class, tmpDir.getDefaultConfiguration()));
+  }
+  
+  @Test
+  public void testIteratorReuse_InMemoryPipeline() throws IOException {
+    checkIteratorReuse(MemPipeline.getInstance());
+  }
+  
+  static class TestIterableReuseFn extends CombineFn<String, String> {
+
+    @Override
+    public void process(Pair<String, Iterable<String>> input, Emitter<Pair<String, String>> emitter) {
+      StringBuilder combinedBuilder = new StringBuilder();
+      for (String v : input.second()) {
+        combinedBuilder.append(v);
+      }
+      
+      try {
+        input.second().iterator();
+        throw new RuntimeException("Second call to iterator should throw an exception");
+      } catch (IllegalStateException e) {
+        // Expected situation
+      }
+      emitter.emit(Pair.of(input.first(), combinedBuilder.toString()));
+    }
+    
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/MRPipelineIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/MRPipelineIT.java b/crunch-core/src/it/java/org/apache/crunch/MRPipelineIT.java
new file mode 100644
index 0000000..7670e88
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/MRPipelineIT.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.crunch.fn.FilterFns;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.To;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class MRPipelineIT implements Serializable {
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void materializedColShouldBeWritten() throws Exception {
+    File textFile = tmpDir.copyResourceFile("shakes.txt");
+    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath());
+    pipeline.run();
+    PCollection<String> filter = genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL());
+    filter.materialize();
+    pipeline.run();
+    File file = tmpDir.getFile("output.txt");
+    Target outFile = To.textFile(file.getAbsolutePath());
+    PCollection<String> write = filter.write(outFile);
+    write.materialize();
+    pipeline.run();
+  }
+  
+  
+  
+  @Test
+  public void testPGroupedTableToMultipleOutputs() throws IOException{
+    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
+    PGroupedTable<String, String> groupedLineTable = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt")).by(IdentityFn.<String>getInstance(), Writables.strings()).groupByKey();
+    
+    PTable<String, String> ungroupedTableA = groupedLineTable.ungroup();
+    PTable<String, String> ungroupedTableB = groupedLineTable.ungroup();
+    
+    File outputDirA = tmpDir.getFile("output_a");
+    File outputDirB = tmpDir.getFile("output_b");
+    
+    pipeline.writeTextFile(ungroupedTableA, outputDirA.getAbsolutePath());
+    pipeline.writeTextFile(ungroupedTableB, outputDirB.getAbsolutePath());
+    pipeline.done();
+
+    // Verify that output from a single PGroupedTable can be sent to multiple collections
+    assertTrue(new File(outputDirA, "part-r-00000").exists());
+    assertTrue(new File(outputDirB, "part-r-00000").exists());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/MapPObjectIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/MapPObjectIT.java b/crunch-core/src/it/java/org/apache/crunch/MapPObjectIT.java
new file mode 100644
index 0000000..c48284f
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/MapPObjectIT.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.materialize.pobject.MapPObject;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class MapPObjectIT {
+
+  static final ImmutableList<Pair<Integer, String>> kvPairs = ImmutableList.of(Pair.of(0, "a"), Pair.of(1, "b"),
+      Pair.of(2, "c"), Pair.of(3, "e"));
+
+  public void assertMatches(Map<Integer, String> m) {
+    for (Integer k : m.keySet()) {
+      assertEquals(kvPairs.get(k).second(), m.get(k));
+    }
+  }
+
+  private static class Set1Mapper extends MapFn<String, Pair<Integer, String>> {
+    @Override
+    public Pair<Integer, String> map(String input) {
+
+      int k = -1;
+      if (input.equals("a"))
+        k = 0;
+      else if (input.equals("b"))
+        k = 1;
+      else if (input.equals("c"))
+        k = 2;
+      else if (input.equals("e"))
+        k = 3;
+      return Pair.of(k, input);
+    }
+  }
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testMemMapPObject() {
+    PTable<Integer, String> table = MemPipeline.tableOf(kvPairs);
+    PObject<Map<Integer, String>> map = new MapPObject<Integer, String>(table);
+    assertMatches(map.getValue());
+  }
+
+  @Test
+  public void testMemAsMap() {
+    PTable<Integer, String> table = MemPipeline.tableOf(kvPairs);
+    assertMatches(table.asMap().getValue());
+  }
+
+  private PTable<Integer, String> getMRPTable() throws IOException {
+    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
+    String inputFile = tmpDir.copyResourceFileName("set1.txt");
+    PCollection<String> c = p.readTextFile(inputFile);
+    PTypeFamily tf = c.getTypeFamily();
+    PTable<Integer, String> table = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(),
+        tf.strings()));
+    return table;
+  }
+
+  @Test
+  public void testMRMapPObject() throws IOException {
+    PTable<Integer, String> table = getMRPTable();
+    PObject<Map<Integer, String>> map = new MapPObject<Integer, String>(table);
+    assertMatches(map.getValue());
+  }
+
+  @Test
+  public void testMRAsMap() throws IOException {
+    PTable<Integer, String> table = getMRPTable();
+    assertMatches(table.asMap().getValue());
+  }
+}


[28/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java
new file mode 100644
index 0000000..a3d30d2
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Used to perform the last step of an inner join.
+ * 
+ * @param <K> Type of the keys.
+ * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
+ * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
+ */
+public class InnerJoinFn<K, U, V> extends JoinFn<K, U, V> {
+
+  private transient K lastKey;
+  private transient List<U> leftValues;
+
+  public InnerJoinFn(PType<K> keyType, PType<U> leftValueType) {
+    super(keyType, leftValueType);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void initialize() {
+    super.initialize();
+    lastKey = null;
+    this.leftValues = Lists.newArrayList();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
+    if (!key.equals(lastKey)) {
+      lastKey = keyType.getDetachedValue(key);
+      leftValues.clear();
+    }
+    if (id == 0) { // from left
+      for (Pair<U, V> pair : pairs) {
+        if (pair.first() != null)
+          leftValues.add(leftValueType.getDetachedValue(pair.first()));
+      }
+    } else { // from right
+      for (Pair<U, V> pair : pairs) {
+        for (U u : leftValues) {
+          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
+        }
+      }
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public String getJoinType() {
+    return "innerJoin";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinFn.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinFn.java
new file mode 100644
index 0000000..99aea5a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinFn.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+
+/**
+ * Represents a {@link org.apache.crunch.DoFn} for performing joins.
+ * 
+ * @param <K> Type of the keys.
+ * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
+ * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
+ */
+public abstract class JoinFn<K, U, V> extends
+    DoFn<Pair<Pair<K, Integer>, Iterable<Pair<U, V>>>, Pair<K, Pair<U, V>>> {
+
+  protected PType<K> keyType;
+  protected PType<U> leftValueType;
+
+  /**
+   * Instantiate with the PType of the value of the left side of the join (used for creating deep
+   * copies of values).
+   * 
+   * @param keyType The PType of the value used as the key of the join
+   * @param leftValueType The PType of the value type of the left side of the join
+   */
+  public JoinFn(PType<K> keyType, PType<U> leftValueType) {
+    this.keyType = keyType;
+    this.leftValueType = leftValueType;
+  }
+
+  @Override
+  public void initialize() {
+    this.keyType.initialize(getConfiguration());
+    this.leftValueType.initialize(getConfiguration());
+  }
+
+  /** @return The name of this join type (e.g. innerJoin, leftOuterJoin). */
+  public abstract String getJoinType();
+
+  /**
+   * Performs the actual joining.
+   * 
+   * @param key The key for this grouping of values.
+   * @param id The side that this group of values is from (0 -> left, 1 -> right).
+   * @param pairs The group of values associated with this key and id pair.
+   * @param emitter The emitter to send the output to.
+   */
+  public abstract void join(K key, int id, Iterable<Pair<U, V>> pairs,
+      Emitter<Pair<K, Pair<U, V>>> emitter);
+
+  /**
+   * Split up the input record to make coding a bit more manageable.
+   * 
+   * @param input The input record.
+   * @param emitter The emitter to send the output to.
+   */
+  @Override
+  public void process(Pair<Pair<K, Integer>, Iterable<Pair<U, V>>> input,
+      Emitter<Pair<K, Pair<U, V>>> emitter) {
+    join(input.first().first(), input.first().second(), input.second(), emitter);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinUtils.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinUtils.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinUtils.java
new file mode 100644
index 0000000..6efeccb
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/JoinUtils.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.avro.io.BinaryData;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.writable.TupleWritable;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * Utilities that are useful in joining multiple data sets via a MapReduce.
+ * 
+ */
+public class JoinUtils {
+
+  public static Class<? extends Partitioner> getPartitionerClass(PTypeFamily typeFamily) {
+    if (typeFamily == WritableTypeFamily.getInstance()) {
+      return TupleWritablePartitioner.class;
+    } else {
+      return AvroIndexedRecordPartitioner.class;
+    }
+  }
+
+  public static Class<? extends RawComparator> getGroupingComparator(PTypeFamily typeFamily) {
+    if (typeFamily == WritableTypeFamily.getInstance()) {
+      return TupleWritableComparator.class;
+    } else {
+      return AvroPairGroupingComparator.class;
+    }
+  }
+
+  public static class TupleWritablePartitioner extends Partitioner<TupleWritable, Writable> {
+    @Override
+    public int getPartition(TupleWritable key, Writable value, int numPartitions) {
+      return (Math.abs(key.get(0).hashCode()) & Integer.MAX_VALUE) % numPartitions;
+    }
+  }
+
+  public static class TupleWritableComparator implements RawComparator<TupleWritable> {
+
+    private DataInputBuffer buffer = new DataInputBuffer();
+    private TupleWritable key1 = new TupleWritable();
+    private TupleWritable key2 = new TupleWritable();
+
+    @Override
+    public int compare(TupleWritable o1, TupleWritable o2) {
+      return ((WritableComparable) o1.get(0)).compareTo((WritableComparable) o2.get(0));
+    }
+
+    @Override
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      try {
+        buffer.reset(b1, s1, l1);
+        key1.readFields(buffer);
+
+        buffer.reset(b2, s2, l2);
+        key2.readFields(buffer);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+
+      return compare(key1, key2);
+    }
+  }
+
+  public static class AvroIndexedRecordPartitioner<K, V> extends Partitioner<AvroKey<K>, AvroValue<V>> {
+    @Override
+    public int getPartition(AvroKey<K> key, AvroValue<V> value, int numPartitions) {
+      IndexedRecord record = (IndexedRecord) key.datum();
+      return (Math.abs(record.get(0).hashCode()) & Integer.MAX_VALUE) % numPartitions;
+    }
+  }
+
+  public static class AvroPairGroupingComparator<T> extends Configured implements RawComparator<AvroWrapper<T>> {
+    private Schema schema;
+
+    @Override
+    public void setConf(Configuration conf) {
+      super.setConf(conf);
+      if (conf != null) {
+        Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf);
+        Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema);
+        schema = keySchema.getFields().get(0).schema();
+      }
+    }
+
+    @Override
+    public int compare(AvroWrapper<T> x, AvroWrapper<T> y) {
+      return ReflectData.get().compare(x.datum(), y.datum(), schema);
+    }
+
+    @Override
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      return BinaryData.compare(b1, s1, l1, b2, s2, l2, schema);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java
new file mode 100644
index 0000000..731c496
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Used to perform the last step of an left outer join.
+ * 
+ * @param <K> Type of the keys.
+ * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
+ * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
+ */
+public class LeftOuterJoinFn<K, U, V> extends JoinFn<K, U, V> {
+
+  private transient int lastId;
+  private transient K lastKey;
+  private transient List<U> leftValues;
+
+  public LeftOuterJoinFn(PType<K> keyType, PType<U> leftValueType) {
+    super(keyType, leftValueType);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void initialize() {
+    super.initialize();
+    lastId = 1;
+    lastKey = null;
+    this.leftValues = Lists.newArrayList();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
+    if (!key.equals(lastKey)) {
+      // Make sure that left side always gets emitted.
+      if (0 == lastId) {
+        for (U u : leftValues) {
+          emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
+        }
+      }
+      lastKey = keyType.getDetachedValue(key);
+      leftValues.clear();
+    }
+    if (id == 0) {
+      for (Pair<U, V> pair : pairs) {
+        if (pair.first() != null)
+          leftValues.add(leftValueType.getDetachedValue(pair.first()));
+      }
+    } else {
+      for (Pair<U, V> pair : pairs) {
+        for (U u : leftValues) {
+          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
+        }
+      }
+    }
+
+    lastId = id;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void cleanup(Emitter<Pair<K, Pair<U, V>>> emitter) {
+    if (0 == lastId) {
+      for (U u : leftValues) {
+        emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
+      }
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public String getJoinType() {
+    return "leftOuterJoin";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
new file mode 100644
index 0000000..56476c1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import java.io.IOException;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.ParallelDoOptions;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.materialize.MaterializableIterable;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.util.DistCache;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+
+/**
+ * Utility for doing map side joins on a common key between two {@link PTable}s.
+ * <p>
+ * A map side join is an optimized join which doesn't use a reducer; instead,
+ * the right side of the join is loaded into memory and the join is performed in
+ * a mapper. This style of join has the important implication that the output of
+ * the join is not sorted, which is the case with a conventional (reducer-based)
+ * join.
+ * <p>
+ * <b>Note:</b>This utility is only supported when running with a
+ * {@link MRPipeline} as the pipeline.
+ */
+public class MapsideJoin {
+
+  /**
+   * Join two tables using a map side join. The right-side table will be loaded
+   * fully in memory, so this method should only be used if the right side
+   * table's contents can fit in the memory allocated to mappers. The join
+   * performed by this method is an inner join.
+   * 
+   * @param left
+   *          The left-side table of the join
+   * @param right
+   *          The right-side table of the join, whose contents will be fully
+   *          read into memory
+   * @return A table keyed on the join key, containing pairs of joined values
+   */
+  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right) {
+    PTypeFamily tf = left.getTypeFamily();
+    Iterable<Pair<K, V>> iterable = right.materialize();
+
+    if (iterable instanceof MaterializableIterable) {
+      MaterializableIterable<Pair<K, V>> mi = (MaterializableIterable<Pair<K, V>>) iterable;
+      MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(mi.getPath().toString(),
+          right.getPType());
+      ParallelDoOptions.Builder optionsBuilder = ParallelDoOptions.builder();
+      if (mi.isSourceTarget()) {
+        optionsBuilder.sourceTargets((SourceTarget) mi.getSource());
+      }
+      return left.parallelDo("mapjoin", mapJoinDoFn,
+          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())),
+          optionsBuilder.build());
+    } else { // in-memory pipeline
+      return left.parallelDo(new InMemoryJoinFn<K, U, V>(iterable),
+          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())));
+    }
+  }
+
+  static class InMemoryJoinFn<K, U, V> extends DoFn<Pair<K, U>, Pair<K, Pair<U, V>>> {
+
+    private Multimap<K, V> joinMap;
+    
+    public InMemoryJoinFn(Iterable<Pair<K, V>> iterable) {
+      joinMap = HashMultimap.create();
+      for (Pair<K, V> joinPair : iterable) {
+        joinMap.put(joinPair.first(), joinPair.second());
+      }
+    }
+    
+    @Override
+    public void process(Pair<K, U> input, Emitter<Pair<K, Pair<U, V>>> emitter) {
+      K key = input.first();
+      U value = input.second();
+      for (V joinValue : joinMap.get(key)) {
+        Pair<U, V> valuePair = Pair.of(value, joinValue);
+        emitter.emit(Pair.of(key, valuePair));
+      }
+    }
+  }
+  
+  static class MapsideJoinDoFn<K, U, V> extends DoFn<Pair<K, U>, Pair<K, Pair<U, V>>> {
+
+    private String inputPath;
+    private PType<Pair<K, V>> ptype;
+    private Multimap<K, V> joinMap;
+
+    public MapsideJoinDoFn(String inputPath, PType<Pair<K, V>> ptype) {
+      this.inputPath = inputPath;
+      this.ptype = ptype;
+    }
+
+    private Path getCacheFilePath() {
+      Path local = DistCache.getPathToCacheFile(new Path(inputPath), getConfiguration());
+      if (local == null) {
+        throw new CrunchRuntimeException("Can't find local cache file for '" + inputPath + "'");
+      }
+      return local;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      DistCache.addCacheFile(new Path(inputPath), conf);
+    }
+    
+    @Override
+    public void initialize() {
+      super.initialize();
+
+      ReadableSourceTarget<Pair<K, V>> sourceTarget = ptype.getDefaultFileSource(
+          getCacheFilePath());
+      Iterable<Pair<K, V>> iterable = null;
+      try {
+        iterable = sourceTarget.read(getConfiguration());
+      } catch (IOException e) {
+        throw new CrunchRuntimeException("Error reading right-side of map side join: ", e);
+      }
+
+      joinMap = ArrayListMultimap.create();
+      for (Pair<K, V> joinPair : iterable) {
+        joinMap.put(joinPair.first(), joinPair.second());
+      }
+    }
+
+    @Override
+    public void process(Pair<K, U> input, Emitter<Pair<K, Pair<U, V>>> emitter) {
+      K key = input.first();
+      U value = input.second();
+      for (V joinValue : joinMap.get(key)) {
+        Pair<U, V> valuePair = Pair.of(value, joinValue);
+        emitter.emit(Pair.of(key, valuePair));
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java
new file mode 100644
index 0000000..2789d40
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Used to perform the last step of an right outer join.
+ * 
+ * @param <K> Type of the keys.
+ * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
+ * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
+ */
+public class RightOuterJoinFn<K, U, V> extends JoinFn<K, U, V> {
+
+  private transient K lastKey;
+  private transient List<U> leftValues;
+
+  public RightOuterJoinFn(PType<K> keyType, PType<U> leftValueType) {
+    super(keyType, leftValueType);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void initialize() {
+    super.initialize();
+    lastKey = null;
+    this.leftValues = Lists.newArrayList();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
+    if (!key.equals(lastKey)) {
+      lastKey = keyType.getDetachedValue(key);
+      leftValues.clear();
+    }
+    if (id == 0) {
+      for (Pair<U, V> pair : pairs) {
+        if (pair.first() != null)
+          leftValues.add(leftValueType.getDetachedValue(pair.first()));
+      }
+    } else {
+      for (Pair<U, V> pair : pairs) {
+        // Make sure that right side gets emitted.
+        if (leftValues.isEmpty()) {
+          leftValues.add(null);
+        }
+
+        for (U u : leftValues) {
+          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
+        }
+      }
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public String getJoinType() {
+    return "rightOuterJoin";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/package-info.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/package-info.java
new file mode 100644
index 0000000..f1ad9f1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Inner and outer joins on collections.
+ */
+package org.apache.crunch.lib.join;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/package-info.java b/crunch-core/src/main/java/org/apache/crunch/lib/package-info.java
new file mode 100644
index 0000000..2695787
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Joining, sorting, aggregating, and other commonly used functionality.
+ */
+package org.apache.crunch.lib;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/sort/Comparators.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/sort/Comparators.java b/crunch-core/src/main/java/org/apache/crunch/lib/sort/Comparators.java
new file mode 100644
index 0000000..ae7f49a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/sort/Comparators.java
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.sort;
+
+import java.util.Arrays;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.BinaryData;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.crunch.lib.Sort.ColumnOrder;
+import org.apache.crunch.lib.Sort.Order;
+import org.apache.crunch.types.writable.TupleWritable;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.mapred.JobConf;
+
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
+
+/**
+ * A collection of {@code RawComparator<T>} implementations that are used by Crunch's {@code Sort} library.
+ */
+public class Comparators {
+  
+  public static class ReverseWritableComparator<T> extends Configured implements RawComparator<T> {
+
+    private RawComparator<T> comparator;
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void setConf(Configuration conf) {
+      super.setConf(conf);
+      if (conf != null) {
+        JobConf jobConf = new JobConf(conf);
+        comparator = WritableComparator.get(jobConf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
+      }
+    }
+
+    @Override
+    public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3, int arg4, int arg5) {
+      return -comparator.compare(arg0, arg1, arg2, arg3, arg4, arg5);
+    }
+
+    @Override
+    public int compare(T o1, T o2) {
+      return -comparator.compare(o1, o2);
+    }
+  }
+
+  public static class ReverseAvroComparator<T> extends Configured implements RawComparator<AvroKey<T>> {
+
+    private Schema schema;
+
+    @Override
+    public void setConf(Configuration conf) {
+      super.setConf(conf);
+      if (conf != null) {
+        schema = (new Schema.Parser()).parse(conf.get("crunch.schema"));
+      }
+    }
+
+    @Override
+    public int compare(AvroKey<T> o1, AvroKey<T> o2) {
+      return -ReflectData.get().compare(o1.datum(), o2.datum(), schema);
+    }
+
+    @Override
+    public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3, int arg4, int arg5) {
+      return -BinaryData.compare(arg0, arg1, arg2, arg3, arg4, arg5, schema);
+    }
+  }
+
+  public static class TupleWritableComparator extends WritableComparator implements Configurable {
+
+    private static final String CRUNCH_ORDERING_PROPERTY = "crunch.ordering";
+
+    private Configuration conf;
+    private ColumnOrder[] columnOrders;
+
+    public TupleWritableComparator() {
+      super(TupleWritable.class, true);
+    }
+
+    public static void configureOrdering(Configuration conf, Order... orders) {
+      conf.set(CRUNCH_ORDERING_PROPERTY,
+          Joiner.on(",").join(Iterables.transform(Arrays.asList(orders), new Function<Order, String>() {
+            @Override
+            public String apply(Order o) {
+              return o.name();
+            }
+          })));
+    }
+
+    public static void configureOrdering(Configuration conf, ColumnOrder... columnOrders) {
+      conf.set(CRUNCH_ORDERING_PROPERTY,
+          Joiner.on(",").join(Iterables.transform(Arrays.asList(columnOrders), new Function<ColumnOrder, String>() {
+            @Override
+            public String apply(ColumnOrder o) {
+              return o.column() + ";" + o.order().name();
+            }
+          })));
+    }
+
+    @Override
+    public int compare(WritableComparable a, WritableComparable b) {
+      TupleWritable ta = (TupleWritable) a;
+      TupleWritable tb = (TupleWritable) b;
+      for (int index = 0; index < columnOrders.length; index++) {
+        int order = 1;
+        if (columnOrders[index].order() == Order.ASCENDING) {
+          order = 1;
+        } else if (columnOrders[index].order() == Order.DESCENDING) {
+          order = -1;
+        } else { // ignore
+          continue;
+        }
+        if (!ta.has(index) && !tb.has(index)) {
+          continue;
+        } else if (ta.has(index) && !tb.has(index)) {
+          return order;
+        } else if (!ta.has(index) && tb.has(index)) {
+          return -order;
+        } else {
+          Writable v1 = ta.get(index);
+          Writable v2 = tb.get(index);
+          if (v1 != v2 && (v1 != null && !v1.equals(v2))) {
+            if (v1 instanceof WritableComparable && v2 instanceof WritableComparable) {
+              int cmp = ((WritableComparable) v1).compareTo((WritableComparable) v2);
+              if (cmp != 0) {
+                return order * cmp;
+              }
+            } else {
+              int cmp = v1.hashCode() - v2.hashCode();
+              if (cmp != 0) {
+                return order * cmp;
+              }
+            }
+          }
+        }
+      }
+      return 0; // ordering using specified cols found no differences
+    }
+
+    @Override
+    public Configuration getConf() {
+      return conf;
+    }
+
+    @Override
+    public void setConf(Configuration conf) {
+      this.conf = conf;
+      if (conf != null) {
+        String ordering = conf.get(CRUNCH_ORDERING_PROPERTY);
+        String[] columnOrderNames = ordering.split(",");
+        columnOrders = new ColumnOrder[columnOrderNames.length];
+        for (int i = 0; i < columnOrders.length; i++) {
+          String[] split = columnOrderNames[i].split(";");
+          int column = Integer.parseInt(split[0]);
+          Order order = Order.valueOf(split[1]);
+          columnOrders[i] = ColumnOrder.by(column, order);
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/sort/SortFns.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/sort/SortFns.java b/crunch-core/src/main/java/org/apache/crunch/lib/sort/SortFns.java
new file mode 100644
index 0000000..be218f6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/sort/SortFns.java
@@ -0,0 +1,210 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.sort;
+
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.lib.Sort.ColumnOrder;
+import org.apache.crunch.lib.Sort.Order;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.TupleFactory;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+
+import com.google.common.collect.Lists;
+
+/**
+ * A set of {@code DoFn}s that are used by Crunch's {@code Sort} library.
+ */
+public class SortFns {
+
+  /**
+   * Extracts a single indexed key from a {@code Tuple} instance.
+   */
+  public static class SingleKeyFn<V extends Tuple, K> extends MapFn<V, K> {
+    private final int index;
+    
+    public SingleKeyFn(int index) {
+      this.index = index;
+    }
+
+    @Override
+    public K map(V input) {
+      return (K) input.get(index);
+    }
+  }
+
+  /**
+   * Extracts a composite key from a {@code Tuple} instance.
+   */
+  public static class TupleKeyFn<V extends Tuple, K extends Tuple> extends MapFn<V, K> {
+    private final int[] indices;
+    private final TupleFactory tupleFactory;
+    
+    public TupleKeyFn(int[] indices, TupleFactory tupleFactory) {
+      this.indices = indices;
+      this.tupleFactory = tupleFactory;
+    }
+    
+    @Override
+    public K map(V input) {
+      Object[] values = new Object[indices.length];
+      for (int i = 0; i < indices.length; i++) {
+        values[i] = input.get(indices[i]);
+      }
+      return (K) tupleFactory.makeTuple(values);
+    }
+  }
+  
+  /**
+   * Pulls a composite set of keys from an Avro {@code GenericRecord} instance.
+   */
+  public static class AvroGenericFn<V extends Tuple> extends MapFn<V, GenericRecord> {
+
+    private final int[] indices;
+    private final String schemaJson;
+    private transient Schema schema;
+    
+    public AvroGenericFn(int[] indices, Schema schema) {
+      this.indices = indices;
+      this.schemaJson = schema.toString();
+    }
+    
+    @Override
+    public void initialize() {
+      this.schema = (new Schema.Parser()).parse(schemaJson);
+    }
+    
+    @Override
+    public GenericRecord map(V input) {
+      GenericRecord rec = new GenericData.Record(schema);
+      for (int i = 0; i < indices.length; i++) {
+        rec.put(i, input.get(indices[i]));
+      }
+      return rec;
+    }
+  }
+  
+  /**
+   * Constructs an Avro schema for the given {@code PType<S>} that respects the given column
+   * orderings.
+   */
+  public static <S> Schema createOrderedTupleSchema(PType<S> ptype, ColumnOrder[] orders) {
+    // Guarantee each tuple schema has a globally unique name
+    String tupleName = "tuple" + UUID.randomUUID().toString().replace('-', 'x');
+    Schema schema = Schema.createRecord(tupleName, "", "crunch", false);
+    List<Schema.Field> fields = Lists.newArrayList();
+    AvroType<S> parentAvroType = (AvroType<S>) ptype;
+    Schema parentAvroSchema = parentAvroType.getSchema();
+
+    for (int index = 0; index < orders.length; index++) {
+      ColumnOrder columnOrder = orders[index];
+      AvroType<?> atype = (AvroType<?>) ptype.getSubTypes().get(index);
+      Schema fieldSchema = atype.getSchema();
+      String fieldName = parentAvroSchema.getFields().get(index).name();
+      // Note: avro sorting of strings is inverted relative to how sorting works for WritableComparable
+      // Text instances: making this consistent
+      Schema.Field.Order order = columnOrder.order() == Order.DESCENDING ? Schema.Field.Order.DESCENDING :
+        Schema.Field.Order.ASCENDING;
+      fields.add(new Schema.Field(fieldName, fieldSchema, "", null, order));
+    }
+    schema.setFields(fields);
+    return schema;
+  }
+
+  /**
+   * Utility class for encapsulating key extraction logic and serialization information about
+   * key extraction.
+   */
+  public static class KeyExtraction<V extends Tuple> {
+
+    private PType<V> ptype;
+    private final ColumnOrder[] columnOrder;
+    private final int[] cols;
+    
+    private MapFn<V, Object> byFn;
+    private PType<Object> keyPType;
+    
+    public KeyExtraction(PType<V> ptype, ColumnOrder[] columnOrder) {
+      this.ptype = ptype;
+      this.columnOrder = columnOrder;
+      this.cols = new int[columnOrder.length];
+      for (int i = 0; i < columnOrder.length; i++) {
+        cols[i] = columnOrder[i].column() - 1;
+      }
+      init();
+    }
+    
+    private void init() {
+      List<PType> pt = ptype.getSubTypes();
+      PTypeFamily ptf = ptype.getFamily();
+      if (cols.length == 1) {
+        byFn = new SingleKeyFn(cols[0]);
+        keyPType = pt.get(cols[0]);
+      } else {
+        TupleFactory tf = null;
+        switch (cols.length) {
+        case 2:
+          tf = TupleFactory.PAIR;
+          keyPType = ptf.pairs(pt.get(cols[0]), pt.get(cols[1]));
+          break;
+        case 3:
+          tf = TupleFactory.TUPLE3;
+          keyPType = ptf.triples(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]));
+          break;
+        case 4:
+          tf = TupleFactory.TUPLE4;
+          keyPType = ptf.quads(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]), pt.get(cols[3]));
+          break;
+        default:
+          PType[] pts = new PType[cols.length];
+          for (int i = 0; i < pts.length; i++) {
+            pts[i] = pt.get(cols[i]);
+          }
+          tf = TupleFactory.TUPLEN;
+          keyPType = (PType<Object>) (PType<?>) ptf.tuples(pts);
+        }
+        
+        if (ptf == AvroTypeFamily.getInstance()) {
+          Schema s = createOrderedTupleSchema(keyPType, columnOrder);
+          keyPType = (PType<Object>) (PType<?>) Avros.generics(s);
+          byFn = new AvroGenericFn(cols, s);
+        } else {
+          byFn = new TupleKeyFn(cols, tf);
+        }
+      }
+      
+    }
+
+    public MapFn<V, Object> getByFn() {
+      return byFn;
+    }
+    
+    public PType<Object> getKeyType() {
+      return keyPType;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java b/crunch-core/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java
new file mode 100644
index 0000000..94fbdbe
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.sort;
+
+import java.io.IOException;
+import java.lang.reflect.Array;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.avro.AvroFileReaderFactory;
+import org.apache.crunch.io.seq.SeqFileReaderFactory;
+import org.apache.crunch.types.writable.WritableDeepCopier;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * A partition-aware {@code Partitioner} instance that can work with either Avro or Writable-formatted
+ * keys.
+ */
+public class TotalOrderPartitioner<K, V> extends Partitioner<K, V> implements Configurable {
+
+  public static final String DEFAULT_PATH = "_partition.lst";
+  public static final String PARTITIONER_PATH = 
+    "crunch.totalorderpartitioner.path";
+  
+  private Configuration conf;
+  private Node<K> partitions;
+  
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    try {
+      this.conf = conf;
+      String parts = getPartitionFile(conf);
+      final Path partFile = new Path(parts);
+      final FileSystem fs = (DEFAULT_PATH.equals(parts))
+        ? FileSystem.getLocal(conf)     // assume in DistributedCache
+        : partFile.getFileSystem(conf);
+
+      Job job = new Job(conf);
+      Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
+      RawComparator<K> comparator =
+          (RawComparator<K>) job.getSortComparator();
+      K[] splitPoints = readPartitions(fs, partFile, keyClass, conf, comparator);
+      int numReduceTasks = job.getNumReduceTasks();
+      if (splitPoints.length != numReduceTasks - 1) {
+        throw new IOException("Wrong number of partitions in keyset");
+      }
+      partitions = new BinarySearchNode(splitPoints, comparator);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("Can't read partitions file", e);
+    }
+  }
+
+  @Override
+  public int getPartition(K key, V value, int modulo) {
+    return partitions.findPartition(key);
+  }
+
+  public static void setPartitionFile(Configuration conf, Path p) {
+    conf.set(PARTITIONER_PATH, p.toString());
+  }
+
+  public static String getPartitionFile(Configuration conf) {
+    return conf.get(PARTITIONER_PATH, DEFAULT_PATH);
+  }
+  
+  @SuppressWarnings("unchecked") // map output key class
+  private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass,
+      Configuration conf, final RawComparator<K> comparator) throws IOException {
+    ArrayList<K> parts = new ArrayList<K>();
+    String schema = conf.get("crunch.schema");
+    if (schema != null) {
+      Schema s = (new Schema.Parser()).parse(schema);
+      AvroFileReaderFactory<K> a = new AvroFileReaderFactory<K>(s);
+      Iterator<K> iter = CompositePathIterable.create(fs, p, a).iterator();
+      while (iter.hasNext()) {
+        parts.add((K) new AvroKey<K>(iter.next()));
+      }
+    } else {
+      WritableDeepCopier wdc = new WritableDeepCopier(keyClass);
+      SeqFileReaderFactory<K> s = new SeqFileReaderFactory<K>(keyClass);
+      Iterator<K> iter = CompositePathIterable.create(fs, p, s).iterator();
+      while (iter.hasNext()) {
+        parts.add((K) wdc.deepCopy((Writable) iter.next()));
+      }
+    }
+    Collections.sort(parts, comparator);
+    return parts.toArray((K[])Array.newInstance(keyClass, parts.size()));
+  }
+  
+  /**
+   * Interface to the partitioner to locate a key in the partition keyset.
+   */
+  interface Node<T> {
+    /**
+     * Locate partition in keyset K, st [Ki..Ki+1) defines a partition,
+     * with implicit K0 = -inf, Kn = +inf, and |K| = #partitions - 1.
+     */
+    int findPartition(T key);
+  }
+  
+  class BinarySearchNode implements Node<K> {
+    private final K[] splitPoints;
+    private final RawComparator<K> comparator;
+    BinarySearchNode(K[] splitPoints, RawComparator<K> comparator) {
+      this.splitPoints = splitPoints;
+      this.comparator = comparator;
+    }
+    public int findPartition(K key) {
+      final int pos = Arrays.binarySearch(splitPoints, key, comparator) + 1;
+      return (pos < 0) ? -pos : pos;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java b/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java
new file mode 100644
index 0000000..2dcc64f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.materialize;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileSourceImpl;
+import org.apache.hadoop.fs.Path;
+
+public class MaterializableIterable<E> implements Iterable<E> {
+
+  private static final Log LOG = LogFactory.getLog(MaterializableIterable.class);
+
+  private final Pipeline pipeline;
+  private final ReadableSource<E> source;
+  private Iterable<E> materialized;
+
+  public MaterializableIterable(Pipeline pipeline, ReadableSource<E> source) {
+    this.pipeline = pipeline;
+    this.source = source;
+    this.materialized = null;
+  }
+
+  public ReadableSource<E> getSource() {
+    return source;
+  }
+
+  public boolean isSourceTarget() {
+    return (source instanceof SourceTarget);
+  }
+  
+  public Path getPath() {
+    if (source instanceof FileSourceImpl) {
+      return ((FileSourceImpl) source).getPath();
+    } else if (source instanceof PathTarget) {
+      return ((PathTarget) source).getPath();
+    }
+    return null;
+  }
+  
+  @Override
+  public Iterator<E> iterator() {
+    if (materialized == null) {
+      pipeline.run();
+      materialize();
+    }
+    return materialized.iterator();
+  }
+
+  public void materialize() {
+    try {
+      materialized = source.read(pipeline.getConfiguration());
+    } catch (IOException e) {
+      LOG.error("Could not materialize: " + source, e);
+      throw new CrunchRuntimeException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableMap.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableMap.java b/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableMap.java
new file mode 100644
index 0000000..69082e2
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/materialize/MaterializableMap.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.materialize;
+
+import java.util.AbstractMap;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.crunch.Pair;
+
+public class MaterializableMap<K, V> extends AbstractMap<K, V> {
+
+  private Iterable<Pair<K, V>> iterable;
+  private Set<Map.Entry<K, V>> entrySet;
+
+  public MaterializableMap(Iterable<Pair<K, V>> iterable) {
+    this.iterable = iterable;
+  }
+
+  private Set<Map.Entry<K, V>> toMapEntries(Iterable<Pair<K, V>> xs) {
+    HashMap<K, V> m = new HashMap<K, V>();
+    for (Pair<K, V> x : xs)
+      m.put(x.first(), x.second());
+    return m.entrySet();
+  }
+
+  @Override
+  public Set<Map.Entry<K, V>> entrySet() {
+    if (entrySet == null)
+      entrySet = toMapEntries(iterable);
+    return entrySet;
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java
new file mode 100644
index 0000000..60e64b1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.materialize.pobject;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+
+import org.apache.crunch.PCollection;
+
+/**
+ * A concrete implementation of {@link org.apache.crunch.materialize.pobject.PObjectImpl} whose
+ * value is a Java {@link java.util.Collection} containing the elements of the underlying {@link
+ * PCollection} for this {@link org.apache.crunch.PObject}.
+ *
+ * @param <S> The value type for elements contained in the {@code Collection} value encapsulated
+ * by this {@code PObject}.
+ */
+public class CollectionPObject<S> extends PObjectImpl<S, Collection<S>> {
+
+  /**
+   * Constructs a new instance of this {@code PObject} implementation.
+   *
+   * @param collect The backing {@code PCollection} for this {@code PObject}.
+   */
+  public CollectionPObject(PCollection<S> collect) {
+    super(collect);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public Collection<S> process(Iterable<S> input) {
+    Collection<S> target = new ArrayList<S>();
+    Iterator<S> itr = input.iterator();
+    while (itr.hasNext()) {
+      target.add(itr.next());
+    }
+    return target;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java
new file mode 100644
index 0000000..aa5fd9e
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.materialize.pobject;
+
+import java.util.Iterator;
+
+import org.apache.crunch.PCollection;
+
+/**
+ * A concrete implementation of {@link PObjectImpl} that uses the first element in the backing
+ * {@link PCollection} as the {@link org.apache.crunch.PObject} value.
+ *
+ * @param <T> The value type of this {@code PObject}.
+ */
+public class FirstElementPObject<T> extends PObjectImpl<T, T> {
+
+  /**
+   * Constructs a new instance of this {@code PObject} implementation.
+   *
+   * @param collect The backing {@code PCollection} for this {@code PObject}.
+   */
+  public FirstElementPObject(PCollection<T> collect) {
+    super(collect);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public T process(Iterable<T> input) {
+    Iterator<T> itr = input.iterator();
+    if (itr.hasNext()) {
+      return itr.next();
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java
new file mode 100644
index 0000000..243997f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.materialize.pobject;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pair;
+
+/**
+ * A concrete implementation of {@link PObjectImpl} whose
+ * value is a Java {@link Map}. The underlying {@link PCollection} for this
+ * {@link org.apache.crunch.PObject} must contain {@link Pair}s of values. The
+ * first element of the pair will be used as the map key, while the second element will be used
+ * as the map value.  Note that the contents of the underlying {@code PCollection} may not be
+ * reflected in the returned {@code Map}, since a single key may be mapped to several values in
+ * the underlying {@code PCollection}, and only one of those values will appear in the {@code
+ * Map} encapsulated by this {@code PObject}.
+ *
+ * @param <K> The type of keys for the Map.
+ * @param <V> The type of values for the Map.
+ */
+public class MapPObject<K, V> extends PObjectImpl<Pair<K, V>, Map<K, V>> {
+
+  /**
+   * Constructs a new instance of this {@code PObject} implementation.
+   *
+   * @param collect The backing {@code PCollection} for this {@code PObject}.
+   */
+  public MapPObject(PCollection<Pair<K, V>> collect) {
+    super(collect);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public Map<K, V> process(Iterable<Pair<K, V>> input) {
+    Map<K, V> target = new HashMap<K, V>();
+    Iterator<Pair<K, V>> itr = input.iterator();
+    while (itr.hasNext()) {
+      Pair<K, V> pair = itr.next();
+      target.put(pair.first(), pair.second());
+    }
+    return target;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java
new file mode 100644
index 0000000..59c2ba2
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.materialize.pobject;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Target;
+
+/**
+ * An abstract implementation of {@link PObject} that is backed by a {@link PCollection}.
+ * Clients creating a concrete implementation should override the method
+ * {@link PObjectImpl#process(Iterable)}, which transforms the backing PCollection into the
+ * singleton value encapsulated by the PObject. Once this {code PObject}'s value has been
+ * calculated, the value is cached to prevent subsequent materializations of the backing
+ * {@code PCollection}.
+ *
+ * @param <S> The type contained in the underlying PCollection.
+ * @param <T> The type encapsulated by this PObject.
+ */
+public abstract class PObjectImpl<S, T> implements PObject<T> {
+
+  // The underlying PCollection whose contents will be used to generate the value for this
+  // PObject.
+  private PCollection<S> collection;
+
+  // A variable to hold a cached copy of the value of this {@code PObject},
+  // to prevent unnecessary materializations of the backing {@code PCollection}.
+  private T cachedValue;
+
+  // A flag indicating if a value for this {@code PObject} has been cached.
+  private boolean isCached;
+
+  /**
+   * Constructs a new instance of this {@code PObject} implementation.
+   *
+   * @param collect The backing {@code PCollection} for this {@code PObject}.
+   */
+  public PObjectImpl(PCollection<S> collect) {
+    this.collection = collect;
+    this.cachedValue = null;
+    this.isCached = false;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public String toString() {
+    return collection.toString();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public final T getValue() {
+    if (!isCached) {
+      cachedValue = process(collection.materialize());
+      isCached = true;
+    }
+    return cachedValue;
+  }
+
+  /**
+   * Transforms the provided Iterable, obtained from the backing {@link PCollection},
+   * into the value encapsulated by this {@code PObject}.
+   *
+   * @param input An Iterable whose elements correspond to those of the backing {@code
+   * PCollection}.
+   * @return The value of this {@code PObject}.
+   */
+  protected abstract T process(Iterable<S> input);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/package-info.java b/crunch-core/src/main/java/org/apache/crunch/package-info.java
new file mode 100644
index 0000000..38f11bc
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/package-info.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Client-facing API and core abstractions.
+ *
+ * @see <a href="http://crunch.apache.org/intro.html">Introduction to
+ *      Apache Crunch</a>
+ */
+package org.apache.crunch;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java b/crunch-core/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java
new file mode 100644
index 0000000..151ab82
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Performs deep copies (based on underlying PType deep copying) of Collections.
+ * 
+ * @param <T> The type of Tuple implementation being copied
+ */
+public class CollectionDeepCopier<T> implements DeepCopier<Collection<T>> {
+
+  private PType<T> elementType;
+
+  public CollectionDeepCopier(PType<T> elementType) {
+    this.elementType = elementType;
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    this.elementType.initialize(conf);
+  }
+
+  @Override
+  public Collection<T> deepCopy(Collection<T> source) {
+    if (source == null) {
+      return null;
+    }
+    List<T> copiedCollection = Lists.newArrayListWithCapacity(source.size());
+    for (T value : source) {
+      copiedCollection.add(elementType.getDetachedValue(value));
+    }
+    return copiedCollection;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/Converter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/Converter.java b/crunch-core/src/main/java/org/apache/crunch/types/Converter.java
new file mode 100644
index 0000000..a0dbb16
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/Converter.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.io.Serializable;
+
+import org.apache.crunch.DoFn;
+
+/**
+ * Converts the input key/value from a MapReduce task into the input to a
+ * {@link DoFn}, or takes the output of a {@code DoFn} and write it to the
+ * output key/values.
+ */
+public interface Converter<K, V, S, T> extends Serializable {
+  S convertInput(K key, V value);
+
+  T convertIterableInput(K key, Iterable<V> value);
+
+  K outputKey(S value);
+
+  V outputValue(S value);
+
+  Class<K> getKeyClass();
+
+  Class<V> getValueClass();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/DeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/DeepCopier.java b/crunch-core/src/main/java/org/apache/crunch/types/DeepCopier.java
new file mode 100644
index 0000000..f146e86
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/DeepCopier.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Performs deep copies of values.
+ * 
+ * @param <T> The type of value that will be copied
+ */
+public interface DeepCopier<T> extends Serializable {
+
+  /**
+   * Initialize the deep copier with a job-specific configuration
+   * 
+   * @param conf Job-specific configuration
+   */
+  void initialize(Configuration conf);
+
+  /**
+   * Create a deep copy of a value.
+   * 
+   * @param source The value to be copied
+   * @return The deep copy of the value
+   */
+  T deepCopy(T source);
+
+  static class NoOpDeepCopier<V> implements DeepCopier<V> {
+
+    @Override
+    public V deepCopy(V source) {
+      return source;
+    }
+
+    @Override
+    public void initialize(Configuration conf) {
+      // No initialization needed
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/MapDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/MapDeepCopier.java b/crunch-core/src/main/java/org/apache/crunch/types/MapDeepCopier.java
new file mode 100644
index 0000000..de8903b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/MapDeepCopier.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.Maps;
+
+public class MapDeepCopier<T> implements DeepCopier<Map<String, T>> {
+
+  private final PType<T> ptype;
+
+  public MapDeepCopier(PType<T> ptype) {
+    this.ptype = ptype;
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    this.ptype.initialize(conf);
+  }
+
+  @Override
+  public Map<String, T> deepCopy(Map<String, T> source) {
+    if (source == null) {
+      return null;
+    }
+    
+    Map<String, T> deepCopyMap = Maps.newHashMap();
+    for (Entry<String, T> entry : source.entrySet()) {
+      deepCopyMap.put(entry.getKey(), ptype.getDetachedValue(entry.getValue()));
+    }
+    return deepCopyMap;
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/PGroupedTableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/PGroupedTableType.java b/crunch-core/src/main/java/org/apache/crunch/types/PGroupedTableType.java
new file mode 100644
index 0000000..d276cd6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/PGroupedTableType.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+import com.google.common.collect.Iterables;
+
+/**
+ * The {@code PType} instance for {@link PGroupedTable} instances. Its settings
+ * are derived from the {@code PTableType} that was grouped to create the
+ * {@code PGroupedTable} instance.
+ * 
+ */
+public abstract class PGroupedTableType<K, V> implements PType<Pair<K, Iterable<V>>> {
+
+  protected static class PTypeIterable<V> implements Iterable<V> {
+    private final Iterable<Object> iterable;
+    private final MapFn<Object, V> mapFn;
+
+    public PTypeIterable(MapFn<Object, V> mapFn, Iterable<Object> iterable) {
+      this.mapFn = mapFn;
+      this.iterable = iterable;
+    }
+
+    public Iterator<V> iterator() {
+      return new Iterator<V>() {
+        Iterator<Object> iter = iterable.iterator();
+
+        public boolean hasNext() {
+          return iter.hasNext();
+        }
+
+        public V next() {
+          return mapFn.map(iter.next());
+        }
+
+        public void remove() {
+          iter.remove();
+        }
+      };
+    }
+    
+    @Override
+    public String toString() {
+      return Iterables.toString(this);
+    }
+  }
+
+  public static class PairIterableMapFn<K, V> extends MapFn<Pair<Object, Iterable<Object>>, Pair<K, Iterable<V>>> {
+    private final MapFn<Object, K> keys;
+    private final MapFn<Object, V> values;
+
+    public PairIterableMapFn(MapFn<Object, K> keys, MapFn<Object, V> values) {
+      this.keys = keys;
+      this.values = values;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      keys.configure(conf);
+      values.configure(conf);
+    }
+    
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      keys.setContext(context);
+      values.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      keys.initialize();
+      values.initialize();
+    }
+
+    @Override
+    public Pair<K, Iterable<V>> map(Pair<Object, Iterable<Object>> input) {
+      return Pair.<K, Iterable<V>> of(keys.map(input.first()), new PTypeIterable(values, input.second()));
+    }
+  }
+
+  protected final PTableType<K, V> tableType;
+
+  public PGroupedTableType(PTableType<K, V> tableType) {
+    this.tableType = tableType;
+  }
+
+  public PTableType<K, V> getTableType() {
+    return tableType;
+  }
+
+  @Override
+  public PTypeFamily getFamily() {
+    return tableType.getFamily();
+  }
+
+  @Override
+  public List<PType> getSubTypes() {
+    return tableType.getSubTypes();
+  }
+
+  @Override
+  public Converter getConverter() {
+    return tableType.getConverter();
+  }
+
+  public abstract Converter getGroupingConverter();
+
+  public abstract void configureShuffle(Job job, GroupingOptions options);
+
+  @Override
+  public ReadableSourceTarget<Pair<K, Iterable<V>>> getDefaultFileSource(Path path) {
+    throw new UnsupportedOperationException("Grouped tables cannot be written out directly");
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/PTableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/PTableType.java b/crunch-core/src/main/java/org/apache/crunch/types/PTableType.java
new file mode 100644
index 0000000..3d06f8b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/PTableType.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+
+/**
+ * An extension of {@code PType} specifically for {@link PTable} objects. It
+ * allows separate access to the {@code PType}s of the key and value for the
+ * {@code PTable}.
+ * 
+ */
+public interface PTableType<K, V> extends PType<Pair<K, V>> {
+  /**
+   * Returns the key type for the table.
+   */
+  PType<K> getKeyType();
+
+  /**
+   * Returns the value type for the table.
+   */
+  PType<V> getValueType();
+
+  /**
+   * Returns the grouped table version of this type.
+   */
+  PGroupedTableType<K, V> getGroupedTableType();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/PType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/PType.java b/crunch-core/src/main/java/org/apache/crunch/types/PType.java
new file mode 100644
index 0000000..ebddf84
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/PType.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A {@code PType} defines a mapping between a data type that is used in a Crunch pipeline and a
+ * serialization and storage format that is used to read/write data from/to HDFS. Every
+ * {@link PCollection} has an associated {@code PType} that tells Crunch how to read/write data from
+ * that {@code PCollection}.
+ * 
+ */
+public interface PType<T> extends Serializable {
+  /**
+   * Returns the Java type represented by this {@code PType}.
+   */
+  Class<T> getTypeClass();
+
+  /**
+   * Returns the {@code PTypeFamily} that this {@code PType} belongs to.
+   */
+  PTypeFamily getFamily();
+
+  MapFn<Object, T> getInputMapFn();
+
+  MapFn<T, Object> getOutputMapFn();
+
+  Converter getConverter();
+
+  /**
+   * Initialize this PType for use within a DoFn. This generally only needs to be called when using
+   * a PType for {@link #getDetachedValue(Object)}.
+   * 
+   * @param conf Configuration object
+   * @see PType#getDetachedValue(Object)
+   */
+  void initialize(Configuration conf);
+
+  /**
+   * Returns a copy of a value (or the value itself) that can safely be retained.
+   * <p>
+   * This is useful when iterable values being processed in a DoFn (via a reducer) need to be held
+   * on to for more than the scope of a single iteration, as a reducer (and therefore also a DoFn
+   * that has an Iterable as input) re-use deserialized values. More information on object reuse is
+   * available in the {@link DoFn} class documentation.
+   * 
+   * @param value The value to be deep-copied
+   * @return A deep copy of the input value
+   */
+  T getDetachedValue(T value);
+
+  /**
+   * Returns a {@code SourceTarget} that is able to read/write data using the serialization format
+   * specified by this {@code PType}.
+   */
+  ReadableSourceTarget<T> getDefaultFileSource(Path path);
+
+  /**
+   * Returns the sub-types that make up this PType if it is a composite instance, such as a tuple.
+   */
+  List<PType> getSubTypes();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/PTypeFamily.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/PTypeFamily.java b/crunch-core/src/main/java/org/apache/crunch/types/PTypeFamily.java
new file mode 100644
index 0000000..9458f14
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/PTypeFamily.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+
+/**
+ * An abstract factory for creating {@code PType} instances that have the same
+ * serialization/storage backing format.
+ * 
+ */
+public interface PTypeFamily {
+  PType<Void> nulls();
+
+  PType<String> strings();
+
+  PType<Long> longs();
+
+  PType<Integer> ints();
+
+  PType<Float> floats();
+
+  PType<Double> doubles();
+
+  PType<Boolean> booleans();
+
+  PType<ByteBuffer> bytes();
+
+  <T> PType<T> records(Class<T> clazz);
+
+  <T> PType<Collection<T>> collections(PType<T> ptype);
+
+  <T> PType<Map<String, T>> maps(PType<T> ptype);
+
+  <V1, V2> PType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2);
+
+  <V1, V2, V3> PType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3);
+
+  <V1, V2, V3, V4> PType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3, PType<V4> p4);
+
+  PType<TupleN> tuples(PType<?>... ptypes);
+
+  <T extends Tuple> PType<T> tuples(Class<T> clazz, PType<?>... ptypes);
+
+  <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base);
+
+  <K, V> PTableType<K, V> tableOf(PType<K> key, PType<V> value);
+
+  /**
+   * Returns the equivalent of the given ptype for this family, if it exists.
+   */
+  <T> PType<T> as(PType<T> ptype);
+}


[29/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTarget.java
new file mode 100644
index 0000000..0c3e6a4
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTarget.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import org.apache.avro.Schema;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.FileTargetImpl;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.avro.AvroTextOutputFormat;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableType;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+
+public class TextFileTarget extends FileTargetImpl {
+  private static Class<? extends FileOutputFormat> getOutputFormat(PType<?> ptype) {
+    if (ptype.getFamily().equals(AvroTypeFamily.getInstance())) {
+      return AvroTextOutputFormat.class;
+    } else {
+      return TextOutputFormat.class;
+    }
+  }
+
+  public <T> TextFileTarget(String path) {
+    this(new Path(path));
+  }
+
+  public <T> TextFileTarget(Path path) {
+    this(path, new SequentialFileNamingScheme());
+  }
+
+  public <T> TextFileTarget(Path path, FileNamingScheme fileNamingScheme) {
+    super(path, null, fileNamingScheme);
+  }
+
+  @Override
+  public Path getPath() {
+    return path;
+  }
+
+  @Override
+  public String toString() {
+    return "Text(" + path + ")";
+  }
+
+  @Override
+  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
+    Converter converter = ptype.getConverter();
+    Class keyClass = converter.getKeyClass();
+    Class valueClass = converter.getValueClass();
+    configureForMapReduce(job, keyClass, valueClass, getOutputFormat(ptype), outputPath, name);
+  }
+
+  @Override
+  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
+    if (!isTextCompatible(ptype)) {
+      return null;
+    }
+    if (ptype instanceof PTableType) {
+      return new TextFileTableSourceTarget(path, (PTableType) ptype);
+    }
+    return new TextFileSourceTarget<T>(path, ptype);
+  }
+  
+  private <T> boolean isTextCompatible(PType<T> ptype) {
+    if (AvroTypeFamily.getInstance().equals(ptype.getFamily())) {
+      AvroType<T> at = (AvroType<T>) ptype;
+      if (at.getSchema().equals(Schema.create(Schema.Type.STRING))) {
+        return true;
+      }
+    } else if (WritableTypeFamily.getInstance().equals(ptype.getFamily())) {
+      if (ptype instanceof PTableType) {
+        PTableType ptt = (PTableType) ptype;
+        return isText(ptt.getKeyType()) && isText(ptt.getValueType());
+      } else {
+        return isText(ptype);
+      }
+    }
+    return false;
+  }
+  
+  private <T> boolean isText(PType<T> wtype) {
+    return Text.class.equals(((WritableType) wtype).getSerializationClass());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Aggregate.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Aggregate.java b/crunch-core/src/main/java/org/apache/crunch/lib/Aggregate.java
new file mode 100644
index 0000000..d4109cc
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Aggregate.java
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.PriorityQueue;
+
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.fn.Aggregators;
+import org.apache.crunch.fn.MapValuesFn;
+import org.apache.crunch.materialize.pobject.FirstElementPObject;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Methods for performing various types of aggregations over {@link PCollection} instances.
+ * 
+ */
+public class Aggregate {
+
+  /**
+   * Returns a {@code PTable} that contains the unique elements of this collection mapped to a count
+   * of their occurrences.
+   */
+  public static <S> PTable<S, Long> count(PCollection<S> collect) {
+    PTypeFamily tf = collect.getTypeFamily();
+    return collect.parallelDo("Aggregate.count", new MapFn<S, Pair<S, Long>>() {
+      public Pair<S, Long> map(S input) {
+        return Pair.of(input, 1L);
+      }
+    }, tf.tableOf(collect.getPType(), tf.longs())).groupByKey()
+        .combineValues(Aggregators.SUM_LONGS());
+  }
+
+  /**
+   * Returns the number of elements in the provided PCollection.
+   * 
+   * @param collect The PCollection whose elements should be counted.
+   * @param <S> The type of the PCollection.
+   * @return A {@code PObject} containing the number of elements in the {@code PCollection}.
+   */
+  public static <S> PObject<Long> length(PCollection<S> collect) {
+    PTypeFamily tf = collect.getTypeFamily();
+    PTable<Integer, Long> countTable = collect
+        .parallelDo("Aggregate.count", new MapFn<S, Pair<Integer, Long>>() {
+          public Pair<Integer, Long> map(S input) {
+            return Pair.of(1, 1L);
+          }
+        }, tf.tableOf(tf.ints(), tf.longs()))
+        .groupByKey(GroupingOptions.builder().numReducers(1).build())
+        .combineValues(Aggregators.SUM_LONGS());
+    PCollection<Long> count = countTable.values();
+    return new FirstElementPObject<Long>(count);
+  }
+
+  public static class PairValueComparator<K, V> implements Comparator<Pair<K, V>> {
+    private final boolean ascending;
+
+    public PairValueComparator(boolean ascending) {
+      this.ascending = ascending;
+    }
+
+    @Override
+    public int compare(Pair<K, V> left, Pair<K, V> right) {
+      int cmp = ((Comparable<V>) left.second()).compareTo(right.second());
+      return ascending ? cmp : -cmp;
+    }
+  }
+
+  public static class TopKFn<K, V> extends DoFn<Pair<K, V>, Pair<Integer, Pair<K, V>>> {
+
+    private final int limit;
+    private final boolean maximize;
+    private transient PriorityQueue<Pair<K, V>> values;
+
+    public TopKFn(int limit, boolean ascending) {
+      this.limit = limit;
+      this.maximize = ascending;
+    }
+
+    public void initialize() {
+      this.values = new PriorityQueue<Pair<K, V>>(limit, new PairValueComparator<K, V>(maximize));
+    }
+
+    public void process(Pair<K, V> input, Emitter<Pair<Integer, Pair<K, V>>> emitter) {
+      values.add(input);
+      if (values.size() > limit) {
+        values.poll();
+      }
+    }
+
+    public void cleanup(Emitter<Pair<Integer, Pair<K, V>>> emitter) {
+      for (Pair<K, V> p : values) {
+        emitter.emit(Pair.of(0, p));
+      }
+    }
+  }
+
+  public static class TopKCombineFn<K, V> extends CombineFn<Integer, Pair<K, V>> {
+
+    private final int limit;
+    private final boolean maximize;
+
+    public TopKCombineFn(int limit, boolean maximize) {
+      this.limit = limit;
+      this.maximize = maximize;
+    }
+
+    @Override
+    public void process(Pair<Integer, Iterable<Pair<K, V>>> input,
+        Emitter<Pair<Integer, Pair<K, V>>> emitter) {
+      Comparator<Pair<K, V>> cmp = new PairValueComparator<K, V>(maximize);
+      PriorityQueue<Pair<K, V>> queue = new PriorityQueue<Pair<K, V>>(limit, cmp);
+      for (Pair<K, V> pair : input.second()) {
+        queue.add(pair);
+        if (queue.size() > limit) {
+          queue.poll();
+        }
+      }
+
+      List<Pair<K, V>> values = Lists.newArrayList(queue);
+      Collections.sort(values, cmp);
+      for (int i = values.size() - 1; i >= 0; i--) {
+        emitter.emit(Pair.of(0, values.get(i)));
+      }
+    }
+  }
+
+  public static <K, V> PTable<K, V> top(PTable<K, V> ptable, int limit, boolean maximize) {
+    PTypeFamily ptf = ptable.getTypeFamily();
+    PTableType<K, V> base = ptable.getPTableType();
+    PType<Pair<K, V>> pairType = ptf.pairs(base.getKeyType(), base.getValueType());
+    PTableType<Integer, Pair<K, V>> inter = ptf.tableOf(ptf.ints(), pairType);
+    return ptable.parallelDo("top" + limit + "map", new TopKFn<K, V>(limit, maximize), inter)
+        .groupByKey(1).combineValues(new TopKCombineFn<K, V>(limit, maximize))
+        .parallelDo("top" + limit + "reduce", new DoFn<Pair<Integer, Pair<K, V>>, Pair<K, V>>() {
+          public void process(Pair<Integer, Pair<K, V>> input, Emitter<Pair<K, V>> emitter) {
+            emitter.emit(input.second());
+          }
+        }, base);
+  }
+
+  /**
+   * Returns the largest numerical element from the input collection.
+   */
+  public static <S> PObject<S> max(PCollection<S> collect) {
+    Class<S> clazz = collect.getPType().getTypeClass();
+    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
+      throw new IllegalArgumentException("Can only get max for Comparable elements, not for: "
+          + collect.getPType().getTypeClass());
+    }
+    PTypeFamily tf = collect.getTypeFamily();
+    PCollection<S> maxCollect = PTables.values(collect
+        .parallelDo("max", new DoFn<S, Pair<Boolean, S>>() {
+          private transient S max = null;
+
+          public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
+            if (max == null || ((Comparable<S>) max).compareTo(input) < 0) {
+              max = input;
+            }
+          }
+
+          public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
+            if (max != null) {
+              emitter.emit(Pair.of(true, max));
+            }
+          }
+        }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1)
+        .combineValues(new CombineFn<Boolean, S>() {
+          public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
+            S max = null;
+            for (S v : input.second()) {
+              if (max == null || ((Comparable<S>) max).compareTo(v) < 0) {
+                max = v;
+              }
+            }
+            emitter.emit(Pair.of(input.first(), max));
+          }
+        }));
+    return new FirstElementPObject<S>(maxCollect);
+  }
+
+  /**
+   * Returns the smallest numerical element from the input collection.
+   */
+  public static <S> PObject<S> min(PCollection<S> collect) {
+    Class<S> clazz = collect.getPType().getTypeClass();
+    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
+      throw new IllegalArgumentException("Can only get min for Comparable elements, not for: "
+          + collect.getPType().getTypeClass());
+    }
+    PTypeFamily tf = collect.getTypeFamily();
+    PCollection<S> minCollect = PTables.values(collect
+        .parallelDo("min", new DoFn<S, Pair<Boolean, S>>() {
+          private transient S min = null;
+
+          public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
+            if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
+              min = input;
+            }
+          }
+
+          public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
+            if (min != null) {
+              emitter.emit(Pair.of(false, min));
+            }
+          }
+        }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1)
+        .combineValues(new CombineFn<Boolean, S>() {
+          public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
+            S min = null;
+            for (S v : input.second()) {
+              if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
+                min = v;
+              }
+            }
+            emitter.emit(Pair.of(input.first(), min));
+          }
+        }));
+    return new FirstElementPObject<S>(minCollect);
+  }
+
+  public static <K, V> PTable<K, Collection<V>> collectValues(PTable<K, V> collect) {
+    PTypeFamily tf = collect.getTypeFamily();
+    final PType<V> valueType = collect.getValueType();
+    return collect.groupByKey().parallelDo("collect",
+        new MapValuesFn<K, Iterable<V>, Collection<V>>() {
+
+          @Override
+          public void initialize() {
+            valueType.initialize(getConfiguration());
+          }
+
+          public Collection<V> map(Iterable<V> values) {
+            List<V> collected = Lists.newArrayList();
+            for (V value : values) {
+              collected.add(valueType.getDetachedValue(value));
+            }
+            return collected;
+          }
+        }, tf.tableOf(collect.getKeyType(), tf.collections(collect.getValueType())));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Cartesian.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Cartesian.java b/crunch-core/src/main/java/org/apache/crunch/lib/Cartesian.java
new file mode 100644
index 0000000..08327dd
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Cartesian.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.Random;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PTypeFamily;
+
+/**
+ * Utilities for Cartesian products of two {@code PTable} or {@code PCollection}
+ * instances.
+ */
+@SuppressWarnings("serial")
+public class Cartesian {
+
+  /**
+   * Helper for building the artificial cross keys. This technique was taken
+   * from Pig's CROSS.
+   */
+  private static class GFCross<V> extends DoFn<V, Pair<Pair<Integer, Integer>, V>> {
+
+    private final int constantField;
+    private final int parallelism;
+    private final Random r;
+
+    public GFCross(int constantField, int parallelism) {
+      this.constantField = constantField;
+      this.parallelism = parallelism;
+      this.r = new Random();
+    }
+
+    public void process(V input, Emitter<Pair<Pair<Integer, Integer>, V>> emitter) {
+      int c = r.nextInt(parallelism);
+      if (constantField == 0) {
+        for (int i = 0; i < parallelism; i++) {
+          emitter.emit(Pair.of(Pair.of(c, i), input));
+        }
+      } else {
+        for (int i = 0; i < parallelism; i++) {
+          emitter.emit(Pair.of(Pair.of(i, c), input));
+        }
+      }
+    }
+  }
+
+  static final int DEFAULT_PARALLELISM = 6;
+
+  /**
+   * Performs a full cross join on the specified {@link PTable}s (using the same
+   * strategy as Pig's CROSS operator).
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
+   *      Join</a>
+   * @param left
+   *          A PTable to perform a cross join on.
+   * @param right
+   *          A PTable to perform a cross join on.
+   * @param <K1>
+   *          Type of left PTable's keys.
+   * @param <K2>
+   *          Type of right PTable's keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result as tuples of ((K1,K2), (U,V)).
+   */
+  public static <K1, K2, U, V> PTable<Pair<K1, K2>, Pair<U, V>> cross(PTable<K1, U> left, PTable<K2, V> right) {
+    return cross(left, right, DEFAULT_PARALLELISM);
+  }
+
+  /**
+   * Performs a full cross join on the specified {@link PTable}s (using the same
+   * strategy as Pig's CROSS operator).
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
+   *      Join</a>
+   * @param left
+   *          A PTable to perform a cross join on.
+   * @param right
+   *          A PTable to perform a cross join on.
+   * @param parallelism
+   *          The square root of the number of reducers to use. Increasing
+   *          parallelism also increases copied data.
+   * @param <K1>
+   *          Type of left PTable's keys.
+   * @param <K2>
+   *          Type of right PTable's keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result as tuples of ((K1,K2), (U,V)).
+   */
+  public static <K1, K2, U, V> PTable<Pair<K1, K2>, Pair<U, V>> cross(PTable<K1, U> left, PTable<K2, V> right,
+      int parallelism) {
+
+    /*
+     * The strategy here is to simply emulate the following PigLatin: A =
+     * foreach table1 generate flatten(GFCross(0, 2)), flatten(*); B = foreach
+     * table2 generate flatten(GFCross(1, 2)), flatten(*); C = cogroup A by ($0,
+     * $1), B by ($0, $1); result = foreach C generate flatten(A), flatten(B);
+     */
+
+    PTypeFamily ltf = left.getTypeFamily();
+    PTypeFamily rtf = right.getTypeFamily();
+
+    PTable<Pair<Integer, Integer>, Pair<K1, U>> leftCross = left.parallelDo(new GFCross<Pair<K1, U>>(0, parallelism),
+        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), ltf.pairs(left.getKeyType(), left.getValueType())));
+    PTable<Pair<Integer, Integer>, Pair<K2, V>> rightCross = right.parallelDo(new GFCross<Pair<K2, V>>(1, parallelism),
+        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), rtf.pairs(right.getKeyType(), right.getValueType())));
+
+    PTable<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>> cg = leftCross.join(rightCross);
+
+    PTypeFamily ctf = cg.getTypeFamily();
+
+    return cg.parallelDo(
+        new MapFn<Pair<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>>, Pair<Pair<K1, K2>, Pair<U, V>>>() {
+
+          @Override
+          public Pair<Pair<K1, K2>, Pair<U, V>> map(Pair<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>> input) {
+            Pair<Pair<K1, U>, Pair<K2, V>> valuePair = input.second();
+            return Pair.of(Pair.of(valuePair.first().first(), valuePair.second().first()),
+                Pair.of(valuePair.first().second(), valuePair.second().second()));
+          }
+        },
+        ctf.tableOf(ctf.pairs(left.getKeyType(), right.getKeyType()),
+            ctf.pairs(left.getValueType(), right.getValueType())));
+  }
+
+  /**
+   * Performs a full cross join on the specified {@link PCollection}s (using the
+   * same strategy as Pig's CROSS operator).
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
+   *      Join</a>
+   * @param left
+   *          A PCollection to perform a cross join on.
+   * @param right
+   *          A PCollection to perform a cross join on.
+   * @param <U>
+   *          Type of the first {@link PCollection}'s values
+   * @param <V>
+   *          Type of the second {@link PCollection}'s values
+   * @return The joined result as tuples of (U,V).
+   */
+  public static <U, V> PCollection<Pair<U, V>> cross(PCollection<U> left, PCollection<V> right) {
+    return cross(left, right, DEFAULT_PARALLELISM);
+  }
+
+  /**
+   * Performs a full cross join on the specified {@link PCollection}s (using the
+   * same strategy as Pig's CROSS operator).
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
+   *      Join</a>
+   * @param left
+   *          A PCollection to perform a cross join on.
+   * @param right
+   *          A PCollection to perform a cross join on.
+   * @param <U>
+   *          Type of the first {@link PCollection}'s values
+   * @param <V>
+   *          Type of the second {@link PCollection}'s values
+   * @return The joined result as tuples of (U,V).
+   */
+  public static <U, V> PCollection<Pair<U, V>> cross(PCollection<U> left, PCollection<V> right, int parallelism) {
+
+    PTypeFamily ltf = left.getTypeFamily();
+    PTypeFamily rtf = right.getTypeFamily();
+
+    PTableType<Pair<Integer, Integer>, U> ptt = ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), left.getPType());
+
+    if (ptt == null)
+      throw new Error();
+
+    PTable<Pair<Integer, Integer>, U> leftCross = left.parallelDo(new GFCross<U>(0, parallelism),
+        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), left.getPType()));
+    PTable<Pair<Integer, Integer>, V> rightCross = right.parallelDo(new GFCross<V>(1, parallelism),
+        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), right.getPType()));
+
+    PTable<Pair<Integer, Integer>, Pair<U, V>> cg = leftCross.join(rightCross);
+
+    PTypeFamily ctf = cg.getTypeFamily();
+
+    return cg.parallelDo(new MapFn<Pair<Pair<Integer, Integer>, Pair<U, V>>, Pair<U, V>>() {
+      @Override
+      public Pair<U, V> map(Pair<Pair<Integer, Integer>, Pair<U, V>> input) {
+        return input.second();
+      }
+    }, ctf.pairs(left.getPType(), right.getPType()));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Cogroup.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Cogroup.java b/crunch-core/src/main/java/org/apache/crunch/lib/Cogroup.java
new file mode 100644
index 0000000..07d873c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Cogroup.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.Collection;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.fn.MapValuesFn;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+import com.google.common.collect.Lists;
+
+public class Cogroup {
+
+  /**
+   * Co-groups the two {@link PTable} arguments.
+   * 
+   * @return a {@code PTable} representing the co-grouped tables.
+   */
+  public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(PTable<K, U> left, PTable<K, V> right) {
+    PTypeFamily ptf = left.getTypeFamily();
+    PType<K> keyType = left.getPTableType().getKeyType();
+    PType<U> leftType = left.getPTableType().getValueType();
+    PType<V> rightType = right.getPTableType().getValueType();
+    PType<Pair<U, V>> itype = ptf.pairs(leftType, rightType);
+
+    PTable<K, Pair<U, V>> cgLeft = left.parallelDo("coGroupTag1", new CogroupFn1<K, U, V>(),
+        ptf.tableOf(keyType, itype));
+    PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(),
+        ptf.tableOf(keyType, itype));
+
+    PTable<K, Pair<U, V>> both = cgLeft.union(cgRight);
+
+    PType<Pair<Collection<U>, Collection<V>>> otype = ptf.pairs(ptf.collections(leftType), ptf.collections(rightType));
+    return both.groupByKey().parallelDo("cogroup", 
+        new PostGroupFn<K, U, V>(leftType, rightType), ptf.tableOf(keyType, otype));
+  }
+
+  private static class CogroupFn1<K, V, U> extends MapValuesFn<K, V, Pair<V, U>> {
+    @Override
+    public Pair<V, U> map(V v) {
+      return Pair.of(v, null);
+    }
+  }
+
+  private static class CogroupFn2<K, V, U> extends MapValuesFn<K, U, Pair<V, U>> {
+    @Override
+    public Pair<V, U> map(U u) {
+      return Pair.of(null, u);
+    }
+  }
+
+  private static class PostGroupFn<K, V, U> extends
+      DoFn<Pair<K, Iterable<Pair<V, U>>>, Pair<K, Pair<Collection<V>, Collection<U>>>> {
+    
+    private PType<V> ptypeV;
+    private PType<U> ptypeU;
+    
+    public PostGroupFn(PType<V> ptypeV, PType<U> ptypeU) {
+      this.ptypeV = ptypeV;
+      this.ptypeU = ptypeU;
+    }
+    
+    @Override
+    public void initialize() {
+      super.initialize();
+      ptypeV.initialize(getConfiguration());
+      ptypeU.initialize(getConfiguration());
+    }
+    
+    @Override
+    public void process(Pair<K, Iterable<Pair<V, U>>> input,
+        Emitter<Pair<K, Pair<Collection<V>, Collection<U>>>> emitter) {
+      Collection<V> cv = Lists.newArrayList();
+      Collection<U> cu = Lists.newArrayList();
+      for (Pair<V, U> pair : input.second()) {
+        if (pair.first() != null) {
+          cv.add(ptypeV.getDetachedValue(pair.first()));
+        } else if (pair.second() != null) {
+          cu.add(ptypeU.getDetachedValue(pair.second()));
+        }
+      }
+      emitter.emit(Pair.of(input.first(), Pair.of(cv, cu)));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Distinct.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Distinct.java b/crunch-core/src/main/java/org/apache/crunch/lib/Distinct.java
new file mode 100644
index 0000000..994830d
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Distinct.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.Set;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * Functions for computing the distinct elements of a {@code PCollection}.
+ */
+public final class Distinct {
+
+  private static final int DEFAULT_FLUSH_EVERY = 50000;
+  
+  /**
+   * Construct a new {@code PCollection} that contains the unique elements of a
+   * given input {@code PCollection}.
+   * 
+   * @param input The input {@code PCollection}
+   * @return A new {@code PCollection} that contains the unique elements of the input
+   */
+  public static <S> PCollection<S> distinct(PCollection<S> input) {
+    return distinct(input, DEFAULT_FLUSH_EVERY);
+  }
+  
+  /**
+   * A {@code PTable<K, V>} analogue of the {@code distinct} function.
+   */
+  public static <K, V> PTable<K, V> distinct(PTable<K, V> input) {
+    return PTables.asPTable(distinct((PCollection<Pair<K, V>>) input));
+  }
+  
+  /**
+   * A {@code distinct} operation that gives the client more control over how frequently
+   * elements are flushed to disk in order to allow control over performance or
+   * memory consumption.
+   * 
+   * @param input The input {@code PCollection}
+   * @param flushEvery Flush the elements to disk whenever we encounter this many unique values
+   * @return A new {@code PCollection} that contains the unique elements of the input
+   */
+  public static <S> PCollection<S> distinct(PCollection<S> input, int flushEvery) {
+    Preconditions.checkArgument(flushEvery > 0);
+    PType<S> pt = input.getPType();
+    PTypeFamily ptf = pt.getFamily();
+    return input
+        .parallelDo("pre-distinct", new PreDistinctFn<S>(flushEvery, pt), ptf.tableOf(pt, ptf.nulls()))
+        .groupByKey()
+        .parallelDo("post-distinct", new PostDistinctFn<S>(), pt);
+  }
+  
+  /**
+   * A {@code PTable<K, V>} analogue of the {@code distinct} function.
+   */
+  public static <K, V> PTable<K, V> distinct(PTable<K, V> input, int flushEvery) {
+    return PTables.asPTable(distinct((PCollection<Pair<K, V>>) input, flushEvery));
+  }
+  
+  private static class PreDistinctFn<S> extends DoFn<S, Pair<S, Void>> {
+    private final Set<S> values = Sets.newHashSet();
+    private final int flushEvery;
+    private final PType<S> ptype;
+    
+    public PreDistinctFn(int flushEvery, PType<S> ptype) {
+      this.flushEvery = flushEvery;
+      this.ptype = ptype;
+    }
+    
+    @Override
+    public void initialize() {
+      super.initialize();
+      ptype.initialize(getConfiguration());
+    }
+    
+    @Override
+    public void process(S input, Emitter<Pair<S, Void>> emitter) {
+      values.add(ptype.getDetachedValue(input));
+      if (values.size() > flushEvery) {
+        cleanup(emitter);
+      }
+    }
+    
+    @Override
+    public void cleanup(Emitter<Pair<S, Void>> emitter) {
+      for (S in : values) {
+        emitter.emit(Pair.<S, Void>of(in, null));
+      }
+      values.clear();
+    }
+  }
+  
+  private static class PostDistinctFn<S> extends DoFn<Pair<S, Iterable<Void>>, S> {
+    @Override
+    public void process(Pair<S, Iterable<Void>> input, Emitter<S> emitter) {
+      emitter.emit(input.first());
+    }
+  }
+  
+  // No instantiation
+  private Distinct() {}
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Join.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Join.java b/crunch-core/src/main/java/org/apache/crunch/lib/Join.java
new file mode 100644
index 0000000..c0c4a6b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Join.java
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.lib.join.FullOuterJoinFn;
+import org.apache.crunch.lib.join.InnerJoinFn;
+import org.apache.crunch.lib.join.JoinFn;
+import org.apache.crunch.lib.join.JoinUtils;
+import org.apache.crunch.lib.join.LeftOuterJoinFn;
+import org.apache.crunch.lib.join.RightOuterJoinFn;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PTypeFamily;
+
+/**
+ * Utilities for joining multiple {@code PTable} instances based on a common
+ * lastKey.
+ */
+public class Join {
+  /**
+   * Performs an inner join on the specified {@link PTable}s.
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Inner_join">Inner
+   *      Join</a>
+   * @param left
+   *          A PTable to perform an inner join on.
+   * @param right
+   *          A PTable to perform an inner join on.
+   * @param <K>
+   *          Type of the keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result.
+   */
+  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right) {
+    return innerJoin(left, right);
+  }
+
+  /**
+   * Performs an inner join on the specified {@link PTable}s.
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Inner_join">Inner
+   *      Join</a>
+   * @param left
+   *          A PTable to perform an inner join on.
+   * @param right
+   *          A PTable to perform an inner join on.
+   * @param <K>
+   *          Type of the keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result.
+   */
+  public static <K, U, V> PTable<K, Pair<U, V>> innerJoin(PTable<K, U> left, PTable<K, V> right) {
+    return join(left, right, new InnerJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
+  }
+
+  /**
+   * Performs a left outer join on the specified {@link PTable}s.
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Left_outer_join">Left
+   *      Join</a>
+   * @param left
+   *          A PTable to perform an left join on. All of this PTable's entries
+   *          will appear in the resulting PTable.
+   * @param right
+   *          A PTable to perform an left join on.
+   * @param <K>
+   *          Type of the keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result.
+   */
+  public static <K, U, V> PTable<K, Pair<U, V>> leftJoin(PTable<K, U> left, PTable<K, V> right) {
+    return join(left, right, new LeftOuterJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
+  }
+
+  /**
+   * Performs a right outer join on the specified {@link PTable}s.
+   * 
+   * @see <a
+   *      href="http://en.wikipedia.org/wiki/Join_(SQL)#Right_outer_join">Right
+   *      Join</a>
+   * @param left
+   *          A PTable to perform an right join on.
+   * @param right
+   *          A PTable to perform an right join on. All of this PTable's entries
+   *          will appear in the resulting PTable.
+   * @param <K>
+   *          Type of the keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result.
+   */
+  public static <K, U, V> PTable<K, Pair<U, V>> rightJoin(PTable<K, U> left, PTable<K, V> right) {
+    return join(left, right, new RightOuterJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
+  }
+
+  /**
+   * Performs a full outer join on the specified {@link PTable}s.
+   * 
+   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Full_outer_join">Full
+   *      Join</a>
+   * @param left
+   *          A PTable to perform an full join on.
+   * @param right
+   *          A PTable to perform an full join on.
+   * @param <K>
+   *          Type of the keys.
+   * @param <U>
+   *          Type of the first {@link PTable}'s values
+   * @param <V>
+   *          Type of the second {@link PTable}'s values
+   * @return The joined result.
+   */
+  public static <K, U, V> PTable<K, Pair<U, V>> fullJoin(PTable<K, U> left, PTable<K, V> right) {
+    return join(left, right, new FullOuterJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
+  }
+
+  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right, JoinFn<K, U, V> joinFn) {
+    PTypeFamily ptf = left.getTypeFamily();
+    PGroupedTable<Pair<K, Integer>, Pair<U, V>> grouped = preJoin(left, right);
+    PTableType<K, Pair<U, V>> ret = ptf
+        .tableOf(left.getKeyType(), ptf.pairs(left.getValueType(), right.getValueType()));
+
+    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
+  }
+
+  private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
+    PTypeFamily ptf = left.getTypeFamily();
+    PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()),
+        ptf.pairs(left.getValueType(), right.getValueType()));
+
+    PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft",
+        new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
+          @Override
+          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) {
+            return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null));
+          }
+        }, ptt);
+    PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight",
+        new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
+          @Override
+          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) {
+            return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second()));
+          }
+        }, ptt);
+
+    GroupingOptions.Builder optionsBuilder = GroupingOptions.builder();
+    optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf));
+
+    return (tag1.union(tag2)).groupByKey(optionsBuilder.build());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/PTables.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/PTables.java b/crunch-core/src/main/java/org/apache/crunch/lib/PTables.java
new file mode 100644
index 0000000..e907680
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/PTables.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.List;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Methods for performing common operations on PTables.
+ * 
+ */
+public class PTables {
+
+  /**
+   * Convert the given {@code PCollection<Pair<K, V>>} to a {@code PTable<K, V>}.
+   * @param pcollect The {@code PCollection} to convert
+   * @return A {@code PTable} that contains the same data as the input {@code PCollection}
+   */
+  public static <K, V> PTable<K, V> asPTable(PCollection<Pair<K, V>> pcollect) {
+    PType<Pair<K, V>> pt = pcollect.getPType();
+    PTypeFamily ptf = pt.getFamily();
+    PTableType<K, V> ptt = ptf.tableOf(pt.getSubTypes().get(0), pt.getSubTypes().get(1));
+    DoFn<Pair<K, V>, Pair<K, V>> id = IdentityFn.getInstance();
+    return pcollect.parallelDo("asPTable", id, ptt);
+  }
+  
+  /**
+   * Extract the keys from the given {@code PTable<K, V>} as a {@code PCollection<K>}.
+   * @param ptable The {@code PTable}
+   * @return A {@code PCollection<K>}
+   */
+  public static <K, V> PCollection<K> keys(PTable<K, V> ptable) {
+    return ptable.parallelDo("PTables.keys", new DoFn<Pair<K, V>, K>() {
+      @Override
+      public void process(Pair<K, V> input, Emitter<K> emitter) {
+        emitter.emit(input.first());
+      }
+    }, ptable.getKeyType());
+  }
+
+  /**
+   * Extract the values from the given {@code PTable<K, V>} as a {@code PCollection<V>}.
+   * @param ptable The {@code PTable}
+   * @return A {@code PCollection<V>}
+   */
+  public static <K, V> PCollection<V> values(PTable<K, V> ptable) {
+    return ptable.parallelDo("PTables.values", new DoFn<Pair<K, V>, V>() {
+      @Override
+      public void process(Pair<K, V> input, Emitter<V> emitter) {
+        emitter.emit(input.second());
+      }
+    }, ptable.getValueType());
+  }
+
+  /**
+   * Create a detached value for a table {@link Pair}.
+   * 
+   * @param tableType The table type
+   * @param value The value from which a detached value is to be created
+   * @return The detached value
+   * @see PType#getDetachedValue(Object)
+   */
+  public static <K, V> Pair<K, V> getDetachedValue(PTableType<K, V> tableType, Pair<K, V> value) {
+    return Pair.of(tableType.getKeyType().getDetachedValue(value.first()), tableType.getValueType()
+        .getDetachedValue(value.second()));
+  }
+
+  /**
+   * Created a detached value for a {@link PGroupedTable} value.
+   * 
+   * 
+   * @param groupedTableType The grouped table type
+   * @param value The value from which a detached value is to be created
+   * @return The detached value
+   * @see PType#getDetachedValue(Object)
+   */
+  public static <K, V> Pair<K, Iterable<V>> getGroupedDetachedValue(
+      PGroupedTableType<K, V> groupedTableType, Pair<K, Iterable<V>> value) {
+
+    PTableType<K, V> tableType = groupedTableType.getTableType();
+    List<V> detachedIterable = Lists.newArrayList();
+    PType<V> valueType = tableType.getValueType();
+    for (V v : value.second()) {
+      detachedIterable.add(valueType.getDetachedValue(v));
+    }
+    return Pair.of(tableType.getKeyType().getDetachedValue(value.first()),
+        (Iterable<V>) detachedIterable);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Sample.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Sample.java b/crunch-core/src/main/java/org/apache/crunch/lib/Sample.java
new file mode 100644
index 0000000..5a66101
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Sample.java
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.lib.SampleUtils.ReservoirSampleFn;
+import org.apache.crunch.lib.SampleUtils.SampleFn;
+import org.apache.crunch.lib.SampleUtils.WRSCombineFn;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+/**
+ * Methods for performing random sampling in a distributed fashion, either by accepting each
+ * record in a {@code PCollection} with an independent probability in order to sample some
+ * fraction of the overall data set, or by using reservoir sampling in order to pull a uniform
+ * or weighted sample of fixed size from a {@code PCollection} of an unknown size. For more details
+ * on the reservoir sampling algorithms used by this library, see the A-ES algorithm described in
+ * <a href="http://arxiv.org/pdf/1012.0256.pdf">Efraimidis (2012)</a>.
+ */
+public class Sample {
+
+  /**
+   * Output records from the given {@code PCollection} with the given probability.
+   * 
+   * @param input The {@code PCollection} to sample from
+   * @param probability The probability (0.0 &lt; p %lt; 1.0)
+   * @return The output {@code PCollection} created from sampling
+   */
+  public static <S> PCollection<S> sample(PCollection<S> input, double probability) {
+    return sample(input, null, probability);
+  }
+
+  /**
+   * Output records from the given {@code PCollection} using a given seed. Useful for unit
+   * testing.
+   * 
+   * @param input The {@code PCollection} to sample from
+   * @param seed The seed for the random number generator
+   * @param probability The probability (0.0 &lt; p &lt; 1.0)
+   * @return The output {@code PCollection} created from sampling
+   */
+  public static <S> PCollection<S> sample(PCollection<S> input, Long seed, double probability) {
+    String stageName = String.format("sample(%.2f)", probability);
+    return input.parallelDo(stageName, new SampleFn<S>(probability, seed), input.getPType());
+  }
+  
+  /**
+   * A {@code PTable<K, V>} analogue of the {@code sample} function.
+   * 
+   * @param input The {@code PTable} to sample from
+   * @param probability The probability (0.0 &lt; p &lt; 1.0)
+   * @return The output {@code PTable} created from sampling
+   */
+  public static <K, V> PTable<K, V> sample(PTable<K, V> input, double probability) {
+    return PTables.asPTable(sample((PCollection<Pair<K, V>>) input, probability));
+  }
+  
+  /**
+   * A {@code PTable<K, V>} analogue of the {@code sample} function, with the seed argument
+   * exposed for testing purposes.
+   * 
+   * @param input The {@code PTable} to sample from
+   * @param seed The seed for the random number generator
+   * @param probability The probability (0.0 &lt; p &lt; 1.0)
+   * @return The output {@code PTable} created from sampling
+   */
+  public static <K, V> PTable<K, V> sample(PTable<K, V> input, Long seed, double probability) {
+    return PTables.asPTable(sample((PCollection<Pair<K, V>>) input, seed, probability));
+  }
+  
+  /**
+   * Select a fixed number of elements from the given {@code PCollection} with each element
+   * equally likely to be included in the sample.
+   * 
+   * @param input The input data
+   * @param sampleSize The number of elements to select
+   * @return A {@code PCollection} made up of the sampled elements
+   */
+  public static <T> PCollection<T> reservoirSample(
+      PCollection<T> input,
+      int sampleSize) {
+    return reservorSample(input, sampleSize, null);
+  }
+
+  /**
+   * A version of the reservoir sampling algorithm that uses a given seed, primarily for
+   * testing purposes.
+   * 
+   * @param input The input data
+   * @param sampleSize The number of elements to select
+   * @param seed The test seed
+   * @return A {@code PCollection} made up of the sampled elements
+
+   */
+  public static <T> PCollection<T> reservorSample(
+      PCollection<T> input,
+      int sampleSize,
+      Long seed) {
+    PTypeFamily ptf = input.getTypeFamily();
+    PType<Pair<T, Integer>> ptype = ptf.pairs(input.getPType(), ptf.ints());
+    return weightedReservoirSample(
+        input.parallelDo(new MapFn<T, Pair<T, Integer>>() {
+          public Pair<T, Integer> map(T t) { return Pair.of(t, 1); }
+        }, ptype),
+        sampleSize,
+        seed);
+  }
+  
+  /**
+   * Selects a weighted sample of the elements of the given {@code PCollection}, where the second term in
+   * the input {@code Pair} is a numerical weight.
+   * 
+   * @param input the weighted observations
+   * @param sampleSize The number of elements to select
+   * @return A random sample of the given size that respects the weighting values
+   */
+  public static <T, N extends Number> PCollection<T> weightedReservoirSample(
+      PCollection<Pair<T, N>> input,
+      int sampleSize) {
+    return weightedReservoirSample(input, sampleSize, null);
+  }
+  
+  /**
+   * The weighted reservoir sampling function with the seed term exposed for testing purposes.
+   * 
+   * @param input the weighted observations
+   * @param sampleSize The number of elements to select
+   * @param seed The test seed
+   * @return A random sample of the given size that respects the weighting values
+   */
+  public static <T, N extends Number> PCollection<T> weightedReservoirSample(
+      PCollection<Pair<T, N>> input,
+      int sampleSize,
+      Long seed) {
+    PTypeFamily ptf = input.getTypeFamily();
+    PTable<Integer, Pair<T, N>> groupedIn = input.parallelDo(
+        new MapFn<Pair<T, N>, Pair<Integer, Pair<T, N>>>() {
+          @Override
+          public Pair<Integer, Pair<T, N>> map(Pair<T, N> p) {
+            return Pair.of(0, p);
+          }
+        }, ptf.tableOf(ptf.ints(), input.getPType()));
+    int[] ss = new int[] { sampleSize };
+    return groupedWeightedReservoirSample(groupedIn, ss, seed)
+        .parallelDo(new MapFn<Pair<Integer, T>, T>() {
+          @Override
+          public T map(Pair<Integer, T> p) {
+            return p.second();
+          }
+        }, (PType<T>) input.getPType().getSubTypes().get(0));
+  }
+  
+  /**
+   * The most general purpose of the weighted reservoir sampling patterns that allows us to choose
+   * a random sample of elements for each of N input groups.
+   * 
+   * @param input A {@code PTable} with the key a group ID and the value a weighted observation in that group
+   * @param sampleSizes An array of length N, with each entry is the number of elements to include in that group
+   * @return A {@code PCollection} of the sampled elements for each of the groups
+   */
+  
+  public static <T, N extends Number> PCollection<Pair<Integer, T>> groupedWeightedReservoirSample(
+      PTable<Integer, Pair<T, N>> input,
+      int[] sampleSizes) {
+    return groupedWeightedReservoirSample(input, sampleSizes, null);
+  }
+  
+  /**
+   * Same as the other groupedWeightedReservoirSample method, but include a seed for testing
+   * purposes.
+   * 
+   * @param input A {@code PTable} with the key a group ID and the value a weighted observation in that group
+   * @param sampleSizes An array of length N, with each entry is the number of elements to include in that group
+   * @param seed The test seed
+   * @return A {@code PCollection} of the sampled elements for each of the groups
+   */
+  public static <T, N extends Number> PCollection<Pair<Integer, T>> groupedWeightedReservoirSample(
+      PTable<Integer, Pair<T, N>> input,
+      int[] sampleSizes,
+      Long seed) {
+    PTypeFamily ptf = input.getTypeFamily();
+    PType<T> ttype = (PType<T>) input.getPTableType().getValueType().getSubTypes().get(0);
+    PTableType<Integer, Pair<Double, T>> ptt = ptf.tableOf(ptf.ints(),
+        ptf.pairs(ptf.doubles(), ttype));
+    
+    return input.parallelDo(new ReservoirSampleFn<T, N>(sampleSizes, seed, ttype), ptt)
+        .groupByKey(1)
+        .combineValues(new WRSCombineFn<T>(sampleSizes, ttype))
+        .parallelDo(new MapFn<Pair<Integer, Pair<Double, T>>, Pair<Integer, T>>() {
+          @Override
+          public Pair<Integer, T> map(Pair<Integer, Pair<Double, T>> p) {
+            return Pair.of(p.first(), p.second().second());
+          }
+        }, ptf.pairs(ptf.ints(), ttype));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/SampleUtils.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/SampleUtils.java b/crunch-core/src/main/java/org/apache/crunch/lib/SampleUtils.java
new file mode 100644
index 0000000..8769eed
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/SampleUtils.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedMap;
+
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+class SampleUtils {
+  
+  static class SampleFn<S> extends FilterFn<S> {
+
+    private final Long seed;
+    private final double acceptanceProbability;
+    private transient Random r;
+
+    public SampleFn(double acceptanceProbability, Long seed) {
+      Preconditions.checkArgument(0.0 < acceptanceProbability && acceptanceProbability < 1.0);
+      this.seed = seed == null ? System.currentTimeMillis() : seed;
+      this.acceptanceProbability = acceptanceProbability;
+    }
+
+    @Override
+    public void initialize() {
+      if (r == null) {
+        r = new Random(seed);
+      }
+    }
+
+    @Override
+    public boolean accept(S input) {
+      return r.nextDouble() < acceptanceProbability;
+    }
+  }
+
+
+  static class ReservoirSampleFn<T, N extends Number>
+      extends DoFn<Pair<Integer, Pair<T, N>>, Pair<Integer, Pair<Double, T>>> {
+  
+    private int[] sampleSizes;
+    private Long seed;
+    private PType<T> valueType;
+    private transient List<SortedMap<Double, T>> reservoirs;
+    private transient Random random;
+    
+    public ReservoirSampleFn(int[] sampleSizes, Long seed, PType<T> valueType) {
+      this.sampleSizes = sampleSizes;
+      this.seed = seed;
+      this.valueType = valueType;
+    }
+    
+    @Override
+    public void initialize() {
+      this.reservoirs = Lists.newArrayList();
+      this.valueType.initialize(getConfiguration());
+      for (int i = 0; i < sampleSizes.length; i++) {
+        reservoirs.add(Maps.<Double, T>newTreeMap());
+      }
+      if (random == null) {
+        if (seed == null) {
+          this.random = new Random();
+        } else {
+          this.random = new Random(seed);
+        }
+      }
+    }
+    
+    @Override
+    public void process(Pair<Integer, Pair<T, N>> input,
+        Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
+      int id = input.first();
+      Pair<T, N> p = input.second();
+      double weight = p.second().doubleValue();
+      if (weight > 0.0) {
+        double score = Math.log(random.nextDouble()) / weight;
+        SortedMap<Double, T> reservoir = reservoirs.get(id);
+        if (reservoir.size() < sampleSizes[id]) { 
+          reservoir.put(score, valueType.getDetachedValue(p.first()));        
+        } else if (score > reservoir.firstKey()) {
+          reservoir.remove(reservoir.firstKey());
+          reservoir.put(score, valueType.getDetachedValue(p.first()));
+        }
+      }
+    }
+    
+    @Override
+    public void cleanup(Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
+      for (int id = 0; id < reservoirs.size(); id++) {
+        SortedMap<Double, T> reservoir = reservoirs.get(id);
+        for (Map.Entry<Double, T> e : reservoir.entrySet()) {
+          emitter.emit(Pair.of(id, Pair.of(e.getKey(), e.getValue())));
+        }
+      }
+    }
+  }
+  
+  static class WRSCombineFn<T> extends CombineFn<Integer, Pair<Double, T>> {
+
+    private int[] sampleSizes;
+    private PType<T> valueType;
+    private List<SortedMap<Double, T>> reservoirs;
+    
+    public WRSCombineFn(int[] sampleSizes, PType<T> valueType) {
+      this.sampleSizes = sampleSizes;
+      this.valueType = valueType;
+    }
+
+    @Override
+    public void initialize() {
+      this.reservoirs = Lists.newArrayList();
+      for (int i = 0; i < sampleSizes.length; i++) {
+        reservoirs.add(Maps.<Double, T>newTreeMap());
+      }
+      this.valueType.initialize(getConfiguration());
+    }
+    
+    @Override
+    public void process(Pair<Integer, Iterable<Pair<Double, T>>> input,
+        Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
+      SortedMap<Double, T> reservoir = reservoirs.get(input.first());
+      for (Pair<Double, T> p : input.second()) {
+        if (reservoir.size() < sampleSizes[input.first()]) { 
+          reservoir.put(p.first(), valueType.getDetachedValue(p.second()));        
+        } else if (p.first() > reservoir.firstKey()) {
+          reservoir.remove(reservoir.firstKey());
+          reservoir.put(p.first(), valueType.getDetachedValue(p.second()));  
+        }
+      }
+    }
+    
+    @Override
+    public void cleanup(Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
+      for (int i = 0; i < reservoirs.size(); i++) {
+        SortedMap<Double, T> reservoir = reservoirs.get(i);
+        for (Map.Entry<Double, T> e : reservoir.entrySet()) {
+          emitter.emit(Pair.of(i, Pair.of(e.getKey(), e.getValue())));
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/SecondarySort.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/SecondarySort.java b/crunch-core/src/main/java/org/apache/crunch/lib/SecondarySort.java
new file mode 100644
index 0000000..54b4396
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/SecondarySort.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.Collection;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.lib.join.JoinUtils;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Utilities for performing a secondary sort on a {@code PTable<K, Pair<V1, V2>>} collection.
+ * <p>
+ * Secondary sorts are usually performed during sessionization: given a collection
+ * of events, we want to group them by a key (such as a user ID), then sort the grouped
+ * records by an auxillary key (such as a timestamp), and then perform some additional
+ * processing on the sorted records.
+ */
+public class SecondarySort {
+  
+  /**
+   * Perform a secondary sort on the given {@code PTable} instance and then apply a
+   * {@code DoFn} to the resulting sorted data to yield an output {@code PCollection<T>}.
+   */
+  public static <K, V1, V2, T> PCollection<T> sortAndApply(PTable<K, Pair<V1, V2>> input,
+      DoFn<Pair<K, Iterable<Pair<V1, V2>>>, T> doFn, PType<T> ptype) {
+    return prepare(input)
+        .parallelDo("SecondarySort.apply", new SSWrapFn<K, V1, V2, T>(doFn), ptype);
+  }
+  
+  /**
+   * Perform a secondary sort on the given {@code PTable} instance and then apply a
+   * {@code DoFn} to the resulting sorted data to yield an output {@code PTable<U, V>}.
+   */
+  public static <K, V1, V2, U, V> PTable<U, V> sortAndApply(PTable<K, Pair<V1, V2>> input,
+      DoFn<Pair<K, Iterable<Pair<V1, V2>>>, Pair<U, V>> doFn, PTableType<U, V> ptype) {
+    return prepare(input)
+        .parallelDo("SecondarySort.apply", new SSWrapFn<K, V1, V2, Pair<U, V>>(doFn), ptype);
+  }
+  
+  private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare(
+      PTable<K, Pair<V1, V2>> input) {
+    PTypeFamily ptf = input.getTypeFamily();
+    PType<Pair<V1, V2>> valueType = input.getValueType();
+    PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf(
+        ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)),
+        valueType);
+    PTableType<K, Collection<Pair<V1, V2>>> out = ptf.tableOf(input.getKeyType(),
+        ptf.collections(input.getValueType()));
+    return input.parallelDo("SecondarySort.format", new SSFormatFn<K, V1, V2>(), inter)
+        .groupByKey(
+            GroupingOptions.builder()
+            .groupingComparatorClass(JoinUtils.getGroupingComparator(ptf))
+            .partitionerClass(JoinUtils.getPartitionerClass(ptf))
+            .build());
+  }
+  
+  private static class SSFormatFn<K, V1, V2> extends MapFn<Pair<K, Pair<V1, V2>>, Pair<Pair<K, V1>, Pair<V1, V2>>> {
+    @Override
+    public Pair<Pair<K, V1>, Pair<V1, V2>> map(Pair<K, Pair<V1, V2>> input) {
+      return Pair.of(Pair.of(input.first(), input.second().first()), input.second());
+    }
+  }  
+
+  private static class SSWrapFn<K, V1, V2, T> extends DoFn<Pair<Pair<K, V1>, Iterable<Pair<V1, V2>>>, T> {
+    private final DoFn<Pair<K, Iterable<Pair<V1, V2>>>, T> intern;
+    
+    public SSWrapFn(DoFn<Pair<K, Iterable<Pair<V1, V2>>>, T> intern) {
+      this.intern = intern;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      intern.configure(conf);
+    }
+
+    @Override
+    public void initialize() {
+      intern.setContext(getContext());
+      intern.initialize();
+    }
+    
+    @Override
+    public void process(Pair<Pair<K, V1>, Iterable<Pair<V1, V2>>> input, Emitter<T> emitter) {
+      intern.process(Pair.of(input.first().first(), input.second()), emitter);
+    }
+    
+    @Override
+    public void cleanup(Emitter<T> emitter) {
+      intern.cleanup(emitter);
+    }
+  }  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Set.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Set.java b/crunch-core/src/main/java/org/apache/crunch/lib/Set.java
new file mode 100644
index 0000000..0ba879c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Set.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import java.util.Collection;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+/**
+ * Utilities for performing set operations (difference, intersection, etc) on
+ * {@code PCollection} instances.
+ */
+public class Set {
+
+  /**
+   * Compute the set difference between two sets of elements.
+   * 
+   * @return a collection containing elements that are in <code>coll1</code> but
+   *         not in <code>coll2</code>
+   */
+  public static <T> PCollection<T> difference(PCollection<T> coll1, PCollection<T> coll2) {
+    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
+        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, T>() {
+          @Override
+          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input, Emitter<T> emitter) {
+            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
+            if (!groups.first().isEmpty() && groups.second().isEmpty()) {
+              emitter.emit(input.first());
+            }
+          }
+        }, coll1.getPType());
+  }
+
+  /**
+   * Compute the intersection of two sets of elements.
+   * 
+   * @return a collection containing elements that common to both sets
+   *         <code>coll1</code> and <code>coll2</code>
+   */
+  public static <T> PCollection<T> intersection(PCollection<T> coll1, PCollection<T> coll2) {
+    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
+        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, T>() {
+          @Override
+          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input, Emitter<T> emitter) {
+            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
+            if (!groups.first().isEmpty() && !groups.second().isEmpty()) {
+              emitter.emit(input.first());
+            }
+          }
+        }, coll1.getPType());
+  }
+
+  /**
+   * Find the elements that are common to two sets, like the Unix
+   * <code>comm</code> utility. This method returns a {@link PCollection} of
+   * {@link Tuple3} objects, and the position in the tuple that an element
+   * appears is determined by the collections that it is a member of, as
+   * follows:
+   * <ol>
+   * <li>elements only in <code>coll1</code>,</li>
+   * <li>elements only in <code>coll2</code>, or</li>
+   * <li>elements in both collections</li>
+   * </ol>
+   * Tuples are otherwise filled with <code>null</code>.
+   * 
+   * @return a collection of {@link Tuple3} objects
+   */
+  public static <T> PCollection<Tuple3<T, T, T>> comm(PCollection<T> coll1, PCollection<T> coll2) {
+    PTypeFamily typeFamily = coll1.getTypeFamily();
+    PType<T> type = coll1.getPType();
+    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
+        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, Tuple3<T, T, T>>() {
+          @Override
+          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input,
+              Emitter<Tuple3<T, T, T>> emitter) {
+            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
+            boolean inFirst = !groups.first().isEmpty();
+            boolean inSecond = !groups.second().isEmpty();
+            T t = input.first();
+            emitter.emit(Tuple3.of(inFirst && !inSecond ? t : null, !inFirst && inSecond ? t : null, inFirst
+                && inSecond ? t : null));
+          }
+        }, typeFamily.triples(type, type, type));
+  }
+
+  private static <T> PTable<T, Boolean> toTable(PCollection<T> coll) {
+    PTypeFamily typeFamily = coll.getTypeFamily();
+    return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
+      @Override
+      public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
+        emitter.emit(Pair.of(input, Boolean.TRUE));
+      }
+    }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/Sort.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/Sort.java b/crunch-core/src/main/java/org/apache/crunch/lib/Sort.java
new file mode 100644
index 0000000..23bcaee
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/Sort.java
@@ -0,0 +1,294 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.apache.crunch.lib.sort.Comparators.*;
+import static org.apache.crunch.lib.sort.SortFns.*;
+
+import org.apache.avro.Schema;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.GroupingOptions.Builder;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.lib.sort.TotalOrderPartitioner;
+import org.apache.crunch.materialize.MaterializableIterable;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.crunch.util.PartitionUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Utilities for sorting {@code PCollection} instances.
+ */
+public class Sort {
+
+  /**
+   * For signaling the order in which a sort should be done.
+   */
+  public enum Order {
+    ASCENDING,
+    DESCENDING,
+    IGNORE
+  }
+
+  /**
+   * To sort by column 2 ascending then column 1 descending, you would use:
+   * <code>
+   * sortPairs(coll, by(2, ASCENDING), by(1, DESCENDING))
+   * </code> Column numbering is 1-based.
+   */
+  public static class ColumnOrder {
+    private int column;
+    private Order order;
+
+    public ColumnOrder(int column, Order order) {
+      this.column = column;
+      this.order = order;
+    }
+
+    public static ColumnOrder by(int column, Order order) {
+      return new ColumnOrder(column, order);
+    }
+
+    public int column() {
+      return column;
+    }
+    
+    public Order order() {
+      return order;
+    }
+    
+    @Override
+    public String toString() {
+      return "ColumnOrder: column:" + column + ", Order: " + order;
+    }
+  }
+
+  /**
+   * Sorts the {@code PCollection} using the natural ordering of its elements in ascending order.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <T> PCollection<T> sort(PCollection<T> collection) {
+    return sort(collection, Order.ASCENDING);
+  }
+
+  /**
+   * Sorts the {@code PCollection} using the natural order of its elements with the given {@code Order}.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
+    return sort(collection, -1, order);
+  }
+  
+  /**
+   * Sorts the {@code PCollection} using the natural ordering of its elements in
+   * the order specified using the given number of reducers.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
+    PTypeFamily tf = collection.getTypeFamily();
+    PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
+    Configuration conf = collection.getPipeline().getConfiguration();
+    PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
+      @Override
+      public void process(T input, Emitter<Pair<T, Void>> emitter) {
+        emitter.emit(Pair.of(input, (Void) null));
+      }
+    }, type);
+    GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, order);
+    return pt.groupByKey(options).ungroup().keys();
+  }
+
+  /**
+   * Sorts the {@code PTable} using the natural ordering of its keys in ascending order.
+   * 
+   * @return a {@code PTable} representing the sorted table.
+   */
+  public static <K, V> PTable<K, V> sort(PTable<K, V> table) {
+    return sort(table, Order.ASCENDING);
+  }
+
+  /**
+   * Sorts the {@code PTable} using the natural ordering of its keys with the given {@code Order}.
+   *
+   * @return a {@code PTable} representing the sorted table.
+   */
+  public static <K, V> PTable<K, V> sort(PTable<K, V> table, Order key) {
+    return sort(table, -1, key);
+  }
+  
+  /**
+   * Sorts the {@code PTable} using the natural ordering of its keys in the
+   * order specified with a client-specified number of reducers.
+   * 
+   * @return a {@code PTable} representing the sorted collection.
+   */
+  public static <K, V> PTable<K, V> sort(PTable<K, V> table, int numReducers, Order key) {
+    Configuration conf = table.getPipeline().getConfiguration();
+    GroupingOptions options = buildGroupingOptions(table, conf, numReducers, key);
+    return table.groupByKey(options).ungroup();
+  }
+
+  
+  /**
+   * Sorts the {@code PCollection} of {@code Pair}s using the specified column
+   * ordering.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <U, V> PCollection<Pair<U, V>> sortPairs(PCollection<Pair<U, V>> collection,
+      ColumnOrder... columnOrders) {
+    return sortTuples(collection, columnOrders);
+  }
+
+  /**
+   * Sorts the {@code PCollection} of {@code Tuple3}s using the specified column
+   * ordering.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <V1, V2, V3> PCollection<Tuple3<V1, V2, V3>> sortTriples(PCollection<Tuple3<V1, V2, V3>> collection,
+      ColumnOrder... columnOrders) {
+    return sortTuples(collection, columnOrders);
+  }
+
+  /**
+   * Sorts the {@code PCollection} of {@code Tuple4}s using the specified column
+   * ordering.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <V1, V2, V3, V4> PCollection<Tuple4<V1, V2, V3, V4>> sortQuads(
+      PCollection<Tuple4<V1, V2, V3, V4>> collection, ColumnOrder... columnOrders) {
+    return sortTuples(collection, columnOrders);
+  }
+
+  /**
+   * Sorts the {@code PCollection} of tuples using the specified column ordering.
+   *
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection,
+      ColumnOrder... columnOrders) {
+    return sortTuples(collection, -1, columnOrders);
+  }
+  
+  /**
+   * Sorts the {@code PCollection} of {@link TupleN}s using the specified column
+   * ordering and a client-specified number of reducers.
+   * 
+   * @return a {@code PCollection} representing the sorted collection.
+   */
+  public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection, int numReducers,
+      ColumnOrder... columnOrders) {
+    PType<T> pType = collection.getPType();
+    KeyExtraction<T> ke = new KeyExtraction<T>(pType, columnOrders);
+    PTable<Object, T> pt = collection.by(ke.getByFn(), ke.getKeyType());
+    Configuration conf = collection.getPipeline().getConfiguration();
+    GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, columnOrders);
+    return pt.groupByKey(options).ungroup().values();
+  }
+
+  // TODO: move to type family?
+  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
+      int numReducers, Order order) {
+    PType<K> ptype = ptable.getKeyType();
+    PTypeFamily tf = ptable.getTypeFamily();
+    Builder builder = GroupingOptions.builder();
+    if (order == Order.DESCENDING) {
+      if (tf == WritableTypeFamily.getInstance()) {
+        builder.sortComparatorClass(ReverseWritableComparator.class);
+      } else if (tf == AvroTypeFamily.getInstance()) {
+        AvroType<K> avroType = (AvroType<K>) ptype;
+        Schema schema = avroType.getSchema();
+        builder.conf("crunch.schema", schema.toString());
+        builder.sortComparatorClass(ReverseAvroComparator.class);
+      } else {
+        throw new RuntimeException("Unrecognized type family: " + tf);
+      }
+    } else if (tf == AvroTypeFamily.getInstance()) {
+      builder.conf("crunch.schema", ((AvroType<K>) ptype).getSchema().toString());
+    }
+    configureReducers(builder, ptable, conf, numReducers);
+    return builder.build();
+  }
+
+  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
+      int numReducers, ColumnOrder[] columnOrders) {
+    PTypeFamily tf = ptable.getTypeFamily();
+    PType<K> keyType = ptable.getKeyType();
+    Builder builder = GroupingOptions.builder();
+    if (tf == WritableTypeFamily.getInstance()) {
+      if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) {
+        builder.sortComparatorClass(ReverseWritableComparator.class);
+      } else {
+        TupleWritableComparator.configureOrdering(conf, columnOrders);
+        builder.sortComparatorClass(TupleWritableComparator.class);
+      }
+    } else if (tf == AvroTypeFamily.getInstance()) {
+      AvroType<K> avroType = (AvroType<K>) keyType;
+      Schema schema = avroType.getSchema();
+      builder.conf("crunch.schema", schema.toString());
+      if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) {
+        builder.sortComparatorClass(ReverseAvroComparator.class);
+      }
+    } else {
+      throw new RuntimeException("Unrecognized type family: " + tf);
+    }
+    configureReducers(builder, ptable, conf, numReducers);
+    return builder.build();
+  }
+
+  private static <K, V> void configureReducers(GroupingOptions.Builder builder,
+      PTable<K, V> ptable, Configuration conf, int numReducers) {
+    if (numReducers <= 0) {
+      numReducers = PartitionUtils.getRecommendedPartitions(ptable, conf);
+      if (numReducers < 5) {
+        // Not worth the overhead, force it to 1
+        numReducers = 1;
+      }
+    }
+    builder.numReducers(numReducers);
+    if (numReducers > 1) {
+      Iterable<K> iter = Sample.reservoirSample(ptable.keys(), numReducers - 1).materialize();
+      MaterializableIterable<K> mi = (MaterializableIterable<K>) iter;
+      if (mi.isSourceTarget()) {
+        builder.sourceTarget((SourceTarget) mi.getSource());
+      }
+      builder.partitionerClass(TotalOrderPartitioner.class);
+      builder.conf(TotalOrderPartitioner.PARTITIONER_PATH, mi.getPath().toString());
+      //TODO: distcache handling
+    }   
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java b/crunch-core/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java
new file mode 100644
index 0000000..c0ce727
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Used to perform the last step of an full outer join.
+ * 
+ * @param <K> Type of the keys.
+ * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
+ * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
+ */
+public class FullOuterJoinFn<K, U, V> extends JoinFn<K, U, V> {
+
+  private transient int lastId;
+  private transient K lastKey;
+  private transient List<U> leftValues;
+
+  public FullOuterJoinFn(PType<K> keyType, PType<U> leftValueType) {
+    super(keyType, leftValueType);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void initialize() {
+    super.initialize();
+    lastId = 1;
+    lastKey = null;
+    this.leftValues = Lists.newArrayList();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
+    if (!key.equals(lastKey)) {
+      // Make sure that left side gets emitted.
+      if (0 == lastId) {
+        for (U u : leftValues) {
+          emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
+        }
+      }
+      lastKey = keyType.getDetachedValue(key);
+      leftValues.clear();
+    }
+    if (id == 0) {
+      for (Pair<U, V> pair : pairs) {
+        if (pair.first() != null)
+          leftValues.add(leftValueType.getDetachedValue(pair.first()));
+      }
+    } else {
+      for (Pair<U, V> pair : pairs) {
+        // Make sure that right side gets emitted.
+        if (leftValues.isEmpty()) {
+          leftValues.add(null);
+        }
+        for (U u : leftValues) {
+          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
+        }
+      }
+    }
+
+    lastId = id;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void cleanup(Emitter<Pair<K, Pair<U, V>>> emitter) {
+    if (0 == lastId) {
+      for (U u : leftValues) {
+        emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
+      }
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public String getJoinType() {
+    return "fullOuterJoin";
+  }
+}


[40/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java
new file mode 100644
index 0000000..5292353
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/SpecificAvroGroupByIT.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Test {@link org.apache.crunch.types.avro.SafeAvroSerialization} with Specific Avro types
+ */
+public class SpecificAvroGroupByIT implements Serializable {
+
+  private static final long serialVersionUID = 1344118240353796561L;
+
+  private transient File avroFile;
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+
+  @Before
+  public void setUp() throws IOException {
+    avroFile = File.createTempFile("avrotest", ".avro");
+  }
+
+  @After
+  public void tearDown() {
+    avroFile.delete();
+  }
+
+  @Test
+  public void testGrouByWithSpecificAvroType() throws Exception {
+    MRPipeline pipeline = new MRPipeline(SpecificAvroGroupByIT.class, tmpDir.getDefaultConfiguration());
+    testSpecificAvro(pipeline);
+  }
+
+  public void testSpecificAvro(MRPipeline pipeline) throws Exception {
+
+    createPersonAvroFile(avroFile);
+
+    PCollection<Person> unsorted = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), Avros.records(Person.class)));
+
+    PTable<String, Person> sorted = unsorted.parallelDo(new MapFn<Person, Pair<String, Person>>() {
+
+      @Override
+      public Pair<String, Person> map(Person input) {
+        String key = input.name.toString();
+        return Pair.of(key, input);
+
+      }
+    }, Avros.tableOf(Avros.strings(), Avros.records(Person.class))).groupByKey().ungroup();
+
+    List<Pair<String, Person>> outputPersonList = Lists.newArrayList(sorted.materialize());
+
+    assertEquals(1, outputPersonList.size());
+    assertEquals(String.class, outputPersonList.get(0).first().getClass());
+    assertEquals(Person.class, outputPersonList.get(0).second().getClass());
+
+    pipeline.done();
+  }
+
+  private void createPersonAvroFile(File avroFile) throws IOException {
+
+    Person person = new Person();
+    person.age = 40;
+    person.name = "Bob";
+    List<CharSequence> siblingNames = Lists.newArrayList();
+    siblingNames.add("Bob" + "1");
+    siblingNames.add("Bob" + "2");
+    person.siblingnames = siblingNames;
+
+    FileOutputStream outputStream = new FileOutputStream(avroFile);
+    SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
+
+    DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
+    dataFileWriter.create(Person.SCHEMA$, outputStream);
+    dataFileWriter.append(person);
+    dataFileWriter.close();
+    outputStream.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java
new file mode 100644
index 0000000..63d594d
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/FullOuterJoinIT.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PTypeFamily;
+
+public class FullOuterJoinIT extends JoinTester {
+  @Override
+  public void assertPassed(Iterable<Pair<String, Long>> lines) {
+    boolean passed1 = false;
+    boolean passed2 = false;
+    boolean passed3 = false;
+    for (Pair<String, Long> line : lines) {
+      if ("wretched".equals(line.first()) && 24 == line.second()) {
+        passed1 = true;
+      }
+      if ("againe".equals(line.first()) && 10 == line.second()) {
+        passed2 = true;
+      }
+      if ("Montparnasse.".equals(line.first()) && 2 == line.second()) {
+        passed3 = true;
+      }
+    }
+    assertTrue(passed1);
+    assertTrue(passed2);
+    assertTrue(passed3);
+  }
+
+  @Override
+  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
+    return new FullOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java
new file mode 100644
index 0000000..4759050
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/InnerJoinIT.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PTypeFamily;
+
+public class InnerJoinIT extends JoinTester {
+  @Override
+  public void assertPassed(Iterable<Pair<String, Long>> lines) {
+    boolean passed1 = false;
+    boolean passed2 = true;
+    boolean passed3 = true;
+    for (Pair<String, Long> line : lines) {
+      if ("wretched".equals(line.first()) && 24 == line.second()) {
+        passed1 = true;
+      }
+      if ("againe".equals(line.first())) {
+        passed2 = false;
+      }
+      if ("Montparnasse.".equals(line.first())) {
+        passed3 = false;
+      }
+    }
+    assertTrue(passed1);
+    assertTrue(passed2);
+    assertTrue(passed3);
+  }
+
+  @Override
+  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
+    return new InnerJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/JoinTester.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/JoinTester.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/JoinTester.java
new file mode 100644
index 0000000..3e8ffda
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/JoinTester.java
@@ -0,0 +1,108 @@
+/**
+R * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.lib.Join;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.junit.Rule;
+import org.junit.Test;
+
+public abstract class JoinTester implements Serializable {
+  private static class WordSplit extends DoFn<String, String> {
+    @Override
+    public void process(String input, Emitter<String> emitter) {
+      for (String word : input.split("\\s+")) {
+        emitter.emit(word);
+      }
+    }
+  }
+
+  protected PTable<String, Long> join(PCollection<String> w1, PCollection<String> w2, PTypeFamily ptf) {
+    PTableType<String, Long> ntt = ptf.tableOf(ptf.strings(), ptf.longs());
+    PTable<String, Long> ws1 = Aggregate.count(w1.parallelDo("ws1", new WordSplit(), ptf.strings()));
+    PTable<String, Long> ws2 = Aggregate.count(w2.parallelDo("ws2", new WordSplit(), ptf.strings()));
+
+    PTable<String, Pair<Long, Long>> join = Join.join(ws1, ws2, getJoinFn(ptf));
+
+    PTable<String, Long> sums = join.parallelDo("cnt", new DoFn<Pair<String, Pair<Long, Long>>, Pair<String, Long>>() {
+      @Override
+      public void process(Pair<String, Pair<Long, Long>> input, Emitter<Pair<String, Long>> emitter) {
+        Pair<Long, Long> pair = input.second();
+        long sum = (pair.first() != null ? pair.first() : 0) + (pair.second() != null ? pair.second() : 0);
+        emitter.emit(Pair.of(input.first(), sum));
+      }
+    }, ntt);
+
+    return sums;
+  }
+
+  protected void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
+    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
+    String maughamInputPath = tmpDir.copyResourceFileName("maugham.txt");
+
+    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
+    PCollection<String> maugham = pipeline.readTextFile(maughamInputPath);
+    PTable<String, Long> joined = join(shakespeare, maugham, typeFamily);
+    Iterable<Pair<String, Long>> lines = joined.materialize();
+
+    assertPassed(lines);
+
+    pipeline.done();
+  }
+  @Rule
+  public transient TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testWritableJoin() throws Exception {
+    run(new MRPipeline(InnerJoinIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
+  }
+
+  @Test
+  public void testAvroJoin() throws Exception {
+    run(new MRPipeline(InnerJoinIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
+  }
+
+  /**
+   * Used to check that the result of the join makes sense.
+   * 
+   * @param lines
+   *          The result of the join.
+   */
+  public abstract void assertPassed(Iterable<Pair<String, Long>> lines);
+
+  /**
+   * @return The JoinFn to use.
+   */
+  protected abstract JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java
new file mode 100644
index 0000000..4ad2a81
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/LeftOuterJoinIT.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PTypeFamily;
+
+public class LeftOuterJoinIT extends JoinTester {
+  @Override
+  public void assertPassed(Iterable<Pair<String, Long>> lines) {
+    boolean passed1 = false;
+    boolean passed2 = false;
+    boolean passed3 = true;
+    for (Pair<String, Long> line : lines) {
+      if ("wretched".equals(line.first()) && 24 == line.second()) {
+        passed1 = true;
+      }
+      if ("againe".equals(line.first()) && 10 == line.second()) {
+        passed2 = true;
+      }
+      if ("Montparnasse.".equals(line.first())) {
+        passed3 = false;
+      }
+    }
+    assertTrue(passed1);
+    assertTrue(passed2);
+    assertTrue(passed3);
+  }
+
+  @Override
+  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
+    return new LeftOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java
new file mode 100644
index 0000000..8bb5586
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/MapsideJoinIT.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.PipelineResult;
+import org.apache.crunch.fn.FilterFns;
+import org.apache.crunch.fn.MapValuesFn;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class MapsideJoinIT {
+  
+  private static String saveTempDir;
+  
+  @BeforeClass
+  public static void setUpClass(){
+    
+    // Ensure a consistent temporary directory for use of the DistributedCache.
+    
+    // The DistributedCache technically isn't supported when running in local mode, and the default
+    // temporary directiory "/tmp" is used as its location. This typically only causes an issue when 
+    // running integration tests on Mac OS X, as OS X doesn't use "/tmp" as it's default temporary
+    // directory. The following call ensures that "/tmp" is used as the temporary directory on all platforms.
+    saveTempDir = System.setProperty("java.io.tmpdir", "/tmp");
+  }
+  
+  @AfterClass
+  public static void tearDownClass(){
+    System.setProperty("java.io.tmpdir", saveTempDir);
+  }
+
+  private static class LineSplitter extends MapFn<String, Pair<Integer, String>> {
+    @Override
+    public Pair<Integer, String> map(String input) {
+      String[] fields = input.split("\\|");
+      return Pair.of(Integer.parseInt(fields[0]), fields[1]);
+    }
+  }
+
+  private static class CapOrdersFn extends MapValuesFn<Integer, String, String> {
+    @Override
+    public String map(String v) {
+      return v.toUpperCase();
+    }
+  }
+  
+  private static class ConcatValuesFn extends MapValuesFn<Integer, Pair<String, String>, String> {
+    @Override
+    public String map(Pair<String, String> v) {
+      return v.toString();
+    }
+  }
+  
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test
+  public void testMapSideJoin_MemPipeline() {
+    runMapsideJoin(MemPipeline.getInstance(), true);
+  }
+
+  @Test
+  public void testMapsideJoin_RightSideIsEmpty() throws IOException {
+    MRPipeline pipeline = new MRPipeline(MapsideJoinIT.class, tmpDir.getDefaultConfiguration());
+    PTable<Integer, String> customerTable = readTable(pipeline, "customers.txt");
+    PTable<Integer, String> orderTable = readTable(pipeline, "orders.txt");
+
+    PTable<Integer, String> filteredOrderTable = orderTable
+        .parallelDo(FilterFns.<Pair<Integer, String>>REJECT_ALL(), orderTable.getPTableType());
+
+    PTable<Integer, Pair<String, String>> joined = MapsideJoin.join(customerTable, filteredOrderTable);
+
+    List<Pair<Integer, Pair<String, String>>> materializedJoin = Lists.newArrayList(joined.materialize());
+
+    assertTrue(materializedJoin.isEmpty());
+  }
+
+  @Test
+  public void testMapsideJoin() throws IOException {
+    runMapsideJoin(new MRPipeline(MapsideJoinIT.class, tmpDir.getDefaultConfiguration()), false);
+  }
+
+  private void runMapsideJoin(Pipeline pipeline, boolean inMemory) {
+    PTable<Integer, String> customerTable = readTable(pipeline, "customers.txt");
+    PTable<Integer, String> orderTable = readTable(pipeline, "orders.txt");
+    
+    PTable<Integer, String> custOrders = MapsideJoin.join(customerTable, orderTable)
+        .parallelDo("concat", new ConcatValuesFn(), Writables.tableOf(Writables.ints(), Writables.strings()));
+
+    PTable<Integer, String> ORDER_TABLE = orderTable.parallelDo(new CapOrdersFn(), orderTable.getPTableType());
+    
+    PTable<Integer, Pair<String, String>> joined = MapsideJoin.join(custOrders, ORDER_TABLE);
+
+    List<Pair<Integer, Pair<String, String>>> expectedJoinResult = Lists.newArrayList();
+    expectedJoinResult.add(Pair.of(111, Pair.of("[John Doe,Corn flakes]", "CORN FLAKES")));
+    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet paper]", "TOILET PAPER")));
+    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet paper]", "TOILET PLUNGER")));
+    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet plunger]", "TOILET PAPER")));
+    expectedJoinResult.add(Pair.of(222, Pair.of("[Jane Doe,Toilet plunger]", "TOILET PLUNGER")));
+    expectedJoinResult.add(Pair.of(333, Pair.of("[Someone Else,Toilet brush]", "TOILET BRUSH")));
+    Iterable<Pair<Integer, Pair<String, String>>> iter = joined.materialize();
+    
+    PipelineResult res = pipeline.run();
+    if (!inMemory) {
+      assertEquals(2, res.getStageResults().size());
+    }
+     
+    List<Pair<Integer, Pair<String, String>>> joinedResultList = Lists.newArrayList(iter);
+    Collections.sort(joinedResultList);
+
+    assertEquals(expectedJoinResult, joinedResultList);
+  }
+
+  private PTable<Integer, String> readTable(Pipeline pipeline, String filename) {
+    try {
+      return pipeline.readTextFile(tmpDir.copyResourceFileName(filename)).parallelDo("asTable", new LineSplitter(),
+          Writables.tableOf(Writables.ints(), Writables.strings()));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java
new file mode 100644
index 0000000..f1ca770
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.apache.crunch.types.avro.Avros.records;
+import static org.apache.crunch.types.avro.Avros.strings;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.avro.specific.SpecificRecord;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.From;
+import org.apache.crunch.test.Employee;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+public class MultiAvroSchemaJoinIT {
+
+  private File personFile;
+  private File employeeFile;
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Before
+  public void setUp() throws Exception {
+    this.personFile = File.createTempFile("person", ".avro");
+    this.employeeFile = File.createTempFile("employee", ".avro");
+
+    DatumWriter<Person> pdw = new SpecificDatumWriter<Person>();
+    DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw);
+    pfw.create(Person.SCHEMA$, personFile);
+    Person p1 = new Person();
+    p1.name = "Josh";
+    p1.age = 19;
+    p1.siblingnames = ImmutableList.<CharSequence> of("Kate", "Mike");
+    pfw.append(p1);
+    Person p2 = new Person();
+    p2.name = "Kate";
+    p2.age = 17;;
+    p2.siblingnames = ImmutableList.<CharSequence> of("Josh", "Mike");
+    pfw.append(p2);
+    Person p3 = new Person();
+    p3.name = "Mike";
+    p3.age = 12;
+    p3.siblingnames = ImmutableList.<CharSequence> of("Josh", "Kate");
+    pfw.append(p3);
+    pfw.close();
+
+    DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>();
+    DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw);
+    efw.create(Employee.SCHEMA$, employeeFile);
+    Employee e1 = new Employee();
+    e1.name = "Kate";
+    e1.salary = 100000;
+    e1.department = "Marketing";
+    efw.append(e1);
+    efw.close();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    personFile.delete();
+    employeeFile.delete();
+  }
+
+  public static class NameFn<K extends SpecificRecord> extends MapFn<K, String> {
+    @Override
+    public String map(K input) {
+      Schema s = input.getSchema();
+      Schema.Field f = s.getField("name");
+      return input.get(f.pos()).toString();
+    }
+  }
+
+  @Test
+  public void testJoin() throws Exception {
+    Pipeline p = new MRPipeline(MultiAvroSchemaJoinIT.class, tmpDir.getDefaultConfiguration());
+    PCollection<Person> people = p.read(From.avroFile(personFile.getAbsolutePath(), records(Person.class)));
+    PCollection<Employee> employees = p.read(From.avroFile(employeeFile.getAbsolutePath(), records(Employee.class)));
+
+    Iterable<Pair<Person, Employee>> result = people.by(new NameFn<Person>(), strings())
+        .join(employees.by(new NameFn<Employee>(), strings())).values().materialize();
+    List<Pair<Person, Employee>> v = Lists.newArrayList(result);
+    assertEquals(1, v.size());
+    assertEquals("Kate", v.get(0).first().name.toString());
+    assertEquals("Kate", v.get(0).second().name.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java b/crunch-core/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java
new file mode 100644
index 0000000..d889b61
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PTypeFamily;
+
+public class RightOuterJoinIT extends JoinTester {
+  @Override
+  public void assertPassed(Iterable<Pair<String, Long>> lines) {
+    boolean passed1 = false;
+    boolean passed2 = true;
+    boolean passed3 = false;
+    for (Pair<String, Long> line : lines) {
+      if ("wretched".equals(line.first()) && 24 == line.second()) {
+        passed1 = true;
+      }
+      if ("againe".equals(line.first())) {
+        passed2 = false;
+      }
+      if ("Montparnasse.".equals(line.first()) && 2 == line.second()) {
+        passed3 = true;
+      }
+    }
+    assertTrue(passed1);
+    assertTrue(passed2);
+    assertTrue(passed3);
+  }
+
+  @Override
+  protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) {
+    return new RightOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/test/TemporaryPaths.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/test/TemporaryPaths.java b/crunch-core/src/it/java/org/apache/crunch/test/TemporaryPaths.java
new file mode 100644
index 0000000..97cf0de
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/test/TemporaryPaths.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.test;
+
+import org.apache.crunch.impl.mr.run.RuntimeParameters;
+import org.apache.hadoop.conf.Configuration;
+
+
+/**
+ * Utilities for working with {@link TemporaryPath}.
+ */
+public final class TemporaryPaths {
+
+  /**
+   * Static factory returning a {@link TemporaryPath} with adjusted
+   * {@link Configuration} properties.
+   */
+  public static TemporaryPath create() {
+    return new TemporaryPath(RuntimeParameters.TMP_DIR, "hadoop.tmp.dir");
+  }
+
+  private TemporaryPaths() {
+    // nothing
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/java/org/apache/crunch/test/Tests.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/java/org/apache/crunch/test/Tests.java b/crunch-core/src/it/java/org/apache/crunch/test/Tests.java
new file mode 100644
index 0000000..e381c1a
--- /dev/null
+++ b/crunch-core/src/it/java/org/apache/crunch/test/Tests.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.test;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.hadoop.io.Writable;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.ByteArrayDataOutput;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.Resources;
+
+
+/**
+ * Utilities for integration tests.
+ */
+public final class Tests {
+
+  private Tests() {
+    // nothing
+  }
+
+  /**
+   * Get the path to and integration test resource file, as per naming convention.
+   *
+   * @param testCase The executing test case instance
+   * @param resourceName The file name of the resource
+   * @return The path to the resource (never null)
+   * @throws IllegalArgumentException Thrown if the resource doesn't exist
+   */
+  public static String pathTo(Object testCase, String resourceName) {
+    String qualifiedName = resource(testCase, resourceName);
+    return Resources.getResource(qualifiedName).getFile();
+  }
+
+  /**
+   * This doesn't check whether the resource exists!
+   *
+   * @param testCase
+   * @param resourceName
+   * @return The path to the resource (never null)
+   */
+  public static String resource(Object testCase, String resourceName) {
+    checkNotNull(testCase);
+    checkNotNull(resourceName);
+
+    // Note: We append "Data" because otherwise Eclipse would complain about the
+    //       the case's class name clashing with the resource directory's name.
+    return testCase.getClass().getName().replaceAll("\\.", "/") + "Data/" + resourceName;
+  }
+
+  /**
+   * Return our two types of {@link Pipeline}s for a JUnit Parameterized test.
+   *
+   * @param testCase The executing test case's class
+   * @return The collection to return from a {@link Parameters} provider method
+   */
+  public static Collection<Object[]> pipelinesParams(Class<?> testCase) {
+    return ImmutableList.copyOf(
+        new Object[][] { { MemPipeline.getInstance() }, { new MRPipeline(testCase) }
+    });
+  }
+
+  /**
+   * Serialize the given Writable into a byte array.
+   *
+   * @param value The instance to serialize
+   * @return The serialized data
+   */
+  public static byte[] serialize(Writable value) {
+    checkNotNull(value);
+    try {
+      ByteArrayDataOutput out = ByteStreams.newDataOutput();
+      value.write(out);
+      return out.toByteArray();
+    } catch (IOException e) {
+      throw new IllegalStateException("cannot serialize", e);
+    }
+  }
+
+  /**
+   * Serialize the src Writable into a byte array, then deserialize it into dest.
+   * @param src The instance to serialize
+   * @param dest The instance to deserialize into
+   * @return dest, for convenience
+   */
+  public static <T extends Writable> T roundtrip(Writable src, T dest) {
+    checkNotNull(src);
+    checkNotNull(dest);
+    checkArgument(src != dest, "src and dest may not be the same instance");
+
+    try {
+      byte[] data = serialize(src);
+      dest.readFields(ByteStreams.newDataInput(data));
+    } catch (IOException e) {
+      throw new IllegalStateException("cannot deserialize", e);
+    }
+    return dest;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/customers.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/customers.txt b/crunch-core/src/it/resources/customers.txt
new file mode 100644
index 0000000..98f3f3d
--- /dev/null
+++ b/crunch-core/src/it/resources/customers.txt
@@ -0,0 +1,4 @@
+111|John Doe
+222|Jane Doe
+333|Someone Else
+444|Has No Orders
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/docs.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/docs.txt b/crunch-core/src/it/resources/docs.txt
new file mode 100644
index 0000000..90a3f65
--- /dev/null
+++ b/crunch-core/src/it/resources/docs.txt
@@ -0,0 +1,6 @@
+A	this doc has this text
+A	and this text as well
+A	but also this
+B	this doc has some text
+B	but not as much as the last
+B	doc

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/emptyTextFile.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/emptyTextFile.txt b/crunch-core/src/it/resources/emptyTextFile.txt
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/letters.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/letters.txt b/crunch-core/src/it/resources/letters.txt
new file mode 100644
index 0000000..916bfc9
--- /dev/null
+++ b/crunch-core/src/it/resources/letters.txt
@@ -0,0 +1,2 @@
+a
+bb
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/log4j.properties b/crunch-core/src/it/resources/log4j.properties
new file mode 100644
index 0000000..5d144a0
--- /dev/null
+++ b/crunch-core/src/it/resources/log4j.properties
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ***** Set root logger level to INFO and its only appender to A.
+log4j.logger.org.apache.crunch=info, A
+
+# Log warnings on Hadoop for the local runner when testing
+log4j.logger.org.apache.hadoop=warn, A
+# Except for Configuration, which is chatty.
+log4j.logger.org.apache.hadoop.conf.Configuration=error, A
+
+# ***** A is set to be a ConsoleAppender.
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+# ***** A uses PatternLayout.
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n


[18/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/maugham.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/maugham.txt b/crunch/src/it/resources/maugham.txt
deleted file mode 100644
index 16c45e8..0000000
--- a/crunch/src/it/resources/maugham.txt
+++ /dev/null
@@ -1,29112 +0,0 @@
-The Project Gutenberg EBook of Of Human Bondage, by W. Somerset Maugham
-
-This eBook is for the use of anyone anywhere at no cost and with
-almost no restrictions whatsoever.  You may copy it, give it away or
-re-use it under the terms of the Project Gutenberg License included
-with this eBook or online at www.gutenberg.net
-
-
-Title: Of Human Bondage
-
-Author: W. Somerset Maugham
-
-Release Date: May 6, 2008 [EBook #351]
-
-Language: English
-
-
-*** START OF THIS PROJECT GUTENBERG EBOOK OF HUMAN BONDAGE ***
-
-
-
-
-
-
-
-
-
-
-
-
-OF HUMAN BONDAGE
-
-
-BY
-
-W. SOMERSET MAUGHAM
-
-
-
-
-I
-
-The day broke gray and dull. The clouds hung heavily, and there was a
-rawness in the air that suggested snow. A woman servant came into a room
-in which a child was sleeping and drew the curtains. She glanced
-mechanically at the house opposite, a stucco house with a portico, and
-went to the child's bed.
-
-"Wake up, Philip," she said.
-
-She pulled down the bed-clothes, took him in her arms, and carried him
-downstairs. He was only half awake.
-
-"Your mother wants you," she said.
-
-She opened the door of a room on the floor below and took the child over
-to a bed in which a woman was lying. It was his mother. She stretched out
-her arms, and the child nestled by her side. He did not ask why he had
-been awakened. The woman kissed his eyes, and with thin, small hands felt
-the warm body through his white flannel nightgown. She pressed him closer
-to herself.
-
-"Are you sleepy, darling?" she said.
-
-Her voice was so weak that it seemed to come already from a great
-distance. The child did not answer, but smiled comfortably. He was very
-happy in the large, warm bed, with those soft arms about him. He tried to
-make himself smaller still as he cuddled up against his mother, and he
-kissed her sleepily. In a moment he closed his eyes and was fast asleep.
-The doctor came forwards and stood by the bed-side.
-
-"Oh, don't take him away yet," she moaned.
-
-The doctor, without answering, looked at her gravely. Knowing she would
-not be allowed to keep the child much longer, the woman kissed him again;
-and she passed her hand down his body till she came to his feet; she held
-the right foot in her hand and felt the five small toes; and then slowly
-passed her hand over the left one. She gave a sob.
-
-"What's the matter?" said the doctor. "You're tired."
-
-She shook her head, unable to speak, and the tears rolled down her cheeks.
-The doctor bent down.
-
-"Let me take him."
-
-She was too weak to resist his wish, and she gave the child up. The doctor
-handed him back to his nurse.
-
-"You'd better put him back in his own bed."
-
-"Very well, sir." The little boy, still sleeping, was taken away. His
-mother sobbed now broken-heartedly.
-
-"What will happen to him, poor child?"
-
-The monthly nurse tried to quiet her, and presently, from exhaustion, the
-crying ceased. The doctor walked to a table on the other side of the room,
-upon which, under a towel, lay the body of a still-born child. He lifted
-the towel and looked. He was hidden from the bed by a screen, but the
-woman guessed what he was doing.
-
-"Was it a girl or a boy?" she whispered to the nurse.
-
-"Another boy."
-
-The woman did not answer. In a moment the child's nurse came back. She
-approached the bed.
-
-"Master Philip never woke up," she said. There was a pause. Then the
-doctor felt his patient's pulse once more.
-
-"I don't think there's anything I can do just now," he said. "I'll call
-again after breakfast."
-
-"I'll show you out, sir," said the child's nurse.
-
-They walked downstairs in silence. In the hall the doctor stopped.
-
-"You've sent for Mrs. Carey's brother-in-law, haven't you?"
-
-"Yes, sir."
-
-"D'you know at what time he'll be here?"
-
-"No, sir, I'm expecting a telegram."
-
-"What about the little boy? I should think he'd be better out of the way."
-
-"Miss Watkin said she'd take him, sir."
-
-"Who's she?"
-
-"She's his godmother, sir. D'you think Mrs. Carey will get over it, sir?"
-
-The doctor shook his head.
-
-
-
-II
-
-It was a week later. Philip was sitting on the floor in the drawing-room
-at Miss Watkin's house in Onslow gardens. He was an only child and used to
-amusing himself. The room was filled with massive furniture, and on each
-of the sofas were three big cushions. There was a cushion too in each
-arm-chair. All these he had taken and, with the help of the gilt rout
-chairs, light and easy to move, had made an elaborate cave in which he
-could hide himself from the Red Indians who were lurking behind the
-curtains. He put his ear to the floor and listened to the herd of
-buffaloes that raced across the prairie. Presently, hearing the door open,
-he held his breath so that he might not be discovered; but a violent hand
-piled away a chair and the cushions fell down.
-
-"You naughty boy, Miss Watkin WILL be cross with you."
-
-"Hulloa, Emma!" he said.
-
-The nurse bent down and kissed him, then began to shake out the cushions,
-and put them back in their places.
-
-"Am I to come home?" he asked.
-
-"Yes, I've come to fetch you."
-
-"You've got a new dress on."
-
-It was in eighteen-eighty-five, and she wore a bustle. Her gown was of
-black velvet, with tight sleeves and sloping shoulders, and the skirt had
-three large flounces. She wore a black bonnet with velvet strings. She
-hesitated. The question she had expected did not come, and so she could
-not give the answer she had prepared.
-
-"Aren't you going to ask how your mamma is?" she said at length.
-
-"Oh, I forgot. How is mamma?"
-
-Now she was ready.
-
-"Your mamma is quite well and happy."
-
-"Oh, I am glad."
-
-"Your mamma's gone away. You won't ever see her any more." Philip did not
-know what she meant.
-
-"Why not?"
-
-"Your mamma's in heaven."
-
-She began to cry, and Philip, though he did not quite understand, cried
-too. Emma was a tall, big-boned woman, with fair hair and large features.
-She came from Devonshire and, notwithstanding her many years of service in
-London, had never lost the breadth of her accent. Her tears increased her
-emotion, and she pressed the little boy to her heart. She felt vaguely the
-pity of that child deprived of the only love in the world that is quite
-unselfish. It seemed dreadful that he must be handed over to strangers.
-But in a little while she pulled herself together.
-
-"Your Uncle William is waiting in to see you," she said. "Go and say
-good-bye to Miss Watkin, and we'll go home."
-
-"I don't want to say good-bye," he answered, instinctively anxious to hide
-his tears.
-
-"Very well, run upstairs and get your hat."
-
-He fetched it, and when he came down Emma was waiting for him in the hall.
-He heard the sound of voices in the study behind the dining-room. He
-paused. He knew that Miss Watkin and her sister were talking to friends,
-and it seemed to him--he was nine years old--that if he went in they would
-be sorry for him.
-
-"I think I'll go and say good-bye to Miss Watkin."
-
-"I think you'd better," said Emma.
-
-"Go in and tell them I'm coming," he said.
-
-He wished to make the most of his opportunity. Emma knocked at the door
-and walked in. He heard her speak.
-
-"Master Philip wants to say good-bye to you, miss."
-
-There was a sudden hush of the conversation, and Philip limped in.
-Henrietta Watkin was a stout woman, with a red face and dyed hair. In
-those days to dye the hair excited comment, and Philip had heard much
-gossip at home when his godmother's changed colour. She lived with an
-elder sister, who had resigned herself contentedly to old age. Two ladies,
-whom Philip did not know, were calling, and they looked at him curiously.
-
-"My poor child," said Miss Watkin, opening her arms.
-
-She began to cry. Philip understood now why she had not been in to
-luncheon and why she wore a black dress. She could not speak.
-
-"I've got to go home," said Philip, at last.
-
-He disengaged himself from Miss Watkin's arms, and she kissed him again.
-Then he went to her sister and bade her good-bye too. One of the strange
-ladies asked if she might kiss him, and he gravely gave her permission.
-Though crying, he keenly enjoyed the sensation he was causing; he would
-have been glad to stay a little longer to be made much of, but felt they
-expected him to go, so he said that Emma was waiting for him. He went out
-of the room. Emma had gone downstairs to speak with a friend in the
-basement, and he waited for her on the landing. He heard Henrietta
-Watkin's voice.
-
-"His mother was my greatest friend. I can't bear to think that she's
-dead."
-
-"You oughtn't to have gone to the funeral, Henrietta," said her sister. "I
-knew it would upset you."
-
-Then one of the strangers spoke.
-
-"Poor little boy, it's dreadful to think of him quite alone in the world.
-I see he limps."
-
-"Yes, he's got a club-foot. It was such a grief to his mother."
-
-Then Emma came back. They called a hansom, and she told the driver where
-to go.
-
-
-
-III
-
-
-When they reached the house Mrs. Carey had died in--it was in a dreary,
-respectable street between Notting Hill Gate and High Street,
-Kensington--Emma led Philip into the drawing-room. His uncle was writing
-letters of thanks for the wreaths which had been sent. One of them, which
-had arrived too late for the funeral, lay in its cardboard box on the
-hall-table.
-
-"Here's Master Philip," said Emma.
-
-Mr. Carey stood up slowly and shook hands with the little boy. Then on
-second thoughts he bent down and kissed his forehead. He was a man of
-somewhat less than average height, inclined to corpulence, with his hair,
-worn long, arranged over the scalp so as to conceal his baldness. He was
-clean-shaven. His features were regular, and it was possible to imagine
-that in his youth he had been good-looking. On his watch-chain he wore a
-gold cross.
-
-"You're going to live with me now, Philip," said Mr. Carey. "Shall you
-like that?"
-
-Two years before Philip had been sent down to stay at the vicarage after
-an attack of chicken-pox; but there remained with him a recollection of an
-attic and a large garden rather than of his uncle and aunt.
-
-"Yes."
-
-"You must look upon me and your Aunt Louisa as your father and mother."
-
-The child's mouth trembled a little, he reddened, but did not answer.
-
-"Your dear mother left you in my charge."
-
-Mr. Carey had no great ease in expressing himself. When the news came that
-his sister-in-law was dying, he set off at once for London, but on the way
-thought of nothing but the disturbance in his life that would be caused if
-her death forced him to undertake the care of her son. He was well over
-fifty, and his wife, to whom he had been married for thirty years, was
-childless; he did not look forward with any pleasure to the presence of a
-small boy who might be noisy and rough. He had never much liked his
-sister-in-law.
-
-"I'm going to take you down to Blackstable tomorrow," he said.
-
-"With Emma?"
-
-The child put his hand in hers, and she pressed it.
-
-"I'm afraid Emma must go away," said Mr. Carey.
-
-"But I want Emma to come with me."
-
-Philip began to cry, and the nurse could not help crying too. Mr. Carey
-looked at them helplessly.
-
-"I think you'd better leave me alone with Master Philip for a moment."
-
-"Very good, sir."
-
-Though Philip clung to her, she released herself gently. Mr. Carey took
-the boy on his knee and put his arm round him.
-
-"You mustn't cry," he said. "You're too old to have a nurse now. We must
-see about sending you to school."
-
-"I want Emma to come with me," the child repeated.
-
-"It costs too much money, Philip. Your father didn't leave very much, and
-I don't know what's become of it. You must look at every penny you spend."
-
-Mr. Carey had called the day before on the family solicitor. Philip's
-father was a surgeon in good practice, and his hospital appointments
-suggested an established position; so that it was a surprise on his sudden
-death from blood-poisoning to find that he had left his widow little more
-than his life insurance and what could be got for the lease of their house
-in Bruton Street. This was six months ago; and Mrs. Carey, already in
-delicate health, finding herself with child, had lost her head and
-accepted for the lease the first offer that was made. She stored her
-furniture, and, at a rent which the parson thought outrageous, took a
-furnished house for a year, so that she might suffer from no inconvenience
-till her child was born. But she had never been used to the management of
-money, and was unable to adapt her expenditure to her altered
-circumstances. The little she had slipped through her fingers in one way
-and another, so that now, when all expenses were paid, not much more than
-two thousand pounds remained to support the boy till he was able to earn
-his own living. It was impossible to explain all this to Philip and he was
-sobbing still.
-
-"You'd better go to Emma," Mr. Carey said, feeling that she could console
-the child better than anyone.
-
-Without a word Philip slipped off his uncle's knee, but Mr. Carey stopped
-him.
-
-"We must go tomorrow, because on Saturday I've got to prepare my sermon,
-and you must tell Emma to get your things ready today. You can bring all
-your toys. And if you want anything to remember your father and mother by
-you can take one thing for each of them. Everything else is going to be
-sold."
-
-The boy slipped out of the room. Mr. Carey was unused to work, and he
-turned to his correspondence with resentment. On one side of the desk was
-a bundle of bills, and these filled him with irritation. One especially
-seemed preposterous. Immediately after Mrs. Carey's death Emma had ordered
-from the florist masses of white flowers for the room in which the dead
-woman lay. It was sheer waste of money. Emma took far too much upon
-herself. Even if there had been no financial necessity, he would have
-dismissed her.
-
-But Philip went to her, and hid his face in her bosom, and wept as though
-his heart would break. And she, feeling that he was almost her own
-son--she had taken him when he was a month old--consoled him with soft
-words. She promised that she would come and see him sometimes, and that
-she would never forget him; and she told him about the country he was
-going to and about her own home in Devonshire--her father kept a turnpike
-on the high-road that led to Exeter, and there were pigs in the sty, and
-there was a cow, and the cow had just had a calf--till Philip forgot his
-tears and grew excited at the thought of his approaching journey.
-Presently she put him down, for there was much to be done, and he helped
-her to lay out his clothes on the bed. She sent him into the nursery to
-gather up his toys, and in a little while he was playing happily.
-
-But at last he grew tired of being alone and went back to the bed-room, in
-which Emma was now putting his things into a big tin box; he remembered
-then that his uncle had said he might take something to remember his
-father and mother by. He told Emma and asked her what he should take.
-
-"You'd better go into the drawing-room and see what you fancy."
-
-"Uncle William's there."
-
-"Never mind that. They're your own things now."
-
-Philip went downstairs slowly and found the door open. Mr. Carey had left
-the room. Philip walked slowly round. They had been in the house so short
-a time that there was little in it that had a particular interest to him.
-It was a stranger's room, and Philip saw nothing that struck his fancy.
-But he knew which were his mother's things and which belonged to the
-landlord, and presently fixed on a little clock that he had once heard his
-mother say she liked. With this he walked again rather disconsolately
-upstairs. Outside the door of his mother's bed-room he stopped and
-listened. Though no one had told him not to go in, he had a feeling that
-it would be wrong to do so; he was a little frightened, and his heart beat
-uncomfortably; but at the same time something impelled him to turn the
-handle. He turned it very gently, as if to prevent anyone within from
-hearing, and then slowly pushed the door open. He stood on the threshold
-for a moment before he had the courage to enter. He was not frightened
-now, but it seemed strange. He closed the door behind him. The blinds were
-drawn, and the room, in the cold light of a January afternoon, was dark.
-On the dressing-table were Mrs. Carey's brushes and the hand mirror. In a
-little tray were hairpins. There was a photograph of himself on the
-chimney-piece and one of his father. He had often been in the room when
-his mother was not in it, but now it seemed different. There was something
-curious in the look of the chairs. The bed was made as though someone were
-going to sleep in it that night, and in a case on the pillow was a
-night-dress.
-
-Philip opened a large cupboard filled with dresses and, stepping in, took
-as many of them as he could in his arms and buried his face in them. They
-smelt of the scent his mother used. Then he pulled open the drawers,
-filled with his mother's things, and looked at them: there were lavender
-bags among the linen, and their scent was fresh and pleasant. The
-strangeness of the room left it, and it seemed to him that his mother had
-just gone out for a walk. She would be in presently and would come
-upstairs to have nursery tea with him. And he seemed to feel her kiss on
-his lips.
-
-It was not true that he would never see her again. It was not true simply
-because it was impossible. He climbed up on the bed and put his head on
-the pillow. He lay there quite still.
-
-
-
-IV
-
-
-Philip parted from Emma with tears, but the journey to Blackstable amused
-him, and, when they arrived, he was resigned and cheerful. Blackstable was
-sixty miles from London. Giving their luggage to a porter, Mr. Carey set
-out to walk with Philip to the vicarage; it took them little more than
-five minutes, and, when they reached it, Philip suddenly remembered the
-gate. It was red and five-barred: it swung both ways on easy hinges; and
-it was possible, though forbidden, to swing backwards and forwards on it.
-They walked through the garden to the front-door. This was only used by
-visitors and on Sundays, and on special occasions, as when the Vicar went
-up to London or came back. The traffic of the house took place through a
-side-door, and there was a back door as well for the gardener and for
-beggars and tramps. It was a fairly large house of yellow brick, with a
-red roof, built about five and twenty years before in an ecclesiastical
-style. The front-door was like a church porch, and the drawing-room
-windows were gothic.
-
-Mrs. Carey, knowing by what train they were coming, waited in the
-drawing-room and listened for the click of the gate. When she heard it she
-went to the door.
-
-"There's Aunt Louisa," said Mr. Carey, when he saw her. "Run and give her
-a kiss."
-
-Philip started to run, awkwardly, trailing his club-foot, and then
-stopped. Mrs. Carey was a little, shrivelled woman of the same age as her
-husband, with a face extraordinarily filled with deep wrinkles, and pale
-blue eyes. Her gray hair was arranged in ringlets according to the fashion
-of her youth. She wore a black dress, and her only ornament was a gold
-chain, from which hung a cross. She had a shy manner and a gentle voice.
-
-"Did you walk, William?" she said, almost reproachfully, as she kissed her
-husband.
-
-"I didn't think of it," he answered, with a glance at his nephew.
-
-"It didn't hurt you to walk, Philip, did it?" she asked the child.
-
-"No. I always walk."
-
-He was a little surprised at their conversation. Aunt Louisa told him to
-come in, and they entered the hall. It was paved with red and yellow
-tiles, on which alternately were a Greek Cross and the Lamb of God. An
-imposing staircase led out of the hall. It was of polished pine, with a
-peculiar smell, and had been put in because fortunately, when the church
-was reseated, enough wood remained over. The balusters were decorated with
-emblems of the Four Evangelists.
-
-"I've had the stove lighted as I thought you'd be cold after your
-journey," said Mrs. Carey.
-
-It was a large black stove that stood in the hall and was only lighted if
-the weather was very bad and the Vicar had a cold. It was not lighted if
-Mrs. Carey had a cold. Coal was expensive. Besides, Mary Ann, the maid,
-didn't like fires all over the place. If they wanted all them fires they
-must keep a second girl. In the winter Mr. and Mrs. Carey lived in the
-dining-room so that one fire should do, and in the summer they could not
-get out of the habit, so the drawing-room was used only by Mr. Carey on
-Sunday afternoons for his nap. But every Saturday he had a fire in the
-study so that he could write his sermon.
-
-Aunt Louisa took Philip upstairs and showed him into a tiny bed-room that
-looked out on the drive. Immediately in front of the window was a large
-tree, which Philip remembered now because the branches were so low that it
-was possible to climb quite high up it.
-
-"A small room for a small boy," said Mrs. Carey. "You won't be frightened
-at sleeping alone?"
-
-"Oh, no."
-
-On his first visit to the vicarage he had come with his nurse, and Mrs.
-Carey had had little to do with him. She looked at him now with some
-uncertainty.
-
-"Can you wash your own hands, or shall I wash them for you?"
-
-"I can wash myself," he answered firmly.
-
-"Well, I shall look at them when you come down to tea," said Mrs. Carey.
-
-She knew nothing about children. After it was settled that Philip should
-come down to Blackstable, Mrs. Carey had thought much how she should treat
-him; she was anxious to do her duty; but now he was there she found
-herself just as shy of him as he was of her. She hoped he would not be
-noisy and rough, because her husband did not like rough and noisy boys.
-Mrs. Carey made an excuse to leave Philip alone, but in a moment came back
-and knocked at the door; she asked him, without coming in, if he could
-pour out the water himself. Then she went downstairs and rang the bell for
-tea.
-
-The dining-room, large and well-proportioned, had windows on two sides of
-it, with heavy curtains of red rep; there was a big table in the middle;
-and at one end an imposing mahogany sideboard with a looking-glass in it.
-In one corner stood a harmonium. On each side of the fireplace were chairs
-covered in stamped leather, each with an antimacassar; one had arms and
-was called the husband, and the other had none and was called the wife.
-Mrs. Carey never sat in the arm-chair: she said she preferred a chair that
-was not too comfortable; there was always a lot to do, and if her chair
-had had arms she might not be so ready to leave it.
-
-Mr. Carey was making up the fire when Philip came in, and he pointed out
-to his nephew that there were two pokers. One was large and bright and
-polished and unused, and was called the Vicar; and the other, which was
-much smaller and had evidently passed through many fires, was called the
-Curate.
-
-"What are we waiting for?" said Mr. Carey.
-
-"I told Mary Ann to make you an egg. I thought you'd be hungry after your
-journey."
-
-Mrs. Carey thought the journey from London to Blackstable very tiring. She
-seldom travelled herself, for the living was only three hundred a year,
-and, when her husband wanted a holiday, since there was not money for two,
-he went by himself. He was very fond of Church Congresses and usually
-managed to go up to London once a year; and once he had been to Paris for
-the exhibition, and two or three times to Switzerland. Mary Ann brought in
-the egg, and they sat down. The chair was much too low for Philip, and for
-a moment neither Mr. Carey nor his wife knew what to do.
-
-"I'll put some books under him," said Mary Ann.
-
-She took from the top of the harmonium the large Bible and the prayer-book
-from which the Vicar was accustomed to read prayers, and put them on
-Philip's chair.
-
-"Oh, William, he can't sit on the Bible," said Mrs. Carey, in a shocked
-tone. "Couldn't you get him some books out of the study?"
-
-Mr. Carey considered the question for an instant.
-
-"I don't think it matters this once if you put the prayer-book on the top,
-Mary Ann," he said. "The book of Common Prayer is the composition of men
-like ourselves. It has no claim to divine authorship."
-
-"I hadn't thought of that, William," said Aunt Louisa.
-
-Philip perched himself on the books, and the Vicar, having said grace, cut
-the top off his egg.
-
-"There," he said, handing it to Philip, "you can eat my top if you like."
-
-Philip would have liked an egg to himself, but he was not offered one, so
-took what he could.
-
-"How have the chickens been laying since I went away?" asked the Vicar.
-
-"Oh, they've been dreadful, only one or two a day."
-
-"How did you like that top, Philip?" asked his uncle.
-
-"Very much, thank you."
-
-"You shall have another one on Sunday afternoon."
-
-Mr. Carey always had a boiled egg at tea on Sunday, so that he might be
-fortified for the evening service.
-
-
-
-V
-
-
-Philip came gradually to know the people he was to live with, and by
-fragments of conversation, some of it not meant for his ears, learned a
-good deal both about himself and about his dead parents. Philip's father
-had been much younger than the Vicar of Blackstable. After a brilliant
-career at St. Luke's Hospital he was put on the staff, and presently began
-to earn money in considerable sums. He spent it freely. When the parson
-set about restoring his church and asked his brother for a subscription,
-he was surprised by receiving a couple of hundred pounds: Mr. Carey,
-thrifty by inclination and economical by necessity, accepted it with
-mingled feelings; he was envious of his brother because he could afford to
-give so much, pleased for the sake of his church, and vaguely irritated by
-a generosity which seemed almost ostentatious. Then Henry Carey married a
-patient, a beautiful girl but penniless, an orphan with no near relations,
-but of good family; and there was an array of fine friends at the wedding.
-The parson, on his visits to her when he came to London, held himself with
-reserve. He felt shy with her and in his heart he resented her great
-beauty: she dressed more magnificently than became the wife of a
-hardworking surgeon; and the charming furniture of her house, the flowers
-among which she lived even in winter, suggested an extravagance which he
-deplored. He heard her talk of entertainments she was going to; and, as he
-told his wife on getting home again, it was impossible to accept
-hospitality without making some return. He had seen grapes in the
-dining-room that must have cost at least eight shillings a pound; and at
-luncheon he had been given asparagus two months before it was ready in the
-vicarage garden. Now all he had anticipated was come to pass: the Vicar
-felt the satisfaction of the prophet who saw fire and brimstone consume
-the city which would not mend its way to his warning. Poor Philip was
-practically penniless, and what was the good of his mother's fine friends
-now? He heard that his father's extravagance was really criminal, and it
-was a mercy that Providence had seen fit to take his dear mother to
-itself: she had no more idea of money than a child.
-
-When Philip had been a week at Blackstable an incident happened which
-seemed to irritate his uncle very much. One morning he found on the
-breakfast table a small packet which had been sent on by post from the
-late Mrs. Carey's house in London. It was addressed to her. When the
-parson opened it he found a dozen photographs of Mrs. Carey. They showed
-the head and shoulders only, and her hair was more plainly done than
-usual, low on the forehead, which gave her an unusual look; the face was
-thin and worn, but no illness could impair the beauty of her features.
-There was in the large dark eyes a sadness which Philip did not remember.
-The first sight of the dead woman gave Mr. Carey a little shock, but this
-was quickly followed by perplexity. The photographs seemed quite recent,
-and he could not imagine who had ordered them.
-
-"D'you know anything about these, Philip?" he asked.
-
-"I remember mamma said she'd been taken," he answered. "Miss Watkin
-scolded her.... She said: I wanted the boy to have something to remember
-me by when he grows up."
-
-Mr. Carey looked at Philip for an instant. The child spoke in a clear
-treble. He recalled the words, but they meant nothing to him.
-
-"You'd better take one of the photographs and keep it in your room," said
-Mr. Carey. "I'll put the others away."
-
-He sent one to Miss Watkin, and she wrote and explained how they came to
-be taken.
-
-One day Mrs. Carey was lying in bed, but she was feeling a little better
-than usual, and the doctor in the morning had seemed hopeful; Emma had
-taken the child out, and the maids were downstairs in the basement:
-suddenly Mrs. Carey felt desperately alone in the world. A great fear
-seized her that she would not recover from the confinement which she was
-expecting in a fortnight. Her son was nine years old. How could he be
-expected to remember her? She could not bear to think that he would grow
-up and forget, forget her utterly; and she had loved him so passionately,
-because he was weakly and deformed, and because he was her child. She had
-no photographs of herself taken since her marriage, and that was ten years
-before. She wanted her son to know what she looked like at the end. He
-could not forget her then, not forget utterly. She knew that if she called
-her maid and told her she wanted to get up, the maid would prevent her,
-and perhaps send for the doctor, and she had not the strength now to
-struggle or argue. She got out of bed and began to dress herself. She had
-been on her back so long that her legs gave way beneath her, and then the
-soles of her feet tingled so that she could hardly bear to put them to the
-ground. But she went on. She was unused to doing her own hair and, when
-she raised her arms and began to brush it, she felt faint. She could never
-do it as her maid did. It was beautiful hair, very fine, and of a deep
-rich gold. Her eyebrows were straight and dark. She put on a black skirt,
-but chose the bodice of the evening dress which she liked best: it was of
-a white damask which was fashionable in those days. She looked at herself
-in the glass. Her face was very pale, but her skin was clear: she had
-never had much colour, and this had always made the redness of her
-beautiful mouth emphatic. She could not restrain a sob. But she could not
-afford to be sorry for herself; she was feeling already desperately tired;
-and she put on the furs which Henry had given her the Christmas
-before--she had been so proud of them and so happy then--and slipped
-downstairs with beating heart. She got safely out of the house and drove
-to a photographer. She paid for a dozen photographs. She was obliged to
-ask for a glass of water in the middle of the sitting; and the assistant,
-seeing she was ill, suggested that she should come another day, but she
-insisted on staying till the end. At last it was finished, and she drove
-back again to the dingy little house in Kensington which she hated with
-all her heart. It was a horrible house to die in.
-
-She found the front door open, and when she drove up the maid and Emma ran
-down the steps to help her. They had been frightened when they found her
-room empty. At first they thought she must have gone to Miss Watkin, and
-the cook was sent round. Miss Watkin came back with her and was waiting
-anxiously in the drawing-room. She came downstairs now full of anxiety and
-reproaches; but the exertion had been more than Mrs. Carey was fit for,
-and when the occasion for firmness no longer existed she gave way. She
-fell heavily into Emma's arms and was carried upstairs. She remained
-unconscious for a time that seemed incredibly long to those that watched
-her, and the doctor, hurriedly sent for, did not come. It was next day,
-when she was a little better, that Miss Watkin got some explanation out of
-her. Philip was playing on the floor of his mother's bed-room, and neither
-of the ladies paid attention to him. He only understood vaguely what they
-were talking about, and he could not have said why those words remained in
-his memory.
-
-"I wanted the boy to have something to remember me by when he grows up."
-
-"I can't make out why she ordered a dozen," said Mr. Carey. "Two would
-have done."
-
-
-
-VI
-
-
-One day was very like another at the vicarage.
-
-Soon after breakfast Mary Ann brought in The Times. Mr. Carey shared it
-with two neighbours. He had it from ten till one, when the gardener took
-it over to Mr. Ellis at the Limes, with whom it remained till seven; then
-it was taken to Miss Brooks at the Manor House, who, since she got it
-late, had the advantage of keeping it. In summer Mrs. Carey, when she was
-making jam, often asked her for a copy to cover the pots with. When the
-Vicar settled down to his paper his wife put on her bonnet and went out to
-do the shopping. Philip accompanied her. Blackstable was a fishing
-village. It consisted of a high street in which were the shops, the bank,
-the doctor's house, and the houses of two or three coalship owners; round
-the little harbor were shabby streets in which lived fishermen and poor
-people; but since they went to chapel they were of no account. When Mrs.
-Carey passed the dissenting ministers in the street she stepped over to
-the other side to avoid meeting them, but if there was not time for this
-fixed her eyes on the pavement. It was a scandal to which the Vicar had
-never resigned himself that there were three chapels in the High Street:
-he could not help feeling that the law should have stepped in to prevent
-their erection. Shopping in Blackstable was not a simple matter; for
-dissent, helped by the fact that the parish church was two miles from the
-town, was very common; and it was necessary to deal only with churchgoers;
-Mrs. Carey knew perfectly that the vicarage custom might make all the
-difference to a tradesman's faith. There were two butchers who went to
-church, and they would not understand that the Vicar could not deal with
-both of them at once; nor were they satisfied with his simple plan of
-going for six months to one and for six months to the other. The butcher
-who was not sending meat to the vicarage constantly threatened not to come
-to church, and the Vicar was sometimes obliged to make a threat: it was
-very wrong of him not to come to church, but if he carried iniquity
-further and actually went to chapel, then of course, excellent as his meat
-was, Mr. Carey would be forced to leave him for ever. Mrs. Carey often
-stopped at the bank to deliver a message to Josiah Graves, the manager,
-who was choir-master, treasurer, and churchwarden. He was a tall, thin man
-with a sallow face and a long nose; his hair was very white, and to Philip
-he seemed extremely old. He kept the parish accounts, arranged the treats
-for the choir and the schools; though there was no organ in the parish
-church, it was generally considered (in Blackstable) that the choir he led
-was the best in Kent; and when there was any ceremony, such as a visit
-from the Bishop for confirmation or from the Rural Dean to preach at the
-Harvest Thanksgiving, he made the necessary preparations. But he had no
-hesitation in doing all manner of things without more than a perfunctory
-consultation with the Vicar, and the Vicar, though always ready to be
-saved trouble, much resented the churchwarden's managing ways. He really
-seemed to look upon himself as the most important person in the parish.
-Mr. Carey constantly told his wife that if Josiah Graves did not take care
-he would give him a good rap over the knuckles one day; but Mrs. Carey
-advised him to bear with Josiah Graves: he meant well, and it was not his
-fault if he was not quite a gentleman. The Vicar, finding his comfort in
-the practice of a Christian virtue, exercised forbearance; but he revenged
-himself by calling the churchwarden Bismarck behind his back.
-
-Once there had been a serious quarrel between the pair, and Mrs. Carey
-still thought of that anxious time with dismay. The Conservative candidate
-had announced his intention of addressing a meeting at Blackstable; and
-Josiah Graves, having arranged that it should take place in the Mission
-Hall, went to Mr. Carey and told him that he hoped he would say a few
-words. It appeared that the candidate had asked Josiah Graves to take the
-chair. This was more than Mr. Carey could put up with. He had firm views
-upon the respect which was due to the cloth, and it was ridiculous for a
-churchwarden to take the chair at a meeting when the Vicar was there. He
-reminded Josiah Graves that parson meant person, that is, the vicar was
-the person of the parish. Josiah Graves answered that he was the first to
-recognise the dignity of the church, but this was a matter of politics,
-and in his turn he reminded the Vicar that their Blessed Saviour had
-enjoined upon them to render unto Caesar the things that were Caesar's. To
-this Mr. Carey replied that the devil could quote scripture to his
-purpose, himself had sole authority over the Mission Hall, and if he were
-not asked to be chairman he would refuse the use of it for a political
-meeting. Josiah Graves told Mr. Carey that he might do as he chose, and
-for his part he thought the Wesleyan Chapel would be an equally suitable
-place. Then Mr. Carey said that if Josiah Graves set foot in what was
-little better than a heathen temple he was not fit to be churchwarden in
-a Christian parish. Josiah Graves thereupon resigned all his offices, and
-that very evening sent to the church for his cassock and surplice. His
-sister, Miss Graves, who kept house for him, gave up her secretaryship of
-the Maternity Club, which provided the pregnant poor with flannel, baby
-linen, coals, and five shillings. Mr. Carey said he was at last master in
-his own house. But soon he found that he was obliged to see to all sorts
-of things that he knew nothing about; and Josiah Graves, after the first
-moment of irritation, discovered that he had lost his chief interest in
-life. Mrs. Carey and Miss Graves were much distressed by the quarrel; they
-met after a discreet exchange of letters, and made up their minds to put
-the matter right: they talked, one to her husband, the other to her
-brother, from morning till night; and since they were persuading these
-gentlemen to do what in their hearts they wanted, after three weeks of
-anxiety a reconciliation was effected. It was to both their interests, but
-they ascribed it to a common love for their Redeemer. The meeting was held
-at the Mission Hall, and the doctor was asked to be chairman. Mr. Carey
-and Josiah Graves both made speeches.
-
-When Mrs. Carey had finished her business with the banker, she generally
-went upstairs to have a little chat with his sister; and while the ladies
-talked of parish matters, the curate or the new bonnet of Mrs. Wilson--Mr.
-Wilson was the richest man in Blackstable, he was thought to have at least
-five hundred a year, and he had married his cook--Philip sat demurely in
-the stiff parlour, used only to receive visitors, and busied himself with
-the restless movements of goldfish in a bowl. The windows were never
-opened except to air the room for a few minutes in the morning, and it had
-a stuffy smell which seemed to Philip to have a mysterious connection with
-banking.
-
-Then Mrs. Carey remembered that she had to go to the grocer, and they
-continued their way. When the shopping was done they often went down a
-side street of little houses, mostly of wood, in which fishermen dwelt
-(and here and there a fisherman sat on his doorstep mending his nets, and
-nets hung to dry upon the doors), till they came to a small beach, shut in
-on each side by warehouses, but with a view of the sea. Mrs. Carey stood
-for a few minutes and looked at it, it was turbid and yellow, [and who
-knows what thoughts passed through her mind?] while Philip searched for
-flat stones to play ducks and drakes. Then they walked slowly back. They
-looked into the post office to get the right time, nodded to Mrs. Wigram
-the doctor's wife, who sat at her window sewing, and so got home.
-
-Dinner was at one o'clock; and on Monday, Tuesday, and Wednesday it
-consisted of beef, roast, hashed, and minced, and on Thursday, Friday, and
-Saturday of mutton. On Sunday they ate one of their own chickens. In the
-afternoon Philip did his lessons, He was taught Latin and mathematics by
-his uncle who knew neither, and French and the piano by his aunt. Of
-French she was ignorant, but she knew the piano well enough to accompany
-the old-fashioned songs she had sung for thirty years. Uncle William used
-to tell Philip that when he was a curate his wife had known twelve songs
-by heart, which she could sing at a moment's notice whenever she was
-asked. She often sang still when there was a tea-party at the vicarage.
-There were few people whom the Careys cared to ask there, and their
-parties consisted always of the curate, Josiah Graves with his sister, Dr.
-Wigram and his wife. After tea Miss Graves played one or two of
-Mendelssohn's Songs without Words, and Mrs. Carey sang When the
-Swallows Homeward Fly, or Trot, Trot, My Pony.
-
-But the Careys did not give tea-parties often; the preparations upset
-them, and when their guests were gone they felt themselves exhausted. They
-preferred to have tea by themselves, and after tea they played backgammon.
-Mrs. Carey arranged that her husband should win, because he did not like
-losing. They had cold supper at eight. It was a scrappy meal because Mary
-Ann resented getting anything ready after tea, and Mrs. Carey helped to
-clear away. Mrs. Carey seldom ate more than bread and butter, with a
-little stewed fruit to follow, but the Vicar had a slice of cold meat.
-Immediately after supper Mrs. Carey rang the bell for prayers, and then
-Philip went to bed. He rebelled against being undressed by Mary Ann and
-after a while succeeded in establishing his right to dress and undress
-himself. At nine o'clock Mary Ann brought in the eggs and the plate. Mrs.
-Carey wrote the date on each egg and put the number down in a book. She
-then took the plate-basket on her arm and went upstairs. Mr. Carey
-continued to read one of his old books, but as the clock struck ten he got
-up, put out the lamps, and followed his wife to bed.
-
-When Philip arrived there was some difficulty in deciding on which evening
-he should have his bath. It was never easy to get plenty of hot water,
-since the kitchen boiler did not work, and it was impossible for two
-persons to have a bath on the same day. The only man who had a bathroom in
-Blackstable was Mr. Wilson, and it was thought ostentatious of him. Mary
-Ann had her bath in the kitchen on Monday night, because she liked to
-begin the week clean. Uncle William could not have his on Saturday,
-because he had a heavy day before him and he was always a little tired
-after a bath, so he had it on Friday. Mrs. Carey had hers on Thursday for
-the same reason. It looked as though Saturday were naturally indicated for
-Philip, but Mary Ann said she couldn't keep the fire up on Saturday night:
-what with all the cooking on Sunday, having to make pastry and she didn't
-know what all, she did not feel up to giving the boy his bath on Saturday
-night; and it was quite clear that he could not bath himself. Mrs. Carey
-was shy about bathing a boy, and of course the Vicar had his sermon. But
-the Vicar insisted that Philip should be clean and sweet for the lord's
-Day. Mary Ann said she would rather go than be put upon--and after
-eighteen years she didn't expect to have more work given her, and they
-might show some consideration--and Philip said he didn't want anyone to
-bath him, but could very well bath himself. This settled it. Mary Ann said
-she was quite sure he wouldn't bath himself properly, and rather than he
-should go dirty--and not because he was going into the presence of the
-Lord, but because she couldn't abide a boy who wasn't properly
-washed--she'd work herself to the bone even if it was Saturday night.
-
-
-
-VII
-
-
-Sunday was a day crowded with incident. Mr. Carey was accustomed to say
-that he was the only man in his parish who worked seven days a week.
-
-The household got up half an hour earlier than usual. No lying abed for a
-poor parson on the day of rest, Mr. Carey remarked as Mary Ann knocked at
-the door punctually at eight. It took Mrs. Carey longer to dress, and she
-got down to breakfast at nine, a little breathless, only just before her
-husband. Mr. Carey's boots stood in front of the fire to warm. Prayers
-were longer than usual, and the breakfast more substantial. After
-breakfast the Vicar cut thin slices of bread for the communion, and Philip
-was privileged to cut off the crust. He was sent to the study to fetch a
-marble paperweight, with which Mr. Carey pressed the bread till it was
-thin and pulpy, and then it was cut into small squares. The amount was
-regulated by the weather. On a very bad day few people came to church, and
-on a very fine one, though many came, few stayed for communion. There were
-most when it was dry enough to make the walk to church pleasant, but not
-so fine that people wanted to hurry away.
-
-Then Mrs. Carey brought the communion plate out of the safe, which stood
-in the pantry, and the Vicar polished it with a chamois leather. At ten
-the fly drove up, and Mr. Carey got into his boots. Mrs. Carey took
-several minutes to put on her bonnet, during which the Vicar, in a
-voluminous cloak, stood in the hall with just such an expression on his
-face as would have become an early Christian about to be led into the
-arena. It was extraordinary that after thirty years of marriage his wife
-could not be ready in time on Sunday morning. At last she came, in black
-satin; the Vicar did not like colours in a clergyman's wife at any time,
-but on Sundays he was determined that she should wear black; now and then,
-in conspiracy with Miss Graves, she ventured a white feather or a pink
-rose in her bonnet, but the Vicar insisted that it should disappear; he
-said he would not go to church with the scarlet woman: Mrs. Carey sighed
-as a woman but obeyed as a wife. They were about to step into the carriage
-when the Vicar remembered that no one had given him his egg. They knew
-that he must have an egg for his voice, there were two women in the house,
-and no one had the least regard for his comfort. Mrs. Carey scolded Mary
-Ann, and Mary Ann answered that she could not think of everything. She
-hurried away to fetch an egg, and Mrs. Carey beat it up in a glass of
-sherry. The Vicar swallowed it at a gulp. The communion plate was stowed
-in the carriage, and they set off.
-
-The fly came from The Red Lion and had a peculiar smell of stale straw.
-They drove with both windows closed so that the Vicar should not catch
-cold. The sexton was waiting at the porch to take the communion plate, and
-while the Vicar went to the vestry Mrs. Carey and Philip settled
-themselves in the vicarage pew. Mrs. Carey placed in front of her the
-sixpenny bit she was accustomed to put in the plate, and gave Philip
-threepence for the same purpose. The church filled up gradually and the
-service began.
-
-Philip grew bored during the sermon, but if he fidgetted Mrs. Carey put a
-gentle hand on his arm and looked at him reproachfully. He regained
-interest when the final hymn was sung and Mr. Graves passed round with the
-plate.
-
-When everyone had gone Mrs. Carey went into Miss Graves' pew to have a few
-words with her while they were waiting for the gentlemen, and Philip went
-to the vestry. His uncle, the curate, and Mr. Graves were still in their
-surplices. Mr. Carey gave him the remains of the consecrated bread and
-told him he might eat it. He had been accustomed to eat it himself, as it
-seemed blasphemous to throw it away, but Philip's keen appetite relieved
-him from the duty. Then they counted the money. It consisted of pennies,
-sixpences and threepenny bits. There were always two single shillings, one
-put in the plate by the Vicar and the other by Mr. Graves; and sometimes
-there was a florin. Mr. Graves told the Vicar who had given this. It was
-always a stranger to Blackstable, and Mr. Carey wondered who he was. But
-Miss Graves had observed the rash act and was able to tell Mrs. Carey that
-the stranger came from London, was married and had children. During the
-drive home Mrs. Carey passed the information on, and the Vicar made up his
-mind to call on him and ask for a subscription to the Additional Curates
-Society. Mr. Carey asked if Philip had behaved properly; and Mrs. Carey
-remarked that Mrs. Wigram had a new mantle, Mr. Cox was not in church, and
-somebody thought that Miss Phillips was engaged. When they reached the
-vicarage they all felt that they deserved a substantial dinner.
-
-When this was over Mrs. Carey went to her room to rest, and Mr. Carey lay
-down on the sofa in the drawing-room for forty winks.
-
-They had tea at five, and the Vicar ate an egg to support himself for
-evensong. Mrs. Carey did not go to this so that Mary Ann might, but she
-read the service through and the hymns. Mr. Carey walked to church in the
-evening, and Philip limped along by his side. The walk through the
-darkness along the country road strangely impressed him, and the church
-with all its lights in the distance, coming gradually nearer, seemed very
-friendly. At first he was shy with his uncle, but little by little grew
-used to him, and he would slip his hand in his uncle's and walk more
-easily for the feeling of protection.
-
-They had supper when they got home. Mr. Carey's slippers were waiting for
-him on a footstool in front of the fire and by their side Philip's, one
-the shoe of a small boy, the other misshapen and odd. He was dreadfully
-tired when he went up to bed, and he did not resist when Mary Ann
-undressed him. She kissed him after she tucked him up, and he began to
-love her.
-
-
-
-VIII
-
-
-Philip had led always the solitary life of an only child, and his
-loneliness at the vicarage was no greater than it had been when his mother
-lived. He made friends with Mary Ann. She was a chubby little person of
-thirty-five, the daughter of a fisherman, and had come to the vicarage at
-eighteen; it was her first place and she had no intention of leaving it;
-but she held a possible marriage as a rod over the timid heads of her
-master and mistress. Her father and mother lived in a little house off
-Harbour Street, and she went to see them on her evenings out. Her stories
-of the sea touched Philip's imagination, and the narrow alleys round the
-harbour grew rich with the romance which his young fancy lent them. One
-evening he asked whether he might go home with her; but his aunt was
-afraid that he might catch something, and his uncle said that evil
-communications corrupted good manners. He disliked the fisher folk, who
-were rough, uncouth, and went to chapel. But Philip was more comfortable
-in the kitchen than in the dining-room, and, whenever he could, he took
-his toys and played there. His aunt was not sorry. She did not like
-disorder, and though she recognised that boys must be expected to be
-untidy she preferred that he should make a mess in the kitchen. If he
-fidgeted his uncle was apt to grow restless and say it was high time he
-went to school. Mrs. Carey thought Philip very young for this, and her
-heart went out to the motherless child; but her attempts to gain his
-affection were awkward, and the boy, feeling shy, received her
-demonstrations with so much sullenness that she was mortified. Sometimes
-she heard his shrill voice raised in laughter in the kitchen, but when she
-went in, he grew suddenly silent, and he flushed darkly when Mary Ann
-explained the joke. Mrs. Carey could not see anything amusing in what she
-heard, and she smiled with constraint.
-
-"He seems happier with Mary Ann than with us, William," she said, when she
-returned to her sewing.
-
-"One can see he's been very badly brought up. He wants licking into
-shape."
-
-On the second Sunday after Philip arrived an unlucky incident occurred.
-Mr. Carey had retired as usual after dinner for a little snooze in the
-drawing-room, but he was in an irritable mood and could not sleep. Josiah
-Graves that morning had objected strongly to some candlesticks with which
-the Vicar had adorned the altar. He had bought them second-hand in
-Tercanbury, and he thought they looked very well. But Josiah Graves said
-they were popish. This was a taunt that always aroused the Vicar. He had
-been at Oxford during the movement which ended in the secession from the
-Established Church of Edward Manning, and he felt a certain sympathy for
-the Church of Rome. He would willingly have made the service more ornate
-than had been usual in the low-church parish of Blackstable, and in his
-secret soul he yearned for processions and lighted candles. He drew the
-line at incense. He hated the word protestant. He called himself a
-Catholic. He was accustomed to say that Papists required an epithet, they
-were Roman Catholic; but the Church of England was Catholic in the best,
-the fullest, and the noblest sense of the term. He was pleased to think
-that his shaven face gave him the look of a priest, and in his youth he
-had possessed an ascetic air which added to the impression. He often
-related that on one of his holidays in Boulogne, one of those holidays
-upon which his wife for economy's sake did not accompany him, when he was
-sitting in a church, the cure had come up to him and invited him to
-preach a sermon. He dismissed his curates when they married, having
-decided views on the celibacy of the unbeneficed clergy. But when at an
-election the Liberals had written on his garden fence in large blue
-letters: This way to Rome, he had been very angry, and threatened to
-prosecute the leaders of the Liberal party in Blackstable. He made up his
-mind now that nothing Josiah Graves said would induce him to remove the
-candlesticks from the altar, and he muttered Bismarck to himself once or
-twice irritably.
-
-Suddenly he heard an unexpected noise. He pulled the handkerchief off his
-face, got up from the sofa on which he was lying, and went into the
-dining-room. Philip was seated on the table with all his bricks around
-him. He had built a monstrous castle, and some defect in the foundation
-had just brought the structure down in noisy ruin.
-
-"What are you doing with those bricks, Philip? You know you're not allowed
-to play games on Sunday."
-
-Philip stared at him for a moment with frightened eyes, and, as his habit
-was, flushed deeply.
-
-"I always used to play at home," he answered.
-
-"I'm sure your dear mamma never allowed you to do such a wicked thing as
-that."
-
-Philip did not know it was wicked; but if it was, he did not wish it to be
-supposed that his mother had consented to it. He hung his head and did not
-answer.
-
-"Don't you know it's very, very wicked to play on Sunday? What d'you
-suppose it's called the day of rest for? You're going to church tonight,
-and how can you face your Maker when you've been breaking one of His laws
-in the afternoon?"
-
-Mr. Carey told him to put the bricks away at once, and stood over him
-while Philip did so.
-
-"You're a very naughty boy," he repeated. "Think of the grief you're
-causing your poor mother in heaven."
-
-Philip felt inclined to cry, but he had an instinctive disinclination to
-letting other people see his tears, and he clenched his teeth to prevent
-the sobs from escaping. Mr. Carey sat down in his arm-chair and began to
-turn over the pages of a book. Philip stood at the window. The vicarage
-was set back from the highroad to Tercanbury, and from the dining-room one
-saw a semicircular strip of lawn and then as far as the horizon green
-fields. Sheep were grazing in them. The sky was forlorn and gray. Philip
-felt infinitely unhappy.
-
-Presently Mary Ann came in to lay the tea, and Aunt Louisa descended the
-stairs.
-
-"Have you had a nice little nap, William?" she asked.
-
-"No," he answered. "Philip made so much noise that I couldn't sleep a
-wink."
-
-This was not quite accurate, for he had been kept awake by his own
-thoughts; and Philip, listening sullenly, reflected that he had only made
-a noise once, and there was no reason why his uncle should not have slept
-before or after. When Mrs. Carey asked for an explanation the Vicar
-narrated the facts.
-
-"He hasn't even said he was sorry," he finished.
-
-"Oh, Philip, I'm sure you're sorry," said Mrs. Carey, anxious that the
-child should not seem wickeder to his uncle than need be.
-
-Philip did not reply. He went on munching his bread and butter. He did not
-know what power it was in him that prevented him from making any
-expression of regret. He felt his ears tingling, he was a little inclined
-to cry, but no word would issue from his lips.
-
-"You needn't make it worse by sulking," said Mr. Carey.
-
-Tea was finished in silence. Mrs. Carey looked at Philip surreptitiously
-now and then, but the Vicar elaborately ignored him. When Philip saw his
-uncle go upstairs to get ready for church he went into the hall and got
-his hat and coat, but when the Vicar came downstairs and saw him, he said:
-
-"I don't wish you to go to church tonight, Philip. I don't think you're in
-a proper frame of mind to enter the House of God."
-
-Philip did not say a word. He felt it was a deep humiliation that was
-placed upon him, and his cheeks reddened. He stood silently watching his
-uncle put on his broad hat and his voluminous cloak. Mrs. Carey as usual
-went to the door to see him off. Then she turned to Philip.
-
-"Never mind, Philip, you won't be a naughty boy next Sunday, will you, and
-then your uncle will take you to church with him in the evening."
-
-She took off his hat and coat, and led him into the dining-room.
-
-"Shall you and I read the service together, Philip, and we'll sing the
-hymns at the harmonium. Would you like that?"
-
-Philip shook his head decidedly. Mrs. Carey was taken aback. If he would
-not read the evening service with her she did not know what to do with
-him.
-
-"Then what would you like to do until your uncle comes back?" she asked
-helplessly.
-
-Philip broke his silence at last.
-
-"I want to be left alone," he said.
-
-"Philip, how can you say anything so unkind? Don't you know that your
-uncle and I only want your good? Don't you love me at all?"
-
-"I hate you. I wish you was dead."
-
-Mrs. Carey gasped. He said the words so savagely that it gave her quite a
-start. She had nothing to say. She sat down in her husband's chair; and as
-she thought of her desire to love the friendless, crippled boy and her
-eager wish that he should love her--she was a barren woman and, even
-though it was clearly God's will that she should be childless, she could
-scarcely bear to look at little children sometimes, her heart ached
-so--the tears rose to her eyes and one by one, slowly, rolled down her
-cheeks. Philip watched her in amazement. She took out her handkerchief,
-and now she cried without restraint. Suddenly Philip realised that she was
-crying because of what he had said, and he was sorry. He went up to her
-silently and kissed her. It was the first kiss he had ever given her
-without being asked. And the poor lady, so small in her black satin,
-shrivelled up and sallow, with her funny corkscrew curls, took the little
-boy on her lap and put her arms around him and wept as though her heart
-would break. But her tears were partly tears of happiness, for she felt
-that the strangeness between them was gone. She loved him now with a new
-love because he had made her suffer.
-
-
-
-IX
-
-
-On the following Sunday, when the Vicar was making his preparations to go
-into the drawing-room for his nap--all the actions of his life were
-conducted with ceremony--and Mrs. Carey was about to go upstairs, Philip
-asked:
-
-"What shall I do if I'm not allowed to play?"
-
-"Can't you sit still for once and be quiet?"
-
-"I can't sit still till tea-time."
-
-Mr. Carey looked out of the window, but it was cold and raw, and he could
-not suggest that Philip should go into the garden.
-
-"I know what you can do. You can learn by heart the collect for the day."
-
-He took the prayer-book which was used for prayers from the harmonium, and
-turned the pages till he came to the place he wanted.
-
-"It's not a long one. If you can say it without a mistake when I come in
-to tea you shall have the top of my egg."
-
-Mrs. Carey drew up Philip's chair to the dining-room table--they had
-bought him a high chair by now--and placed the book in front of him.
-
-"The devil finds work for idle hands to do," said Mr. Carey.
-
-He put some more coals on the fire so that there should be a cheerful
-blaze when he came in to tea, and went into the drawing-room. He loosened
-his collar, arranged the cushions, and settled himself comfortably on the
-sofa. But thinking the drawing-room a little chilly, Mrs. Carey brought
-him a rug from the hall; she put it over his legs and tucked it round his
-feet. She drew the blinds so that the light should not offend his eyes,
-and since he had closed them already went out of the room on tiptoe. The
-Vicar was at peace with himself today, and in ten minutes he was asleep.
-He snored softly.
-
-It was the Sixth Sunday after Epiphany, and the collect began with the
-words: O God, whose blessed Son was manifested that he might destroy the
-works of the devil, and make us the sons of God, and heirs of Eternal
-life. Philip read it through. He could make no sense of it. He began
-saying the words aloud to himself, but many of them were unknown to him,
-and the construction of the sentence was strange. He could not get more
-than two lines in his head. And his attention was constantly wandering:
-there were fruit trees trained on the walls of the vicarage, and a long
-twig beat now and then against the windowpane; sheep grazed stolidly in
-the field beyond the garden. It seemed as though there were knots inside
-his brain. Then panic seized him that he would not know the words by
-tea-time, and he kept on whispering them to himself quickly; he did not
-try to understand, but merely to get them parrot-like into his memory.
-
-Mrs. Carey could not sleep that afternoon, and by four o'clock she was so
-wide awake that she came downstairs. She thought she would hear Philip his
-collect so that he should make no mistakes when he said it to his uncle.
-His uncle then would be pleased; he would see that the boy's heart was in
-the right place. But when Mrs. Carey came to the dining-room and was about
-to go in, she heard a sound that made her stop suddenly. Her heart gave a
-little jump. She turned away and quietly slipped out of the front-door.
-She walked round the house till she came to the dining-room window and
-then cautiously looked in. Philip was still sitting on the chair she had
-put him in, but his head was on the table buried in his arms, and he was
-sobbing desperately. She saw the convulsive movement of his shoulders.
-Mrs. Carey was frightened. A thing that had always struck her about the
-child was that he seemed so collected. She had never seen him cry. And now
-she realised that his calmness was some instinctive shame of showing his
-fillings: he hid himself to weep.
-
-Without thinking that her husband disliked being wakened suddenly, she
-burst into the drawing-room.
-
-"William, William," she said. "The boy's crying as though his heart would
-break."
-
-Mr. Carey sat up and disentangled himself from the rug about his legs.
-
-"What's he got to cry about?"
-
-"I don't know.... Oh, William, we can't let the boy be unhappy. D'you
-think it's our fault? If we'd had children we'd have known what to do."
-
-Mr. Carey looked at her in perplexity. He felt extraordinarily helpless.
-
-"He can't be crying because I gave him the collect to learn. It's not more
-than ten lines."
-
-"Don't you think I might take him some picture books to look at, William?
-There are some of the Holy Land. There couldn't be anything wrong in
-that."
-
-"Very well, I don't mind."
-
-Mrs. Carey went into the study. To collect books was Mr. Carey's only
-passion, and he never went into Tercanbury without spending an hour or two
-in the second-hand shop; he always brought back four or five musty
-volumes. He never read them, for he had long lost the habit of reading,
-but he liked to turn the pages, look at the illustrations if they were
-illustrated, and mend the bindings. He welcomed wet days because on them
-he could stay at home without pangs of conscience and spend the afternoon
-with white of egg and a glue-pot, patching up the Russia leather of some
-battered quarto. He had many volumes of old travels, with steel
-engravings, and Mrs. Carey quickly found two which described Palestine.
-She coughed elaborately at the door so that Philip should have time to
-compose himself, she felt that he would be humiliated if she came upon him
-in the midst of his tears, then she rattled the door handle. When she went
-in Philip was poring over the prayer-book, hiding his eyes with his hands
-so that she might not see he had been crying.
-
-"Do you know the collect yet?" she said.
-
-He did not answer for a moment, and she felt that he did not trust his
-voice. She was oddly embarrassed.
-
-"I can't learn it by heart," he said at last, with a gasp.
-
-"Oh, well, never mind," she said. "You needn't. I've got some picture
-books for you to look at. Come and sit on my lap, and we'll look at them
-together."
-
-Philip slipped off his chair and limped over to her. He looked down so
-that she should not see his eyes. She put her arms round him.
-
-"Look," she said, "that's the place where our blessed Lord was born."
-
-She showed him an Eastern town with flat roofs and cupolas and minarets.
-In the foreground was a group of palm-trees, and under them were resting
-two Arabs and some camels. Philip passed his hand over the picture as if
-he wanted to feel the houses and the loose habiliments of the nomads.
-
-"Read what it says," he asked.
-
-Mrs. Carey in her even voice read the opposite page. It was a romantic
-narrative of some Eastern traveller of the thirties, pompous maybe, but
-fragrant with the emotion with which the East came to the generation that
-followed Byron and Chateaubriand. In a moment or two Philip interrupted
-her.
-
-"I want to see another picture."
-
-When Mary Ann came in and Mrs. Carey rose to help her lay the cloth.
-Philip took the book in his hands and hurried through the illustrations.
-It was with difficulty that his aunt induced him to put the book down for
-tea. He had forgotten his horrible struggle to get the collect by heart;
-he had forgotten his tears. Next day it was raining, and he asked for the
-book again. Mrs. Carey gave it him joyfully. Talking over his future with
-her husband she had found that both desired him to take orders, and this
-eagerness for the book which described places hallowed by the presence of
-Jesus seemed a good sign. It looked as though the boy's mind addressed
-itself naturally to holy things. But in a day or two he asked for more
-books. Mr. Carey took him into his study, showed him the shelf in which he
-kept illustrated works, and chose for him one that dealt with Rome. Philip
-took it greedily. The pictures led him to a new amusement. He began to
-read the page before and the page after each engraving to find out what it
-was about, and soon he lost all interest in his toys.
-
-Then, when no one was near, he took out books for himself; and perhaps
-because the first impression on his mind was made by an Eastern town, he
-found his chief amusement in those which described the Levant. His heart
-beat with excitement at the pictures of mosques and rich palaces; but
-there was one, in a book on Constantinople, which peculiarly stirred his
-imagination. It was called the Hall of the Thousand Columns. It was a
-Byzantine cistern, which the popular fancy had endowed with fantastic
-vastness; and the legend which he read told that a boat was always moored
-at the entrance to tempt the unwary, but no traveller venturing into the
-darkness had ever been seen again. And Philip wondered whether the boat
-went on for ever through one pillared alley after another or came at last
-to some strange mansion.
-
-One day a good fortune befell him, for he hit upon Lane's translation of
-The Thousand Nights and a Night. He was captured first by the
-illustrations, and then he began to read, to start with, the stories that
-dealt with magic, and then the others; and those he liked he read again
-and again. He could think of nothing else. He forgot the life about him.
-He had to be called two or three times before he would come to his dinner.
-Insensibly he formed the most delightful habit in the world, the habit of
-reading: he did not know that thus he was providing himself with a refuge
-from all the distress of life; he did not know either that he was creating
-for himself an unreal world which would make the real world of every day
-a source of bitter disappointment. Presently he began to read other
-things. His brain was precocious. His uncle and aunt, seeing that he
-occupied himself and neither worried nor made a noise, ceased to trouble
-themselves about him. Mr. Carey had so many books that he did not know
-them, and as he read little he forgot the odd lots he had bought at one
-time and another because they were cheap. Haphazard among the sermons and
-homilies, the travels, the lives of the Saints, the Fathers, the histories
-of the church, were old-fashioned novels; and these Philip at last
-discovered. He chose them by their titles, and the first he read was The
-Lancashire Witches, and then he read The Admirable Crichton, and then
-many more. Whenever he started a book with two solitary travellers riding
-along the brink of a desperate ravine he knew he was safe.
-
-The summer was come now, and the gardener, an old sailor, made him a
-hammock and fixed it up for him in the branches of a weeping willow. And
-here for long hours he lay, hidden from anyone who might come to the
-vicarage, reading, reading passionately. Time passed and it was July;
-August came: on Sundays the church was crowded with strangers, and the
-collection at the offertory often amounted to two pounds. Neither the
-Vicar nor Mrs. Carey went out of the garden much during this period; for
-they disliked strange faces, and they looked upon the visitors from London
-with aversion. The house opposite was taken for six weeks by a gentleman
-who had two little boys, and he sent in to ask if Philip would like to go
-and play with them; but Mrs. Carey returned a polite refusal. She was
-afraid that Philip would be corrupted by little boys from London. He was
-going to be a clergyman, and it was necessary that he should be preserved
-from contamination. She liked to see in him an infant Samuel.
-
-
-
-X
-
-
-The Careys made up their minds to send Philip to King's School at
-Tercanbury. The neighbouring clergy sent their sons there. It was united
-by long tradition to the Cathedral: its headmaster was an honorary Canon,
-and a past headmaster was the Archdeacon. Boys were encouraged there to
-aspire to Holy Orders, and the education was such as might prepare an
-honest lad to spend his life in God's service. A preparatory school was
-attached to it, and to this it was arranged that Philip should go. Mr.
-Carey took him into Tercanbury one Thursday afternoon towards the end of
-September. All day Philip had been excited and rather frightened. He knew
-little of school life but what he had read in the stories of The Boy's
-Own Paper. He had also read Eric, or Little by Little.
-
-When they got out of the train at Tercanbury, Philip felt sick with
-apprehension, and during the drive in to the town sat pale and silent. The
-high brick wall in front of the school gave it the look of a prison. There
-was a little door in it, which opened on their ringing; and a clumsy,
-untidy man came out and fetched Philip's tin trunk and his play-box. They
-were shown into the drawing-room; it was filled with massive, ugly
-furniture, and the chairs of the suite were placed round the walls with a
-forbidding rigidity. They waited for the headmaster.
-
-"What's Mr. Watson like?" asked Philip, after a while.
-
-"You'll see for yourself."
-
-There was another pause. Mr. Carey wondered why the headmaster did not
-come. Presently Philip made an effort and spoke again.
-
-"Tell him I've got a club-foot," he said.
-
-Before Mr. Carey could speak the door burst open and Mr. Watson swept into
-the room. To Philip he seemed gigantic. He was a man of over six feet
-high, and broad, with enormous hands and a great red beard; he talked
-loudly in a jovial manner; but his aggressive cheerfulness struck terror
-in Philip's heart. He shook hands with Mr. Carey, and then took Philip's
-small hand in his.
-
-"Well, young fellow, are you glad to come to school?" he shouted.
-
-Philip reddened and found no word to answer.
-
-"How old are you?"
-
-"Nine," said Philip.
-
-"You must say sir," said his uncle.
-
-"I expect you've got a good lot to learn," the headmaster bellowed
-cheerily.
-
-To give the boy confidence he began to tickle him with rough fingers.
-Philip, feeling shy and uncomfortable, squirmed under his touch.
-
-"I've put him in the small dormitory for the present.... You'll like that,
-won't you?" he added to Philip. "Only eight of you in there. You won't
-feel so strange."
-
-Then the door opened, and Mrs. Watson came in. She was a dark woman with
-black hair, neatly parted in the middle. She had curiously thick lips and
-a small round nose. Her eyes were large and black. There was a singular
-coldness in her appearance. She seldom spoke and smiled more seldom still.
-Her husband introduced Mr. Carey to her, and then gave Philip a friendly
-push towards her.
-
-"This is a new boy, Helen, His name's Carey."
-
-Without a word she shook hands with Philip and then sat down, not
-speaking, while the headmaster asked Mr. Carey how much Philip knew and
-what books he had been working with. The Vicar of Blackstable was a little
-embarrassed by Mr. Watson's boisterous heartiness, and in a moment or two
-got up.
-
-"I think I'd better leave Philip with you now."
-
-"That's all right," said Mr. Watson. "He'll be safe with me. He'll get on
-like a house on fire. Won't you, young fellow?"
-
-Without waiting for an answer from Philip the big man burst into a great
-bellow of laughter. Mr. Carey kissed Philip on the forehead and went away.
-
-"Come along, young fellow," shouted Mr. Watson. "I'll show you the
-school-room."
-
-He swept out of the drawing-room with giant strides, and Philip hurriedly
-limped behind him. He was taken into a long, bare room with two tables
-that ran along its whole length; on each side of them were wooden forms.
-
-"Nobody much here yet," said Mr. Watson. "I'll just show you the
-playground, and then I'll leave you to shift for yourself."
-
-Mr. Watson led the way. Philip found himself in a large play-ground with
-high brick walls on three sides of it. On the fourth side was an iron
-railing through which you saw a vast lawn and beyond this some of the
-buildings of King's School. One small boy was wandering disconsolately,
-kicking up the gravel as he walked.
-
-"Hulloa, Venning," shouted Mr. Watson. "When did you turn up?"
-
-The small boy came forward and shook hands.
-
-"Here's a new boy. He's older and bigger than you, so don't you bully
-him."
-
-The headmaster glared amicably at the two children, filling them with fear
-by the roar of his voice, and then with a guffaw left them.
-
-"What's your name?"
-
-"Carey."
-
-"What's your father?"
-
-"He's dead."
-
-"Oh! Does your mother wash?"
-
-"My mother's dead, too."
-
-Philip thought this answer would cause the boy a certain awkwardness, but
-Venning was not to be turned from his facetiousness for so little.
-
-"Well, did she wash?" he went on.
-
-"Yes," said Philip indignantly.
-
-"She was a washerwoman then?"
-
-"No, she wasn't."
-
-"Then she didn't wash."
-
-The little boy crowed with delight at the success of his dialectic. Then
-he caught sight of Philip's feet.
-
-"What's the matter with your foot?"
-
-Philip instinctively tried to withdraw it from sight. He hid it behind the
-one which was whole.
-
-"I've got a club-foot," he answered.
-
-"How did you get it?"
-
-"I've always had it."
-
-"Let's have a look."
-
-"No."
-
-"Don't then."
-
-The little boy accompanied the words with a sharp kick on Philip's shin,
-which Philip did not expect and thus could not guard against. The pain was
-so great that it made him gasp, but greater than the pain was the
-surprise. He did not know why Venning kicked him. He had not the presence
-of mind to give him a black eye. Besides, the boy was smaller than he, and
-he had read in The Boy's Own Paper that it was a mean thing to hit
-anyone smaller than yourself. While Philip was nursing his shin a third
-boy appeared, and his tormentor left him. In a little while he noticed
-that the pair were talking about him, and he felt they were looking at his
-feet. He grew hot and uncomfortable.
-
-But others arrived, a dozen together, and then more, and they began to
-talk about their doings during the holidays, where they had been, and what
-wonderful cricket they had played. A few new boys appeared, and with these
-presently Philip found himself talking. He was shy and nervous. He was
-anxious to make himself pleasant, but he could not think of anything to
-say. He was asked a great many questions and answered them all quite
-willingly. One boy asked him whether he could play cricket.
-
-"No," answered Philip. "I've got a club-foot."
-
-The boy looked down quickly and reddened. Philip saw that he felt he had
-asked an unseemly question. He was too shy to apologise and looked at
-Philip awkwardly.
-
-
-
-XI
-
-
-Next morning when the clanging of a bell awoke Philip he looked round his
-cubicle in astonishment. Then a voice sang out, and he remembered where he
-was.
-
-"Are you awake, Singer?"
-
-The partitions of the cubicle were of polished pitch-pine, and there was
-a green curtain in front. In those days there was little thought of
-ventilation, and the windows were closed except when the dormitory was
-aired in the morning.
-
-Philip got up and knelt down to say his prayers. It was a cold morning,
-and he shivered a little; but he had been taught by his uncle that his
-prayers were more acceptable to God if he said them in his nightshirt than
-if he waited till he was dressed. This did not surprise him, for he was
-beginning to realise that he was the creature of a God who appreciated the
-discomfort of his worshippers. Then he washed. There were two baths for
-the fifty boarders, and each boy had a bath once a week. The rest of his
-washing was done in a small basin on a wash-stand, which with the bed and
-a chair, made up the furniture of each cubicle. The boys chatted gaily
-while they dressed. Philip was all ears. Then another bell sounded, and
-they ran downstairs. They took their seats on the forms on each side of
-the two long tables in the school-room; and Mr. Watson, followed by his
-wife and the servants, came in and sat down. Mr. Watson read prayers in an
-impressive manner, and the supplications thundered out in his loud voice
-as though they were threats personally addressed to each boy. Philip
-listened with anxiety. Then Mr. Watson read a chapter from the Bible, and
-the servants trooped out. In a moment the untidy youth brought in two
-large pots of tea and on a second journey immense dishes of bread and
-butter.
-
-Philip had a squeamish appetite, and the thick slabs of poor butter on the
-bread turned his stomach, but he saw other boys scraping it off and
-followed their example. They all had potted meats and such like, which
-they had brought in their play-boxes; and some had 'extras,' eggs or
-bacon, upon which Mr. Watson made a profit. When he had asked Mr. Carey
-whether Philip was to have these, Mr. Carey replied that he did not think
-boys should be spoilt. Mr. Watson quite agreed with him--he considered
-nothing was better than bread and butter for growing lads--but some
-parents, unduly pampering their offspring, insisted on it.
-
-Philip noticed that 'extras' gave boys a certain consideration and made up
-his mind, when he wrote to Aunt Louisa, to ask for them.
-
-After breakfast the boys wandered out into the play-ground. Here the
-day-boys were gradually assembling. They were sons of the local clergy, of
-the officers at the Depot, and of such manufacturers or men of business as
-the old town possessed. Presently a bell rang, and they all trooped into
-school. This consisted of a large, long room at opposite ends of which two
-under-masters conducted the second and third forms, and of a smaller one,
-leading out of it, used by Mr. Watson, who taught the first form. To
-attach the preparatory to the senior school these three classes were known
-officially, on speech days and in reports, as upper, middle, and lower
-second. Philip was put in the last. The master, a red-faced man with a
-pleasant voice, was called Rice; he had a jolly manner with boys, and the
-time passed quickly. Philip was surprised when it was a quarter to eleven
-and they were let out for ten minutes' rest.
-
-The whole school rushed noisily into the play-ground. The new boys were
-told to go into the middle, while the others stationed themselves along
-opposite walls. They began to play Pig in the Middle. The old boys ran
-from wall to wall while the new boys tried to catch them: when one was
-seized and the mystic words said--one, two, three, and a pig for me--he
-became a prisoner and, turning sides, helped to catch those who were still
-free. Philip saw a boy running past and tried to catch him, but his limp
-gave him no chance; and the runners, taking their opportunity, made
-straight for the ground he covered. Then one of them had the brilliant
-idea of imitating Philip's clumsy run. Other boys saw it and began to
-laugh; then they all copied the first; and they ran round Philip, limping
-grotesquely, screaming in their treble voices with shrill laughter. They
-lost their heads with the delight of their new amusement, and choked with
-helpless merriment. One of them tripped Philip up and he fell, heavily as
-he always fell, and cut his knee. They laughed all the louder when he got
-up. A boy pushed him from behind, and he would have fallen again if
-another had not caught him. The game was forgotten in the entertainment of
-Philip's deformity. One of them invented an odd, rolling limp that struck
-the rest as supremely ridiculous, and several of the boys lay down on the
-ground and rolled about in laughter: Philip was completely scared. He
-could not make out why they were laughing at him. His heart beat so that
-he could hardly breathe, and he was more frightened than he had ever been
-in his life. He stood still stupidly while the boys ran round him,
-mimicking and laughing; they shouted to him to try and catch them; but he
-did not move. He did not want them to see him run any more. He was using
-all his strength to prevent himself from crying.
-
-Suddenly the bell rang, and they all trooped back to school. Philip's knee
-was bleeding, and he was dusty and dishevelled. For some minutes Mr. Rice
-could not control his form. They were excited still by the strange
-novelty, and Philip saw one or two of them furtively looking down at his
-feet. He tucked them under the bench.
-
-In the afternoon they went up to play football, but Mr. Watson stopped
-Philip on the way out after dinner.
-
-"I suppose you can't play football, Carey?" he asked him.
-
-Philip blushed self-consciously.
-
-"No, sir."
-
-"Very well. You'd better go up to the field. You can walk as far as that,
-can't you?"
-
-Philip had no idea where the field was, but he answered all the same.
-
-"Yes, sir."
-
-The boys went in charge of Mr. Rice, who glanced at Philip and seeing he
-had not changed, asked why he was not going to play.
-
-"Mr. Watson said I needn't, sir," said Philip.
-
-"Why?"
-
-There were boys all round him, looking at him curiously, and a feeling of
-shame came over Philip. He looked down without answering. Others gave the
-reply.
-
-"He's got a club-foot, sir."
-
-"Oh, I see."
-
-Mr. Rice was quite young; he had only taken his degree a year before; and
-he was suddenly embarrassed. His instinct was to beg the boy's pardon, but
-he was too shy to do so. He made his voice gruff and loud.
-
-"Now then, you boys, what are you waiting about for? Get on with you."
-
-Some of them had already started and those that were left now set off, in
-groups of two or three.
-
-"You'd better come along with me, Carey," said the master "You don't know
-the way, do you?"
-
-Philip guessed the kindness, and a sob came to his throat.
-
-"I can't go very fast, sir."
-
-"Then I'll go very slow," said the master, with a smile.
-
-Philip's heart went out to the red-faced, commonplace young man who said
-a gentle word to him. He suddenly felt less unhappy.
-
-But at night when they went up to bed and were undressing, the boy who was
-called Singer came out of his cubicle and put his head in Philip's.
-
-"I say, let's look at your foot," he said.
-
-"No," answered Philip.
-
-He jumped into bed quickly.
-
-"Don't say no to me," said Singer. "Come on, Mason."
-
-The boy in the next cubicle was looking round the corner, and at the words
-he slipped in. They made for Philip and tried to tear the bed-clothes off
-him, but he held them tightly.
-
-"Why can't you leave me alone?" he cried.
-
-Singer seized a brush and with the back of it beat Philip's hands clenched
-on the blanket. Philip cried out.
-
-"Why don't you show us your foot quietly?"
-
-"I won't."
-
-In desperation Philip clenched his fist and hit the boy who tormented him,
-but he was at a disadvantage, and the boy seized his arm. He began to turn
-it.
-
-"Oh, don't, don't," said Philip. "You'll break my arm."
-
-"Stop still then and put out your foot."
-
-Philip gave a sob and a gasp. The boy gave the arm another wrench. The
-pain was unendurable.
-
-"All right. I'll do it," said Philip.
-
-He put out his foot. Singer still kept his hand on Philip's wrist. He
-looked curiously at the deformity.
-
-"Isn't it beastly?" said Mason.
-
-Another came in and looked too.
-
-"Ugh," he said, in disgust.
-
-"My word, it is rum," said Singer, making a face. "Is it hard?"
-
-He touched it with the tip of his forefinger, cautiously, as though it


<TRUNCATED>
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/orders.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/orders.txt b/crunch/src/it/resources/orders.txt
deleted file mode 100644
index 2f1383f..0000000
--- a/crunch/src/it/resources/orders.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-222|Toilet plunger
-333|Toilet brush
-222|Toilet paper
-111|Corn flakes
\ No newline at end of file


[34/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java b/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java
new file mode 100644
index 0000000..93926c1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java
@@ -0,0 +1,325 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.hadoop.mapreduce.lib.jobcontrol;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.impl.mr.run.RuntimeParameters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.base.Objects;
+import com.google.common.collect.Lists;
+
+/**
+ * This class encapsulates a MapReduce job and its dependency. It monitors the
+ * states of the depending jobs and updates the state of this job. A job starts
+ * in the WAITING state. If it does not have any depending jobs, or all of the
+ * depending jobs are in SUCCEEDED state, then the job state will become READY. If
+ * any depending jobs fail, the job will fail too. When in READY state, the job
+ * can be submitted to Hadoop for execution, with the state changing into
+ * RUNNING state. From RUNNING state, the job can get into SUCCEEDED or FAILED
+ * state, depending the status of the job execution.
+ */
+public class CrunchControlledJob {
+
+  // A job will be in one of the following states
+  public static enum State {
+    SUCCESS, WAITING, RUNNING, READY, FAILED, DEPENDENT_FAILED
+  };
+
+  public static interface Hook {
+    public void run() throws IOException;
+  }
+
+  private static final Log LOG = LogFactory.getLog(CrunchControlledJob.class);
+
+  private final int jobID;
+  private final Job job; // mapreduce job to be executed.
+  // the jobs the current job depends on
+  private final List<CrunchControlledJob> dependingJobs;
+  private final Hook prepareHook;
+  private final Hook completionHook;
+  private State state;
+  // some info for human consumption, e.g. the reason why the job failed
+  private String message;
+  private String lastKnownProgress;
+
+  /**
+   * Construct a job.
+   *
+   * @param jobID
+   *          an ID used to match with its {@link org.apache.crunch.impl.mr.plan.JobPrototype}.
+   * @param job
+   *          a mapreduce job to be executed.
+   * @param prepareHook
+   *          a piece of code that will run before this job is submitted.
+   * @param completionHook
+   *          a piece of code that will run after this job gets completed.
+   */
+  public CrunchControlledJob(int jobID, Job job, Hook prepareHook, Hook completionHook) {
+    this.jobID = jobID;
+    this.job = job;
+    this.dependingJobs = Lists.newArrayList();
+    this.prepareHook = prepareHook;
+    this.completionHook = completionHook;
+    this.state = State.WAITING;
+    this.message = "just initialized";
+  }
+
+  @Override
+  public String toString() {
+    StringBuffer sb = new StringBuffer();
+    sb.append("job name:\t").append(this.job.getJobName()).append("\n");
+    sb.append("job id:\t").append(this.jobID).append("\n");
+    sb.append("job state:\t").append(this.state).append("\n");
+    sb.append("job mapred id:\t").append(this.job.getJobID()).append("\n");
+    sb.append("job message:\t").append(this.message).append("\n");
+
+    if (this.dependingJobs == null || this.dependingJobs.size() == 0) {
+      sb.append("job has no depending job:\t").append("\n");
+    } else {
+      sb.append("job has ").append(this.dependingJobs.size())
+          .append(" dependeng jobs:\n");
+      for (int i = 0; i < this.dependingJobs.size(); i++) {
+        sb.append("\t depending job ").append(i).append(":\t");
+        sb.append((this.dependingJobs.get(i)).getJobName()).append("\n");
+      }
+    }
+    return sb.toString();
+  }
+
+  /**
+   * @return the job name of this job
+   */
+  public String getJobName() {
+    return job.getJobName();
+  }
+
+  /**
+   * Set the job name for this job.
+   *
+   * @param jobName
+   *          the job name
+   */
+  public void setJobName(String jobName) {
+    job.setJobName(jobName);
+  }
+
+  /**
+   * @return the job ID of this job
+   */
+  public int getJobID() {
+    return this.jobID;
+  }
+
+  /**
+   * @return the mapred ID of this job as assigned by the mapred framework.
+   */
+  public JobID getMapredJobID() {
+    return this.job.getJobID();
+  }
+
+  /**
+   * @return the mapreduce job
+   */
+  public synchronized Job getJob() {
+    return this.job;
+  }
+
+  /**
+   * @return the state of this job
+   */
+  public synchronized State getJobState() {
+    return this.state;
+  }
+
+  /**
+   * Set the state for this job.
+   * 
+   * @param state
+   *          the new state for this job.
+   */
+  protected synchronized void setJobState(State state) {
+    this.state = state;
+  }
+
+  /**
+   * @return the message of this job
+   */
+  public synchronized String getMessage() {
+    return this.message;
+  }
+
+  /**
+   * Set the message for this job.
+   * 
+   * @param message
+   *          the message for this job.
+   */
+  public synchronized void setMessage(String message) {
+    this.message = message;
+  }
+
+  /**
+   * @return the depending jobs of this job
+   */
+  public List<CrunchControlledJob> getDependentJobs() {
+    return this.dependingJobs;
+  }
+
+  /**
+   * Add a job to this jobs' dependency list. Dependent jobs can only be added
+   * while a Job is waiting to run, not during or afterwards.
+   * 
+   * @param dependingJob
+   *          Job that this Job depends on.
+   * @return <tt>true</tt> if the Job was added.
+   */
+  public synchronized boolean addDependingJob(CrunchControlledJob dependingJob) {
+    if (this.state == State.WAITING) { // only allowed to add jobs when waiting
+      return this.dependingJobs.add(dependingJob);
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * @return true if this job is in a complete state
+   */
+  public synchronized boolean isCompleted() {
+    return this.state == State.FAILED || this.state == State.DEPENDENT_FAILED
+        || this.state == State.SUCCESS;
+  }
+
+  /**
+   * @return true if this job is in READY state
+   */
+  public synchronized boolean isReady() {
+    return this.state == State.READY;
+  }
+
+  public void killJob() throws IOException, InterruptedException {
+    job.killJob();
+  }
+
+  /**
+   * Check the state of this running job. The state may remain the same, become
+   * SUCCEEDED or FAILED.
+   */
+  private void checkRunningState() throws IOException, InterruptedException {
+    try {
+      if (job.isComplete()) {
+        if (job.isSuccessful()) {
+          this.state = State.SUCCESS;
+        } else {
+          this.state = State.FAILED;
+          this.message = "Job failed!";
+        }
+      } else {
+        // still running
+        if (job.getConfiguration().getBoolean(RuntimeParameters.LOG_JOB_PROGRESS, false)) {
+          logJobProgress();
+        }
+      }
+    } catch (IOException ioe) {
+      this.state = State.FAILED;
+      this.message = StringUtils.stringifyException(ioe);
+      try {
+        if (job != null) {
+          job.killJob();
+        }
+      } catch (IOException e) {
+      }
+    }
+    if (isCompleted()) {
+      completionHook.run();
+    }
+  }
+
+  /**
+   * Check and update the state of this job. The state changes depending on its
+   * current state and the states of the depending jobs.
+   */
+  synchronized State checkState() throws IOException, InterruptedException {
+    if (this.state == State.RUNNING) {
+      checkRunningState();
+    }
+    if (this.state != State.WAITING) {
+      return this.state;
+    }
+    if (this.dependingJobs == null || this.dependingJobs.size() == 0) {
+      this.state = State.READY;
+      return this.state;
+    }
+    CrunchControlledJob pred = null;
+    int n = this.dependingJobs.size();
+    for (int i = 0; i < n; i++) {
+      pred = this.dependingJobs.get(i);
+      State s = pred.checkState();
+      if (s == State.WAITING || s == State.READY || s == State.RUNNING) {
+        break; // a pred is still not completed, continue in WAITING
+        // state
+      }
+      if (s == State.FAILED || s == State.DEPENDENT_FAILED) {
+        this.state = State.DEPENDENT_FAILED;
+        this.message = "depending job " + i + " with jobID " + pred.getJobID()
+            + " failed. " + pred.getMessage();
+        break;
+      }
+      // pred must be in success state
+      if (i == n - 1) {
+        this.state = State.READY;
+      }
+    }
+
+    return this.state;
+  }
+
+  /**
+   * Submit this job to mapred. The state becomes RUNNING if submission is
+   * successful, FAILED otherwise.
+   */
+  protected synchronized void submit() {
+    try {
+      prepareHook.run();
+      job.submit();
+      this.state = State.RUNNING;
+      LOG.info("Running job \"" + getJobName() + "\"");
+      LOG.info("Job status available at: " + job.getTrackingURL());
+    } catch (Exception ioe) {
+      this.state = State.FAILED;
+      this.message = StringUtils.stringifyException(ioe);
+      LOG.info("Error occurred starting job \"" + getJobName() + "\":");
+      LOG.info(getMessage());
+    }
+  }
+
+  private void logJobProgress() throws IOException, InterruptedException {
+    String progress = String.format("map %.0f%% reduce %.0f%%",
+        100.0 * job.mapProgress(), 100.0 * job.reduceProgress());
+    if (!Objects.equal(lastKnownProgress, progress)) {
+      LOG.info(job.getJobName() + " progress: " + progress);
+      lastKnownProgress = progress;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java b/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java
new file mode 100644
index 0000000..727ab6f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java
@@ -0,0 +1,211 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.hadoop.mapreduce.lib.jobcontrol;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob.State;
+
+/**
+ * This class encapsulates a set of MapReduce jobs and its dependency.
+ * 
+ * It tracks the states of the jobs by placing them into different tables
+ * according to their states.
+ * 
+ * This class provides APIs for the client app to add a job to the group and to
+ * get the jobs in the group in different states. When a job is added, an ID
+ * unique to the group is assigned to the job.
+ */
+public class CrunchJobControl {
+
+  private Map<Integer, CrunchControlledJob> waitingJobs;
+  private Map<Integer, CrunchControlledJob> readyJobs;
+  private Map<Integer, CrunchControlledJob> runningJobs;
+  private Map<Integer, CrunchControlledJob> successfulJobs;
+  private Map<Integer, CrunchControlledJob> failedJobs;
+
+  private Log log = LogFactory.getLog(CrunchJobControl.class);
+
+  private final String groupName;
+
+  /**
+   * Construct a job control for a group of jobs.
+   * 
+   * @param groupName
+   *          a name identifying this group
+   */
+  public CrunchJobControl(String groupName) {
+    this.waitingJobs = new Hashtable<Integer, CrunchControlledJob>();
+    this.readyJobs = new Hashtable<Integer, CrunchControlledJob>();
+    this.runningJobs = new Hashtable<Integer, CrunchControlledJob>();
+    this.successfulJobs = new Hashtable<Integer, CrunchControlledJob>();
+    this.failedJobs = new Hashtable<Integer, CrunchControlledJob>();
+    this.groupName = groupName;
+  }
+
+  private static List<CrunchControlledJob> toList(Map<Integer, CrunchControlledJob> jobs) {
+    ArrayList<CrunchControlledJob> retv = new ArrayList<CrunchControlledJob>();
+    synchronized (jobs) {
+      for (CrunchControlledJob job : jobs.values()) {
+        retv.add(job);
+      }
+    }
+    return retv;
+  }
+
+  /**
+   * @return the jobs in the waiting state
+   */
+  public List<CrunchControlledJob> getWaitingJobList() {
+    return toList(this.waitingJobs);
+  }
+
+  /**
+   * @return the jobs in the running state
+   */
+  public List<CrunchControlledJob> getRunningJobList() {
+    return toList(this.runningJobs);
+  }
+
+  /**
+   * @return the jobs in the ready state
+   */
+  public List<CrunchControlledJob> getReadyJobsList() {
+    return toList(this.readyJobs);
+  }
+
+  /**
+   * @return the jobs in the success state
+   */
+  public List<CrunchControlledJob> getSuccessfulJobList() {
+    return toList(this.successfulJobs);
+  }
+
+  public List<CrunchControlledJob> getFailedJobList() {
+    return toList(this.failedJobs);
+  }
+
+  private static void addToQueue(CrunchControlledJob aJob,
+      Map<Integer, CrunchControlledJob> queue) {
+    synchronized (queue) {
+      queue.put(aJob.getJobID(), aJob);
+    }
+  }
+
+  private void addToQueue(CrunchControlledJob aJob) {
+    Map<Integer, CrunchControlledJob> queue = getQueue(aJob.getJobState());
+    addToQueue(aJob, queue);
+  }
+
+  private Map<Integer, CrunchControlledJob> getQueue(State state) {
+    Map<Integer, CrunchControlledJob> retv = null;
+    if (state == State.WAITING) {
+      retv = this.waitingJobs;
+    } else if (state == State.READY) {
+      retv = this.readyJobs;
+    } else if (state == State.RUNNING) {
+      retv = this.runningJobs;
+    } else if (state == State.SUCCESS) {
+      retv = this.successfulJobs;
+    } else if (state == State.FAILED || state == State.DEPENDENT_FAILED) {
+      retv = this.failedJobs;
+    }
+    return retv;
+  }
+
+  /**
+   * Add a new job.
+   * 
+   * @param aJob
+   *          the new job
+   */
+  synchronized public void addJob(CrunchControlledJob aJob) {
+    aJob.setJobState(State.WAITING);
+    this.addToQueue(aJob);
+  }
+
+  synchronized private void checkRunningJobs() throws IOException,
+      InterruptedException {
+
+    Map<Integer, CrunchControlledJob> oldJobs = null;
+    oldJobs = this.runningJobs;
+    this.runningJobs = new Hashtable<Integer, CrunchControlledJob>();
+
+    for (CrunchControlledJob nextJob : oldJobs.values()) {
+      nextJob.checkState();
+      this.addToQueue(nextJob);
+    }
+  }
+
+  synchronized private void checkWaitingJobs() throws IOException,
+      InterruptedException {
+    Map<Integer, CrunchControlledJob> oldJobs = null;
+    oldJobs = this.waitingJobs;
+    this.waitingJobs = new Hashtable<Integer, CrunchControlledJob>();
+
+    for (CrunchControlledJob nextJob : oldJobs.values()) {
+      nextJob.checkState();
+      this.addToQueue(nextJob);
+    }
+  }
+
+  synchronized private void startReadyJobs() {
+    Map<Integer, CrunchControlledJob> oldJobs = null;
+    oldJobs = this.readyJobs;
+    this.readyJobs = new Hashtable<Integer, CrunchControlledJob>();
+
+    for (CrunchControlledJob nextJob : oldJobs.values()) {
+      // Submitting Job to Hadoop
+      nextJob.submit();
+      this.addToQueue(nextJob);
+    }
+  }
+
+  synchronized public void killAllRunningJobs() {
+    for (CrunchControlledJob job : runningJobs.values()) {
+      if (!job.isCompleted()) {
+        try {
+          job.killJob();
+        } catch (Exception e) {
+          log.error("Exception killing job: " + job.getJobName(), e);
+        }
+      }
+    }
+  }
+
+  synchronized public boolean allFinished() {
+    return this.waitingJobs.size() == 0 && this.readyJobs.size() == 0
+        && this.runningJobs.size() == 0;
+  }
+
+  /**
+   * Checks the states of the running jobs Update the states of waiting jobs, and submits the jobs in
+   * ready state (i.e. whose dependencies are all finished in success).
+   */
+  public void pollJobStatusAndStartNewOnes() throws IOException, InterruptedException {
+    checkRunningJobs();
+    checkWaitingJobs();
+    startReadyJobs();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/SingleUseIterable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/SingleUseIterable.java b/crunch-core/src/main/java/org/apache/crunch/impl/SingleUseIterable.java
new file mode 100644
index 0000000..98f982f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/SingleUseIterable.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl;
+
+import java.util.Iterator;
+
+/**
+ * Wrapper around a Reducer's input Iterable. Ensures that the
+ * {@link #iterator()} method is not called more than once.
+ */
+public class SingleUseIterable<T> implements Iterable<T> {
+
+  private boolean used = false;
+  private Iterable<T> wrappedIterable;
+
+  /**
+   * Instantiate around an Iterable that may only be used once.
+   * 
+   * @param toWrap iterable to wrap
+   */
+  public SingleUseIterable(Iterable<T> toWrap) {
+    this.wrappedIterable = toWrap;
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    if (used) {
+      throw new IllegalStateException("iterator() can only be called once on this Iterable");
+    }
+    used = true;
+    return wrappedIterable.iterator();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java
new file mode 100644
index 0000000..272b2af
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java
@@ -0,0 +1,275 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.PipelineExecution;
+import org.apache.crunch.PipelineResult;
+import org.apache.crunch.Source;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.Target;
+import org.apache.crunch.Target.WriteMode;
+import org.apache.crunch.impl.mem.collect.MemCollection;
+import org.apache.crunch.impl.mem.collect.MemTable;
+import org.apache.crunch.io.At;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Counters;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class MemPipeline implements Pipeline {
+
+  private static final Log LOG = LogFactory.getLog(MemPipeline.class);
+  private static Counters COUNTERS = new Counters();
+  private static final MemPipeline INSTANCE = new MemPipeline();
+
+  private int outputIndex = 0;
+  
+  public static Counters getCounters() {
+    return COUNTERS;
+  }
+  
+  public static void clearCounters() {
+    COUNTERS = new Counters();
+  }
+
+  public static Pipeline getInstance() {
+    return INSTANCE;
+  }
+
+  public static <T> PCollection<T> collectionOf(T... ts) {
+    return new MemCollection<T>(ImmutableList.copyOf(ts));
+  }
+
+  public static <T> PCollection<T> collectionOf(Iterable<T> collect) {
+    return new MemCollection<T>(collect);
+  }
+
+  public static <T> PCollection<T> typedCollectionOf(PType<T> ptype, T... ts) {
+    return new MemCollection<T>(ImmutableList.copyOf(ts), ptype, null);
+  }
+
+  public static <T> PCollection<T> typedCollectionOf(PType<T> ptype, Iterable<T> collect) {
+    return new MemCollection<T>(collect, ptype, null);
+  }
+
+  public static <S, T> PTable<S, T> tableOf(S s, T t, Object... more) {
+    List<Pair<S, T>> pairs = Lists.newArrayList();
+    pairs.add(Pair.of(s, t));
+    for (int i = 0; i < more.length; i += 2) {
+      pairs.add(Pair.of((S) more[i], (T) more[i + 1]));
+    }
+    return new MemTable<S, T>(pairs);
+  }
+
+  public static <S, T> PTable<S, T> typedTableOf(PTableType<S, T> ptype, S s, T t, Object... more) {
+    List<Pair<S, T>> pairs = Lists.newArrayList();
+    pairs.add(Pair.of(s, t));
+    for (int i = 0; i < more.length; i += 2) {
+      pairs.add(Pair.of((S) more[i], (T) more[i + 1]));
+    }
+    return new MemTable<S, T>(pairs, ptype, null);
+  }
+
+  public static <S, T> PTable<S, T> tableOf(Iterable<Pair<S, T>> pairs) {
+    return new MemTable<S, T>(pairs);
+  }
+
+  public static <S, T> PTable<S, T> typedTableOf(PTableType<S, T> ptype, Iterable<Pair<S, T>> pairs) {
+    return new MemTable<S, T>(pairs, ptype, null);
+  }
+
+  private Configuration conf = new Configuration();
+  private Set<Target> activeTargets = Sets.newHashSet();
+  
+  private MemPipeline() {
+  }
+
+  @Override
+  public void setConfiguration(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public Configuration getConfiguration() {
+    return conf;
+  }
+
+  @Override
+  public <T> PCollection<T> read(Source<T> source) {
+    if (source instanceof ReadableSource) {
+      try {
+        Iterable<T> iterable = ((ReadableSource<T>) source).read(conf);
+        return new MemCollection<T>(iterable, source.getType(), source.toString());
+      } catch (IOException e) {
+        LOG.error("Exception reading source: " + source.toString(), e);
+        throw new IllegalStateException(e);
+      }
+    }
+    LOG.error("Source " + source + " is not readable");
+    throw new IllegalStateException("Source " + source + " is not readable");
+  }
+
+  @Override
+  public <K, V> PTable<K, V> read(TableSource<K, V> source) {
+    if (source instanceof ReadableSource) {
+      try {
+        Iterable<Pair<K, V>> iterable = ((ReadableSource<Pair<K, V>>) source).read(conf);
+        return new MemTable<K, V>(iterable, source.getTableType(), source.toString());
+      } catch (IOException e) {
+        LOG.error("Exception reading source: " + source.toString(), e);
+        throw new IllegalStateException(e);
+      }
+    }
+    LOG.error("Source " + source + " is not readable");
+    throw new IllegalStateException("Source " + source + " is not readable");
+  }
+
+  @Override
+  public void write(PCollection<?> collection, Target target) {
+    write(collection, target, Target.WriteMode.DEFAULT);
+  }
+  
+  @Override
+  public void write(PCollection<?> collection, Target target,
+      Target.WriteMode writeMode) {
+    target.handleExisting(writeMode, getConfiguration());
+    if (writeMode != WriteMode.APPEND && activeTargets.contains(target)) {
+      throw new CrunchRuntimeException("Target " + target + " is already written in the current run." +
+          " Use WriteMode.APPEND in order to write additional data to it.");
+    }
+    activeTargets.add(target);
+    if (target instanceof PathTarget) {
+      Path path = ((PathTarget) target).getPath();
+      try {
+        FileSystem fs = path.getFileSystem(conf);
+        FSDataOutputStream os = fs.create(new Path(path, "out" + outputIndex));
+        outputIndex++;
+        if (collection instanceof PTable) {
+          for (Object o : collection.materialize()) {
+            Pair p = (Pair) o;
+            os.writeBytes(p.first().toString());
+            os.writeBytes("\t");
+            os.writeBytes(p.second().toString());
+            os.writeBytes("\r\n");
+          }
+        } else {
+          for (Object o : collection.materialize()) {
+            os.writeBytes(o.toString() + "\r\n");
+          }
+        }
+        os.close();
+      } catch (IOException e) {
+        LOG.error("Exception writing target: " + target, e);
+      }
+    } else {
+      LOG.error("Target " + target + " is not a PathTarget instance");
+    }
+  }
+
+  @Override
+  public PCollection<String> readTextFile(String pathName) {
+    return read(At.textFile(pathName));
+  }
+
+  @Override
+  public <T> void writeTextFile(PCollection<T> collection, String pathName) {
+    write(collection, At.textFile(pathName));
+  }
+
+  @Override
+  public <T> Iterable<T> materialize(PCollection<T> pcollection) {
+    return pcollection.materialize();
+  }
+
+  @Override
+  public PipelineExecution runAsync() {
+    activeTargets.clear();
+    return new PipelineExecution() {
+      @Override
+      public String getPlanDotFile() {
+        return "";
+      }
+
+      @Override
+      public void waitFor(long timeout, TimeUnit timeUnit) throws InterruptedException {
+        // no-po
+      }
+
+      @Override
+      public void waitUntilDone() throws InterruptedException {
+        // no-po
+      }
+
+      @Override
+      public Status getStatus() {
+        return Status.SUCCEEDED;
+      }
+
+      @Override
+      public PipelineResult getResult() {
+        return new PipelineResult(ImmutableList.of(new PipelineResult.StageResult("MemPipelineStage", COUNTERS)));
+      }
+
+      @Override
+      public void kill() {
+      }
+    };
+  }
+  
+  @Override
+  public PipelineResult run() {
+    activeTargets.clear();
+    return new PipelineResult(ImmutableList.of(new PipelineResult.StageResult("MemPipelineStage", COUNTERS)));
+  }
+
+  @Override
+  public PipelineResult done() {
+    return run();
+  }
+
+  @Override
+  public void enableDebug() {
+    LOG.info("Note: in-memory pipelines do not have debug logging");
+  }
+
+  @Override
+  public String getName() {
+    return "Memory Pipeline";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java
new file mode 100644
index 0000000..c97fac6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java
@@ -0,0 +1,295 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem.collect;
+
+import java.lang.reflect.Method;
+import java.util.Collection;
+
+import javassist.util.proxy.MethodFilter;
+import javassist.util.proxy.MethodHandler;
+import javassist.util.proxy.ProxyFactory;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PObject;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.ParallelDoOptions;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Target;
+import org.apache.crunch.fn.ExtractKeyFn;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mem.emit.InMemoryEmitter;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.materialize.pobject.CollectionPObject;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+public class MemCollection<S> implements PCollection<S> {
+
+  private final Collection<S> collect;
+  private final PType<S> ptype;
+  private String name;
+
+  public MemCollection(Iterable<S> collect) {
+    this(collect, null, null);
+  }
+
+  public MemCollection(Iterable<S> collect, PType<S> ptype) {
+    this(collect, ptype, null);
+  }
+
+  public MemCollection(Iterable<S> collect, PType<S> ptype, String name) {
+    this.collect = ImmutableList.copyOf(collect);
+    this.ptype = ptype;
+    this.name = name;
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return MemPipeline.getInstance();
+  }
+
+  @Override
+  public PCollection<S> union(PCollection<S> other) {
+    return union(new PCollection[] { other });
+  }
+  
+  @Override
+  public PCollection<S> union(PCollection<S>... collections) {
+    Collection<S> output = Lists.newArrayList();
+    for (PCollection<S> pcollect : collections) {
+      for (S s : pcollect.materialize()) {
+        output.add(s);
+      }
+    }
+    output.addAll(collect);
+    return new MemCollection<S>(output, collections[0].getPType());
+  }
+
+  @Override
+  public <T> PCollection<T> parallelDo(DoFn<S, T> doFn, PType<T> type) {
+    return parallelDo(null, doFn, type);
+  }
+
+  @Override
+  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type) {
+    return parallelDo(name, doFn, type, ParallelDoOptions.builder().build());
+  }
+  
+  @Override
+  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type,
+      ParallelDoOptions options) {
+    InMemoryEmitter<T> emitter = new InMemoryEmitter<T>();
+    doFn.setContext(getInMemoryContext(getPipeline().getConfiguration()));
+    doFn.initialize();
+    for (S s : collect) {
+      doFn.process(s, emitter);
+    }
+    doFn.cleanup(emitter);
+    return new MemCollection<T>(emitter.getOutput(), type, name);
+  }
+
+  @Override
+  public <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type) {
+    return parallelDo(null, doFn, type);
+  }
+
+  @Override
+  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type) {
+    return parallelDo(name, doFn, type, ParallelDoOptions.builder().build());
+  }
+  
+  @Override
+  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type,
+      ParallelDoOptions options) {
+    InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>();
+    doFn.setContext(getInMemoryContext(getPipeline().getConfiguration()));
+    doFn.initialize();
+    for (S s : collect) {
+      doFn.process(s, emitter);
+    }
+    doFn.cleanup(emitter);
+    return new MemTable<K, V>(emitter.getOutput(), type, name);
+  }
+
+  @Override
+  public PCollection<S> write(Target target) {
+    getPipeline().write(this, target);
+    return this;
+  }
+
+  @Override
+  public PCollection<S> write(Target target, Target.WriteMode writeMode) {
+    getPipeline().write(this, target, writeMode);
+    return this;
+  }
+
+  @Override
+  public Iterable<S> materialize() {
+    return collect;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public PObject<Collection<S>> asCollection() {
+    return new CollectionPObject<S>(this);
+  }
+
+  public Collection<S> getCollection() {
+    return collect;
+  }
+
+  @Override
+  public PType<S> getPType() {
+    return ptype;
+  }
+
+  @Override
+  public PTypeFamily getTypeFamily() {
+    if (ptype != null) {
+      return ptype.getFamily();
+    }
+    return null;
+  }
+
+  @Override
+  public long getSize() {
+    return collect.isEmpty() ? 0 : 1; // getSize is only used for pipeline optimization in MR
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  @Override
+  public String toString() {
+    return collect.toString();
+  }
+
+  @Override
+  public PTable<S, Long> count() {
+    return Aggregate.count(this);
+  }
+
+  @Override
+  public PObject<Long> length() {
+    return Aggregate.length(this);
+  }
+
+  @Override
+  public PObject<S> max() {
+    return Aggregate.max(this);
+  }
+
+  @Override
+  public PObject<S> min() {
+    return Aggregate.min(this);
+  }
+
+  @Override
+  public PCollection<S> filter(FilterFn<S> filterFn) {
+    return parallelDo(filterFn, getPType());
+  }
+
+  @Override
+  public PCollection<S> filter(String name, FilterFn<S> filterFn) {
+    return parallelDo(name, filterFn, getPType());
+  }
+
+  @Override
+  public <K> PTable<K, S> by(MapFn<S, K> mapFn, PType<K> keyType) {
+    return parallelDo(new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
+  }
+
+  @Override
+  public <K> PTable<K, S> by(String name, MapFn<S, K> mapFn, PType<K> keyType) {
+    return parallelDo(name, new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
+  }
+
+  /**
+   * The method creates a {@link TaskInputOutputContext} that will just provide
+   * {@linkplain Configuration}. The method has been implemented with javaassist
+   * as there are API changes in versions of Hadoop. In hadoop 1.0.3 the
+   * {@linkplain TaskInputOutputContext} is abstract class while in version 2
+   * the same is an interface.
+   * <p>
+   * Note: The intention of this is to provide the bare essentials that are
+   * required to make the {@linkplain MemPipeline} work. It lacks even the basic
+   * things that can proved some support for unit testing pipeline.
+   */
+  private static TaskInputOutputContext<?, ?, ?, ?> getInMemoryContext(final Configuration conf) {
+    ProxyFactory factory = new ProxyFactory();
+    Class<TaskInputOutputContext> superType = TaskInputOutputContext.class;
+    Class[] types = new Class[0];
+    Object[] args = new Object[0];
+    if (superType.isInterface()) {
+      factory.setInterfaces(new Class[] { superType });
+    } else {
+      types = new Class[] { Configuration.class, TaskAttemptID.class, RecordWriter.class, OutputCommitter.class,
+          StatusReporter.class };
+      args = new Object[] { conf, new TaskAttemptID(), null, null, null };
+      factory.setSuperclass(superType);
+    }
+    factory.setFilter(new MethodFilter() {
+      @Override
+      public boolean isHandled(Method m) {
+        String name = m.getName();
+        return "getConfiguration".equals(name) || "getCounter".equals(name) || "progress".equals(name);
+      }
+    });
+    MethodHandler handler = new MethodHandler() {
+      @Override
+      public Object invoke(Object arg0, Method m, Method arg2, Object[] args) throws Throwable {
+        String name = m.getName();
+        if ("getConfiguration".equals(name)) {
+          return conf;
+        } else if ("progress".equals(name)) {
+          // no-op
+          return null;
+        } else { // getCounter
+          if (args.length == 1) {
+            return MemPipeline.getCounters().findCounter((Enum<?>) args[0]);
+          } else {
+            return MemPipeline.getCounters().findCounter((String) args[0], (String) args[1]);
+          }
+        }
+      }
+    };
+    try {
+      Object newInstance = factory.create(types, args, handler);
+      return (TaskInputOutputContext<?, ?, ?, ?>) newInstance;
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new RuntimeException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java
new file mode 100644
index 0000000..d105bb4
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem.collect;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.crunch.Aggregator;
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Target;
+import org.apache.crunch.fn.Aggregators;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+class MemGroupedTable<K, V> extends MemCollection<Pair<K, Iterable<V>>> implements PGroupedTable<K, V> {
+
+  private final MemTable<K, V> parent;
+
+  private static <S, T> Iterable<Pair<S, Iterable<T>>> buildMap(MemTable<S, T> parent, GroupingOptions options) {
+    PType<S> keyType = parent.getKeyType();
+    Shuffler<S, T> shuffler = Shuffler.create(keyType, options, parent.getPipeline());
+
+    for (Pair<S, T> pair : parent.materialize()) {
+      shuffler.add(pair);
+    }
+
+    return shuffler;
+  }
+
+  public MemGroupedTable(MemTable<K, V> parent, GroupingOptions options) {
+    super(buildMap(parent, options));
+    this.parent = parent;
+  }
+
+  @Override
+  public PCollection<Pair<K, Iterable<V>>> union(PCollection<Pair<K, Iterable<V>>>... collections) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public PCollection<Pair<K, Iterable<V>>> write(Target target) {
+    getPipeline().write(this.ungroup(), target);
+    return this;
+  }
+
+  @Override
+  public PType<Pair<K, Iterable<V>>> getPType() {
+    PTableType<K, V> parentType = parent.getPTableType();
+    if (parentType != null) {
+      return parentType.getGroupedTableType();
+    }
+    return null;
+  }
+
+  @Override
+  public PTypeFamily getTypeFamily() {
+    return parent.getTypeFamily();
+  }
+
+  @Override
+  public long getSize() {
+    return 1; // getSize is only used for pipeline optimization in MR
+  }
+
+  @Override
+  public String getName() {
+    return "MemGrouped(" + parent.getName() + ")";
+  }
+
+  @Override
+  public PTable<K, V> combineValues(CombineFn<K, V> combineFn) {
+    return parallelDo(combineFn, parent.getPTableType());
+  }
+
+  @Override
+  public PTable<K, V> combineValues(Aggregator<V> agg) {
+    return combineValues(Aggregators.<K, V>toCombineFn(agg));
+  }
+
+  @Override
+  public PTable<K, V> ungroup() {
+    return parent;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java
new file mode 100644
index 0000000..f8a5960
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem.collect;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.PObject;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Target;
+import org.apache.crunch.lib.Aggregate;
+import org.apache.crunch.lib.Cogroup;
+import org.apache.crunch.lib.Join;
+import org.apache.crunch.lib.PTables;
+import org.apache.crunch.materialize.MaterializableMap;
+import org.apache.crunch.materialize.pobject.MapPObject;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.Lists;
+
+public class MemTable<K, V> extends MemCollection<Pair<K, V>> implements PTable<K, V> {
+
+  private PTableType<K, V> ptype;
+
+  public MemTable(Iterable<Pair<K, V>> collect) {
+    this(collect, null, null);
+  }
+
+  public MemTable(Iterable<Pair<K, V>> collect, PTableType<K, V> ptype, String name) {
+    super(collect, ptype, name);
+    this.ptype = ptype;
+  }
+
+  @Override
+  public PTable<K, V> union(PTable<K, V> other) {
+    return union(new PTable[] { other });
+  }
+  
+  @Override
+  public PTable<K, V> union(PTable<K, V>... others) {
+    List<Pair<K, V>> values = Lists.newArrayList();
+    values.addAll(getCollection());
+    for (PTable<K, V> ptable : others) {
+      for (Pair<K, V> p : ptable.materialize()) {
+        values.add(p);
+      }
+    }
+    return new MemTable<K, V>(values, others[0].getPTableType(), null);
+  }
+
+  @Override
+  public PGroupedTable<K, V> groupByKey() {
+    return groupByKey(null);
+  }
+
+  @Override
+  public PGroupedTable<K, V> groupByKey(int numPartitions) {
+    return groupByKey(null);
+  }
+
+  @Override
+  public PGroupedTable<K, V> groupByKey(GroupingOptions options) {
+    return new MemGroupedTable<K, V>(this, options);
+  }
+
+  @Override
+  public PTable<K, V> write(Target target) {
+    super.write(target);
+    return this;
+  }
+
+  @Override
+  public PTable<K, V> write(Target target, Target.WriteMode writeMode) {
+    getPipeline().write(this, target, writeMode);
+    return this;
+  }
+  
+  @Override
+  public PTableType<K, V> getPTableType() {
+    return ptype;
+  }
+
+  @Override
+  public PType<K> getKeyType() {
+    if (ptype != null) {
+      return ptype.getKeyType();
+    }
+    return null;
+  }
+
+  @Override
+  public PType<V> getValueType() {
+    if (ptype != null) {
+      return ptype.getValueType();
+    }
+    return null;
+  }
+
+  @Override
+  public PTable<K, V> filter(FilterFn<Pair<K, V>> filterFn) {
+    return parallelDo(filterFn, getPTableType());
+  }
+  
+  @Override
+  public PTable<K, V> filter(String name, FilterFn<Pair<K, V>> filterFn) {
+    return parallelDo(name, filterFn, getPTableType());
+  }
+
+  @Override
+  public PTable<K, V> top(int count) {
+    return Aggregate.top(this, count, true);
+  }
+
+  @Override
+  public PTable<K, V> bottom(int count) {
+    return Aggregate.top(this, count, false);
+  }
+
+  @Override
+  public PTable<K, Collection<V>> collectValues() {
+    return Aggregate.collectValues(this);
+  }
+
+  @Override
+  public <U> PTable<K, Pair<V, U>> join(PTable<K, U> other) {
+    return Join.join(this, other);
+  }
+
+  @Override
+  public <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other) {
+    return Cogroup.cogroup(this, other);
+  }
+
+  @Override
+  public PCollection<K> keys() {
+    return PTables.keys(this);
+  }
+
+  @Override
+  public PCollection<V> values() {
+    return PTables.values(this);
+  }
+
+  @Override
+  public Map<K, V> materializeToMap() {
+    return new MaterializableMap<K, V>(this.materialize());
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public PObject<Map<K, V>> asMap() {
+    return new MapPObject<K, V>(this);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java
new file mode 100644
index 0000000..2e8f9eb
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem.collect;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.impl.SingleUseIterable;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * In-memory versions of common MapReduce patterns for aggregating key-value data.
+ */
+abstract class Shuffler<K, V> implements Iterable<Pair<K, Iterable<V>>> {
+
+  public abstract void add(Pair<K, V> record);
+  
+  private static <K, V> Map<K, V> getMapForKeyType(PType<?> ptype) {
+    if (ptype != null && Comparable.class.isAssignableFrom(ptype.getTypeClass())) {
+      return new TreeMap<K, V>();
+    } else {
+      return Maps.newHashMap();
+    }
+  }
+  
+  public static <S, T> Shuffler<S, T> create(PType<S> keyType, GroupingOptions options,
+      Pipeline pipeline) {
+    Map<S, Collection<T>> map = getMapForKeyType(keyType);
+    
+    if (options != null) {
+      if (Pair.class.equals(keyType.getTypeClass()) && options.getGroupingComparatorClass() != null) {
+        PType<?> pairKey = keyType.getSubTypes().get(0);
+        return new SecondarySortShuffler(getMapForKeyType(pairKey));
+      } else if (options.getSortComparatorClass() != null) {
+        RawComparator<S> rc = ReflectionUtils.newInstance(options.getSortComparatorClass(),
+            pipeline.getConfiguration());
+        map = new TreeMap<S, Collection<T>>(rc);
+      }
+    }
+    
+    return new MapShuffler<S, T>(map);
+  }
+  
+  private static class HFunction<K, V> implements Function<Map.Entry<K, Collection<V>>, Pair<K, Iterable<V>>> {
+    @Override
+    public Pair<K, Iterable<V>> apply(Map.Entry<K, Collection<V>> input) {
+      return Pair.<K, Iterable<V>>of(input.getKey(), new SingleUseIterable<V>(input.getValue()));
+    }
+  }
+  
+  private static class MapShuffler<K, V> extends Shuffler<K, V> {
+    private final Map<K, Collection<V>> map;
+    
+    public MapShuffler(Map<K, Collection<V>> map) {
+      this.map = map;
+    }
+    
+    @Override
+    public Iterator<Pair<K, Iterable<V>>> iterator() {
+      return Iterators.transform(map.entrySet().iterator(),
+          new HFunction<K, V>());
+    }
+
+    @Override
+    public void add(Pair<K, V> record) {
+      if (!map.containsKey(record.first())) {
+        Collection<V> values = Lists.newArrayList();
+        map.put(record.first(), values);
+      }
+      map.get(record.first()).add(record.second());
+    }
+  }
+
+  private static class SSFunction<K, SK, V> implements
+      Function<Map.Entry<K, List<Pair<SK, V>>>, Pair<Pair<K, SK>, Iterable<V>>> {
+    @Override
+    public Pair<Pair<K, SK>, Iterable<V>> apply(Entry<K, List<Pair<SK, V>>> input) {
+      List<Pair<SK, V>> values = input.getValue();
+      Collections.sort(values, new Comparator<Pair<SK, V>>() {
+        @Override
+        public int compare(Pair<SK, V> o1, Pair<SK, V> o2) {
+          return ((Comparable) o1.first()).compareTo(o2.first());
+        }
+      });
+      Pair<K, SK> key = Pair.of(input.getKey(), values.get(0).first());
+      return Pair.of(key, Iterables.transform(values, new Function<Pair<SK, V>, V>() {
+        @Override
+        public V apply(Pair<SK, V> input) {
+          return input.second();
+        }
+      }));
+    }
+  }
+
+  private static class SecondarySortShuffler<K, SK, V> extends Shuffler<Pair<K, SK>, V> {
+
+    private Map<K, List<Pair<SK, V>>> map;
+    
+    public SecondarySortShuffler(Map<K, List<Pair<SK, V>>> map) {
+      this.map = map;
+    }
+    
+    @Override
+    public Iterator<Pair<Pair<K, SK>, Iterable<V>>> iterator() {
+      return Iterators.transform(map.entrySet().iterator(), new SSFunction<K, SK, V>());
+    }
+
+    @Override
+    public void add(Pair<Pair<K, SK>, V> record) {
+      K primary = record.first().first();
+      if (!map.containsKey(primary)) {
+        map.put(primary, Lists.<Pair<SK, V>>newArrayList());
+      }
+      map.get(primary).add(Pair.of(record.first().second(), record.second()));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java
new file mode 100644
index 0000000..6976615
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mem.emit;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+
+import com.google.common.collect.Lists;
+
+/**
+ * An {@code Emitter} instance that writes emitted records to a backing
+ * {@code List}.
+ * 
+ * @param <T>
+ */
+public class InMemoryEmitter<T> implements Emitter<T> {
+
+  private final List<T> output;
+
+  public InMemoryEmitter() {
+    this(Lists.<T> newArrayList());
+  }
+
+  public InMemoryEmitter(List<T> output) {
+    this.output = output;
+  }
+
+  @Override
+  public void emit(T emitted) {
+    output.add(emitted);
+  }
+
+  @Override
+  public void flush() {
+
+  }
+
+  public List<T> getOutput() {
+    return output;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mem/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mem/package-info.java b/crunch-core/src/main/java/org/apache/crunch/impl/mem/package-info.java
new file mode 100644
index 0000000..a55b673
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mem/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * In-memory Pipeline implementation for rapid prototyping and testing.
+ */
+package org.apache.crunch.impl.mem;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java
new file mode 100644
index 0000000..00cf486
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java
@@ -0,0 +1,396 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.PipelineExecution;
+import org.apache.crunch.PipelineResult;
+import org.apache.crunch.Source;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.Target;
+import org.apache.crunch.Target.WriteMode;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.InputTable;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+import org.apache.crunch.impl.mr.collect.UnionCollection;
+import org.apache.crunch.impl.mr.collect.UnionTable;
+import org.apache.crunch.impl.mr.exec.MRExecutor;
+import org.apache.crunch.impl.mr.plan.MSCRPlanner;
+import org.apache.crunch.impl.mr.run.RuntimeParameters;
+import org.apache.crunch.io.From;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.io.To;
+import org.apache.crunch.materialize.MaterializableIterable;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Pipeline implementation that is executed within Hadoop MapReduce.
+ */
+public class MRPipeline implements Pipeline {
+
+  private static final Log LOG = LogFactory.getLog(MRPipeline.class);
+
+  private static final Random RANDOM = new Random();
+
+  private final Class<?> jarClass;
+  private final String name;
+  private final Map<PCollectionImpl<?>, Set<Target>> outputTargets;
+  private final Map<PCollectionImpl<?>, MaterializableIterable<?>> outputTargetsToMaterialize;
+  private Path tempDirectory;
+  private int tempFileIndex;
+  private int nextAnonymousStageId;
+
+  private Configuration conf;
+
+  /**
+   * Instantiate with a default Configuration and name.
+   * 
+   * @param jarClass Class containing the main driver method for running the pipeline
+   */
+  public MRPipeline(Class<?> jarClass) {
+    this(jarClass, new Configuration());
+  }
+
+  /**
+   * Instantiate with a custom pipeline name. The name will be displayed in the Hadoop JobTracker.
+   * 
+   * @param jarClass Class containing the main driver method for running the pipeline
+   * @param name Display name of the pipeline
+   */
+  public MRPipeline(Class<?> jarClass, String name) {
+    this(jarClass, name, new Configuration());
+  }
+
+  /**
+   * Instantiate with a custom configuration and default naming.
+   * 
+   * @param jarClass Class containing the main driver method for running the pipeline
+   * @param conf Configuration to be used within all MapReduce jobs run in the pipeline
+   */
+  public MRPipeline(Class<?> jarClass, Configuration conf) {
+    this(jarClass, jarClass.getName(), conf);
+  }
+
+  /**
+   * Instantiate with a custom name and configuration. The name will be displayed in the Hadoop
+   * JobTracker.
+   * 
+   * @param jarClass Class containing the main driver method for running the pipeline
+   * @param name Display name of the pipeline
+   * @param conf Configuration to be used within all MapReduce jobs run in the pipeline
+   */
+  public MRPipeline(Class<?> jarClass, String name, Configuration conf) {
+    this.jarClass = jarClass;
+    this.name = name;
+    this.outputTargets = Maps.newHashMap();
+    this.outputTargetsToMaterialize = Maps.newHashMap();
+    this.conf = conf;
+    this.tempDirectory = createTempDirectory(conf);
+    this.tempFileIndex = 0;
+    this.nextAnonymousStageId = 0;
+  }
+
+  @Override
+  public Configuration getConfiguration() {
+    return conf;
+  }
+
+  @Override
+  public void setConfiguration(Configuration conf) {
+    this.conf = conf;
+    this.tempDirectory = createTempDirectory(conf);
+  }
+
+  public MRExecutor plan() {
+    Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize = Maps.newHashMap();
+    for (PCollectionImpl<?> c : outputTargets.keySet()) {
+      if (outputTargetsToMaterialize.containsKey(c)) {
+        toMaterialize.put(c, outputTargetsToMaterialize.get(c));
+        outputTargetsToMaterialize.remove(c);
+      }
+    }
+    MSCRPlanner planner = new MSCRPlanner(this, outputTargets, toMaterialize);
+    try {
+      return planner.plan(jarClass, conf);
+    } catch (IOException e) {
+      throw new CrunchRuntimeException(e);
+    }
+  }
+
+  @Override
+  public PipelineResult run() {
+    try {
+      PipelineExecution pipelineExecution = runAsync();
+      pipelineExecution.waitUntilDone();
+      return pipelineExecution.getResult();
+    } catch (InterruptedException e) {
+      // TODO: How to handle this without changing signature?
+      LOG.error("Exception running pipeline", e);
+      return PipelineResult.EMPTY;
+    }
+  }
+  
+  @Override
+  public PipelineExecution runAsync() {
+    PipelineExecution res = plan().execute();
+    outputTargets.clear();
+    return res;
+  }
+
+  @Override
+  public PipelineResult done() {
+    PipelineResult res = null;
+    if (!outputTargets.isEmpty()) {
+      res = run();
+    }
+    cleanup();
+    return res;
+  }
+
+  public <S> PCollection<S> read(Source<S> source) {
+    return new InputCollection<S>(source, this);
+  }
+
+  public <K, V> PTable<K, V> read(TableSource<K, V> source) {
+    return new InputTable<K, V>(source, this);
+  }
+
+  public PCollection<String> readTextFile(String pathName) {
+    return read(From.textFile(pathName));
+  }
+
+  public void write(PCollection<?> pcollection, Target target) {
+    write(pcollection, target, Target.WriteMode.DEFAULT);
+  }
+  
+  @SuppressWarnings("unchecked")
+  public void write(PCollection<?> pcollection, Target target,
+      Target.WriteMode writeMode) {
+    if (pcollection instanceof PGroupedTableImpl) {
+      pcollection = ((PGroupedTableImpl<?, ?>) pcollection).ungroup();
+    } else if (pcollection instanceof UnionCollection || pcollection instanceof UnionTable) {
+      pcollection = pcollection.parallelDo("UnionCollectionWrapper",
+          (MapFn) IdentityFn.<Object> getInstance(), pcollection.getPType());
+    }
+    target.handleExisting(writeMode, getConfiguration());
+    if (writeMode != WriteMode.APPEND && targetInCurrentRun(target)) {
+      throw new CrunchRuntimeException("Target " + target + " is already written in current run." +
+          " Use WriteMode.APPEND in order to write additional data to it.");
+    }
+    addOutput((PCollectionImpl<?>) pcollection, target);
+  }
+
+  private boolean targetInCurrentRun(Target target) {
+    for (Set<Target> targets : outputTargets.values()) {
+      if (targets.contains(target)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  private void addOutput(PCollectionImpl<?> impl, Target target) {
+    if (!outputTargets.containsKey(impl)) {
+      outputTargets.put(impl, Sets.<Target> newHashSet());
+    }
+    outputTargets.get(impl).add(target);
+  }
+
+  @Override
+  public <T> Iterable<T> materialize(PCollection<T> pcollection) {
+
+    PCollectionImpl<T> pcollectionImpl = toPcollectionImpl(pcollection);
+    ReadableSource<T> readableSrc = getMaterializeSourceTarget(pcollectionImpl);
+
+    MaterializableIterable<T> c = new MaterializableIterable<T>(this, readableSrc);
+    if (!outputTargetsToMaterialize.containsKey(pcollectionImpl)) {
+      outputTargetsToMaterialize.put(pcollectionImpl, c);
+    }
+    return c;
+  }
+
+  /**
+   * Retrieve a ReadableSourceTarget that provides access to the contents of a {@link PCollection}.
+   * This is primarily intended as a helper method to {@link #materialize(PCollection)}. The
+   * underlying data of the ReadableSourceTarget may not be actually present until the pipeline is
+   * run.
+   * 
+   * @param pcollection The collection for which the ReadableSourceTarget is to be retrieved
+   * @return The ReadableSourceTarget
+   * @throws IllegalArgumentException If no ReadableSourceTarget can be retrieved for the given
+   *           PCollection
+   */
+  public <T> ReadableSource<T> getMaterializeSourceTarget(PCollection<T> pcollection) {
+    PCollectionImpl<T> impl = toPcollectionImpl(pcollection);
+
+    // First, check to see if this is a readable input collection.
+    if (impl instanceof InputCollection) {
+      InputCollection<T> ic = (InputCollection<T>) impl;
+      if (ic.getSource() instanceof ReadableSource) {
+        return (ReadableSource) ic.getSource();
+      } else {
+        throw new IllegalArgumentException(
+            "Cannot materialize non-readable input collection: " + ic);
+      }
+    } else if (impl instanceof InputTable) {
+      InputTable it = (InputTable) impl;
+      if (it.getSource() instanceof ReadableSource) {
+        return (ReadableSource) it.getSource();
+      } else {
+        throw new IllegalArgumentException(
+            "Cannot materialize non-readable input table: " + it);
+      }
+    }
+
+    // Next, check to see if this pcollection has already been materialized.
+    SourceTarget<T> matTarget = impl.getMaterializedAt();
+    if (matTarget != null && matTarget instanceof ReadableSourceTarget) {
+      return (ReadableSourceTarget<T>) matTarget;
+    }
+    
+    // Check to see if we plan on materializing this collection on the
+    // next run.
+    ReadableSourceTarget<T> srcTarget = null;
+    if (outputTargets.containsKey(pcollection)) {
+      for (Target target : outputTargets.get(impl)) {
+        if (target instanceof ReadableSourceTarget) {
+          return (ReadableSourceTarget<T>) target;
+        }
+      }
+    }
+
+    // If we're not planning on materializing it already, create a temporary
+    // output to hold the materialized records and return that.
+    SourceTarget<T> st = createIntermediateOutput(pcollection.getPType());
+    if (!(st instanceof ReadableSourceTarget)) {
+      throw new IllegalArgumentException("The PType for the given PCollection is not readable"
+          + " and cannot be materialized");
+    } else {
+      srcTarget = (ReadableSourceTarget<T>) st;
+      addOutput(impl, srcTarget);
+      return srcTarget;
+    }
+  }
+
+  /**
+   * Safely cast a PCollection into a PCollectionImpl, including handling the case of
+   * UnionCollections.
+   * 
+   * @param pcollection The PCollection to be cast/transformed
+   * @return The PCollectionImpl representation
+   */
+  private <T> PCollectionImpl<T> toPcollectionImpl(PCollection<T> pcollection) {
+    PCollectionImpl<T> pcollectionImpl = null;
+    if (pcollection instanceof UnionCollection || pcollection instanceof UnionTable) {
+      pcollectionImpl = (PCollectionImpl<T>) pcollection.parallelDo("UnionCollectionWrapper",
+          (MapFn) IdentityFn.<Object> getInstance(), pcollection.getPType());
+    } else {
+      pcollectionImpl = (PCollectionImpl<T>) pcollection;
+    }
+    return pcollectionImpl;
+  }
+
+  public <T> SourceTarget<T> createIntermediateOutput(PType<T> ptype) {
+    return ptype.getDefaultFileSource(createTempPath());
+  }
+
+  public Path createTempPath() {
+    tempFileIndex++;
+    return new Path(tempDirectory, "p" + tempFileIndex);
+  }
+
+  private static Path createTempDirectory(Configuration conf) {
+    Path dir = createTemporaryPath(conf);
+    try {
+      dir.getFileSystem(conf).mkdirs(dir);
+    } catch (IOException e) {
+      throw new RuntimeException("Cannot create job output directory " + dir, e);
+    }
+    return dir;
+  }
+
+  private static Path createTemporaryPath(Configuration conf) {
+    String baseDir = conf.get(RuntimeParameters.TMP_DIR, "/tmp");
+    return new Path(baseDir, "crunch-" + (RANDOM.nextInt() & Integer.MAX_VALUE));
+  }
+
+  @Override
+  public <T> void writeTextFile(PCollection<T> pcollection, String pathName) {
+    pcollection.parallelDo("asText", new StringifyFn<T>(), Writables.strings())
+        .write(To.textFile(pathName));
+  }
+
+  private static class StringifyFn<T> extends MapFn<T, String> {
+    @Override
+    public String map(T input) {
+      return input.toString();
+    }
+  }
+  
+  private void cleanup() {
+    if (!outputTargets.isEmpty()) {
+      LOG.warn("Not running cleanup while output targets remain");
+      return;
+    }
+    try {
+      FileSystem fs = tempDirectory.getFileSystem(conf);
+      if (fs.exists(tempDirectory)) {
+        fs.delete(tempDirectory, true);
+      }
+    } catch (IOException e) {
+      LOG.info("Exception during cleanup", e);
+    }
+  }
+
+  public int getNextAnonymousStageId() {
+    return nextAnonymousStageId++;
+  }
+
+  @Override
+  public void enableDebug() {
+    // Turn on Crunch runtime error catching.
+    getConfiguration().setBoolean(RuntimeParameters.DEBUG, true);
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java
new file mode 100644
index 0000000..7b8f2ea
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.ParallelDoOptions;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+
+public class DoCollectionImpl<S> extends PCollectionImpl<S> {
+
+  private final PCollectionImpl<Object> parent;
+  private final DoFn<Object, S> fn;
+  private final PType<S> ntype;
+
+  <T> DoCollectionImpl(String name, PCollectionImpl<T> parent, DoFn<T, S> fn, PType<S> ntype) {
+    this(name, parent, fn, ntype, ParallelDoOptions.builder().build());
+  }
+  
+  <T> DoCollectionImpl(String name, PCollectionImpl<T> parent, DoFn<T, S> fn, PType<S> ntype,
+      ParallelDoOptions options) {
+    super(name, options);
+    this.parent = (PCollectionImpl<Object>) parent;
+    this.fn = (DoFn<Object, S>) fn;
+    this.ntype = ntype;
+  }
+
+  @Override
+  protected long getSizeInternal() {
+    return (long) (fn.scaleFactor() * parent.getSize());
+  }
+
+  @Override
+  public PType<S> getPType() {
+    return ntype;
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitDoFnCollection(this);
+  }
+
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.<PCollectionImpl<?>> of(parent);
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    return DoNode.createFnNode(getName(), fn, ntype);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java
new file mode 100644
index 0000000..176643b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.ParallelDoOptions;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.ImmutableList;
+
+public class DoTableImpl<K, V> extends PTableBase<K, V> implements PTable<K, V> {
+
+  private final PCollectionImpl<?> parent;
+  private final DoFn<?, Pair<K, V>> fn;
+  private final PTableType<K, V> type;
+
+  <S> DoTableImpl(String name, PCollectionImpl<S> parent, DoFn<S, Pair<K, V>> fn, PTableType<K, V> ntype) {
+    this(name, parent, fn, ntype, ParallelDoOptions.builder().build());
+  }
+  
+  <S> DoTableImpl(String name, PCollectionImpl<S> parent, DoFn<S, Pair<K, V>> fn, PTableType<K, V> ntype,
+      ParallelDoOptions options) {
+    super(name, options);
+    this.parent = parent;
+    this.fn = fn;
+    this.type = ntype;
+  }
+
+  @Override
+  protected long getSizeInternal() {
+    return (long) (fn.scaleFactor() * parent.getSize());
+  }
+
+  @Override
+  public PTableType<K, V> getPTableType() {
+    return type;
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitDoTable(this);
+  }
+
+  @Override
+  public PType<Pair<K, V>> getPType() {
+    return type;
+  }
+
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.<PCollectionImpl<?>> of(parent);
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    return DoNode.createFnNode(getName(), fn, type);
+  }
+
+  public boolean hasCombineFn() {
+    return fn instanceof CombineFn;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java
new file mode 100644
index 0000000..ace5cc1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.Source;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.ImmutableList;
+
+public class InputCollection<S> extends PCollectionImpl<S> {
+
+  private final Source<S> source;
+
+  public InputCollection(Source<S> source, MRPipeline pipeline) {
+    super(source.toString());
+    this.source = source;
+    this.pipeline = pipeline;
+  }
+
+  @Override
+  public PType<S> getPType() {
+    return source.getType();
+  }
+
+  public Source<S> getSource() {
+    return source;
+  }
+
+  @Override
+  protected long getSizeInternal() {
+    long sz = source.getSize(pipeline.getConfiguration());
+    if (sz < 0) {
+      throw new IllegalStateException("Input source " + source + " does not exist!");
+    }
+    return sz;
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitInputCollection(this);
+  }
+
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.of();
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    return DoNode.createInputNode(source);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || !(obj instanceof InputCollection)) {
+      return false;
+    }
+    return source.equals(((InputCollection) obj).source);
+  }
+
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().append(source).toHashCode();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java
new file mode 100644
index 0000000..71f11c5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import java.util.List;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+
+import com.google.common.collect.ImmutableList;
+
+public class InputTable<K, V> extends PTableBase<K, V> {
+
+  private final TableSource<K, V> source;
+  private final InputCollection<Pair<K, V>> asCollection;
+
+  public InputTable(TableSource<K, V> source, MRPipeline pipeline) {
+    super(source.toString());
+    this.source = source;
+    this.pipeline = pipeline;
+    this.asCollection = new InputCollection<Pair<K, V>>(source, pipeline);
+  }
+
+  public TableSource<K, V> getSource() {
+    return source;
+  }
+  
+  @Override
+  protected long getSizeInternal() {
+    return asCollection.getSizeInternal();
+  }
+
+  @Override
+  public PTableType<K, V> getPTableType() {
+    return source.getTableType();
+  }
+
+  @Override
+  public PType<Pair<K, V>> getPType() {
+    return source.getType();
+  }
+
+  @Override
+  public List<PCollectionImpl<?>> getParents() {
+    return ImmutableList.of();
+  }
+
+  @Override
+  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
+    visitor.visitInputCollection(asCollection);
+  }
+
+  @Override
+  public DoNode createDoNode() {
+    return DoNode.createInputNode(source);
+  }
+
+  @Override
+  public int hashCode() {
+    return asCollection.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return asCollection.equals(other);
+  }
+}


[38/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src1.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src1.txt b/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src1.txt
new file mode 100644
index 0000000..a92974b
--- /dev/null
+++ b/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src1.txt
@@ -0,0 +1,5 @@
+a1
+b2
+a1
+a1
+b2

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src2.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src2.txt b/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src2.txt
new file mode 100644
index 0000000..9363398
--- /dev/null
+++ b/crunch-core/src/it/resources/org/apache/crunch/UnionITData/src2.txt
@@ -0,0 +1,3 @@
+c3
+a1
+c3

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt b/crunch-core/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt
new file mode 100644
index 0000000..680cb09
--- /dev/null
+++ b/crunch-core/src/it/resources/org/apache/crunch/fn/AggregatorsITData/ints.txt
@@ -0,0 +1,5 @@
+a	1	2
+a	3	4
+b	2	3
+a	5	6
+b	9	10

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt b/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt
new file mode 100644
index 0000000..9f38eb9
--- /dev/null
+++ b/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src1.txt
@@ -0,0 +1,4 @@
+a,1-1
+b,1-2
+c,1-3
+a,1-4

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt b/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt
new file mode 100644
index 0000000..ed9524e
--- /dev/null
+++ b/crunch-core/src/it/resources/org/apache/crunch/lib/CogroupITData/src2.txt
@@ -0,0 +1,4 @@
+b,2-1
+c,2-2
+c,2-3
+d,2-4

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/secondary_sort_input.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/secondary_sort_input.txt b/crunch-core/src/it/resources/secondary_sort_input.txt
new file mode 100644
index 0000000..3c7be93
--- /dev/null
+++ b/crunch-core/src/it/resources/secondary_sort_input.txt
@@ -0,0 +1,7 @@
+one,1,1 
+one,2,-3 
+two,4,5 
+two,2,6 
+two,1,7,9 
+three,0,-1 
+one,-5,10 

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/set1.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/set1.txt b/crunch-core/src/it/resources/set1.txt
new file mode 100644
index 0000000..3b67f57
--- /dev/null
+++ b/crunch-core/src/it/resources/set1.txt
@@ -0,0 +1,4 @@
+b
+c
+a
+e
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/set2.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/set2.txt b/crunch-core/src/it/resources/set2.txt
new file mode 100644
index 0000000..8169ab5
--- /dev/null
+++ b/crunch-core/src/it/resources/set2.txt
@@ -0,0 +1,3 @@
+c
+d
+a
\ No newline at end of file


[15/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/urls.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/urls.txt b/crunch/src/it/resources/urls.txt
deleted file mode 100644
index 827e711..0000000
--- a/crunch/src/it/resources/urls.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-www.A.com	www.B.com
-www.A.com	www.C.com
-www.A.com	www.D.com
-www.A.com	www.E.com
-www.B.com	www.D.com
-www.B.com	www.E.com
-www.C.com	www.D.com
-www.D.com	www.B.com
-www.E.com	www.A.com
-www.F.com	www.B.com
-www.F.com	www.C.com

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Aggregator.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Aggregator.java b/crunch/src/main/java/org/apache/crunch/Aggregator.java
deleted file mode 100644
index 432452b..0000000
--- a/crunch/src/main/java/org/apache/crunch/Aggregator.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.io.Serializable;
-
-import org.apache.hadoop.conf.Configuration;
-
-
-/**
- * Aggregate a sequence of values into a possibly smaller sequence of the same type.
- *
- * <p>In most cases, an Aggregator will turn multiple values into a single value,
- * like creating a sum, finding the minimum or maximum, etc. In some cases
- * (ie. finding the top K elements), an implementation may return more than
- * one value. The {@link org.apache.crunch.fn.Aggregators} utility class contains
- * factory methods for creating all kinds of pre-defined Aggregators that should
- * cover the most common cases.</p>
- *
- * <p>Aggregator implementations should usually be <em>associative</em> and
- * <em>commutative</em>, which makes their results deterministic. If your aggregation
- * function isn't commutative, you can still use secondary sort to that effect.</p>
- *
- * <p>The lifecycle of an {@link Aggregator} always begins with you instantiating
- * it and passing it to Crunch. When running your {@link Pipeline}, Crunch serializes
- * the instance and deserializes it wherever it is needed on the cluster. This is how
- * Crunch uses a deserialized instance:<p>
- *
- * <ol>
- *   <li>call {@link #initialize(Configuration)} once</li>
- *   <li>call {@link #reset()}
- *   <li>call {@link #update(Object)} multiple times until all values of a sequence
- *       have been aggregated</li>
- *   <li>call {@link #results()} to retrieve the aggregated result</li>
- *   <li>go back to step 2 until all sequences have been aggregated</li>
- * </ol>
- *
- * @param <T> The value types to aggregate
- */
-public interface Aggregator<T> extends Serializable {
-
-  /**
-   * Perform any setup of this instance that is required prior to processing
-   * inputs.
-   *
-   * @param conf Hadoop configuration
-   */
-  void initialize(Configuration conf);
-
-  /**
-   * Clears the internal state of this Aggregator and prepares it for the
-   * values associated with the next key.
-   *
-   * Depending on what you aggregate, this typically means setting a variable
-   * to zero or clearing a list. Failing to do this will yield wrong results!
-   */
-  void reset();
-
-  /**
-   * Incorporate the given value into the aggregate state maintained by this
-   * instance.
-   *
-   * @param value The value to add to the aggregated state
-   */
-  void update(T value);
-
-  /**
-   * Returns the current aggregated state of this instance.
-   */
-  Iterable<T> results();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/CombineFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/CombineFn.java b/crunch/src/main/java/org/apache/crunch/CombineFn.java
deleted file mode 100644
index 71e8057..0000000
--- a/crunch/src/main/java/org/apache/crunch/CombineFn.java
+++ /dev/null
@@ -1,1211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.io.Serializable;
-import java.math.BigInteger;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.SortedSet;
-
-import org.apache.crunch.fn.Aggregators;
-import org.apache.crunch.util.Tuples;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- * A special {@link DoFn} implementation that converts an {@link Iterable} of
- * values into a single value. If a {@code CombineFn} instance is used on a
- * {@link PGroupedTable}, the function will be applied to the output of the map
- * stage before the data is passed to the reducer, which can improve the runtime
- * of certain classes of jobs.
- * <p>
- * Note that the incoming {@code Iterable} can only be used to create an 
- * {@code Iterator} once. Calling {@link Iterable#iterator()} method a second
- * time will throw an {@link IllegalStateException}.
- */
-public abstract class CombineFn<S, T> extends DoFn<Pair<S, Iterable<T>>, Pair<S, T>> {
-
-  /**
-   * @deprecated Use {@link org.apache.crunch.Aggregator}
-   */
-  public static interface Aggregator<T> extends Serializable {
-    /**
-     * Perform any setup of this instance that is required prior to processing
-     * inputs.
-     */
-    void initialize(Configuration configuration);
-
-    /**
-     * Clears the internal state of this Aggregator and prepares it for the
-     * values associated with the next key.
-     */
-    void reset();
-
-    /**
-     * Incorporate the given value into the aggregate state maintained by this
-     * instance.
-     */
-    void update(T value);
-
-    /**
-     * Returns the current aggregated state of this instance.
-     */
-    Iterable<T> results();
-  }
-
-  /**
-   * Base class for aggregators that do not require any initialization.
-   *
-   * @deprecated Use {@link org.apache.crunch.fn.Aggregators.SimpleAggregator}
-   */
-  public static abstract class SimpleAggregator<T> implements Aggregator<T> {
-    @Override
-    public void initialize(Configuration conf) {
-      // No-op
-    }
-  }
-  
-  /**
-   * Interface for constructing new aggregator instances.
-   *
-   * @deprecated Use {@link PGroupedTable#combineValues(Aggregator)} which doesn't require a factory.
-   */
-  public static interface AggregatorFactory<T> {
-    Aggregator<T> create();
-  }
-
-  /**
-   * A {@code CombineFn} that delegates all of the actual work to an
-   * {@code Aggregator} instance.
-   *
-   * @deprecated Use the {@link Aggregators#toCombineFn(org.apache.crunch.Aggregator)} adapter
-   */
-  public static class AggregatorCombineFn<K, V> extends CombineFn<K, V> {
-
-    private final Aggregator<V> aggregator;
-
-    public AggregatorCombineFn(Aggregator<V> aggregator) {
-      this.aggregator = aggregator;
-    }
-
-    @Override
-    public void initialize() {
-      aggregator.initialize(getConfiguration());
-    }
-    
-    @Override
-    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
-      aggregator.reset();
-      for (V v : input.second()) {
-        aggregator.update(v);
-      }
-      for (V v : aggregator.results()) {
-        emitter.emit(Pair.of(input.first(), v));
-      }
-    }
-  }
-
-  private static abstract class TupleAggregator<T> implements Aggregator<T> {
-    private final List<Aggregator<Object>> aggregators;
-
-    public TupleAggregator(Aggregator<?>... aggregators) {
-      this.aggregators = Lists.newArrayList();
-      for (Aggregator<?> a : aggregators) {
-        this.aggregators.add((Aggregator<Object>) a);
-      }
-    }
-
-    @Override
-    public void initialize(Configuration configuration) {
-      for (Aggregator<?> a : aggregators) {
-        a.initialize(configuration);
-      }
-    }
-    
-    @Override
-    public void reset() {
-      for (Aggregator<?> a : aggregators) {
-        a.reset();
-      }
-    }
-
-    protected void updateTuple(Tuple t) {
-      for (int i = 0; i < aggregators.size(); i++) {
-        aggregators.get(i).update(t.get(i));
-      }
-    }
-
-    protected Iterable<Object> results(int index) {
-      return aggregators.get(index).results();
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#pairAggregator(Aggregator, Aggregator)}
-   */
-  public static class PairAggregator<V1, V2> extends TupleAggregator<Pair<V1, V2>> {
-
-    public PairAggregator(Aggregator<V1> a1, Aggregator<V2> a2) {
-      super(a1, a2);
-    }
-
-    @Override
-    public void update(Pair<V1, V2> value) {
-      updateTuple(value);
-    }
-
-    @Override
-    public Iterable<Pair<V1, V2>> results() {
-      return new Tuples.PairIterable<V1, V2>((Iterable<V1>) results(0), (Iterable<V2>) results(1));
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#tripAggregator(Aggregator, Aggregator, Aggregator)}
-   */
-  public static class TripAggregator<A, B, C> extends TupleAggregator<Tuple3<A, B, C>> {
-
-    public TripAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3) {
-      super(a1, a2, a3);
-    }
-
-    @Override
-    public void update(Tuple3<A, B, C> value) {
-      updateTuple(value);
-    }
-
-    @Override
-    public Iterable<Tuple3<A, B, C>> results() {
-      return new Tuples.TripIterable<A, B, C>((Iterable<A>) results(0), (Iterable<B>) results(1),
-          (Iterable<C>) results(2));
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#quadAggregator(Aggregator, Aggregator, Aggregator, Aggregator)}
-   */
-  public static class QuadAggregator<A, B, C, D> extends TupleAggregator<Tuple4<A, B, C, D>> {
-
-    public QuadAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3, Aggregator<D> a4) {
-      super(a1, a2, a3, a4);
-    }
-
-    @Override
-    public void update(Tuple4<A, B, C, D> value) {
-      updateTuple(value);
-    }
-
-    @Override
-    public Iterable<Tuple4<A, B, C, D>> results() {
-      return new Tuples.QuadIterable<A, B, C, D>((Iterable<A>) results(0), (Iterable<B>) results(1),
-          (Iterable<C>) results(2), (Iterable<D>) results(3));
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#tupleAggregator(Aggregator...)}
-   */
-  public static class TupleNAggregator extends TupleAggregator<TupleN> {
-
-    private final int size;
-
-    public TupleNAggregator(Aggregator<?>... aggregators) {
-      super(aggregators);
-      size = aggregators.length;
-    }
-
-    @Override
-    public void update(TupleN value) {
-      updateTuple(value);
-    }
-
-    @Override
-    public Iterable<TupleN> results() {
-      Iterable<?>[] iterables = new Iterable[size];
-      for (int i = 0; i < size; i++) {
-        iterables[i] = results(i);
-      }
-      return new Tuples.TupleNIterable(iterables);
-    }
-
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#toCombineFn(Aggregator)}
-   */
-  public static final <K, V> CombineFn<K, V> aggregator(Aggregator<V> aggregator) {
-    return new AggregatorCombineFn<K, V>(aggregator);
-  }
-
-  /**
-   * @deprecated Use {@link PGroupedTable#combineValues(Aggregator)} which doesn't require a factory.
-   */
-  public static final <K, V> CombineFn<K, V> aggregatorFactory(AggregatorFactory<V> aggregator) {
-    return new AggregatorCombineFn<K, V>(aggregator.create());
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#pairAggregator(Aggregator, Aggregator)}
-   */
-  public static final <K, V1, V2> CombineFn<K, Pair<V1, V2>> pairAggregator(AggregatorFactory<V1> a1,
-      AggregatorFactory<V2> a2) {
-    return aggregator(new PairAggregator<V1, V2>(a1.create(), a2.create()));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#tripAggregator(Aggregator, Aggregator, Aggregator)}
-   */
-  public static final <K, A, B, C> CombineFn<K, Tuple3<A, B, C>> tripAggregator(AggregatorFactory<A> a1,
-      AggregatorFactory<B> a2, AggregatorFactory<C> a3) {
-    return aggregator(new TripAggregator<A, B, C>(a1.create(), a2.create(), a3.create()));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#quadAggregator(Aggregator, Aggregator, Aggregator, Aggregator)}
-   */
-  public static final <K, A, B, C, D> CombineFn<K, Tuple4<A, B, C, D>> quadAggregator(AggregatorFactory<A> a1,
-      AggregatorFactory<B> a2, AggregatorFactory<C> a3, AggregatorFactory<D> a4) {
-    return aggregator(new QuadAggregator<A, B, C, D>(a1.create(), a2.create(), a3.create(), a4.create()));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#tupleAggregator(Aggregator...)}
-   */
-  public static final <K> CombineFn<K, TupleN> tupleAggregator(AggregatorFactory<?>... factories) {
-    Aggregator<?>[] aggs = new Aggregator[factories.length];
-    for (int i = 0; i < aggs.length; i++) {
-      aggs[i] = factories[i].create();
-    }
-    return aggregator(new TupleNAggregator(aggs));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_LONGS()}
-   */
-  public static final <K> CombineFn<K, Long> SUM_LONGS() {
-    return aggregatorFactory(SUM_LONGS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_INTS()}
-   */
-  public static final <K> CombineFn<K, Integer> SUM_INTS() {
-    return aggregatorFactory(SUM_INTS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_FLOATS()}
-   */
-  public static final <K> CombineFn<K, Float> SUM_FLOATS() {
-    return aggregatorFactory(SUM_FLOATS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_DOUBLES()}
-   */
-  public static final <K> CombineFn<K, Double> SUM_DOUBLES() {
-    return aggregatorFactory(SUM_DOUBLES);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_BIGINTS()}
-   */
-  public static final <K> CombineFn<K, BigInteger> SUM_BIGINTS() {
-    return aggregatorFactory(SUM_BIGINTS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_LONGS()}
-   */
-  public static final <K> CombineFn<K, Long> MAX_LONGS() {
-    return aggregatorFactory(MAX_LONGS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_LONGS(int)}
-   */
-  public static final <K> CombineFn<K, Long> MAX_LONGS(int n) {
-    return aggregator(new MaxNAggregator<Long>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_INTS()}
-   */
-  public static final <K> CombineFn<K, Integer> MAX_INTS() {
-    return aggregatorFactory(MAX_INTS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_INTS(int)}
-   */
-  public static final <K> CombineFn<K, Integer> MAX_INTS(int n) {
-    return aggregator(new MaxNAggregator<Integer>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_FLOATS()}
-   */
-  public static final <K> CombineFn<K, Float> MAX_FLOATS() {
-    return aggregatorFactory(MAX_FLOATS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_FLOATS(int)}
-   */
-  public static final <K> CombineFn<K, Float> MAX_FLOATS(int n) {
-    return aggregator(new MaxNAggregator<Float>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_DOUBLES()}
-   */
-  public static final <K> CombineFn<K, Double> MAX_DOUBLES() {
-    return aggregatorFactory(MAX_DOUBLES);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_DOUBLES(int)}
-   */
-  public static final <K> CombineFn<K, Double> MAX_DOUBLES(int n) {
-    return aggregator(new MaxNAggregator<Double>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_BIGINTS()}
-   */
-  public static final <K> CombineFn<K, BigInteger> MAX_BIGINTS() {
-    return aggregatorFactory(MAX_BIGINTS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_BIGINTS(int)}
-   */
-  public static final <K> CombineFn<K, BigInteger> MAX_BIGINTS(int n) {
-    return aggregator(new MaxNAggregator<BigInteger>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_LONGS()}
-   */
-  public static final <K> CombineFn<K, Long> MIN_LONGS() {
-    return aggregatorFactory(MIN_LONGS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_LONGS(int)}
-   */
-  public static final <K> CombineFn<K, Long> MIN_LONGS(int n) {
-    return aggregator(new MinNAggregator<Long>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_INTS()}
-   */
-  public static final <K> CombineFn<K, Integer> MIN_INTS() {
-    return aggregatorFactory(MIN_INTS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_INTS(int)}
-   */
-  public static final <K> CombineFn<K, Integer> MIN_INTS(int n) {
-    return aggregator(new MinNAggregator<Integer>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_FLOATS()}
-   */
-  public static final <K> CombineFn<K, Float> MIN_FLOATS() {
-    return aggregatorFactory(MIN_FLOATS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_FLOATS(int)}
-   */
-  public static final <K> CombineFn<K, Float> MIN_FLOATS(int n) {
-    return aggregator(new MinNAggregator<Float>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_DOUBLES()}
-   */
-  public static final <K> CombineFn<K, Double> MIN_DOUBLES() {
-    return aggregatorFactory(MIN_DOUBLES);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_DOUBLES(int)}
-   */
-  public static final <K> CombineFn<K, Double> MIN_DOUBLES(int n) {
-    return aggregator(new MinNAggregator<Double>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_BIGINTS()}
-   */
-  public static final <K> CombineFn<K, BigInteger> MIN_BIGINTS() {
-    return aggregatorFactory(MIN_BIGINTS);
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_BIGINTS(int)}
-   */
-  public static final <K> CombineFn<K, BigInteger> MIN_BIGINTS(int n) {
-    return aggregator(new MinNAggregator<BigInteger>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#FIRST_N(int)}
-   */
-  public static final <K, V> CombineFn<K, V> FIRST_N(int n) {
-    return aggregator(new FirstNAggregator<V>(n));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#LAST_N(int)}
-   */
-  public static final <K, V> CombineFn<K, V> LAST_N(int n) {
-    return aggregator(new LastNAggregator<V>(n));
-  }
-
-  /**
-   * Used to concatenate strings, with a separator between each strings. There
-   * is no limits of length for the concatenated string.
-   * 
-   * @param separator
-   *            the separator which will be appended between each string
-   * @param skipNull
-   *            define if we should skip null values. Throw
-   *            NullPointerException if set to false and there is a null
-   *            value.
-   * @return
-   *
-   * @deprecated Use {@link Aggregators#STRING_CONCAT(String, boolean)}
-   */
-  public static final <K> CombineFn<K, String> STRING_CONCAT(final String separator, final boolean skipNull) {
-      return aggregator(new StringConcatAggregator(separator, skipNull));
-  }
-
-  /**
-   * Used to concatenate strings, with a separator between each strings. You
-   * can specify the maximum length of the output string and of the input
-   * strings, if they are > 0. If a value is <= 0, there is no limits.
-   * 
-   * Any too large string (or any string which would made the output too
-   * large) will be silently discarded.
-   * 
-   * @param separator
-   *            the separator which will be appended between each string
-   * @param skipNull
-   *            define if we should skip null values. Throw
-   *            NullPointerException if set to false and there is a null
-   *            value.
-   * @param maxOutputLength
-   *            the maximum length of the output string. If it's set <= 0,
-   *            there is no limits. The number of characters of the output
-   *            string will be < maxOutputLength.
-   * @param maxInputLength
-   *            the maximum length of the input strings. If it's set <= 0,
-   *            there is no limits. The number of characters of the int string
-   *            will be < maxInputLength to be concatenated.
-   * @return
-   *
-   * @deprecated Use {@link Aggregators#STRING_CONCAT(String, boolean, long, long)}
-   */
-  public static final <K> CombineFn<K, String> STRING_CONCAT(final String separator, final boolean skipNull, final long maxOutputLength, final long maxInputLength) {
-      return aggregator(new StringConcatAggregator(separator, skipNull, maxOutputLength, maxInputLength));
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_LONGS()}
-   */
-  public static class SumLongs extends SimpleAggregator<Long> {
-    private long sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0;
-    }
-
-    @Override
-    public void update(Long next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_LONGS()}
-   */
-  public static AggregatorFactory<Long> SUM_LONGS = new AggregatorFactory<Long>() {
-    public Aggregator<Long> create() {
-      return new SumLongs();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_INTS()}
-   */
-  public static class SumInts extends SimpleAggregator<Integer> {
-    private int sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0;
-    }
-
-    @Override
-    public void update(Integer next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_INTS()}
-   */
-  public static AggregatorFactory<Integer> SUM_INTS = new AggregatorFactory<Integer>() {
-    public Aggregator<Integer> create() {
-      return new SumInts();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_FLOATS()}
-   */
-  public static class SumFloats extends SimpleAggregator<Float> {
-    private float sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0f;
-    }
-
-    @Override
-    public void update(Float next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_FLOATS()}
-   */
-  public static AggregatorFactory<Float> SUM_FLOATS = new AggregatorFactory<Float>() {
-    public Aggregator<Float> create() {
-      return new SumFloats();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_DOUBLES()}
-   */
-  public static class SumDoubles extends SimpleAggregator<Double> {
-    private double sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0f;
-    }
-
-    @Override
-    public void update(Double next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_DOUBLES()}
-   */
-  public static AggregatorFactory<Double> SUM_DOUBLES = new AggregatorFactory<Double>() {
-    public Aggregator<Double> create() {
-      return new SumDoubles();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_BIGINTS()}
-   */
-  public static class SumBigInts extends SimpleAggregator<BigInteger> {
-    private BigInteger sum = BigInteger.ZERO;
-
-    @Override
-    public void reset() {
-      sum = BigInteger.ZERO;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      sum = sum.add(next);
-    }
-
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#SUM_BIGINTS()}
-   */
-  public static AggregatorFactory<BigInteger> SUM_BIGINTS = new AggregatorFactory<BigInteger>() {
-    public Aggregator<BigInteger> create() {
-      return new SumBigInts();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_LONGS()}
-   */
-  public static class MaxLongs extends SimpleAggregator<Long> {
-    private Long max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Long next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_LONGS()}
-   */
-  public static AggregatorFactory<Long> MAX_LONGS = new AggregatorFactory<Long>() {
-    public Aggregator<Long> create() {
-      return new MaxLongs();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_INTS()}
-   */
-  public static class MaxInts extends SimpleAggregator<Integer> {
-    private Integer max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Integer next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_INTS()}
-   */
-  public static AggregatorFactory<Integer> MAX_INTS = new AggregatorFactory<Integer>() {
-    public Aggregator<Integer> create() {
-      return new MaxInts();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_FLOATS()}
-   */
-  public static class MaxFloats extends SimpleAggregator<Float> {
-    private Float max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Float next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_FLOATS()}
-   */
-  public static AggregatorFactory<Float> MAX_FLOATS = new AggregatorFactory<Float>() {
-    public Aggregator<Float> create() {
-      return new MaxFloats();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_DOUBLES()}
-   */
-  public static class MaxDoubles extends SimpleAggregator<Double> {
-    private Double max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Double next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_DOUBLES()}
-   */
-  public static AggregatorFactory<Double> MAX_DOUBLES = new AggregatorFactory<Double>() {
-    public Aggregator<Double> create() {
-      return new MaxDoubles();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_BIGINTS()}
-   */
-  public static class MaxBigInts extends SimpleAggregator<BigInteger> {
-    private BigInteger max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      if (max == null || max.compareTo(next) < 0) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_BIGINTS()}
-   */
-  public static AggregatorFactory<BigInteger> MAX_BIGINTS = new AggregatorFactory<BigInteger>() {
-    public Aggregator<BigInteger> create() {
-      return new MaxBigInts();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_LONGS()}
-   */
-  public static class MinLongs extends SimpleAggregator<Long> {
-    private Long min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Long next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_LONGS()}
-   */
-  public static AggregatorFactory<Long> MIN_LONGS = new AggregatorFactory<Long>() {
-    public Aggregator<Long> create() {
-      return new MinLongs();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_INTS()}
-   */
-  public static class MinInts extends SimpleAggregator<Integer> {
-    private Integer min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Integer next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_INTS()}
-   */
-  public static AggregatorFactory<Integer> MIN_INTS = new AggregatorFactory<Integer>() {
-    public Aggregator<Integer> create() {
-      return new MinInts();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_FLOATS()}
-   */
-  public static class MinFloats extends SimpleAggregator<Float> {
-    private Float min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Float next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_FLOATS()}
-   */
-  public static AggregatorFactory<Float> MIN_FLOATS = new AggregatorFactory<Float>() {
-    public Aggregator<Float> create() {
-      return new MinFloats();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_DOUBLES()}
-   */
-  public static class MinDoubles extends SimpleAggregator<Double> {
-    private Double min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Double next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_DOUBLES()}
-   */
-  public static AggregatorFactory<Double> MIN_DOUBLES = new AggregatorFactory<Double>() {
-    public Aggregator<Double> create() {
-      return new MinDoubles();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_BIGINTS()}
-   */
-  public static class MinBigInts extends SimpleAggregator<BigInteger> {
-    private BigInteger min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      if (min == null || min.compareTo(next) > 0) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_BIGINTS()}
-   */
-  public static AggregatorFactory<BigInteger> MIN_BIGINTS = new AggregatorFactory<BigInteger>() {
-    public Aggregator<BigInteger> create() {
-      return new MinBigInts();
-    }
-  };
-
-  /**
-   * @deprecated Use {@link Aggregators#MAX_N(int, Class)}
-   */
-  public static class MaxNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
-    private final int arity;
-    private transient SortedSet<V> elements;
-
-    public MaxNAggregator(int arity) {
-      this.arity = arity;
-    }
-
-    @Override
-    public void reset() {
-      if (elements == null) {
-        elements = Sets.newTreeSet();
-      } else {
-        elements.clear();
-      }
-    }
-
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      } else if (value.compareTo(elements.first()) > 0) {
-        elements.remove(elements.first());
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#MIN_N(int, Class)}
-   */
-  public static class MinNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
-    private final int arity;
-    private transient SortedSet<V> elements;
-
-    public MinNAggregator(int arity) {
-      this.arity = arity;
-    }
-
-    @Override
-    public void reset() {
-      if (elements == null) {
-        elements = Sets.newTreeSet();
-      } else {
-        elements.clear();
-      }
-    }
-
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      } else if (value.compareTo(elements.last()) < 0) {
-        elements.remove(elements.last());
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#FIRST_N(int)}
-   */
-  public static class FirstNAggregator<V> extends SimpleAggregator<V> {
-    private final int arity;
-    private final List<V> elements;
-
-    public FirstNAggregator(int arity) {
-      this.arity = arity;
-      this.elements = Lists.newArrayList();
-    }
-
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#LAST_N(int)}
-   */
-  public static class LastNAggregator<V> extends SimpleAggregator<V> {
-    private final int arity;
-    private final LinkedList<V> elements;
-
-    public LastNAggregator(int arity) {
-      this.arity = arity;
-      this.elements = Lists.newLinkedList();
-    }
-
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-
-    @Override
-    public void update(V value) {
-      elements.add(value);
-      if (elements.size() == arity + 1) {
-        elements.removeFirst();
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link Aggregators#STRING_CONCAT(String, boolean, long, long)}
-   */
-  public static class StringConcatAggregator extends SimpleAggregator<String> {
-    private final String separator;
-    private final boolean skipNulls;
-    private final long maxOutputLength;
-    private final long maxInputLength;
-    private long currentLength;
-    private final LinkedList<String> list = new LinkedList<String>();
-
-    private transient Joiner joiner;
-    
-    public StringConcatAggregator(final String separator, final boolean skipNulls) {
-      this.separator = separator;
-      this.skipNulls = skipNulls;
-      this.maxInputLength = 0;
-      this.maxOutputLength = 0;
-    }
-
-    public StringConcatAggregator(final String separator, final boolean skipNull, final long maxOutputLength, final long maxInputLength) {
-      this.separator = separator;
-      this.skipNulls = skipNull;
-      this.maxOutputLength = maxOutputLength;
-      this.maxInputLength = maxInputLength;
-      this.currentLength = -separator.length();
-    }
-
-    @Override
-    public void reset() {
-      if (joiner == null) {
-        joiner = skipNulls ? Joiner.on(separator).skipNulls() : Joiner.on(separator);
-      }
-      currentLength = -separator.length();
-      list.clear();
-    }
-
-    @Override
-    public void update(final String next) {
-      long length = (next == null) ? 0 : next.length() + separator.length();
-      if (maxOutputLength > 0 && currentLength + length > maxOutputLength || maxInputLength > 0 && next.length() > maxInputLength) {
-        return;
-      }
-      if (maxOutputLength > 0) {
-        currentLength += length;
-      }
-      list.add(next);
-    }
-
-    @Override
-    public Iterable<String> results() {
-      return ImmutableList.of(joiner.join(list));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/CrunchRuntimeException.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/CrunchRuntimeException.java b/crunch/src/main/java/org/apache/crunch/CrunchRuntimeException.java
deleted file mode 100644
index 044f600..0000000
--- a/crunch/src/main/java/org/apache/crunch/CrunchRuntimeException.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * A {@code RuntimeException} implementation that includes some additional options
- * for the Crunch execution engine to track reporting status. Clients may
- * use instances of this class in their own {@code DoFn} implementations.
- */
-public class CrunchRuntimeException extends RuntimeException {
-
-  private boolean logged = false;
-
-  public CrunchRuntimeException(String msg) {
-    super(msg);
-  }
-
-  public CrunchRuntimeException(Exception e) {
-    super(e);
-  }
-
-  public CrunchRuntimeException(String msg, Exception e) {
-    super(msg, e);
-  }
-
-  /**
-   * Returns true if this exception was written to the debug logs.
-   */
-  public boolean wasLogged() {
-    return logged;
-  }
-
-  /**
-   * Indicate that this exception has been written to the debug logs.
-   */
-  public void markLogged() {
-    this.logged = true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/DoFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/DoFn.java b/crunch/src/main/java/org/apache/crunch/DoFn.java
deleted file mode 100644
index 2c6389a..0000000
--- a/crunch/src/main/java/org/apache/crunch/DoFn.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.io.Serializable;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-/**
- * Base class for all data processing functions in Crunch.
- * 
- * <p>
- * Note that all {@code DoFn} instances implement {@link Serializable}, and thus
- * all of their non-transient member variables must implement
- * {@code Serializable} as well. If your DoFn depends on non-serializable
- * classes for data processing, they may be declared as {@code transient} and
- * initialized in the DoFn's {@code initialize} method.
- * 
- */
-public abstract class DoFn<S, T> implements Serializable {
-  private transient TaskInputOutputContext<?, ?, ?, ?> context;
-
-  /**
-   * Configure this DoFn. Subclasses may override this method to modify the
-   * configuration of the Job that this DoFn instance belongs to.
-   * 
-   * <p>
-   * Called during the job planning phase by the crunch-client.
-   * </p>
-   * 
-   * @param conf
-   *          The Configuration instance for the Job.
-   */
-  public void configure(Configuration conf) {
-  }
-
-  /**
-   * Initialize this DoFn. This initialization will happen before the actual
-   * {@link #process(Object, Emitter)} is triggered. Subclasses may override
-   * this method to do appropriate initialization.
-   * 
-   * <p>
-   * Called during the setup of the job instance this {@code DoFn} is associated
-   * with.
-   * </p>
-   * 
-   */
-  public void initialize() {
-  }
-
-  /**
-   * Processes the records from a {@link PCollection}.
-   * 
-   * <br/>
-   * <br/>
-   * <b>Note:</b> Crunch can reuse a single input record object whose content
-   * changes on each {@link #process(Object, Emitter)} method call. This
-   * functionality is imposed by Hadoop's <a href=
-   * "http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/Reducer.html"
-   * >Reducer</a> implementation: <i>The framework will reuse the key and value
-   * objects that are passed into the reduce, therefore the application should
-   * clone the objects they want to keep a copy of.</i>
-   * 
-   * @param input
-   *          The input record.
-   * @param emitter
-   *          The emitter to send the output to
-   */
-  public abstract void process(S input, Emitter<T> emitter);
-
-  /**
-   * Called during the cleanup of the MapReduce job this {@code DoFn} is
-   * associated with. Subclasses may override this method to do appropriate
-   * cleanup.
-   * 
-   * @param emitter
-   *          The emitter that was used for output
-   */
-  public void cleanup(Emitter<T> emitter) {
-  }
-
-  /**
-   * Called during setup to pass the {@link TaskInputOutputContext} to this
-   * {@code DoFn} instance.
-   */
-  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-    this.context = context;
-  }
-
-  /**
-   * Returns an estimate of how applying this function to a {@link PCollection}
-   * will cause it to change in side. The optimizer uses these estimates to
-   * decide where to break up dependent MR jobs into separate Map and Reduce
-   * phases in order to minimize I/O.
-   * 
-   * <p>
-   * Subclasses of {@code DoFn} that will substantially alter the size of the
-   * resulting {@code PCollection} should override this method.
-   */
-  public float scaleFactor() {
-    return 1.2f;
-  }
-
-  protected TaskInputOutputContext<?, ?, ?, ?> getContext() {
-    return context;
-  }
-
-  protected Configuration getConfiguration() {
-    return context.getConfiguration();
-  }
-
-  protected Counter getCounter(Enum<?> counterName) {
-    return context.getCounter(counterName);
-  }
-
-  protected Counter getCounter(String groupName, String counterName) {
-    return context.getCounter(groupName, counterName);
-  }
-
-  protected void increment(Enum<?> counterName) {
-    increment(counterName, 1);
-  }
-
-  protected void increment(Enum<?> counterName, long value) {
-    getCounter(counterName).increment(value);
-  }
-
-  protected void progress() {
-    context.progress();
-  }
-
-  protected TaskAttemptID getTaskAttemptID() {
-    return context.getTaskAttemptID();
-  }
-
-  protected void setStatus(String status) {
-    context.setStatus(status);
-  }
-
-  protected String getStatus() {
-    return context.getStatus();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Emitter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Emitter.java b/crunch/src/main/java/org/apache/crunch/Emitter.java
deleted file mode 100644
index d104a09..0000000
--- a/crunch/src/main/java/org/apache/crunch/Emitter.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * Interface for writing outputs from a {@link DoFn}.
- * 
- */
-public interface Emitter<T> {
-  /**
-   * Write the emitted value to the next stage of the pipeline.
-   * 
-   * @param emitted
-   *          The value to write
-   */
-  void emit(T emitted);
-
-  /**
-   * Flushes any values cached by this emitter. Called during the cleanup stage.
-   */
-  void flush();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/FilterFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/FilterFn.java b/crunch/src/main/java/org/apache/crunch/FilterFn.java
deleted file mode 100644
index 440f122..0000000
--- a/crunch/src/main/java/org/apache/crunch/FilterFn.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.List;
-
-import org.apache.crunch.fn.FilterFns;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-import com.google.common.collect.ImmutableList;
-
-/**
- * A {@link DoFn} for the common case of filtering the members of a
- * {@link PCollection} based on a boolean condition.
- */
-public abstract class FilterFn<T> extends DoFn<T, T> {
-
-  /**
-   * If true, emit the given record.
-   */
-  public abstract boolean accept(T input);
-
-  @Override
-  public void process(T input, Emitter<T> emitter) {
-    if (accept(input)) {
-      emitter.emit(input);
-    }
-  }
-  
-  @Override
-  public final void cleanup(Emitter<T> emitter) {
-    cleanup();
-  }
-  
-  /**
-   * Called during the cleanup of the MapReduce job this {@code FilterFn} is
-   * associated with. Subclasses may override this method to do appropriate
-   * cleanup.
-   */
-  public void cleanup() {
-  }
-  
-  @Override
-  public float scaleFactor() {
-    return 0.5f;
-  }
-
-  /**
-   * @deprecated Use {@link FilterFns#and(FilterFn...)}
-   */
-  public static <S> FilterFn<S> and(FilterFn<S>... fns) {
-    return new AndFn<S>(fns);
-  }
-
-  /**
-   * @deprecated Use {@link FilterFns#and(FilterFn...)}
-   */
-  public static class AndFn<S> extends FilterFn<S> {
-
-    private final List<FilterFn<S>> fns;
-
-    public AndFn(FilterFn<S>... fns) {
-      this.fns = ImmutableList.<FilterFn<S>> copyOf(fns);
-    }
-    
-    @Override
-    public void configure(Configuration conf) {
-      for (FilterFn<S> fn : fns) {
-        fn.configure(conf);
-      }
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      for (FilterFn<S> fn : fns) {
-        fn.setContext(context);
-      }
-    }
-    
-    @Override
-    public void initialize() {
-      for (FilterFn<S> fn : fns) {
-        fn.initialize();
-      }
-    }
-
-    @Override
-    public void cleanup() {
-      for (FilterFn<S> fn : fns) {
-        fn.cleanup();
-      }
-    }
-
-    @Override
-    public boolean accept(S input) {
-      for (FilterFn<S> fn : fns) {
-        if (!fn.accept(input)) {
-          return false;
-        }
-      }
-      return true;
-    }
-    
-    @Override
-    public float scaleFactor() {
-      float scaleFactor = 1.0f;
-      for (FilterFn<S> fn : fns) {
-        scaleFactor *= fn.scaleFactor();
-      }
-      return scaleFactor;
-    }
-  }
-
-  /**
-   * @deprecated Use {@link FilterFns#or(FilterFn...)}
-   */
-  public static <S> FilterFn<S> or(FilterFn<S>... fns) {
-    return new OrFn<S>(fns);
-  }
-
-  /**
-   * @deprecated Use {@link FilterFns#or(FilterFn...)}
-   */
-  public static class OrFn<S> extends FilterFn<S> {
-
-    private final List<FilterFn<S>> fns;
-
-    public OrFn(FilterFn<S>... fns) {
-      this.fns = ImmutableList.<FilterFn<S>> copyOf(fns);
-    }
-    
-    @Override
-    public void configure(Configuration conf) {
-      for (FilterFn<S> fn : fns) {
-        fn.configure(conf);
-      }
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      for (FilterFn<S> fn : fns) {
-        fn.setContext(context);
-      }
-    }
-    
-    @Override
-    public void initialize() {
-      for (FilterFn<S> fn : fns) {
-        fn.initialize();
-      }
-    }
-    
-    @Override
-    public void cleanup() {
-      for (FilterFn<S> fn : fns) {
-        fn.cleanup();
-      }
-    }
-
-    @Override
-    public boolean accept(S input) {
-      for (FilterFn<S> fn : fns) {
-        if (fn.accept(input)) {
-          return true;
-        }
-      }
-      return false;
-    }
-    
-    @Override
-    public float scaleFactor() {
-      float scaleFactor = 0.0f;
-      for (FilterFn<S> fn : fns) {
-        scaleFactor += fn.scaleFactor();
-      }
-      return Math.min(1.0f, scaleFactor);
-    }
-  }
-
-  /**
-   * @deprecated Use {@link FilterFns#not(FilterFn)}
-   */
-  public static <S> FilterFn<S> not(FilterFn<S> fn) {
-    return new NotFn<S>(fn);
-  }
-
-  /**
-   * @deprecated Use {@link FilterFns#not(FilterFn)}
-   */
-  public static class NotFn<S> extends FilterFn<S> {
-
-    private final FilterFn<S> base;
-
-    public NotFn(FilterFn<S> base) {
-      this.base = base;
-    }
-    
-    @Override
-    public void configure(Configuration conf) {
-        base.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      base.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      base.initialize();
-    }
-    
-    @Override
-    public void cleanup() {
-      base.cleanup();
-    }
-    
-    @Override
-    public boolean accept(S input) {
-      return !base.accept(input);
-    }
-
-    @Override
-    public float scaleFactor() {
-      return 1.0f - base.scaleFactor();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/GroupingOptions.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/GroupingOptions.java b/crunch/src/main/java/org/apache/crunch/GroupingOptions.java
deleted file mode 100644
index 4aa1343..0000000
--- a/crunch/src/main/java/org/apache/crunch/GroupingOptions.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-/**
- * Options that can be passed to a {@code groupByKey} operation in order to
- * exercise finer control over how the partitioning, grouping, and sorting of
- * keys is performed.
- * 
- */
-public class GroupingOptions {
-
-  private final Class<? extends Partitioner> partitionerClass;
-  private final Class<? extends RawComparator> groupingComparatorClass;
-  private final Class<? extends RawComparator> sortComparatorClass;
-  private final int numReducers;
-  private final Map<String, String> extraConf;
-  private final Set<SourceTarget<?>> sourceTargets;
-  
-  private GroupingOptions(Class<? extends Partitioner> partitionerClass,
-      Class<? extends RawComparator> groupingComparatorClass, Class<? extends RawComparator> sortComparatorClass,
-      int numReducers, Map<String, String> extraConf, Set<SourceTarget<?>> sourceTargets) {
-    this.partitionerClass = partitionerClass;
-    this.groupingComparatorClass = groupingComparatorClass;
-    this.sortComparatorClass = sortComparatorClass;
-    this.numReducers = numReducers;
-    this.extraConf = extraConf;
-    this.sourceTargets = sourceTargets;
-  }
-
-  public int getNumReducers() {
-    return numReducers;
-  }
-
-  public Class<? extends RawComparator> getSortComparatorClass() {
-    return sortComparatorClass;
-  }
-
-  public Class<? extends RawComparator> getGroupingComparatorClass() {
-    return groupingComparatorClass;
-  }
-  
-  public Class<? extends Partitioner> getPartitionerClass() {
-    return partitionerClass;
-  }
-  
-  public Set<SourceTarget<?>> getSourceTargets() {
-    return sourceTargets;
-  }
-  
-  public void configure(Job job) {
-    if (partitionerClass != null) {
-      job.setPartitionerClass(partitionerClass);
-    }
-    if (groupingComparatorClass != null) {
-      job.setGroupingComparatorClass(groupingComparatorClass);
-    }
-    if (sortComparatorClass != null) {
-      job.setSortComparatorClass(sortComparatorClass);
-    }
-    if (numReducers > 0) {
-      job.setNumReduceTasks(numReducers);
-    }
-    for (Map.Entry<String, String> e : extraConf.entrySet()) {
-      job.getConfiguration().set(e.getKey(), e.getValue());
-    }
-  }
-
-  public boolean isCompatibleWith(GroupingOptions other) {
-    if (partitionerClass != other.partitionerClass) {
-      return false;
-    }
-    if (groupingComparatorClass != other.groupingComparatorClass) {
-      return false;
-    }
-    if (sortComparatorClass != other.sortComparatorClass) {
-      return false;
-    }
-    if (!extraConf.equals(other.extraConf)) {
-      return false;
-    }
-    return true;
-  }
-
-  public static Builder builder() {
-    return new Builder();
-  }
-
-  /**
-   * Builder class for creating {@code GroupingOptions} instances.
-   * 
-   */
-  public static class Builder {
-    private Class<? extends Partitioner> partitionerClass;
-    private Class<? extends RawComparator> groupingComparatorClass;
-    private Class<? extends RawComparator> sortComparatorClass;
-    private int numReducers;
-    private Map<String, String> extraConf = Maps.newHashMap();
-    private Set<SourceTarget<?>> sourceTargets = Sets.newHashSet();
-    
-    public Builder() {
-    }
-
-    public Builder partitionerClass(Class<? extends Partitioner> partitionerClass) {
-      this.partitionerClass = partitionerClass;
-      return this;
-    }
-
-    public Builder groupingComparatorClass(Class<? extends RawComparator> groupingComparatorClass) {
-      this.groupingComparatorClass = groupingComparatorClass;
-      return this;
-    }
-
-    public Builder sortComparatorClass(Class<? extends RawComparator> sortComparatorClass) {
-      this.sortComparatorClass = sortComparatorClass;
-      return this;
-    }
-
-    public Builder numReducers(int numReducers) {
-      if (numReducers <= 0) {
-        throw new IllegalArgumentException("Invalid number of reducers: " + numReducers);
-      }
-      this.numReducers = numReducers;
-      return this;
-    }
-
-    public Builder conf(String confKey, String confValue) {
-      this.extraConf.put(confKey, confValue);
-      return this;
-    }
-    
-    public Builder sourceTarget(SourceTarget<?> st) {
-      this.sourceTargets.add(st);
-      return this;
-    }
-    
-    public GroupingOptions build() {
-      return new GroupingOptions(partitionerClass, groupingComparatorClass, sortComparatorClass,
-          numReducers, extraConf, sourceTargets);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/MapFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/MapFn.java b/crunch/src/main/java/org/apache/crunch/MapFn.java
deleted file mode 100644
index dbf172e..0000000
--- a/crunch/src/main/java/org/apache/crunch/MapFn.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * A {@link DoFn} for the common case of emitting exactly one value for each
- * input record.
- * 
- */
-public abstract class MapFn<S, T> extends DoFn<S, T> {
-
-  /**
-   * Maps the given input into an instance of the output type.
-   */
-  public abstract T map(S input);
-
-  @Override
-  public void process(S input, Emitter<T> emitter) {
-    emitter.emit(map(input));
-  }
-
-  @Override
-  public float scaleFactor() {
-    return 1.0f;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/PCollection.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/PCollection.java b/crunch/src/main/java/org/apache/crunch/PCollection.java
deleted file mode 100644
index 6f5abf6..0000000
--- a/crunch/src/main/java/org/apache/crunch/PCollection.java
+++ /dev/null
@@ -1,245 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.Collection;
-
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-/**
- * A representation of an immutable, distributed collection of elements that is
- * the fundamental target of computations in Crunch.
- *
- */
-public interface PCollection<S> {
-  /**
-   * Returns the {@code Pipeline} associated with this PCollection.
-   */
-  Pipeline getPipeline();
-
-  /**
-   * Returns a {@code PCollection} instance that acts as the union of this
-   * {@code PCollection} and the given {@code PCollection}.
-   */
-  PCollection<S> union(PCollection<S> other);
-  
-  /**
-   * Returns a {@code PCollection} instance that acts as the union of this
-   * {@code PCollection} and the input {@code PCollection}s.
-   */
-  PCollection<S> union(PCollection<S>... collections);
-
-  /**
-   * Applies the given doFn to the elements of this {@code PCollection} and
-   * returns a new {@code PCollection} that is the output of this processing.
-   *
-   * @param doFn
-   *          The {@code DoFn} to apply
-   * @param type
-   *          The {@link PType} of the resulting {@code PCollection}
-   * @return a new {@code PCollection}
-   */
-  <T> PCollection<T> parallelDo(DoFn<S, T> doFn, PType<T> type);
-
-  /**
-   * Applies the given doFn to the elements of this {@code PCollection} and
-   * returns a new {@code PCollection} that is the output of this processing.
-   *
-   * @param name
-   *          An identifier for this processing step, useful for debugging
-   * @param doFn
-   *          The {@code DoFn} to apply
-   * @param type
-   *          The {@link PType} of the resulting {@code PCollection}
-   * @return a new {@code PCollection}
-   */
-  <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type);
-  
-  /**
-   * Applies the given doFn to the elements of this {@code PCollection} and
-   * returns a new {@code PCollection} that is the output of this processing.
-   *
-   * @param name
-   *          An identifier for this processing step, useful for debugging
-   * @param doFn
-   *          The {@code DoFn} to apply
-   * @param type
-   *          The {@link PType} of the resulting {@code PCollection}
-   * @param options
-   *          Optional information that is needed for certain pipeline operations
-   * @return a new {@code PCollection}
-   */
-  <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type,
-      ParallelDoOptions options);
-
-  /**
-   * Similar to the other {@code parallelDo} instance, but returns a
-   * {@code PTable} instance instead of a {@code PCollection}.
-   *
-   * @param doFn
-   *          The {@code DoFn} to apply
-   * @param type
-   *          The {@link PTableType} of the resulting {@code PTable}
-   * @return a new {@code PTable}
-   */
-  <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type);
-
-  /**
-   * Similar to the other {@code parallelDo} instance, but returns a
-   * {@code PTable} instance instead of a {@code PCollection}.
-   *
-   * @param name
-   *          An identifier for this processing step
-   * @param doFn
-   *          The {@code DoFn} to apply
-   * @param type
-   *          The {@link PTableType} of the resulting {@code PTable}
-   * @return a new {@code PTable}
-   */
-  <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type);
-  
-  /**
-   * Similar to the other {@code parallelDo} instance, but returns a
-   * {@code PTable} instance instead of a {@code PCollection}.
-   *
-   * @param name
-   *          An identifier for this processing step
-   * @param doFn
-   *          The {@code DoFn} to apply
-   * @param type
-   *          The {@link PTableType} of the resulting {@code PTable}
-   * @param options
-   *          Optional information that is needed for certain pipeline operations
-   * @return a new {@code PTable}
-   */
-  <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type,
-      ParallelDoOptions options);
-
-  /**
-   * Write the contents of this {@code PCollection} to the given {@code Target},
-   * using the storage format specified by the target.
-   *
-   * @param target
-   *          The target to write to
-   */
-  PCollection<S> write(Target target);
-
-  /**
-   * Write the contents of this {@code PCollection} to the given {@code Target},
-   * using the given {@code Target.WriteMode} to handle existing
-   * targets.
-   * 
-   * @param target
-   *          The target
-   * @param writeMode
-   *          The rule for handling existing outputs at the target location
-   */
-  PCollection<S> write(Target target, Target.WriteMode writeMode);
-  
-  /**
-   * Returns a reference to the data set represented by this PCollection that
-   * may be used by the client to read the data locally.
-   */
-  Iterable<S> materialize();
-
-  /**
-   * @return A {@code PObject} encapsulating an in-memory {@link Collection} containing the values
-   * of this {@code PCollection}.
-   */
-  PObject<Collection<S>> asCollection();
-
-  /**
-   * Returns the {@code PType} of this {@code PCollection}.
-   */
-  PType<S> getPType();
-
-  /**
-   * Returns the {@code PTypeFamily} of this {@code PCollection}.
-   */
-  PTypeFamily getTypeFamily();
-
-  /**
-   * Returns the size of the data represented by this {@code PCollection} in
-   * bytes.
-   */
-  long getSize();
-
-  /**
-   * Returns the number of elements represented by this {@code PCollection}.
-   *
-   * @return An {@code PObject} containing the number of elements in this {@code PCollection}.
-   */
-  PObject<Long> length();
-
-  /**
-   * Returns a shorthand name for this PCollection.
-   */
-  String getName();
-
-  /**
-   * Apply the given filter function to this instance and return the resulting
-   * {@code PCollection}.
-   */
-  PCollection<S> filter(FilterFn<S> filterFn);
-
-  /**
-   * Apply the given filter function to this instance and return the resulting
-   * {@code PCollection}.
-   *
-   * @param name
-   *          An identifier for this processing step
-   * @param filterFn
-   *          The {@code FilterFn} to apply
-   */
-  PCollection<S> filter(String name, FilterFn<S> filterFn);
-
-  /**
-   * Apply the given map function to each element of this instance in order to
-   * create a {@code PTable}.
-   */
-  <K> PTable<K, S> by(MapFn<S, K> extractKeyFn, PType<K> keyType);
-
-  /**
-   * Apply the given map function to each element of this instance in order to
-   * create a {@code PTable}.
-   *
-   * @param name
-   *          An identifier for this processing step
-   * @param extractKeyFn
-   *          The {@code MapFn} to apply
-   */
-  <K> PTable<K, S> by(String name, MapFn<S, K> extractKeyFn, PType<K> keyType);
-
-  /**
-   * Returns a {@code PTable} instance that contains the counts of each unique
-   * element of this PCollection.
-   */
-  PTable<S, Long> count();
-
-  /**
-   * Returns a {@code PObject} of the maximum element of this instance.
-   */
-  PObject<S> max();
-
-  /**
-   * Returns a {@code PObject} of the minimum element of this instance.
-   */
-  PObject<S> min();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/PGroupedTable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/PGroupedTable.java b/crunch/src/main/java/org/apache/crunch/PGroupedTable.java
deleted file mode 100644
index d77ffdb..0000000
--- a/crunch/src/main/java/org/apache/crunch/PGroupedTable.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.crunch.Aggregator;
-
-/**
- * The Crunch representation of a grouped {@link PTable}.
- * 
- */
-public interface PGroupedTable<K, V> extends PCollection<Pair<K, Iterable<V>>> {
-
-  /**
-   * Combines the values of this grouping using the given {@code CombineFn}.
-   * 
-   * @param combineFn
-   *          The combiner function
-   * @return A {@code PTable} where each key has a single value
-   */
-  PTable<K, V> combineValues(CombineFn<K, V> combineFn);
-
-  /**
-   * Combine the values in each group using the given {@link Aggregator}.
-   *
-   * @param aggregator The function to use
-   * @return A {@link PTable} where each group key maps to an aggregated
-   *         value. Group keys may be repeated if an aggregator returns
-   *         more than one value.
-   */
-  PTable<K, V> combineValues(Aggregator<V> aggregator);
-
-  /**
-   * Convert this grouping back into a multimap.
-   * 
-   * @return an ungrouped version of the data in this {@code PGroupedTable}.
-   */
-  PTable<K, V> ungroup();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/PObject.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/PObject.java b/crunch/src/main/java/org/apache/crunch/PObject.java
deleted file mode 100644
index 897a01f..0000000
--- a/crunch/src/main/java/org/apache/crunch/PObject.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * A {@code PObject} represents a singleton object value that results from a distributed
- * computation. Computation producing the value is deferred until
- * {@link org.apache.crunch.PObject#getValue()} is called.
- *
- * @param <T> The type of value encapsulated by this {@code PObject}.
- */
-public interface PObject<T> {
-  /**
-   * Gets the value associated with this {@code PObject}.  Calling this method will trigger
-   * whatever computation is necessary to obtain the value and block until that computation
-   * succeeds.
-   *
-   * @return The value associated with this {@code PObject}.
-   */
-  T getValue();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/PTable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/PTable.java b/crunch/src/main/java/org/apache/crunch/PTable.java
deleted file mode 100644
index 8df9853..0000000
--- a/crunch/src/main/java/org/apache/crunch/PTable.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-
-/**
- * A sub-interface of {@code PCollection} that represents an immutable,
- * distributed multi-map of keys and values.
- *
- */
-public interface PTable<K, V> extends PCollection<Pair<K, V>> {
-
-  /**
-   Returns a {@code PTable} instance that acts as the union of this
-   * {@code PTable} and the other {@code PTable}s.
-   */
-  PTable<K, V> union(PTable<K, V> other);
-  
-  /**
-   * Returns a {@code PTable} instance that acts as the union of this
-   * {@code PTable} and the input {@code PTable}s.
-   */
-  PTable<K, V> union(PTable<K, V>... others);
-
-  /**
-   * Performs a grouping operation on the keys of this table.
-   *
-   * @return a {@code PGroupedTable} instance that represents the grouping
-   */
-  PGroupedTable<K, V> groupByKey();
-
-  /**
-   * Performs a grouping operation on the keys of this table, using the given
-   * number of partitions.
-   *
-   * @param numPartitions
-   *          The number of partitions for the data.
-   * @return a {@code PGroupedTable} instance that represents this grouping
-   */
-  PGroupedTable<K, V> groupByKey(int numPartitions);
-
-  /**
-   * Performs a grouping operation on the keys of this table, using the
-   * additional {@code GroupingOptions} to control how the grouping is executed.
-   *
-   * @param options
-   *          The grouping options to use
-   * @return a {@code PGroupedTable} instance that represents the grouping
-   */
-  PGroupedTable<K, V> groupByKey(GroupingOptions options);
-
-  /**
-   * Writes this {@code PTable} to the given {@code Target}.
-   */
-  PTable<K, V> write(Target target);
-
-  /**
-   * Writes this {@code PTable} to the given {@code Target}, using the
-   * given {@code Target.WriteMode} to handle existing targets.
-   */
-  PTable<K, V> write(Target target, Target.WriteMode writeMode);
-
-  /**
-   * Returns the {@code PTableType} of this {@code PTable}.
-   */
-  PTableType<K, V> getPTableType();
-
-  /**
-   * Returns the {@code PType} of the key.
-   */
-  PType<K> getKeyType();
-
-  /**
-   * Returns the {@code PType} of the value.
-   */
-  PType<V> getValueType();
-
-  /**
-   * Aggregate all of the values with the same key into a single key-value pair
-   * in the returned PTable.
-   */
-  PTable<K, Collection<V>> collectValues();
-
-  /**
-   * Apply the given filter function to this instance and return the resulting
-   * {@code PTable}.
-   */
-  PTable<K, V> filter(FilterFn<Pair<K, V>> filterFn);
-  
-  /**
-   * Apply the given filter function to this instance and return the resulting
-   * {@code PTable}.
-   *
-   * @param name
-   *          An identifier for this processing step
-   * @param filterFn
-   *          The {@code FilterFn} to apply
-   */
-  PTable<K, V> filter(String name, FilterFn<Pair<K, V>> filterFn);
-  
-  /**
-   * Returns a PTable made up of the pairs in this PTable with the largest value
-   * field.
-   *
-   * @param count
-   *          The number of pairs to return
-   */
-  PTable<K, V> top(int count);
-
-  /**
-   * Returns a PTable made up of the pairs in this PTable with the smallest
-   * value field.
-   *
-   * @param count
-   *          The number of pairs to return
-   */
-  PTable<K, V> bottom(int count);
-
-  /**
-   * Perform an inner join on this table and the one passed in as an argument on
-   * their common keys.
-   */
-  <U> PTable<K, Pair<V, U>> join(PTable<K, U> other);
-
-  /**
-   * Co-group operation with the given table on common keys.
-   */
-  <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other);
-
-  /**
-   * Returns a {@link PCollection} made up of the keys in this PTable.
-   */
-  PCollection<K> keys();
-
-  /**
-   * Returns a {@link PCollection} made up of the values in this PTable.
-   */
-  PCollection<V> values();
-
-  /**
-   * Returns a Map<K, V> made up of the keys and values in this PTable.
-   * <p>
-   * <b>Note:</b> The contents of the returned map may not be exactly the same
-   * as this PTable, as a PTable is a multi-map (i.e. can contain multiple
-   * values for a single key).
-   */
-  Map<K, V> materializeToMap();
-
-  /**
-   * Returns a {@link PObject} encapsulating a {@link Map} made up of the keys and values in this
-   * {@code PTable}.
-   * <p><b>Note:</b>The contents of the returned map may not be exactly the same as this PTable,
-   * as a PTable is a multi-map (i.e. can contain multiple values for a single key).
-   * </p>
-   *
-   * @return The {@code PObject} encapsulating a {@code Map} made up of the keys and values in
-   * this {@code PTable}.
-   */
-  PObject<Map<K, V>> asMap();
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Pair.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Pair.java b/crunch/src/main/java/org/apache/crunch/Pair.java
deleted file mode 100644
index fd058b6..0000000
--- a/crunch/src/main/java/org/apache/crunch/Pair.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-
-/**
- * A convenience class for two-element {@link Tuple}s.
- */
-public class Pair<K, V> implements Tuple, Comparable<Pair<K, V>> {
-
-  private final K first;
-  private final V second;
-
-  public static <T, U> Pair<T, U> of(T first, U second) {
-    return new Pair<T, U>(first, second);
-  }
-
-  public Pair(K first, V second) {
-    this.first = first;
-    this.second = second;
-  }
-
-  public K first() {
-    return first;
-  }
-
-  public V second() {
-    return second;
-  }
-
-  public Object get(int index) {
-    switch (index) {
-    case 0:
-      return first;
-    case 1:
-      return second;
-    default:
-      throw new ArrayIndexOutOfBoundsException();
-    }
-  }
-
-  public int size() {
-    return 2;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(first).append(second).toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    Pair<?, ?> other = (Pair<?, ?>) obj;
-    return (first == other.first || (first != null && first.equals(other.first)))
-        && (second == other.second || (second != null && second.equals(other.second)));
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("[");
-    sb.append(first).append(",").append(second).append("]");
-    return sb.toString();
-  }
-
-  private int cmp(Object lhs, Object rhs) {
-    if (lhs == rhs) {
-      return 0;
-    } else if (lhs != null && Comparable.class.isAssignableFrom(lhs.getClass())) {
-      return ((Comparable) lhs).compareTo(rhs);
-    }
-    return (lhs == null ? 0 : lhs.hashCode()) - (rhs == null ? 0 : rhs.hashCode());
-  }
-
-  @Override
-  public int compareTo(Pair<K, V> o) {
-    int diff = cmp(first, o.first);
-    if (diff == 0) {
-      diff = cmp(second, o.second);
-    }
-    return diff;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/ParallelDoOptions.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/ParallelDoOptions.java b/crunch/src/main/java/org/apache/crunch/ParallelDoOptions.java
deleted file mode 100644
index 2407b3a..0000000
--- a/crunch/src/main/java/org/apache/crunch/ParallelDoOptions.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.Collections;
-import java.util.Set;
-
-import com.google.common.collect.Sets;
-
-/**
- * Container class that includes optional information about a {@code parallelDo} operation
- * applied to a {@code PCollection}. Primarily used within the Crunch framework
- * itself for certain types of advanced processing operations, such as in-memory joins
- * that require reading a file from the filesystem into a {@code DoFn}.
- */
-public class ParallelDoOptions {
-  private final Set<SourceTarget<?>> sourceTargets;
-  
-  private ParallelDoOptions(Set<SourceTarget<?>> sourceTargets) {
-    this.sourceTargets = sourceTargets;
-  }
-  
-  public Set<SourceTarget<?>> getSourceTargets() {
-    return sourceTargets;
-  }
-  
-  public static Builder builder() {
-    return new Builder();
-  }
-  
-  public static class Builder {
-    private Set<SourceTarget<?>> sourceTargets;
-    
-    public Builder() {
-      this.sourceTargets = Sets.newHashSet();
-    }
-    
-    public Builder sourceTargets(SourceTarget<?>... sourceTargets) {
-      Collections.addAll(this.sourceTargets, sourceTargets);
-      return this;
-    }
-    
-    public ParallelDoOptions build() {
-      return new ParallelDoOptions(sourceTargets);
-    }
-  }
-}


[22/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/writable/WritablesTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/writable/WritablesTest.java b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritablesTest.java
new file mode 100644
index 0000000..5396fba
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritablesTest.java
@@ -0,0 +1,256 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class WritablesTest {
+
+  @Test
+  public void testNulls() throws Exception {
+    Void n = null;
+    NullWritable nw = NullWritable.get();
+    testInputOutputFn(Writables.nulls(), n, nw);
+  }
+
+  @Test
+  public void testStrings() throws Exception {
+    String s = "abc";
+    Text text = new Text(s);
+    testInputOutputFn(Writables.strings(), s, text);
+  }
+
+  @Test
+  public void testInts() throws Exception {
+    int j = 55;
+    IntWritable w = new IntWritable(j);
+    testInputOutputFn(Writables.ints(), j, w);
+  }
+
+  @Test
+  public void testLongs() throws Exception {
+    long j = 55;
+    LongWritable w = new LongWritable(j);
+    testInputOutputFn(Writables.longs(), j, w);
+  }
+
+  @Test
+  public void testFloats() throws Exception {
+    float j = 55.5f;
+    FloatWritable w = new FloatWritable(j);
+    testInputOutputFn(Writables.floats(), j, w);
+  }
+
+  @Test
+  public void testDoubles() throws Exception {
+    double j = 55.5d;
+    DoubleWritable w = new DoubleWritable(j);
+    testInputOutputFn(Writables.doubles(), j, w);
+  }
+
+  @Test
+  public void testBoolean() throws Exception {
+    boolean j = false;
+    BooleanWritable w = new BooleanWritable(j);
+    testInputOutputFn(Writables.booleans(), j, w);
+  }
+
+  @Test
+  public void testBytes() throws Exception {
+    byte[] bytes = new byte[] { 17, 26, -98 };
+    BytesWritable bw = new BytesWritable(bytes);
+    ByteBuffer bb = ByteBuffer.wrap(bytes);
+    testInputOutputFn(Writables.bytes(), bb, bw);
+  }
+
+  @Test
+  public void testCollections() throws Exception {
+    String s = "abc";
+    Collection<String> j = Lists.newArrayList();
+    j.add(s);
+    GenericArrayWritable<Text> w = new GenericArrayWritable<Text>(Text.class);
+    w.set(new Text[] { new Text(s) });
+    testInputOutputFn(Writables.collections(Writables.strings()), j, w);
+  }
+
+  @Test
+  public void testPairs() throws Exception {
+    Pair<String, String> j = Pair.of("a", "b");
+    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), });
+    w.setWritten(0);
+    w.setWritten(1);
+    testInputOutputFn(Writables.pairs(Writables.strings(), Writables.strings()), j, w);
+  }
+
+  @Test
+  public void testNestedTables() throws Exception {
+    PTableType<Long, Long> pll = Writables.tableOf(Writables.longs(), Writables.longs());
+    PTableType<Pair<Long, Long>, String> nest = Writables.tableOf(pll, Writables.strings());
+    assertNotNull(nest);
+  }
+
+  @Test
+  public void testPairEquals() throws Exception {
+    PType<Pair<Long, ByteBuffer>> t1 = Writables.pairs(Writables.longs(), Writables.bytes());
+    PType<Pair<Long, ByteBuffer>> t2 = Writables.pairs(Writables.longs(), Writables.bytes());
+    assertEquals(t1, t2);
+    assertEquals(t1.hashCode(), t2.hashCode());
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testTriples() throws Exception {
+    Tuple3 j = Tuple3.of("a", "b", "c");
+    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), new Text("c"), });
+    w.setWritten(0);
+    w.setWritten(1);
+    w.setWritten(2);
+    WritableType<?, ?> wt = Writables.triples(Writables.strings(), Writables.strings(), Writables.strings());
+    testInputOutputFn(wt, j, w);
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testQuads() throws Exception {
+    Tuple4 j = Tuple4.of("a", "b", "c", "d");
+    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), new Text("c"), new Text("d"), });
+    w.setWritten(0);
+    w.setWritten(1);
+    w.setWritten(2);
+    w.setWritten(3);
+    WritableType<?, ?> wt = Writables.quads(Writables.strings(), Writables.strings(), Writables.strings(),
+        Writables.strings());
+    testInputOutputFn(wt, j, w);
+  }
+
+  @Test
+  public void testTupleN() throws Exception {
+    TupleN j = new TupleN("a", "b", "c", "d", "e");
+    TupleWritable w = new TupleWritable(new Text[] { new Text("a"), new Text("b"), new Text("c"), new Text("d"),
+        new Text("e"), });
+    w.setWritten(0);
+    w.setWritten(1);
+    w.setWritten(2);
+    w.setWritten(3);
+    w.setWritten(4);
+    WritableType<?, ?> wt = Writables.tuples(Writables.strings(), Writables.strings(), Writables.strings(),
+        Writables.strings(), Writables.strings());
+    testInputOutputFn(wt, j, w);
+  }
+
+  protected static class TestWritable implements Writable {
+    String left;
+    int right;
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      out.writeUTF(left);
+      out.writeInt(right);
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      left = in.readUTF();
+      right = in.readInt();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj)
+        return true;
+      if (obj == null)
+        return false;
+      if (getClass() != obj.getClass())
+        return false;
+      TestWritable other = (TestWritable) obj;
+      if (left == null) {
+        if (other.left != null)
+          return false;
+      } else if (!left.equals(other.left))
+        return false;
+      if (right != other.right)
+        return false;
+      return true;
+    }
+
+  }
+
+  @Test
+  public void testRecords() throws Exception {
+    TestWritable j = new TestWritable();
+    j.left = "a";
+    j.right = 1;
+    TestWritable w = new TestWritable();
+    w.left = "a";
+    w.right = 1;
+    WritableType<?, ?> wt = Writables.records(TestWritable.class);
+    testInputOutputFn(wt, j, w);
+  }
+
+  @Test
+  public void testTableOf() throws Exception {
+    Pair<String, String> j = Pair.of("a", "b");
+    Pair<Text, Text> w = Pair.of(new Text("a"), new Text("b"));
+    WritableTableType<String, String> wtt = Writables.tableOf(Writables.strings(), Writables.strings());
+    testInputOutputFn(wtt, j, w);
+  }
+
+  @Test
+  public void testRegister() throws Exception {
+    WritableType<TestWritable, TestWritable> wt = Writables.writables(TestWritable.class);
+    Writables.register(TestWritable.class, wt);
+    assertSame(Writables.records(TestWritable.class), wt);
+  }
+
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  protected static void testInputOutputFn(PType ptype, Object java, Object writable) {
+    ptype.getInputMapFn().initialize();
+    ptype.getOutputMapFn().initialize();
+    assertEquals(java, ptype.getInputMapFn().map(writable));
+    assertEquals(writable, ptype.getOutputMapFn().map(java));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/util/DistCacheTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/util/DistCacheTest.java b/crunch-core/src/test/java/org/apache/crunch/util/DistCacheTest.java
new file mode 100644
index 0000000..6784f14
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/util/DistCacheTest.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class DistCacheTest {
+
+  // A temporary folder used to hold files created for the test.
+  @Rule
+  public TemporaryFolder testFolder = new TemporaryFolder();
+
+  // A configuration and lists of paths to use in tests.
+  private Configuration testConf;
+  private String[] testFilePaths;
+  private String[] testFileQualifiedPaths;
+
+  /**
+   * Setup resources for tests. These include:
+   * <ol>
+   * <li>A Hadoop configuration.
+   * <li>A directory of temporary files that includes 3 .jar files and 1 other
+   * file.
+   * <li>Arrays containing the canonical paths and qualified paths to the test
+   * files.
+   * </ol>
+   */
+  @Before
+  public void setup() throws IOException {
+    // Create a configuration for tests.
+    testConf = new Configuration();
+
+    // Create the test files and add their paths to the list of test file paths.
+    testFilePaths = new String[3];
+    testFilePaths[0] = testFolder.newFile("jar1.jar").getCanonicalPath();
+    testFilePaths[1] = testFolder.newFile("jar2.jar").getCanonicalPath();
+    testFilePaths[2] = testFolder.newFile("jar3.jar").getCanonicalPath();
+    testFolder.newFile("notJar.other");
+
+    // Populate a list of qualified paths from the test file paths.
+    testFileQualifiedPaths = new String[3];
+    for (int i = 0; i < testFilePaths.length; i++) {
+      testFileQualifiedPaths[i] = "file:" + testFilePaths[i];
+    }
+  }
+
+  /**
+   * Tests adding jars one-by-one to a job's configuration.
+   * 
+   * @throws IOException
+   *           If there is a problem adding the jars.
+   */
+  @Test
+  public void testAddJar() throws IOException {
+    // Add each valid jar path to the distributed cache configuration, and
+    // verify each was
+    // added correctly in turn.
+    for (int i = 0; i < testFilePaths.length; i++) {
+      DistCache.addJarToDistributedCache(testConf, testFilePaths[i]);
+      assertEquals("tmpjars configuration var does not contain expected value.",
+          StringUtils.join(testFileQualifiedPaths, ",", 0, i + 1), testConf.get("tmpjars"));
+    }
+  }
+
+  /**
+   * Tests that attempting to add the path to a jar that does not exist to the
+   * configuration throws an exception.
+   * 
+   * @throws IOException
+   *           If the added jar path does not exist. This exception is expected.
+   */
+  @Test(expected = IOException.class)
+  public void testAddJarThatDoesntExist() throws IOException {
+    DistCache.addJarToDistributedCache(testConf, "/garbage/doesntexist.jar");
+  }
+
+  /**
+   * Tests that adding a directory of jars to the configuration works as
+   * expected. .jar files under the added directory should be added to the
+   * configuration, and all other files should be skipped.
+   * 
+   * @throws IOException
+   *           If there is a problem adding the jar directory to the
+   *           configuration.
+   */
+  @Test
+  public void testAddJarDirectory() throws IOException {
+    DistCache.addJarDirToDistributedCache(testConf, testFolder.getRoot().getCanonicalPath());
+    // Throw the added jar paths in a set to detect duplicates.
+    String[] splitJarPaths = StringUtils.split(testConf.get("tmpjars"), ",");
+    Set<String> addedJarPaths = new HashSet<String>();
+    for (String path : splitJarPaths) {
+      addedJarPaths.add(path);
+    }
+    assertEquals("Incorrect number of jar paths added.", testFilePaths.length, addedJarPaths.size());
+
+    // Ensure all expected paths were added.
+    for (int i = 0; i < testFileQualifiedPaths.length; i++) {
+      assertTrue("Expected jar path missing from jar paths added to tmpjars: " + testFileQualifiedPaths[i],
+          addedJarPaths.contains(testFileQualifiedPaths[i]));
+    }
+  }
+
+  /**
+   * Tests that adding a jar directory that does not exist to the configuration
+   * throws an exception.
+   * 
+   * @throws IOException
+   *           If the added jar directory does not exist. This exception is
+   *           expected.
+   */
+  @Test(expected = IOException.class)
+  public void testAddJarDirectoryThatDoesntExist() throws IOException {
+    DistCache.addJarDirToDistributedCache(testConf, "/garbage/doesntexist");
+  }
+
+  /**
+   * Tests that adding a jar directory that is not a directory to the
+   * configuration throws an exception.
+   * 
+   * @throws IOException
+   *           If the added jar directory is not a directory. This exception is
+   *           expected.
+   */
+  @Test(expected = IOException.class)
+  public void testAddJarDirectoryNotDirectory() throws IOException {
+    DistCache.addJarDirToDistributedCache(testConf, testFilePaths[0]);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-dist/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-dist/pom.xml b/crunch-dist/pom.xml
index 749a767..cdd4256 100644
--- a/crunch-dist/pom.xml
+++ b/crunch-dist/pom.xml
@@ -35,7 +35,7 @@ under the License.
   <dependencies>
     <dependency>
       <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch</artifactId>
+      <artifactId>crunch-core</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.crunch</groupId>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-examples/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-examples/pom.xml b/crunch-examples/pom.xml
index fd790c3..fcbe30c 100644
--- a/crunch-examples/pom.xml
+++ b/crunch-examples/pom.xml
@@ -36,7 +36,7 @@ under the License.
 
     <dependency>
       <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch</artifactId>
+      <artifactId>crunch-core</artifactId>
     </dependency>
 
     <dependency>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-hbase/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-hbase/pom.xml b/crunch-hbase/pom.xml
index 656c6cc..df21ef8 100644
--- a/crunch-hbase/pom.xml
+++ b/crunch-hbase/pom.xml
@@ -31,7 +31,7 @@ under the License.
   <dependencies>
     <dependency>
       <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch</artifactId>
+      <artifactId>crunch-core</artifactId>
     </dependency>
 
     <dependency>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-scrunch/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-scrunch/pom.xml b/crunch-scrunch/pom.xml
index 7db5ac7..b97766a 100644
--- a/crunch-scrunch/pom.xml
+++ b/crunch-scrunch/pom.xml
@@ -43,7 +43,7 @@ under the License.
     </dependency>
     <dependency>
       <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch</artifactId>
+      <artifactId>crunch-core</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/pom.xml
----------------------------------------------------------------------
diff --git a/crunch/pom.xml b/crunch/pom.xml
deleted file mode 100644
index 2a38913..0000000
--- a/crunch/pom.xml
+++ /dev/null
@@ -1,182 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.crunch</groupId>
-    <artifactId>crunch-parent</artifactId>
-    <version>0.6.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>crunch</artifactId>
-  <name>Apache Crunch Core</name>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-mapred</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.javassist</groupId>
-      <artifactId>javassist</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <!-- Override the slf4j dependency from Avro, which is incompatible with
-         Hadoop's. -->
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.codehaus.jackson</groupId>
-      <artifactId>jackson-core-asl</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.codehaus.jackson</groupId>
-      <artifactId>jackson-mapper-asl</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    
-    <!-- Both Protobufs and Thrift are supported as
-         derived serialization types, and you can use
-         (almost) any version of them you like, Crunch
-         only relies on the stable public APIs, not the
-         structure of the files themselves.
-
-         Both dependencies are scoped as provided, in
-         order to not expand the size of the assembly jars
-         unnecessarily.
-    -->
-
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libthrift</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>commons-logging</groupId>
-      <artifactId>commons-logging</artifactId>
-      <scope>provided</scope>
-    </dependency>
-   
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-log4j12</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <!-- Used by LocalJobRunner in integration tests -->
-    <dependency>
-      <groupId>commons-httpclient</groupId>
-      <artifactId>commons-httpclient</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.crunch</groupId>
-      <artifactId>crunch-test</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>build-helper-maven-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-failsafe-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.avro</groupId>
-        <artifactId>avro-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>schemas</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>schema</goal>
-            </goals>
-            <configuration>
-              <testSourceDirectory>${project.basedir}/src/test/avro/</testSourceDirectory>
-              <testOutputDirectory>target/generated-test-sources/</testOutputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-
-</project>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/CancelJobsIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/CancelJobsIT.java b/crunch/src/it/java/org/apache/crunch/CancelJobsIT.java
deleted file mode 100644
index ff01a2f..0000000
--- a/crunch/src/it/java/org/apache/crunch/CancelJobsIT.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.To;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Rule;
-import org.junit.Test;
-
-/**
- *
- */
-public class CancelJobsIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testRun() throws Exception {
-    PipelineExecution pe = run();
-    pe.waitUntilDone();
-    PipelineResult pr = pe.getResult();
-    assertEquals(PipelineExecution.Status.SUCCEEDED, pe.getStatus());
-    assertEquals(2, pr.getStageResults().size());
-  }
-  
-  @Test
-  public void testKill() throws Exception {
-    PipelineExecution pe = run();
-    pe.kill();
-    pe.waitUntilDone();
-    assertEquals(PipelineExecution.Status.KILLED, pe.getStatus());
-  }
-
-  @Test
-  public void testKillMultipleTimes() throws Exception {
-    PipelineExecution pe = run();
-    for (int i = 0; i < 10; i++) {
-      pe.kill();
-    }
-    pe.waitUntilDone();
-    assertEquals(PipelineExecution.Status.KILLED, pe.getStatus());
-  }
-
-  @Test
-  public void testKillAfterDone() throws Exception {
-    PipelineExecution pe = run();
-    pe.waitUntilDone();
-    assertEquals(PipelineExecution.Status.SUCCEEDED, pe.getStatus());
-    pe.kill(); // expect no-op
-    assertEquals(PipelineExecution.Status.SUCCEEDED, pe.getStatus());
-  }
-  
-  public PipelineExecution run() throws IOException {
-    String shakes = tmpDir.copyResourceFileName("shakes.txt");
-    String out = tmpDir.getFileName("cancel");
-    Pipeline p = new MRPipeline(CancelJobsIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<String> words = p.readTextFile(shakes);
-    p.write(words.count().top(20), To.textFile(out));
-    return p.runAsync(); // need to hack to slow down job start up if this test becomes flaky.
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/CleanTextIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/CleanTextIT.java b/crunch/src/it/java/org/apache/crunch/CleanTextIT.java
deleted file mode 100644
index 2f4004e..0000000
--- a/crunch/src/it/java/org/apache/crunch/CleanTextIT.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.nio.charset.Charset;
-import java.util.List;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.To;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.avro.Avros;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.io.Files;
-
-/**
- *
- */
-public class CleanTextIT {
-
-  private static final int LINES_IN_SHAKES = 3667;
-  
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  static DoFn<String, String> CLEANER = new DoFn<String, String>() {
-    @Override
-    public void process(String input, Emitter<String> emitter) {
-      emitter.emit(input.toLowerCase());
-    }
-  };
-  
-  static DoFn<String, String> SPLIT = new DoFn<String, String>() {
-    @Override
-    public void process(String input, Emitter<String> emitter) {
-      for (String word : input.split("\\S+")) {
-        if (!word.isEmpty()) {
-          emitter.emit(word);
-        }
-      }
-    }
-  };
-  
-  @Test
-  public void testMapSideOutputs() throws Exception {
-    Pipeline pipeline = new MRPipeline(CleanTextIT.class, tmpDir.getDefaultConfiguration());
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    
-    PCollection<String> cleanShakes = shakespeare.parallelDo(CLEANER, Avros.strings());
-    File cso = tmpDir.getFile("cleanShakes");
-    cleanShakes.write(To.textFile(cso.getAbsolutePath()));
-    
-    File wc = tmpDir.getFile("wordCounts");
-    cleanShakes.parallelDo(SPLIT, Avros.strings()).count().write(To.textFile(wc.getAbsolutePath()));
-    pipeline.done();
-    
-    File cleanFile = new File(cso, "part-m-00000");
-    List<String> lines = Files.readLines(cleanFile, Charset.defaultCharset());
-    assertEquals(LINES_IN_SHAKES, lines.size());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/CollectionPObjectIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/CollectionPObjectIT.java b/crunch/src/it/java/org/apache/crunch/CollectionPObjectIT.java
deleted file mode 100644
index 7e0c75c..0000000
--- a/crunch/src/it/java/org/apache/crunch/CollectionPObjectIT.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.lang.String;
-import java.util.Collection;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.materialize.pobject.CollectionPObject;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Rule;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class CollectionPObjectIT {
-
-  private static final int LINES_IN_SHAKES = 3667;
-
-  private static final String FIRST_SHAKESPEARE_LINE =
-      "***The Project Gutenberg's Etext of Shakespeare's First Folio***";
-
-  private static final String LAST_SHAKESPEARE_LINE =
-      "FINIS. THE TRAGEDIE OF MACBETH.";
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testPObjectMRPipeline() throws IOException {
-    runPObject(new MRPipeline(CollectionPObjectIT.class, tmpDir.getDefaultConfiguration()));
-  }
-
-  @Test
-  public void testAsCollectionMRPipeline() throws IOException {
-    runAsCollection(new MRPipeline(CollectionPObjectIT.class, tmpDir.getDefaultConfiguration()));
-  }
-
-  @Test
-  public void testPObjectMemPipeline() throws IOException {
-    runPObject(MemPipeline.getInstance());
-  }
-
-  @Test
-  public void testAsCollectionMemPipeline() throws IOException {
-    runAsCollection(MemPipeline.getInstance());
-  }
-
-  private PCollection<String> getPCollection(Pipeline pipeline) throws IOException {
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    return shakespeare;
-  }
-
-  private void verifyLines(String[] lines) {
-    assertEquals("Not enough lines in Shakespeare.", LINES_IN_SHAKES, lines.length);
-    assertEquals("First line in Shakespeare is wrong.", FIRST_SHAKESPEARE_LINE, lines[0]);
-    assertEquals("Last line in Shakespeare is wrong.", LAST_SHAKESPEARE_LINE,
-        lines[lines.length - 1]);
-  }
-
-  public void runPObject(Pipeline pipeline) throws IOException {
-    PCollection<String> shakespeare = getPCollection(pipeline);
-    PObject<Collection<String>> linesP = new CollectionPObject<String>(shakespeare);
-    String[] lines = new String[LINES_IN_SHAKES];
-    lines = linesP.getValue().toArray(lines);
-    verifyLines(lines);
-  }
-
-  public void runAsCollection(Pipeline pipeline) throws IOException {
-    PCollection<String> shakespeare = getPCollection(pipeline);
-    String[] lines = new String[LINES_IN_SHAKES];
-    lines = shakespeare.asCollection().getValue().toArray(lines);
-    verifyLines(lines);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/CollectionsIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/CollectionsIT.java b/crunch/src/it/java/org/apache/crunch/CollectionsIT.java
deleted file mode 100644
index 17d0cae..0000000
--- a/crunch/src/it/java/org/apache/crunch/CollectionsIT.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Collection;
-
-import org.apache.crunch.fn.Aggregators.SimpleAggregator;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-@SuppressWarnings("serial")
-public class CollectionsIT {
-
-  private static class AggregateStringListFn extends SimpleAggregator<Collection<String>> {
-    private final Collection<String> rtn = Lists.newArrayList();
-
-    @Override
-    public void reset() {
-      rtn.clear();
-    }
-
-    @Override
-    public void update(Collection<String> values) {
-      rtn.addAll(values);
-    }
-
-    @Override
-    public Iterable<Collection<String>> results() {
-      return ImmutableList.of(rtn);
-    }
-  }
-
-  private static PTable<String, Collection<String>> listOfCharcters(PCollection<String> lines, PTypeFamily typeFamily) {
-
-    return lines.parallelDo(new DoFn<String, Pair<String, Collection<String>>>() {
-      @Override
-      public void process(String line, Emitter<Pair<String, Collection<String>>> emitter) {
-        for (String word : line.split("\\s+")) {
-          Collection<String> characters = Lists.newArrayList();
-          for (char c : word.toCharArray()) {
-            characters.add(String.valueOf(c));
-          }
-          emitter.emit(Pair.of(word, characters));
-        }
-      }
-    }, typeFamily.tableOf(typeFamily.strings(), typeFamily.collections(typeFamily.strings())))
-        .groupByKey().combineValues(new AggregateStringListFn());
-  }
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritables() throws IOException {
-    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvro() throws IOException {
-    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testInMemoryWritables() throws IOException {
-    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testInMemoryAvro() throws IOException {
-    run(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
-  }
-
-  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    Iterable<Pair<String, Collection<String>>> lines = listOfCharcters(shakespeare, typeFamily).materialize();
-
-    boolean passed = false;
-    for (Pair<String, Collection<String>> line : lines) {
-      if (line.first().startsWith("yellow")) {
-        passed = true;
-        break;
-      }
-    }
-    pipeline.done();
-    assertTrue(passed);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/CollectionsLengthIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/CollectionsLengthIT.java b/crunch/src/it/java/org/apache/crunch/CollectionsLengthIT.java
deleted file mode 100644
index 3a38b92..0000000
--- a/crunch/src/it/java/org/apache/crunch/CollectionsLengthIT.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.lang.Long;
-
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class CollectionsLengthIT {
-
-  public static final Long LINES_IN_SHAKESPEARE = 3667L;
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritables() throws IOException {
-    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvro() throws IOException {
-    run(new MRPipeline(CollectionsIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testInMemoryWritables() throws IOException {
-    run(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testInMemoryAvro() throws IOException {
-    run(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
-  }
-
-  public void run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    Long length = shakespeare.length().getValue();
-    assertEquals("Incorrect length for shakespear PCollection.", LINES_IN_SHAKESPEARE, length);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java b/crunch/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java
deleted file mode 100644
index f1323ca..0000000
--- a/crunch/src/it/java/org/apache/crunch/DeepCopyCustomTuplesIT.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.apache.crunch.types.avro.Avros.*;
-import static org.junit.Assert.assertEquals;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PType;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Iterables;
-
-/**
- *
- */
-public class DeepCopyCustomTuplesIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  public static class PID extends Pair<Integer, String> {
-    public PID(Integer first, String second) {
-      super(first, second);
-    }
-  }
-  
-  private static PType<PID> pids = tuples(PID.class, ints(), strings());
-  
-  @Test
-  public void testDeepCopyCustomTuple() throws Exception {
-    Pipeline p = new MRPipeline(DeepCopyCustomTuplesIT.class, tmpDir.getDefaultConfiguration());
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakes = p.readTextFile(shakesInputPath);
-    Iterable<String> out = shakes
-        .parallelDo(new PreProcFn(), tableOf(ints(), pairs(ints(), pids)))
-        .groupByKey()
-        .parallelDo(new PostProcFn(), strings())
-        .materialize();
-    assertEquals(65, Iterables.size(out));
-    p.done();
-  }
-  
-  private static class PreProcFn extends MapFn<String, Pair<Integer, Pair<Integer, PID>>> {
-    private int counter = 0;
-    @Override
-    public Pair<Integer, Pair<Integer, PID>> map(String input) {
-      return Pair.of(counter++, Pair.of(counter++, new PID(input.length(), input)));
-    }
-  };
-  
-  private static class PostProcFn extends DoFn<Pair<Integer, Iterable<Pair<Integer, PID>>>, String> {
-    @Override
-    public void process(Pair<Integer, Iterable<Pair<Integer, PID>>> input, Emitter<String> emitter) {
-      for (Pair<Integer, PID> p : input.second()) {
-        if (p.second().first() > 0 && p.second().first() < 10) {
-          emitter.emit(p.second().second());
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/EnumPairIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/EnumPairIT.java b/crunch/src/it/java/org/apache/crunch/EnumPairIT.java
deleted file mode 100644
index 1d0974e..0000000
--- a/crunch/src/it/java/org/apache/crunch/EnumPairIT.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypes;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Rule;
-import org.junit.Test;
-
-public class EnumPairIT implements Serializable {
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  static enum etypes {
-    type1,
-  }
-
-  @Test
-  public void testEnumPTypes() throws IOException {
-    String inputFile1 = tmpDir.copyResourceFileName("set1.txt");
-    Pipeline pipeline = new MRPipeline(EnumPairIT.class);
-    PCollection<String> set1 = pipeline.readTextFile(inputFile1);
-    PTable<String, etypes> data = set1.parallelDo(new DoFn<String, Pair<String, etypes>>() {
-      @Override
-      public void process(String input, Emitter<Pair<String, etypes>> emitter) {
-        emitter.emit(new Pair<String, etypes>(input, etypes.type1));
-      }
-    }, Writables.tableOf(Writables.strings(), PTypes.enums(etypes.class, set1.getTypeFamily())));
-
-    Iterable<Pair<String, etypes>> materialized = data.materialize();
-    pipeline.run();
-    for (Pair<String, etypes> pair : materialized) {
-      assertEquals(etypes.type1, pair.second());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/FirstElementPObjectIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/FirstElementPObjectIT.java b/crunch/src/it/java/org/apache/crunch/FirstElementPObjectIT.java
deleted file mode 100644
index d985e10..0000000
--- a/crunch/src/it/java/org/apache/crunch/FirstElementPObjectIT.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.lang.String;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.materialize.pobject.FirstElementPObject;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Rule;
-import org.junit.Test;
-
-@SuppressWarnings("serial")
-public class FirstElementPObjectIT {
-
-  private static final String FIRST_SHAKESPEARE_LINE =
-      "***The Project Gutenberg's Etext of Shakespeare's First Folio***";
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testMRPipeline() throws IOException {
-    run(new MRPipeline(FirstElementPObjectIT.class, tmpDir.getDefaultConfiguration()));
-  }
-
-  @Test
-  public void testInMemoryPipeline() throws IOException {
-    run(MemPipeline.getInstance());
-  }
-
-  public void run(Pipeline pipeline) throws IOException {
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    PObject<String> firstLine = new FirstElementPObject<String>(shakespeare);
-    String first = firstLine.getValue();
-    assertEquals("First line in Shakespeare is wrong.", FIRST_SHAKESPEARE_LINE, first);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java b/crunch/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java
deleted file mode 100644
index da487eb..0000000
--- a/crunch/src/it/java/org/apache/crunch/IterableReuseProtectionIT.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-/**
- * Verify that calling the iterator method on a Reducer-based Iterable 
- * is forcefully disallowed.
- */
-public class IterableReuseProtectionIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-  
-  
-  public void checkIteratorReuse(Pipeline pipeline) throws IOException {
-    Iterable<String> values = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
-        .by(IdentityFn.<String>getInstance(), Writables.strings())
-        .groupByKey()
-        .combineValues(new TestIterableReuseFn())
-        .values().materialize();
-    
-    List<String> valueList = Lists.newArrayList(values);
-    Collections.sort(valueList);
-    assertEquals(Lists.newArrayList("a", "b", "c", "e"), valueList);
-  }
-  
-  @Test
-  public void testIteratorReuse_MRPipeline() throws IOException {
-    checkIteratorReuse(new MRPipeline(IterableReuseProtectionIT.class, tmpDir.getDefaultConfiguration()));
-  }
-  
-  @Test
-  public void testIteratorReuse_InMemoryPipeline() throws IOException {
-    checkIteratorReuse(MemPipeline.getInstance());
-  }
-  
-  static class TestIterableReuseFn extends CombineFn<String, String> {
-
-    @Override
-    public void process(Pair<String, Iterable<String>> input, Emitter<Pair<String, String>> emitter) {
-      StringBuilder combinedBuilder = new StringBuilder();
-      for (String v : input.second()) {
-        combinedBuilder.append(v);
-      }
-      
-      try {
-        input.second().iterator();
-        throw new RuntimeException("Second call to iterator should throw an exception");
-      } catch (IllegalStateException e) {
-        // Expected situation
-      }
-      emitter.emit(Pair.of(input.first(), combinedBuilder.toString()));
-    }
-    
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/MRPipelineIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/MRPipelineIT.java b/crunch/src/it/java/org/apache/crunch/MRPipelineIT.java
deleted file mode 100644
index 7670e88..0000000
--- a/crunch/src/it/java/org/apache/crunch/MRPipelineIT.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.crunch.fn.FilterFns;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.To;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Rule;
-import org.junit.Test;
-
-public class MRPipelineIT implements Serializable {
-  @Rule
-  public transient TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void materializedColShouldBeWritten() throws Exception {
-    File textFile = tmpDir.copyResourceFile("shakes.txt");
-    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath());
-    pipeline.run();
-    PCollection<String> filter = genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL());
-    filter.materialize();
-    pipeline.run();
-    File file = tmpDir.getFile("output.txt");
-    Target outFile = To.textFile(file.getAbsolutePath());
-    PCollection<String> write = filter.write(outFile);
-    write.materialize();
-    pipeline.run();
-  }
-  
-  
-  
-  @Test
-  public void testPGroupedTableToMultipleOutputs() throws IOException{
-    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
-    PGroupedTable<String, String> groupedLineTable = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt")).by(IdentityFn.<String>getInstance(), Writables.strings()).groupByKey();
-    
-    PTable<String, String> ungroupedTableA = groupedLineTable.ungroup();
-    PTable<String, String> ungroupedTableB = groupedLineTable.ungroup();
-    
-    File outputDirA = tmpDir.getFile("output_a");
-    File outputDirB = tmpDir.getFile("output_b");
-    
-    pipeline.writeTextFile(ungroupedTableA, outputDirA.getAbsolutePath());
-    pipeline.writeTextFile(ungroupedTableB, outputDirB.getAbsolutePath());
-    pipeline.done();
-
-    // Verify that output from a single PGroupedTable can be sent to multiple collections
-    assertTrue(new File(outputDirA, "part-r-00000").exists());
-    assertTrue(new File(outputDirB, "part-r-00000").exists());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/MapPObjectIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/MapPObjectIT.java b/crunch/src/it/java/org/apache/crunch/MapPObjectIT.java
deleted file mode 100644
index c48284f..0000000
--- a/crunch/src/it/java/org/apache/crunch/MapPObjectIT.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.materialize.pobject.MapPObject;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-
-public class MapPObjectIT {
-
-  static final ImmutableList<Pair<Integer, String>> kvPairs = ImmutableList.of(Pair.of(0, "a"), Pair.of(1, "b"),
-      Pair.of(2, "c"), Pair.of(3, "e"));
-
-  public void assertMatches(Map<Integer, String> m) {
-    for (Integer k : m.keySet()) {
-      assertEquals(kvPairs.get(k).second(), m.get(k));
-    }
-  }
-
-  private static class Set1Mapper extends MapFn<String, Pair<Integer, String>> {
-    @Override
-    public Pair<Integer, String> map(String input) {
-
-      int k = -1;
-      if (input.equals("a"))
-        k = 0;
-      else if (input.equals("b"))
-        k = 1;
-      else if (input.equals("c"))
-        k = 2;
-      else if (input.equals("e"))
-        k = 3;
-      return Pair.of(k, input);
-    }
-  }
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testMemMapPObject() {
-    PTable<Integer, String> table = MemPipeline.tableOf(kvPairs);
-    PObject<Map<Integer, String>> map = new MapPObject<Integer, String>(table);
-    assertMatches(map.getValue());
-  }
-
-  @Test
-  public void testMemAsMap() {
-    PTable<Integer, String> table = MemPipeline.tableOf(kvPairs);
-    assertMatches(table.asMap().getValue());
-  }
-
-  private PTable<Integer, String> getMRPTable() throws IOException {
-    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
-    String inputFile = tmpDir.copyResourceFileName("set1.txt");
-    PCollection<String> c = p.readTextFile(inputFile);
-    PTypeFamily tf = c.getTypeFamily();
-    PTable<Integer, String> table = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(),
-        tf.strings()));
-    return table;
-  }
-
-  @Test
-  public void testMRMapPObject() throws IOException {
-    PTable<Integer, String> table = getMRPTable();
-    PObject<Map<Integer, String>> map = new MapPObject<Integer, String>(table);
-    assertMatches(map.getValue());
-  }
-
-  @Test
-  public void testMRAsMap() throws IOException {
-    PTable<Integer, String> table = getMRPTable();
-    assertMatches(table.asMap().getValue());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/MapsIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/MapsIT.java b/crunch/src/it/java/org/apache/crunch/MapsIT.java
deleted file mode 100644
index 5b3187b..0000000
--- a/crunch/src/it/java/org/apache/crunch/MapsIT.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-
-import java.util.Map;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-
-public class MapsIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testWritables() throws Exception {
-    run(WritableTypeFamily.getInstance(), tmpDir);
-  }
-
-  @Test
-  public void testAvros() throws Exception {
-    run(AvroTypeFamily.getInstance(), tmpDir);
-  }
-
-  public static void run(PTypeFamily typeFamily, TemporaryPath tmpDir) throws Exception {
-    Pipeline pipeline = new MRPipeline(MapsIT.class, tmpDir.getDefaultConfiguration());
-    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
-    PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
-    Iterable<Pair<String, Map<String, Long>>> output = shakespeare
-        .parallelDo(new DoFn<String, Pair<String, Map<String, Long>>>() {
-          @Override
-          public void process(String input, Emitter<Pair<String, Map<String, Long>>> emitter) {
-            String last = null;
-            for (String word : input.toLowerCase().split("\\W+")) {
-              if (!word.isEmpty()) {
-                String firstChar = word.substring(0, 1);
-                if (last != null) {
-                  Map<String, Long> cc = ImmutableMap.of(firstChar, 1L);
-                  emitter.emit(Pair.of(last, cc));
-                }
-                last = firstChar;
-              }
-            }
-          }
-        }, typeFamily.tableOf(typeFamily.strings(), typeFamily.maps(typeFamily.longs()))).groupByKey()
-        .combineValues(new CombineFn<String, Map<String, Long>>() {
-          @Override
-          public void process(Pair<String, Iterable<Map<String, Long>>> input,
-              Emitter<Pair<String, Map<String, Long>>> emitter) {
-            Map<String, Long> agg = Maps.newHashMap();
-            for (Map<String, Long> in : input.second()) {
-              for (Map.Entry<String, Long> e : in.entrySet()) {
-                if (!agg.containsKey(e.getKey())) {
-                  agg.put(e.getKey(), e.getValue());
-                } else {
-                  agg.put(e.getKey(), e.getValue() + agg.get(e.getKey()));
-                }
-              }
-            }
-            emitter.emit(Pair.of(input.first(), agg));
-          }
-        }).materialize();
-
-    boolean passed = false;
-    for (Pair<String, Map<String, Long>> v : output) {
-      if (v.first().equals("k") && v.second().get("n") == 8L) {
-        passed = true;
-        break;
-      }
-    }
-    pipeline.done();
-
-    assertThat(passed, is(true));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/MaterializeIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/MaterializeIT.java b/crunch/src/it/java/org/apache/crunch/MaterializeIT.java
deleted file mode 100644
index d064993..0000000
--- a/crunch/src/it/java/org/apache/crunch/MaterializeIT.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.crunch.fn.FilterFns;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.Person;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.junit.Assume;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-
-public class MaterializeIT {
-
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testMaterializeInput_Writables() throws IOException {
-    runMaterializeInput(new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
-        WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeInput_Avro() throws IOException {
-    runMaterializeInput(new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
-        AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeInput_InMemoryWritables() throws IOException {
-    runMaterializeInput(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeInput_InMemoryAvro() throws IOException {
-    runMaterializeInput(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeEmptyIntermediate_Writables() throws IOException {
-    runMaterializeEmptyIntermediate(
-        new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
-        WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeEmptyIntermediate_Avro() throws IOException {
-    runMaterializeEmptyIntermediate(
-        new MRPipeline(MaterializeIT.class, tmpDir.getDefaultConfiguration()),
-        AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeEmptyIntermediate_InMemoryWritables() throws IOException {
-    runMaterializeEmptyIntermediate(MemPipeline.getInstance(), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testMaterializeEmptyIntermediate_InMemoryAvro() throws IOException {
-    runMaterializeEmptyIntermediate(MemPipeline.getInstance(), AvroTypeFamily.getInstance());
-  }
-
-  public void runMaterializeInput(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    List<String> expectedContent = Lists.newArrayList("b", "c", "a", "e");
-    String inputPath = tmpDir.copyResourceFileName("set1.txt");
-
-    PCollection<String> lines = pipeline.readTextFile(inputPath);
-    assertEquals(expectedContent, Lists.newArrayList(lines.materialize()));
-    pipeline.done();
-  }
-
-  public void runMaterializeEmptyIntermediate(Pipeline pipeline, PTypeFamily typeFamily)
-      throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("set1.txt");
-    PCollection<String> empty = pipeline.readTextFile(inputPath).filter(FilterFns.<String>REJECT_ALL());
-
-    assertTrue(Lists.newArrayList(empty.materialize()).isEmpty());
-    pipeline.done();
-  }
-
-  static class StringToStringWrapperPersonPairMapFn extends MapFn<String, Pair<StringWrapper, Person>> {
-
-    @Override
-    public Pair<StringWrapper, Person> map(String input) {
-      Person person = new Person();
-      person.name = input;
-      person.age = 42;
-      person.siblingnames = Lists.<CharSequence> newArrayList();
-      return Pair.of(new StringWrapper(input), person);
-    }
-
-  }
-
-  @Test
-  public void testMaterializeAvroPersonAndReflectsPair_GroupedTable() throws IOException {
-    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
-    Pipeline pipeline = new MRPipeline(MaterializeIT.class);
-    List<Pair<StringWrapper, Person>> pairList = Lists.newArrayList(pipeline
-        .readTextFile(tmpDir.copyResourceFileName("set1.txt"))
-        .parallelDo(new StringToStringWrapperPersonPairMapFn(),
-            Avros.pairs(Avros.reflects(StringWrapper.class), Avros.records(Person.class)))
-        .materialize());
-    
-    // We just need to make sure this doesn't crash
-    assertEquals(4, pairList.size());
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/MaterializeToMapIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/MaterializeToMapIT.java b/crunch/src/it/java/org/apache/crunch/MaterializeToMapIT.java
deleted file mode 100644
index 7fef30e..0000000
--- a/crunch/src/it/java/org/apache/crunch/MaterializeToMapIT.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.ImmutableList;
-
-public class MaterializeToMapIT {
-
-  static final ImmutableList<Pair<Integer, String>> kvPairs = ImmutableList.of(Pair.of(0, "a"), Pair.of(1, "b"),
-      Pair.of(2, "c"), Pair.of(3, "e"));
-
-  public void assertMatches(Map<Integer, String> m) {
-    for (Integer k : m.keySet()) {
-      assertEquals(kvPairs.get(k).second(), m.get(k));
-    }
-  }
-
-  @Test
-  public void testMemMaterializeToMap() {
-    assertMatches(MemPipeline.tableOf(kvPairs).materializeToMap());
-  }
-
-  private static class Set1Mapper extends MapFn<String, Pair<Integer, String>> {
-    @Override
-    public Pair<Integer, String> map(String input) {
-
-      int k = -1;
-      if (input.equals("a"))
-        k = 0;
-      else if (input.equals("b"))
-        k = 1;
-      else if (input.equals("c"))
-        k = 2;
-      else if (input.equals("e"))
-        k = 3;
-      return Pair.of(k, input);
-    }
-  }
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  @Test
-  public void testMRMaterializeToMap() throws IOException {
-    Pipeline p = new MRPipeline(MaterializeToMapIT.class, tmpDir.getDefaultConfiguration());
-    String inputFile = tmpDir.copyResourceFileName("set1.txt");
-    PCollection<String> c = p.readTextFile(inputFile);
-    PTypeFamily tf = c.getTypeFamily();
-    PTable<Integer, String> t = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(), tf.strings()));
-    Map<Integer, String> m = t.materializeToMap();
-    assertMatches(m);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/MultipleOutputIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/MultipleOutputIT.java b/crunch/src/it/java/org/apache/crunch/MultipleOutputIT.java
deleted file mode 100644
index 1a85b6a..0000000
--- a/crunch/src/it/java/org/apache/crunch/MultipleOutputIT.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.io.At;
-import org.apache.crunch.test.StringWrapper;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.crunch.types.writable.Writables;
-import org.junit.Rule;
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-import com.google.common.io.Files;
-
-public class MultipleOutputIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  public static PCollection<String> evenCountLetters(PCollection<String> words, PTypeFamily typeFamily) {
-    return words.parallelDo("even", new FilterFn<String>() {
-
-      @Override
-      public boolean accept(String input) {
-        return input.length() % 2 == 0;
-      }
-    }, typeFamily.strings());
-  }
-
-  public static PCollection<String> oddCountLetters(PCollection<String> words, PTypeFamily typeFamily) {
-    return words.parallelDo("odd", new FilterFn<String>() {
-
-      @Override
-      public boolean accept(String input) {
-        return input.length() % 2 != 0;
-      }
-    }, typeFamily.strings());
-
-  }
-
-  public static PTable<String, Long> substr(PTable<String, Long> ptable) {
-    return ptable.parallelDo(new DoFn<Pair<String, Long>, Pair<String, Long>>() {
-      public void process(Pair<String, Long> input, Emitter<Pair<String, Long>> emitter) {
-        if (input.first().length() > 0) {
-          emitter.emit(Pair.of(input.first().substring(0, 1), input.second()));
-        }
-      }
-    }, ptable.getPTableType());
-  }
-
-  @Test
-  public void testWritables() throws IOException {
-    run(new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testAvro() throws IOException {
-    run(new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration()), AvroTypeFamily.getInstance());
-  }
-
-  @Test
-  public void testParallelDosFused() throws IOException {
-
-    PipelineResult result = run(new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration()),
-        WritableTypeFamily.getInstance());
-
-    // Ensure our multiple outputs were fused into a single job.
-    assertEquals("parallel Dos not fused into a single job", 1, result.getStageResults().size());
-  }
-
-  public PipelineResult run(Pipeline pipeline, PTypeFamily typeFamily) throws IOException {
-    String inputPath = tmpDir.copyResourceFileName("letters.txt");
-    String outputPathEven = tmpDir.getFileName("even");
-    String outputPathOdd = tmpDir.getFileName("odd");
-
-    PCollection<String> words = pipeline.read(At.textFile(inputPath, typeFamily.strings()));
-
-    PCollection<String> evenCountWords = evenCountLetters(words, typeFamily);
-    PCollection<String> oddCountWords = oddCountLetters(words, typeFamily);
-    pipeline.writeTextFile(evenCountWords, outputPathEven);
-    pipeline.writeTextFile(oddCountWords, outputPathOdd);
-
-    PipelineResult result = pipeline.done();
-
-    checkFileContents(outputPathEven, Arrays.asList("bb"));
-    checkFileContents(outputPathOdd, Arrays.asList("a"));
-
-    return result;
-  }
-
-  /**
-   * Mutates the state of an input and then emits the mutated object.
-   */
-  static class AppendFn extends DoFn<StringWrapper, StringWrapper> {
-
-    private String value;
-
-    public AppendFn(String value) {
-      this.value = value;
-    }
-
-    @Override
-    public void process(StringWrapper input, Emitter<StringWrapper> emitter) {
-      input.setValue(input.getValue() + value);
-      emitter.emit(input);
-    }
-
-  }
-
-  /**
-   * Fusing multiple pipelines has a risk of running into object reuse bugs.
-   * This test verifies that mutating the state of an object that is passed
-   * through multiple streams of a pipeline doesn't allow one stream to affect
-   * another.
-   */
-  @Test
-  public void testFusedMappersObjectReuseBug() throws IOException {
-    Pipeline pipeline = new MRPipeline(MultipleOutputIT.class, tmpDir.getDefaultConfiguration());
-    PCollection<StringWrapper> stringWrappers = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
-        .parallelDo(new StringWrapper.StringToStringWrapperMapFn(), Avros.reflects(StringWrapper.class));
-
-    PCollection<String> stringsA = stringWrappers.parallelDo(new AppendFn("A"), stringWrappers.getPType())
-        .parallelDo(new StringWrapper.StringWrapperToStringMapFn(), Writables.strings());
-    PCollection<String> stringsB = stringWrappers.parallelDo(new AppendFn("B"), stringWrappers.getPType())
-        .parallelDo(new StringWrapper.StringWrapperToStringMapFn(), Writables.strings());
-
-    String outputA = tmpDir.getFileName("stringsA");
-    String outputB = tmpDir.getFileName("stringsB");
-
-    pipeline.writeTextFile(stringsA, outputA);
-    pipeline.writeTextFile(stringsB, outputB);
-    PipelineResult pipelineResult = pipeline.done();
-
-    // Make sure fusing did actually occur
-    assertEquals(1, pipelineResult.getStageResults().size());
-
-    checkFileContents(outputA, Lists.newArrayList("cA", "dA", "aA"));
-    checkFileContents(outputB, Lists.newArrayList("cB", "dB", "aB"));
-
-  }
-
-  private void checkFileContents(String filePath, List<String> expected) throws IOException {
-    File outputFile = new File(filePath, "part-m-00000");
-    List<String> lines = Files.readLines(outputFile, Charset.defaultCharset());
-    assertEquals(expected, lines);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java
----------------------------------------------------------------------
diff --git a/crunch/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java b/crunch/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java
deleted file mode 100644
index 44eb897..0000000
--- a/crunch/src/it/java/org/apache/crunch/PCollectionGetSizeIT.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import static com.google.common.collect.Lists.newArrayList;
-import static org.apache.crunch.io.At.sequenceFile;
-import static org.apache.crunch.io.At.textFile;
-import static org.apache.crunch.types.writable.Writables.strings;
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-
-import java.io.IOException;
-
-import org.apache.crunch.fn.FilterFns;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.test.TemporaryPath;
-import org.apache.crunch.test.TemporaryPaths;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-
-public class PCollectionGetSizeIT {
-  @Rule
-  public TemporaryPath tmpDir = TemporaryPaths.create();
-
-  private String emptyInputPath;
-  private String nonEmptyInputPath;
-  private String outputPath;
-
-  @Before
-  public void setUp() throws IOException {
-    emptyInputPath = tmpDir.copyResourceFileName("emptyTextFile.txt");
-    nonEmptyInputPath = tmpDir.copyResourceFileName("set1.txt");
-    outputPath = tmpDir.getFileName("output");
-  }
-
-  @Test
-  public void testGetSizeOfEmptyInput_MRPipeline() throws IOException {
-    testCollectionGetSizeOfEmptyInput(new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
-  }
-
-  @Test
-  public void testGetSizeOfEmptyInput_MemPipeline() throws IOException {
-    testCollectionGetSizeOfEmptyInput(MemPipeline.getInstance());
-  }
-
-  private void testCollectionGetSizeOfEmptyInput(Pipeline pipeline) throws IOException {
-
-    assertThat(pipeline.read(textFile(emptyInputPath)).getSize(), is(0L));
-  }
-
-  @Test
-  public void testMaterializeEmptyInput_MRPipeline() throws IOException {
-    testMaterializeEmptyInput(new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
-  }
-
-  @Test
-  public void testMaterializeEmptyImput_MemPipeline() throws IOException {
-    testMaterializeEmptyInput(MemPipeline.getInstance());
-  }
-
-  private void testMaterializeEmptyInput(Pipeline pipeline) throws IOException {
-    assertThat(newArrayList(pipeline.readTextFile(emptyInputPath).materialize().iterator()).size(), is(0));
-  }
-
-  @Test
-  public void testGetSizeOfEmptyIntermediatePCollection_MRPipeline() throws IOException {
-
-    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(
-        new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
-
-    assertThat(emptyIntermediate.getSize(), is(0L));
-  }
-
-  @Test
-  @Ignore("GetSize of a DoCollection is only an estimate based on scale factor, so we can't count on it being reported as 0")
-  public void testGetSizeOfEmptyIntermediatePCollection_NoSave_MRPipeline() throws IOException {
-
-    PCollection<String> data = new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration())
-      .readTextFile(nonEmptyInputPath);
-
-    PCollection<String> emptyPCollection = data.filter(FilterFns.<String>REJECT_ALL());
-
-    assertThat(emptyPCollection.getSize(), is(0L));
-  }
-
-  @Test
-  public void testGetSizeOfEmptyIntermediatePCollection_MemPipeline() {
-
-    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(MemPipeline.getInstance());
-
-    assertThat(emptyIntermediate.getSize(), is(0L));
-  }
-
-  @Test
-  public void testMaterializeOfEmptyIntermediatePCollection_MRPipeline() throws IOException {
-
-    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(
-        new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()));
-
-    assertThat(newArrayList(emptyIntermediate.materialize()).size(), is(0));
-  }
-
-  @Test
-  public void testMaterializeOfEmptyIntermediatePCollection_MemPipeline() {
-
-    PCollection<String> emptyIntermediate = createPesistentEmptyIntermediate(MemPipeline.getInstance());
-
-    assertThat(newArrayList(emptyIntermediate.materialize()).size(), is(0));
-  }
-
-  private PCollection<String> createPesistentEmptyIntermediate(Pipeline pipeline) {
-
-    PCollection<String> data = pipeline.readTextFile(nonEmptyInputPath);
-
-    PCollection<String> emptyPCollection = data.filter(FilterFns.<String>REJECT_ALL());
-
-    emptyPCollection.write(sequenceFile(outputPath, strings()));
-
-    pipeline.run();
-
-    return pipeline.read(sequenceFile(outputPath, strings()));
-  }
-
-  @Test(expected = IllegalStateException.class)
-  public void testExpectExceptionForGettingSizeOfNonExistingFile_MRPipeline() throws IOException {
-    new MRPipeline(this.getClass(), tmpDir.getDefaultConfiguration()).readTextFile("non_existing.file").getSize();
-  }
-
-  @Test(expected = IllegalStateException.class)
-  public void testExpectExceptionForGettingSizeOfNonExistingFile_MemPipeline() {
-    MemPipeline.getInstance().readTextFile("non_existing.file").getSize();
-  }
-}


[36/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/it/resources/urls.txt
----------------------------------------------------------------------
diff --git a/crunch-core/src/it/resources/urls.txt b/crunch-core/src/it/resources/urls.txt
new file mode 100644
index 0000000..827e711
--- /dev/null
+++ b/crunch-core/src/it/resources/urls.txt
@@ -0,0 +1,11 @@
+www.A.com	www.B.com
+www.A.com	www.C.com
+www.A.com	www.D.com
+www.A.com	www.E.com
+www.B.com	www.D.com
+www.B.com	www.E.com
+www.C.com	www.D.com
+www.D.com	www.B.com
+www.E.com	www.A.com
+www.F.com	www.B.com
+www.F.com	www.C.com

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Aggregator.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Aggregator.java b/crunch-core/src/main/java/org/apache/crunch/Aggregator.java
new file mode 100644
index 0000000..432452b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Aggregator.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.conf.Configuration;
+
+
+/**
+ * Aggregate a sequence of values into a possibly smaller sequence of the same type.
+ *
+ * <p>In most cases, an Aggregator will turn multiple values into a single value,
+ * like creating a sum, finding the minimum or maximum, etc. In some cases
+ * (ie. finding the top K elements), an implementation may return more than
+ * one value. The {@link org.apache.crunch.fn.Aggregators} utility class contains
+ * factory methods for creating all kinds of pre-defined Aggregators that should
+ * cover the most common cases.</p>
+ *
+ * <p>Aggregator implementations should usually be <em>associative</em> and
+ * <em>commutative</em>, which makes their results deterministic. If your aggregation
+ * function isn't commutative, you can still use secondary sort to that effect.</p>
+ *
+ * <p>The lifecycle of an {@link Aggregator} always begins with you instantiating
+ * it and passing it to Crunch. When running your {@link Pipeline}, Crunch serializes
+ * the instance and deserializes it wherever it is needed on the cluster. This is how
+ * Crunch uses a deserialized instance:<p>
+ *
+ * <ol>
+ *   <li>call {@link #initialize(Configuration)} once</li>
+ *   <li>call {@link #reset()}
+ *   <li>call {@link #update(Object)} multiple times until all values of a sequence
+ *       have been aggregated</li>
+ *   <li>call {@link #results()} to retrieve the aggregated result</li>
+ *   <li>go back to step 2 until all sequences have been aggregated</li>
+ * </ol>
+ *
+ * @param <T> The value types to aggregate
+ */
+public interface Aggregator<T> extends Serializable {
+
+  /**
+   * Perform any setup of this instance that is required prior to processing
+   * inputs.
+   *
+   * @param conf Hadoop configuration
+   */
+  void initialize(Configuration conf);
+
+  /**
+   * Clears the internal state of this Aggregator and prepares it for the
+   * values associated with the next key.
+   *
+   * Depending on what you aggregate, this typically means setting a variable
+   * to zero or clearing a list. Failing to do this will yield wrong results!
+   */
+  void reset();
+
+  /**
+   * Incorporate the given value into the aggregate state maintained by this
+   * instance.
+   *
+   * @param value The value to add to the aggregated state
+   */
+  void update(T value);
+
+  /**
+   * Returns the current aggregated state of this instance.
+   */
+  Iterable<T> results();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/CombineFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/CombineFn.java b/crunch-core/src/main/java/org/apache/crunch/CombineFn.java
new file mode 100644
index 0000000..71e8057
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/CombineFn.java
@@ -0,0 +1,1211 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.io.Serializable;
+import java.math.BigInteger;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.SortedSet;
+
+import org.apache.crunch.fn.Aggregators;
+import org.apache.crunch.util.Tuples;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * A special {@link DoFn} implementation that converts an {@link Iterable} of
+ * values into a single value. If a {@code CombineFn} instance is used on a
+ * {@link PGroupedTable}, the function will be applied to the output of the map
+ * stage before the data is passed to the reducer, which can improve the runtime
+ * of certain classes of jobs.
+ * <p>
+ * Note that the incoming {@code Iterable} can only be used to create an 
+ * {@code Iterator} once. Calling {@link Iterable#iterator()} method a second
+ * time will throw an {@link IllegalStateException}.
+ */
+public abstract class CombineFn<S, T> extends DoFn<Pair<S, Iterable<T>>, Pair<S, T>> {
+
+  /**
+   * @deprecated Use {@link org.apache.crunch.Aggregator}
+   */
+  public static interface Aggregator<T> extends Serializable {
+    /**
+     * Perform any setup of this instance that is required prior to processing
+     * inputs.
+     */
+    void initialize(Configuration configuration);
+
+    /**
+     * Clears the internal state of this Aggregator and prepares it for the
+     * values associated with the next key.
+     */
+    void reset();
+
+    /**
+     * Incorporate the given value into the aggregate state maintained by this
+     * instance.
+     */
+    void update(T value);
+
+    /**
+     * Returns the current aggregated state of this instance.
+     */
+    Iterable<T> results();
+  }
+
+  /**
+   * Base class for aggregators that do not require any initialization.
+   *
+   * @deprecated Use {@link org.apache.crunch.fn.Aggregators.SimpleAggregator}
+   */
+  public static abstract class SimpleAggregator<T> implements Aggregator<T> {
+    @Override
+    public void initialize(Configuration conf) {
+      // No-op
+    }
+  }
+  
+  /**
+   * Interface for constructing new aggregator instances.
+   *
+   * @deprecated Use {@link PGroupedTable#combineValues(Aggregator)} which doesn't require a factory.
+   */
+  public static interface AggregatorFactory<T> {
+    Aggregator<T> create();
+  }
+
+  /**
+   * A {@code CombineFn} that delegates all of the actual work to an
+   * {@code Aggregator} instance.
+   *
+   * @deprecated Use the {@link Aggregators#toCombineFn(org.apache.crunch.Aggregator)} adapter
+   */
+  public static class AggregatorCombineFn<K, V> extends CombineFn<K, V> {
+
+    private final Aggregator<V> aggregator;
+
+    public AggregatorCombineFn(Aggregator<V> aggregator) {
+      this.aggregator = aggregator;
+    }
+
+    @Override
+    public void initialize() {
+      aggregator.initialize(getConfiguration());
+    }
+    
+    @Override
+    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
+      aggregator.reset();
+      for (V v : input.second()) {
+        aggregator.update(v);
+      }
+      for (V v : aggregator.results()) {
+        emitter.emit(Pair.of(input.first(), v));
+      }
+    }
+  }
+
+  private static abstract class TupleAggregator<T> implements Aggregator<T> {
+    private final List<Aggregator<Object>> aggregators;
+
+    public TupleAggregator(Aggregator<?>... aggregators) {
+      this.aggregators = Lists.newArrayList();
+      for (Aggregator<?> a : aggregators) {
+        this.aggregators.add((Aggregator<Object>) a);
+      }
+    }
+
+    @Override
+    public void initialize(Configuration configuration) {
+      for (Aggregator<?> a : aggregators) {
+        a.initialize(configuration);
+      }
+    }
+    
+    @Override
+    public void reset() {
+      for (Aggregator<?> a : aggregators) {
+        a.reset();
+      }
+    }
+
+    protected void updateTuple(Tuple t) {
+      for (int i = 0; i < aggregators.size(); i++) {
+        aggregators.get(i).update(t.get(i));
+      }
+    }
+
+    protected Iterable<Object> results(int index) {
+      return aggregators.get(index).results();
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#pairAggregator(Aggregator, Aggregator)}
+   */
+  public static class PairAggregator<V1, V2> extends TupleAggregator<Pair<V1, V2>> {
+
+    public PairAggregator(Aggregator<V1> a1, Aggregator<V2> a2) {
+      super(a1, a2);
+    }
+
+    @Override
+    public void update(Pair<V1, V2> value) {
+      updateTuple(value);
+    }
+
+    @Override
+    public Iterable<Pair<V1, V2>> results() {
+      return new Tuples.PairIterable<V1, V2>((Iterable<V1>) results(0), (Iterable<V2>) results(1));
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#tripAggregator(Aggregator, Aggregator, Aggregator)}
+   */
+  public static class TripAggregator<A, B, C> extends TupleAggregator<Tuple3<A, B, C>> {
+
+    public TripAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3) {
+      super(a1, a2, a3);
+    }
+
+    @Override
+    public void update(Tuple3<A, B, C> value) {
+      updateTuple(value);
+    }
+
+    @Override
+    public Iterable<Tuple3<A, B, C>> results() {
+      return new Tuples.TripIterable<A, B, C>((Iterable<A>) results(0), (Iterable<B>) results(1),
+          (Iterable<C>) results(2));
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#quadAggregator(Aggregator, Aggregator, Aggregator, Aggregator)}
+   */
+  public static class QuadAggregator<A, B, C, D> extends TupleAggregator<Tuple4<A, B, C, D>> {
+
+    public QuadAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3, Aggregator<D> a4) {
+      super(a1, a2, a3, a4);
+    }
+
+    @Override
+    public void update(Tuple4<A, B, C, D> value) {
+      updateTuple(value);
+    }
+
+    @Override
+    public Iterable<Tuple4<A, B, C, D>> results() {
+      return new Tuples.QuadIterable<A, B, C, D>((Iterable<A>) results(0), (Iterable<B>) results(1),
+          (Iterable<C>) results(2), (Iterable<D>) results(3));
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#tupleAggregator(Aggregator...)}
+   */
+  public static class TupleNAggregator extends TupleAggregator<TupleN> {
+
+    private final int size;
+
+    public TupleNAggregator(Aggregator<?>... aggregators) {
+      super(aggregators);
+      size = aggregators.length;
+    }
+
+    @Override
+    public void update(TupleN value) {
+      updateTuple(value);
+    }
+
+    @Override
+    public Iterable<TupleN> results() {
+      Iterable<?>[] iterables = new Iterable[size];
+      for (int i = 0; i < size; i++) {
+        iterables[i] = results(i);
+      }
+      return new Tuples.TupleNIterable(iterables);
+    }
+
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#toCombineFn(Aggregator)}
+   */
+  public static final <K, V> CombineFn<K, V> aggregator(Aggregator<V> aggregator) {
+    return new AggregatorCombineFn<K, V>(aggregator);
+  }
+
+  /**
+   * @deprecated Use {@link PGroupedTable#combineValues(Aggregator)} which doesn't require a factory.
+   */
+  public static final <K, V> CombineFn<K, V> aggregatorFactory(AggregatorFactory<V> aggregator) {
+    return new AggregatorCombineFn<K, V>(aggregator.create());
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#pairAggregator(Aggregator, Aggregator)}
+   */
+  public static final <K, V1, V2> CombineFn<K, Pair<V1, V2>> pairAggregator(AggregatorFactory<V1> a1,
+      AggregatorFactory<V2> a2) {
+    return aggregator(new PairAggregator<V1, V2>(a1.create(), a2.create()));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#tripAggregator(Aggregator, Aggregator, Aggregator)}
+   */
+  public static final <K, A, B, C> CombineFn<K, Tuple3<A, B, C>> tripAggregator(AggregatorFactory<A> a1,
+      AggregatorFactory<B> a2, AggregatorFactory<C> a3) {
+    return aggregator(new TripAggregator<A, B, C>(a1.create(), a2.create(), a3.create()));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#quadAggregator(Aggregator, Aggregator, Aggregator, Aggregator)}
+   */
+  public static final <K, A, B, C, D> CombineFn<K, Tuple4<A, B, C, D>> quadAggregator(AggregatorFactory<A> a1,
+      AggregatorFactory<B> a2, AggregatorFactory<C> a3, AggregatorFactory<D> a4) {
+    return aggregator(new QuadAggregator<A, B, C, D>(a1.create(), a2.create(), a3.create(), a4.create()));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#tupleAggregator(Aggregator...)}
+   */
+  public static final <K> CombineFn<K, TupleN> tupleAggregator(AggregatorFactory<?>... factories) {
+    Aggregator<?>[] aggs = new Aggregator[factories.length];
+    for (int i = 0; i < aggs.length; i++) {
+      aggs[i] = factories[i].create();
+    }
+    return aggregator(new TupleNAggregator(aggs));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_LONGS()}
+   */
+  public static final <K> CombineFn<K, Long> SUM_LONGS() {
+    return aggregatorFactory(SUM_LONGS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_INTS()}
+   */
+  public static final <K> CombineFn<K, Integer> SUM_INTS() {
+    return aggregatorFactory(SUM_INTS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_FLOATS()}
+   */
+  public static final <K> CombineFn<K, Float> SUM_FLOATS() {
+    return aggregatorFactory(SUM_FLOATS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_DOUBLES()}
+   */
+  public static final <K> CombineFn<K, Double> SUM_DOUBLES() {
+    return aggregatorFactory(SUM_DOUBLES);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_BIGINTS()}
+   */
+  public static final <K> CombineFn<K, BigInteger> SUM_BIGINTS() {
+    return aggregatorFactory(SUM_BIGINTS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_LONGS()}
+   */
+  public static final <K> CombineFn<K, Long> MAX_LONGS() {
+    return aggregatorFactory(MAX_LONGS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_LONGS(int)}
+   */
+  public static final <K> CombineFn<K, Long> MAX_LONGS(int n) {
+    return aggregator(new MaxNAggregator<Long>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_INTS()}
+   */
+  public static final <K> CombineFn<K, Integer> MAX_INTS() {
+    return aggregatorFactory(MAX_INTS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_INTS(int)}
+   */
+  public static final <K> CombineFn<K, Integer> MAX_INTS(int n) {
+    return aggregator(new MaxNAggregator<Integer>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_FLOATS()}
+   */
+  public static final <K> CombineFn<K, Float> MAX_FLOATS() {
+    return aggregatorFactory(MAX_FLOATS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_FLOATS(int)}
+   */
+  public static final <K> CombineFn<K, Float> MAX_FLOATS(int n) {
+    return aggregator(new MaxNAggregator<Float>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_DOUBLES()}
+   */
+  public static final <K> CombineFn<K, Double> MAX_DOUBLES() {
+    return aggregatorFactory(MAX_DOUBLES);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_DOUBLES(int)}
+   */
+  public static final <K> CombineFn<K, Double> MAX_DOUBLES(int n) {
+    return aggregator(new MaxNAggregator<Double>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_BIGINTS()}
+   */
+  public static final <K> CombineFn<K, BigInteger> MAX_BIGINTS() {
+    return aggregatorFactory(MAX_BIGINTS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_BIGINTS(int)}
+   */
+  public static final <K> CombineFn<K, BigInteger> MAX_BIGINTS(int n) {
+    return aggregator(new MaxNAggregator<BigInteger>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_LONGS()}
+   */
+  public static final <K> CombineFn<K, Long> MIN_LONGS() {
+    return aggregatorFactory(MIN_LONGS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_LONGS(int)}
+   */
+  public static final <K> CombineFn<K, Long> MIN_LONGS(int n) {
+    return aggregator(new MinNAggregator<Long>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_INTS()}
+   */
+  public static final <K> CombineFn<K, Integer> MIN_INTS() {
+    return aggregatorFactory(MIN_INTS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_INTS(int)}
+   */
+  public static final <K> CombineFn<K, Integer> MIN_INTS(int n) {
+    return aggregator(new MinNAggregator<Integer>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_FLOATS()}
+   */
+  public static final <K> CombineFn<K, Float> MIN_FLOATS() {
+    return aggregatorFactory(MIN_FLOATS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_FLOATS(int)}
+   */
+  public static final <K> CombineFn<K, Float> MIN_FLOATS(int n) {
+    return aggregator(new MinNAggregator<Float>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_DOUBLES()}
+   */
+  public static final <K> CombineFn<K, Double> MIN_DOUBLES() {
+    return aggregatorFactory(MIN_DOUBLES);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_DOUBLES(int)}
+   */
+  public static final <K> CombineFn<K, Double> MIN_DOUBLES(int n) {
+    return aggregator(new MinNAggregator<Double>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_BIGINTS()}
+   */
+  public static final <K> CombineFn<K, BigInteger> MIN_BIGINTS() {
+    return aggregatorFactory(MIN_BIGINTS);
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_BIGINTS(int)}
+   */
+  public static final <K> CombineFn<K, BigInteger> MIN_BIGINTS(int n) {
+    return aggregator(new MinNAggregator<BigInteger>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#FIRST_N(int)}
+   */
+  public static final <K, V> CombineFn<K, V> FIRST_N(int n) {
+    return aggregator(new FirstNAggregator<V>(n));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#LAST_N(int)}
+   */
+  public static final <K, V> CombineFn<K, V> LAST_N(int n) {
+    return aggregator(new LastNAggregator<V>(n));
+  }
+
+  /**
+   * Used to concatenate strings, with a separator between each strings. There
+   * is no limits of length for the concatenated string.
+   * 
+   * @param separator
+   *            the separator which will be appended between each string
+   * @param skipNull
+   *            define if we should skip null values. Throw
+   *            NullPointerException if set to false and there is a null
+   *            value.
+   * @return
+   *
+   * @deprecated Use {@link Aggregators#STRING_CONCAT(String, boolean)}
+   */
+  public static final <K> CombineFn<K, String> STRING_CONCAT(final String separator, final boolean skipNull) {
+      return aggregator(new StringConcatAggregator(separator, skipNull));
+  }
+
+  /**
+   * Used to concatenate strings, with a separator between each strings. You
+   * can specify the maximum length of the output string and of the input
+   * strings, if they are > 0. If a value is <= 0, there is no limits.
+   * 
+   * Any too large string (or any string which would made the output too
+   * large) will be silently discarded.
+   * 
+   * @param separator
+   *            the separator which will be appended between each string
+   * @param skipNull
+   *            define if we should skip null values. Throw
+   *            NullPointerException if set to false and there is a null
+   *            value.
+   * @param maxOutputLength
+   *            the maximum length of the output string. If it's set <= 0,
+   *            there is no limits. The number of characters of the output
+   *            string will be < maxOutputLength.
+   * @param maxInputLength
+   *            the maximum length of the input strings. If it's set <= 0,
+   *            there is no limits. The number of characters of the int string
+   *            will be < maxInputLength to be concatenated.
+   * @return
+   *
+   * @deprecated Use {@link Aggregators#STRING_CONCAT(String, boolean, long, long)}
+   */
+  public static final <K> CombineFn<K, String> STRING_CONCAT(final String separator, final boolean skipNull, final long maxOutputLength, final long maxInputLength) {
+      return aggregator(new StringConcatAggregator(separator, skipNull, maxOutputLength, maxInputLength));
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_LONGS()}
+   */
+  public static class SumLongs extends SimpleAggregator<Long> {
+    private long sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0;
+    }
+
+    @Override
+    public void update(Long next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Long> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_LONGS()}
+   */
+  public static AggregatorFactory<Long> SUM_LONGS = new AggregatorFactory<Long>() {
+    public Aggregator<Long> create() {
+      return new SumLongs();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_INTS()}
+   */
+  public static class SumInts extends SimpleAggregator<Integer> {
+    private int sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0;
+    }
+
+    @Override
+    public void update(Integer next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Integer> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_INTS()}
+   */
+  public static AggregatorFactory<Integer> SUM_INTS = new AggregatorFactory<Integer>() {
+    public Aggregator<Integer> create() {
+      return new SumInts();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_FLOATS()}
+   */
+  public static class SumFloats extends SimpleAggregator<Float> {
+    private float sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0f;
+    }
+
+    @Override
+    public void update(Float next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Float> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_FLOATS()}
+   */
+  public static AggregatorFactory<Float> SUM_FLOATS = new AggregatorFactory<Float>() {
+    public Aggregator<Float> create() {
+      return new SumFloats();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_DOUBLES()}
+   */
+  public static class SumDoubles extends SimpleAggregator<Double> {
+    private double sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0f;
+    }
+
+    @Override
+    public void update(Double next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Double> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_DOUBLES()}
+   */
+  public static AggregatorFactory<Double> SUM_DOUBLES = new AggregatorFactory<Double>() {
+    public Aggregator<Double> create() {
+      return new SumDoubles();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_BIGINTS()}
+   */
+  public static class SumBigInts extends SimpleAggregator<BigInteger> {
+    private BigInteger sum = BigInteger.ZERO;
+
+    @Override
+    public void reset() {
+      sum = BigInteger.ZERO;
+    }
+
+    @Override
+    public void update(BigInteger next) {
+      sum = sum.add(next);
+    }
+
+    @Override
+    public Iterable<BigInteger> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#SUM_BIGINTS()}
+   */
+  public static AggregatorFactory<BigInteger> SUM_BIGINTS = new AggregatorFactory<BigInteger>() {
+    public Aggregator<BigInteger> create() {
+      return new SumBigInts();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_LONGS()}
+   */
+  public static class MaxLongs extends SimpleAggregator<Long> {
+    private Long max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Long next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Long> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_LONGS()}
+   */
+  public static AggregatorFactory<Long> MAX_LONGS = new AggregatorFactory<Long>() {
+    public Aggregator<Long> create() {
+      return new MaxLongs();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_INTS()}
+   */
+  public static class MaxInts extends SimpleAggregator<Integer> {
+    private Integer max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Integer next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Integer> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_INTS()}
+   */
+  public static AggregatorFactory<Integer> MAX_INTS = new AggregatorFactory<Integer>() {
+    public Aggregator<Integer> create() {
+      return new MaxInts();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_FLOATS()}
+   */
+  public static class MaxFloats extends SimpleAggregator<Float> {
+    private Float max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Float next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Float> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_FLOATS()}
+   */
+  public static AggregatorFactory<Float> MAX_FLOATS = new AggregatorFactory<Float>() {
+    public Aggregator<Float> create() {
+      return new MaxFloats();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_DOUBLES()}
+   */
+  public static class MaxDoubles extends SimpleAggregator<Double> {
+    private Double max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Double next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Double> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_DOUBLES()}
+   */
+  public static AggregatorFactory<Double> MAX_DOUBLES = new AggregatorFactory<Double>() {
+    public Aggregator<Double> create() {
+      return new MaxDoubles();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_BIGINTS()}
+   */
+  public static class MaxBigInts extends SimpleAggregator<BigInteger> {
+    private BigInteger max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(BigInteger next) {
+      if (max == null || max.compareTo(next) < 0) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<BigInteger> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_BIGINTS()}
+   */
+  public static AggregatorFactory<BigInteger> MAX_BIGINTS = new AggregatorFactory<BigInteger>() {
+    public Aggregator<BigInteger> create() {
+      return new MaxBigInts();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_LONGS()}
+   */
+  public static class MinLongs extends SimpleAggregator<Long> {
+    private Long min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Long next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Long> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_LONGS()}
+   */
+  public static AggregatorFactory<Long> MIN_LONGS = new AggregatorFactory<Long>() {
+    public Aggregator<Long> create() {
+      return new MinLongs();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_INTS()}
+   */
+  public static class MinInts extends SimpleAggregator<Integer> {
+    private Integer min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Integer next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Integer> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_INTS()}
+   */
+  public static AggregatorFactory<Integer> MIN_INTS = new AggregatorFactory<Integer>() {
+    public Aggregator<Integer> create() {
+      return new MinInts();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_FLOATS()}
+   */
+  public static class MinFloats extends SimpleAggregator<Float> {
+    private Float min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Float next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Float> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_FLOATS()}
+   */
+  public static AggregatorFactory<Float> MIN_FLOATS = new AggregatorFactory<Float>() {
+    public Aggregator<Float> create() {
+      return new MinFloats();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_DOUBLES()}
+   */
+  public static class MinDoubles extends SimpleAggregator<Double> {
+    private Double min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Double next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Double> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_DOUBLES()}
+   */
+  public static AggregatorFactory<Double> MIN_DOUBLES = new AggregatorFactory<Double>() {
+    public Aggregator<Double> create() {
+      return new MinDoubles();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_BIGINTS()}
+   */
+  public static class MinBigInts extends SimpleAggregator<BigInteger> {
+    private BigInteger min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(BigInteger next) {
+      if (min == null || min.compareTo(next) > 0) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<BigInteger> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_BIGINTS()}
+   */
+  public static AggregatorFactory<BigInteger> MIN_BIGINTS = new AggregatorFactory<BigInteger>() {
+    public Aggregator<BigInteger> create() {
+      return new MinBigInts();
+    }
+  };
+
+  /**
+   * @deprecated Use {@link Aggregators#MAX_N(int, Class)}
+   */
+  public static class MaxNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
+    private final int arity;
+    private transient SortedSet<V> elements;
+
+    public MaxNAggregator(int arity) {
+      this.arity = arity;
+    }
+
+    @Override
+    public void reset() {
+      if (elements == null) {
+        elements = Sets.newTreeSet();
+      } else {
+        elements.clear();
+      }
+    }
+
+    @Override
+    public void update(V value) {
+      if (elements.size() < arity) {
+        elements.add(value);
+      } else if (value.compareTo(elements.first()) > 0) {
+        elements.remove(elements.first());
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#MIN_N(int, Class)}
+   */
+  public static class MinNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
+    private final int arity;
+    private transient SortedSet<V> elements;
+
+    public MinNAggregator(int arity) {
+      this.arity = arity;
+    }
+
+    @Override
+    public void reset() {
+      if (elements == null) {
+        elements = Sets.newTreeSet();
+      } else {
+        elements.clear();
+      }
+    }
+
+    @Override
+    public void update(V value) {
+      if (elements.size() < arity) {
+        elements.add(value);
+      } else if (value.compareTo(elements.last()) < 0) {
+        elements.remove(elements.last());
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#FIRST_N(int)}
+   */
+  public static class FirstNAggregator<V> extends SimpleAggregator<V> {
+    private final int arity;
+    private final List<V> elements;
+
+    public FirstNAggregator(int arity) {
+      this.arity = arity;
+      this.elements = Lists.newArrayList();
+    }
+
+    @Override
+    public void reset() {
+      elements.clear();
+    }
+
+    @Override
+    public void update(V value) {
+      if (elements.size() < arity) {
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#LAST_N(int)}
+   */
+  public static class LastNAggregator<V> extends SimpleAggregator<V> {
+    private final int arity;
+    private final LinkedList<V> elements;
+
+    public LastNAggregator(int arity) {
+      this.arity = arity;
+      this.elements = Lists.newLinkedList();
+    }
+
+    @Override
+    public void reset() {
+      elements.clear();
+    }
+
+    @Override
+    public void update(V value) {
+      elements.add(value);
+      if (elements.size() == arity + 1) {
+        elements.removeFirst();
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link Aggregators#STRING_CONCAT(String, boolean, long, long)}
+   */
+  public static class StringConcatAggregator extends SimpleAggregator<String> {
+    private final String separator;
+    private final boolean skipNulls;
+    private final long maxOutputLength;
+    private final long maxInputLength;
+    private long currentLength;
+    private final LinkedList<String> list = new LinkedList<String>();
+
+    private transient Joiner joiner;
+    
+    public StringConcatAggregator(final String separator, final boolean skipNulls) {
+      this.separator = separator;
+      this.skipNulls = skipNulls;
+      this.maxInputLength = 0;
+      this.maxOutputLength = 0;
+    }
+
+    public StringConcatAggregator(final String separator, final boolean skipNull, final long maxOutputLength, final long maxInputLength) {
+      this.separator = separator;
+      this.skipNulls = skipNull;
+      this.maxOutputLength = maxOutputLength;
+      this.maxInputLength = maxInputLength;
+      this.currentLength = -separator.length();
+    }
+
+    @Override
+    public void reset() {
+      if (joiner == null) {
+        joiner = skipNulls ? Joiner.on(separator).skipNulls() : Joiner.on(separator);
+      }
+      currentLength = -separator.length();
+      list.clear();
+    }
+
+    @Override
+    public void update(final String next) {
+      long length = (next == null) ? 0 : next.length() + separator.length();
+      if (maxOutputLength > 0 && currentLength + length > maxOutputLength || maxInputLength > 0 && next.length() > maxInputLength) {
+        return;
+      }
+      if (maxOutputLength > 0) {
+        currentLength += length;
+      }
+      list.add(next);
+    }
+
+    @Override
+    public Iterable<String> results() {
+      return ImmutableList.of(joiner.join(list));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/CrunchRuntimeException.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/CrunchRuntimeException.java b/crunch-core/src/main/java/org/apache/crunch/CrunchRuntimeException.java
new file mode 100644
index 0000000..044f600
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/CrunchRuntimeException.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * A {@code RuntimeException} implementation that includes some additional options
+ * for the Crunch execution engine to track reporting status. Clients may
+ * use instances of this class in their own {@code DoFn} implementations.
+ */
+public class CrunchRuntimeException extends RuntimeException {
+
+  private boolean logged = false;
+
+  public CrunchRuntimeException(String msg) {
+    super(msg);
+  }
+
+  public CrunchRuntimeException(Exception e) {
+    super(e);
+  }
+
+  public CrunchRuntimeException(String msg, Exception e) {
+    super(msg, e);
+  }
+
+  /**
+   * Returns true if this exception was written to the debug logs.
+   */
+  public boolean wasLogged() {
+    return logged;
+  }
+
+  /**
+   * Indicate that this exception has been written to the debug logs.
+   */
+  public void markLogged() {
+    this.logged = true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/DoFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/DoFn.java b/crunch-core/src/main/java/org/apache/crunch/DoFn.java
new file mode 100644
index 0000000..2c6389a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/DoFn.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+/**
+ * Base class for all data processing functions in Crunch.
+ * 
+ * <p>
+ * Note that all {@code DoFn} instances implement {@link Serializable}, and thus
+ * all of their non-transient member variables must implement
+ * {@code Serializable} as well. If your DoFn depends on non-serializable
+ * classes for data processing, they may be declared as {@code transient} and
+ * initialized in the DoFn's {@code initialize} method.
+ * 
+ */
+public abstract class DoFn<S, T> implements Serializable {
+  private transient TaskInputOutputContext<?, ?, ?, ?> context;
+
+  /**
+   * Configure this DoFn. Subclasses may override this method to modify the
+   * configuration of the Job that this DoFn instance belongs to.
+   * 
+   * <p>
+   * Called during the job planning phase by the crunch-client.
+   * </p>
+   * 
+   * @param conf
+   *          The Configuration instance for the Job.
+   */
+  public void configure(Configuration conf) {
+  }
+
+  /**
+   * Initialize this DoFn. This initialization will happen before the actual
+   * {@link #process(Object, Emitter)} is triggered. Subclasses may override
+   * this method to do appropriate initialization.
+   * 
+   * <p>
+   * Called during the setup of the job instance this {@code DoFn} is associated
+   * with.
+   * </p>
+   * 
+   */
+  public void initialize() {
+  }
+
+  /**
+   * Processes the records from a {@link PCollection}.
+   * 
+   * <br/>
+   * <br/>
+   * <b>Note:</b> Crunch can reuse a single input record object whose content
+   * changes on each {@link #process(Object, Emitter)} method call. This
+   * functionality is imposed by Hadoop's <a href=
+   * "http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/Reducer.html"
+   * >Reducer</a> implementation: <i>The framework will reuse the key and value
+   * objects that are passed into the reduce, therefore the application should
+   * clone the objects they want to keep a copy of.</i>
+   * 
+   * @param input
+   *          The input record.
+   * @param emitter
+   *          The emitter to send the output to
+   */
+  public abstract void process(S input, Emitter<T> emitter);
+
+  /**
+   * Called during the cleanup of the MapReduce job this {@code DoFn} is
+   * associated with. Subclasses may override this method to do appropriate
+   * cleanup.
+   * 
+   * @param emitter
+   *          The emitter that was used for output
+   */
+  public void cleanup(Emitter<T> emitter) {
+  }
+
+  /**
+   * Called during setup to pass the {@link TaskInputOutputContext} to this
+   * {@code DoFn} instance.
+   */
+  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+    this.context = context;
+  }
+
+  /**
+   * Returns an estimate of how applying this function to a {@link PCollection}
+   * will cause it to change in side. The optimizer uses these estimates to
+   * decide where to break up dependent MR jobs into separate Map and Reduce
+   * phases in order to minimize I/O.
+   * 
+   * <p>
+   * Subclasses of {@code DoFn} that will substantially alter the size of the
+   * resulting {@code PCollection} should override this method.
+   */
+  public float scaleFactor() {
+    return 1.2f;
+  }
+
+  protected TaskInputOutputContext<?, ?, ?, ?> getContext() {
+    return context;
+  }
+
+  protected Configuration getConfiguration() {
+    return context.getConfiguration();
+  }
+
+  protected Counter getCounter(Enum<?> counterName) {
+    return context.getCounter(counterName);
+  }
+
+  protected Counter getCounter(String groupName, String counterName) {
+    return context.getCounter(groupName, counterName);
+  }
+
+  protected void increment(Enum<?> counterName) {
+    increment(counterName, 1);
+  }
+
+  protected void increment(Enum<?> counterName, long value) {
+    getCounter(counterName).increment(value);
+  }
+
+  protected void progress() {
+    context.progress();
+  }
+
+  protected TaskAttemptID getTaskAttemptID() {
+    return context.getTaskAttemptID();
+  }
+
+  protected void setStatus(String status) {
+    context.setStatus(status);
+  }
+
+  protected String getStatus() {
+    return context.getStatus();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Emitter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Emitter.java b/crunch-core/src/main/java/org/apache/crunch/Emitter.java
new file mode 100644
index 0000000..d104a09
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Emitter.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * Interface for writing outputs from a {@link DoFn}.
+ * 
+ */
+public interface Emitter<T> {
+  /**
+   * Write the emitted value to the next stage of the pipeline.
+   * 
+   * @param emitted
+   *          The value to write
+   */
+  void emit(T emitted);
+
+  /**
+   * Flushes any values cached by this emitter. Called during the cleanup stage.
+   */
+  void flush();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/FilterFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/FilterFn.java b/crunch-core/src/main/java/org/apache/crunch/FilterFn.java
new file mode 100644
index 0000000..440f122
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/FilterFn.java
@@ -0,0 +1,244 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.List;
+
+import org.apache.crunch.fn.FilterFns;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * A {@link DoFn} for the common case of filtering the members of a
+ * {@link PCollection} based on a boolean condition.
+ */
+public abstract class FilterFn<T> extends DoFn<T, T> {
+
+  /**
+   * If true, emit the given record.
+   */
+  public abstract boolean accept(T input);
+
+  @Override
+  public void process(T input, Emitter<T> emitter) {
+    if (accept(input)) {
+      emitter.emit(input);
+    }
+  }
+  
+  @Override
+  public final void cleanup(Emitter<T> emitter) {
+    cleanup();
+  }
+  
+  /**
+   * Called during the cleanup of the MapReduce job this {@code FilterFn} is
+   * associated with. Subclasses may override this method to do appropriate
+   * cleanup.
+   */
+  public void cleanup() {
+  }
+  
+  @Override
+  public float scaleFactor() {
+    return 0.5f;
+  }
+
+  /**
+   * @deprecated Use {@link FilterFns#and(FilterFn...)}
+   */
+  public static <S> FilterFn<S> and(FilterFn<S>... fns) {
+    return new AndFn<S>(fns);
+  }
+
+  /**
+   * @deprecated Use {@link FilterFns#and(FilterFn...)}
+   */
+  public static class AndFn<S> extends FilterFn<S> {
+
+    private final List<FilterFn<S>> fns;
+
+    public AndFn(FilterFn<S>... fns) {
+      this.fns = ImmutableList.<FilterFn<S>> copyOf(fns);
+    }
+    
+    @Override
+    public void configure(Configuration conf) {
+      for (FilterFn<S> fn : fns) {
+        fn.configure(conf);
+      }
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      for (FilterFn<S> fn : fns) {
+        fn.setContext(context);
+      }
+    }
+    
+    @Override
+    public void initialize() {
+      for (FilterFn<S> fn : fns) {
+        fn.initialize();
+      }
+    }
+
+    @Override
+    public void cleanup() {
+      for (FilterFn<S> fn : fns) {
+        fn.cleanup();
+      }
+    }
+
+    @Override
+    public boolean accept(S input) {
+      for (FilterFn<S> fn : fns) {
+        if (!fn.accept(input)) {
+          return false;
+        }
+      }
+      return true;
+    }
+    
+    @Override
+    public float scaleFactor() {
+      float scaleFactor = 1.0f;
+      for (FilterFn<S> fn : fns) {
+        scaleFactor *= fn.scaleFactor();
+      }
+      return scaleFactor;
+    }
+  }
+
+  /**
+   * @deprecated Use {@link FilterFns#or(FilterFn...)}
+   */
+  public static <S> FilterFn<S> or(FilterFn<S>... fns) {
+    return new OrFn<S>(fns);
+  }
+
+  /**
+   * @deprecated Use {@link FilterFns#or(FilterFn...)}
+   */
+  public static class OrFn<S> extends FilterFn<S> {
+
+    private final List<FilterFn<S>> fns;
+
+    public OrFn(FilterFn<S>... fns) {
+      this.fns = ImmutableList.<FilterFn<S>> copyOf(fns);
+    }
+    
+    @Override
+    public void configure(Configuration conf) {
+      for (FilterFn<S> fn : fns) {
+        fn.configure(conf);
+      }
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      for (FilterFn<S> fn : fns) {
+        fn.setContext(context);
+      }
+    }
+    
+    @Override
+    public void initialize() {
+      for (FilterFn<S> fn : fns) {
+        fn.initialize();
+      }
+    }
+    
+    @Override
+    public void cleanup() {
+      for (FilterFn<S> fn : fns) {
+        fn.cleanup();
+      }
+    }
+
+    @Override
+    public boolean accept(S input) {
+      for (FilterFn<S> fn : fns) {
+        if (fn.accept(input)) {
+          return true;
+        }
+      }
+      return false;
+    }
+    
+    @Override
+    public float scaleFactor() {
+      float scaleFactor = 0.0f;
+      for (FilterFn<S> fn : fns) {
+        scaleFactor += fn.scaleFactor();
+      }
+      return Math.min(1.0f, scaleFactor);
+    }
+  }
+
+  /**
+   * @deprecated Use {@link FilterFns#not(FilterFn)}
+   */
+  public static <S> FilterFn<S> not(FilterFn<S> fn) {
+    return new NotFn<S>(fn);
+  }
+
+  /**
+   * @deprecated Use {@link FilterFns#not(FilterFn)}
+   */
+  public static class NotFn<S> extends FilterFn<S> {
+
+    private final FilterFn<S> base;
+
+    public NotFn(FilterFn<S> base) {
+      this.base = base;
+    }
+    
+    @Override
+    public void configure(Configuration conf) {
+        base.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      base.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      base.initialize();
+    }
+    
+    @Override
+    public void cleanup() {
+      base.cleanup();
+    }
+    
+    @Override
+    public boolean accept(S input) {
+      return !base.accept(input);
+    }
+
+    @Override
+    public float scaleFactor() {
+      return 1.0f - base.scaleFactor();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/GroupingOptions.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/GroupingOptions.java b/crunch-core/src/main/java/org/apache/crunch/GroupingOptions.java
new file mode 100644
index 0000000..4aa1343
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/GroupingOptions.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Options that can be passed to a {@code groupByKey} operation in order to
+ * exercise finer control over how the partitioning, grouping, and sorting of
+ * keys is performed.
+ * 
+ */
+public class GroupingOptions {
+
+  private final Class<? extends Partitioner> partitionerClass;
+  private final Class<? extends RawComparator> groupingComparatorClass;
+  private final Class<? extends RawComparator> sortComparatorClass;
+  private final int numReducers;
+  private final Map<String, String> extraConf;
+  private final Set<SourceTarget<?>> sourceTargets;
+  
+  private GroupingOptions(Class<? extends Partitioner> partitionerClass,
+      Class<? extends RawComparator> groupingComparatorClass, Class<? extends RawComparator> sortComparatorClass,
+      int numReducers, Map<String, String> extraConf, Set<SourceTarget<?>> sourceTargets) {
+    this.partitionerClass = partitionerClass;
+    this.groupingComparatorClass = groupingComparatorClass;
+    this.sortComparatorClass = sortComparatorClass;
+    this.numReducers = numReducers;
+    this.extraConf = extraConf;
+    this.sourceTargets = sourceTargets;
+  }
+
+  public int getNumReducers() {
+    return numReducers;
+  }
+
+  public Class<? extends RawComparator> getSortComparatorClass() {
+    return sortComparatorClass;
+  }
+
+  public Class<? extends RawComparator> getGroupingComparatorClass() {
+    return groupingComparatorClass;
+  }
+  
+  public Class<? extends Partitioner> getPartitionerClass() {
+    return partitionerClass;
+  }
+  
+  public Set<SourceTarget<?>> getSourceTargets() {
+    return sourceTargets;
+  }
+  
+  public void configure(Job job) {
+    if (partitionerClass != null) {
+      job.setPartitionerClass(partitionerClass);
+    }
+    if (groupingComparatorClass != null) {
+      job.setGroupingComparatorClass(groupingComparatorClass);
+    }
+    if (sortComparatorClass != null) {
+      job.setSortComparatorClass(sortComparatorClass);
+    }
+    if (numReducers > 0) {
+      job.setNumReduceTasks(numReducers);
+    }
+    for (Map.Entry<String, String> e : extraConf.entrySet()) {
+      job.getConfiguration().set(e.getKey(), e.getValue());
+    }
+  }
+
+  public boolean isCompatibleWith(GroupingOptions other) {
+    if (partitionerClass != other.partitionerClass) {
+      return false;
+    }
+    if (groupingComparatorClass != other.groupingComparatorClass) {
+      return false;
+    }
+    if (sortComparatorClass != other.sortComparatorClass) {
+      return false;
+    }
+    if (!extraConf.equals(other.extraConf)) {
+      return false;
+    }
+    return true;
+  }
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Builder class for creating {@code GroupingOptions} instances.
+   * 
+   */
+  public static class Builder {
+    private Class<? extends Partitioner> partitionerClass;
+    private Class<? extends RawComparator> groupingComparatorClass;
+    private Class<? extends RawComparator> sortComparatorClass;
+    private int numReducers;
+    private Map<String, String> extraConf = Maps.newHashMap();
+    private Set<SourceTarget<?>> sourceTargets = Sets.newHashSet();
+    
+    public Builder() {
+    }
+
+    public Builder partitionerClass(Class<? extends Partitioner> partitionerClass) {
+      this.partitionerClass = partitionerClass;
+      return this;
+    }
+
+    public Builder groupingComparatorClass(Class<? extends RawComparator> groupingComparatorClass) {
+      this.groupingComparatorClass = groupingComparatorClass;
+      return this;
+    }
+
+    public Builder sortComparatorClass(Class<? extends RawComparator> sortComparatorClass) {
+      this.sortComparatorClass = sortComparatorClass;
+      return this;
+    }
+
+    public Builder numReducers(int numReducers) {
+      if (numReducers <= 0) {
+        throw new IllegalArgumentException("Invalid number of reducers: " + numReducers);
+      }
+      this.numReducers = numReducers;
+      return this;
+    }
+
+    public Builder conf(String confKey, String confValue) {
+      this.extraConf.put(confKey, confValue);
+      return this;
+    }
+    
+    public Builder sourceTarget(SourceTarget<?> st) {
+      this.sourceTargets.add(st);
+      return this;
+    }
+    
+    public GroupingOptions build() {
+      return new GroupingOptions(partitionerClass, groupingComparatorClass, sortComparatorClass,
+          numReducers, extraConf, sourceTargets);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/MapFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/MapFn.java b/crunch-core/src/main/java/org/apache/crunch/MapFn.java
new file mode 100644
index 0000000..dbf172e
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/MapFn.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * A {@link DoFn} for the common case of emitting exactly one value for each
+ * input record.
+ * 
+ */
+public abstract class MapFn<S, T> extends DoFn<S, T> {
+
+  /**
+   * Maps the given input into an instance of the output type.
+   */
+  public abstract T map(S input);
+
+  @Override
+  public void process(S input, Emitter<T> emitter) {
+    emitter.emit(map(input));
+  }
+
+  @Override
+  public float scaleFactor() {
+    return 1.0f;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/PCollection.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/PCollection.java b/crunch-core/src/main/java/org/apache/crunch/PCollection.java
new file mode 100644
index 0000000..6f5abf6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/PCollection.java
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.Collection;
+
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+
+/**
+ * A representation of an immutable, distributed collection of elements that is
+ * the fundamental target of computations in Crunch.
+ *
+ */
+public interface PCollection<S> {
+  /**
+   * Returns the {@code Pipeline} associated with this PCollection.
+   */
+  Pipeline getPipeline();
+
+  /**
+   * Returns a {@code PCollection} instance that acts as the union of this
+   * {@code PCollection} and the given {@code PCollection}.
+   */
+  PCollection<S> union(PCollection<S> other);
+  
+  /**
+   * Returns a {@code PCollection} instance that acts as the union of this
+   * {@code PCollection} and the input {@code PCollection}s.
+   */
+  PCollection<S> union(PCollection<S>... collections);
+
+  /**
+   * Applies the given doFn to the elements of this {@code PCollection} and
+   * returns a new {@code PCollection} that is the output of this processing.
+   *
+   * @param doFn
+   *          The {@code DoFn} to apply
+   * @param type
+   *          The {@link PType} of the resulting {@code PCollection}
+   * @return a new {@code PCollection}
+   */
+  <T> PCollection<T> parallelDo(DoFn<S, T> doFn, PType<T> type);
+
+  /**
+   * Applies the given doFn to the elements of this {@code PCollection} and
+   * returns a new {@code PCollection} that is the output of this processing.
+   *
+   * @param name
+   *          An identifier for this processing step, useful for debugging
+   * @param doFn
+   *          The {@code DoFn} to apply
+   * @param type
+   *          The {@link PType} of the resulting {@code PCollection}
+   * @return a new {@code PCollection}
+   */
+  <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type);
+  
+  /**
+   * Applies the given doFn to the elements of this {@code PCollection} and
+   * returns a new {@code PCollection} that is the output of this processing.
+   *
+   * @param name
+   *          An identifier for this processing step, useful for debugging
+   * @param doFn
+   *          The {@code DoFn} to apply
+   * @param type
+   *          The {@link PType} of the resulting {@code PCollection}
+   * @param options
+   *          Optional information that is needed for certain pipeline operations
+   * @return a new {@code PCollection}
+   */
+  <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type,
+      ParallelDoOptions options);
+
+  /**
+   * Similar to the other {@code parallelDo} instance, but returns a
+   * {@code PTable} instance instead of a {@code PCollection}.
+   *
+   * @param doFn
+   *          The {@code DoFn} to apply
+   * @param type
+   *          The {@link PTableType} of the resulting {@code PTable}
+   * @return a new {@code PTable}
+   */
+  <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type);
+
+  /**
+   * Similar to the other {@code parallelDo} instance, but returns a
+   * {@code PTable} instance instead of a {@code PCollection}.
+   *
+   * @param name
+   *          An identifier for this processing step
+   * @param doFn
+   *          The {@code DoFn} to apply
+   * @param type
+   *          The {@link PTableType} of the resulting {@code PTable}
+   * @return a new {@code PTable}
+   */
+  <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type);
+  
+  /**
+   * Similar to the other {@code parallelDo} instance, but returns a
+   * {@code PTable} instance instead of a {@code PCollection}.
+   *
+   * @param name
+   *          An identifier for this processing step
+   * @param doFn
+   *          The {@code DoFn} to apply
+   * @param type
+   *          The {@link PTableType} of the resulting {@code PTable}
+   * @param options
+   *          Optional information that is needed for certain pipeline operations
+   * @return a new {@code PTable}
+   */
+  <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type,
+      ParallelDoOptions options);
+
+  /**
+   * Write the contents of this {@code PCollection} to the given {@code Target},
+   * using the storage format specified by the target.
+   *
+   * @param target
+   *          The target to write to
+   */
+  PCollection<S> write(Target target);
+
+  /**
+   * Write the contents of this {@code PCollection} to the given {@code Target},
+   * using the given {@code Target.WriteMode} to handle existing
+   * targets.
+   * 
+   * @param target
+   *          The target
+   * @param writeMode
+   *          The rule for handling existing outputs at the target location
+   */
+  PCollection<S> write(Target target, Target.WriteMode writeMode);
+  
+  /**
+   * Returns a reference to the data set represented by this PCollection that
+   * may be used by the client to read the data locally.
+   */
+  Iterable<S> materialize();
+
+  /**
+   * @return A {@code PObject} encapsulating an in-memory {@link Collection} containing the values
+   * of this {@code PCollection}.
+   */
+  PObject<Collection<S>> asCollection();
+
+  /**
+   * Returns the {@code PType} of this {@code PCollection}.
+   */
+  PType<S> getPType();
+
+  /**
+   * Returns the {@code PTypeFamily} of this {@code PCollection}.
+   */
+  PTypeFamily getTypeFamily();
+
+  /**
+   * Returns the size of the data represented by this {@code PCollection} in
+   * bytes.
+   */
+  long getSize();
+
+  /**
+   * Returns the number of elements represented by this {@code PCollection}.
+   *
+   * @return An {@code PObject} containing the number of elements in this {@code PCollection}.
+   */
+  PObject<Long> length();
+
+  /**
+   * Returns a shorthand name for this PCollection.
+   */
+  String getName();
+
+  /**
+   * Apply the given filter function to this instance and return the resulting
+   * {@code PCollection}.
+   */
+  PCollection<S> filter(FilterFn<S> filterFn);
+
+  /**
+   * Apply the given filter function to this instance and return the resulting
+   * {@code PCollection}.
+   *
+   * @param name
+   *          An identifier for this processing step
+   * @param filterFn
+   *          The {@code FilterFn} to apply
+   */
+  PCollection<S> filter(String name, FilterFn<S> filterFn);
+
+  /**
+   * Apply the given map function to each element of this instance in order to
+   * create a {@code PTable}.
+   */
+  <K> PTable<K, S> by(MapFn<S, K> extractKeyFn, PType<K> keyType);
+
+  /**
+   * Apply the given map function to each element of this instance in order to
+   * create a {@code PTable}.
+   *
+   * @param name
+   *          An identifier for this processing step
+   * @param extractKeyFn
+   *          The {@code MapFn} to apply
+   */
+  <K> PTable<K, S> by(String name, MapFn<S, K> extractKeyFn, PType<K> keyType);
+
+  /**
+   * Returns a {@code PTable} instance that contains the counts of each unique
+   * element of this PCollection.
+   */
+  PTable<S, Long> count();
+
+  /**
+   * Returns a {@code PObject} of the maximum element of this instance.
+   */
+  PObject<S> max();
+
+  /**
+   * Returns a {@code PObject} of the minimum element of this instance.
+   */
+  PObject<S> min();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/PGroupedTable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/PGroupedTable.java b/crunch-core/src/main/java/org/apache/crunch/PGroupedTable.java
new file mode 100644
index 0000000..d77ffdb
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/PGroupedTable.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.crunch.Aggregator;
+
+/**
+ * The Crunch representation of a grouped {@link PTable}.
+ * 
+ */
+public interface PGroupedTable<K, V> extends PCollection<Pair<K, Iterable<V>>> {
+
+  /**
+   * Combines the values of this grouping using the given {@code CombineFn}.
+   * 
+   * @param combineFn
+   *          The combiner function
+   * @return A {@code PTable} where each key has a single value
+   */
+  PTable<K, V> combineValues(CombineFn<K, V> combineFn);
+
+  /**
+   * Combine the values in each group using the given {@link Aggregator}.
+   *
+   * @param aggregator The function to use
+   * @return A {@link PTable} where each group key maps to an aggregated
+   *         value. Group keys may be repeated if an aggregator returns
+   *         more than one value.
+   */
+  PTable<K, V> combineValues(Aggregator<V> aggregator);
+
+  /**
+   * Convert this grouping back into a multimap.
+   * 
+   * @return an ungrouped version of the data in this {@code PGroupedTable}.
+   */
+  PTable<K, V> ungroup();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/PObject.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/PObject.java b/crunch-core/src/main/java/org/apache/crunch/PObject.java
new file mode 100644
index 0000000..897a01f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/PObject.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * A {@code PObject} represents a singleton object value that results from a distributed
+ * computation. Computation producing the value is deferred until
+ * {@link org.apache.crunch.PObject#getValue()} is called.
+ *
+ * @param <T> The type of value encapsulated by this {@code PObject}.
+ */
+public interface PObject<T> {
+  /**
+   * Gets the value associated with this {@code PObject}.  Calling this method will trigger
+   * whatever computation is necessary to obtain the value and block until that computation
+   * succeeds.
+   *
+   * @return The value associated with this {@code PObject}.
+   */
+  T getValue();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/PTable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/PTable.java b/crunch-core/src/main/java/org/apache/crunch/PTable.java
new file mode 100644
index 0000000..8df9853
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/PTable.java
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+
+/**
+ * A sub-interface of {@code PCollection} that represents an immutable,
+ * distributed multi-map of keys and values.
+ *
+ */
+public interface PTable<K, V> extends PCollection<Pair<K, V>> {
+
+  /**
+   Returns a {@code PTable} instance that acts as the union of this
+   * {@code PTable} and the other {@code PTable}s.
+   */
+  PTable<K, V> union(PTable<K, V> other);
+  
+  /**
+   * Returns a {@code PTable} instance that acts as the union of this
+   * {@code PTable} and the input {@code PTable}s.
+   */
+  PTable<K, V> union(PTable<K, V>... others);
+
+  /**
+   * Performs a grouping operation on the keys of this table.
+   *
+   * @return a {@code PGroupedTable} instance that represents the grouping
+   */
+  PGroupedTable<K, V> groupByKey();
+
+  /**
+   * Performs a grouping operation on the keys of this table, using the given
+   * number of partitions.
+   *
+   * @param numPartitions
+   *          The number of partitions for the data.
+   * @return a {@code PGroupedTable} instance that represents this grouping
+   */
+  PGroupedTable<K, V> groupByKey(int numPartitions);
+
+  /**
+   * Performs a grouping operation on the keys of this table, using the
+   * additional {@code GroupingOptions} to control how the grouping is executed.
+   *
+   * @param options
+   *          The grouping options to use
+   * @return a {@code PGroupedTable} instance that represents the grouping
+   */
+  PGroupedTable<K, V> groupByKey(GroupingOptions options);
+
+  /**
+   * Writes this {@code PTable} to the given {@code Target}.
+   */
+  PTable<K, V> write(Target target);
+
+  /**
+   * Writes this {@code PTable} to the given {@code Target}, using the
+   * given {@code Target.WriteMode} to handle existing targets.
+   */
+  PTable<K, V> write(Target target, Target.WriteMode writeMode);
+
+  /**
+   * Returns the {@code PTableType} of this {@code PTable}.
+   */
+  PTableType<K, V> getPTableType();
+
+  /**
+   * Returns the {@code PType} of the key.
+   */
+  PType<K> getKeyType();
+
+  /**
+   * Returns the {@code PType} of the value.
+   */
+  PType<V> getValueType();
+
+  /**
+   * Aggregate all of the values with the same key into a single key-value pair
+   * in the returned PTable.
+   */
+  PTable<K, Collection<V>> collectValues();
+
+  /**
+   * Apply the given filter function to this instance and return the resulting
+   * {@code PTable}.
+   */
+  PTable<K, V> filter(FilterFn<Pair<K, V>> filterFn);
+  
+  /**
+   * Apply the given filter function to this instance and return the resulting
+   * {@code PTable}.
+   *
+   * @param name
+   *          An identifier for this processing step
+   * @param filterFn
+   *          The {@code FilterFn} to apply
+   */
+  PTable<K, V> filter(String name, FilterFn<Pair<K, V>> filterFn);
+  
+  /**
+   * Returns a PTable made up of the pairs in this PTable with the largest value
+   * field.
+   *
+   * @param count
+   *          The number of pairs to return
+   */
+  PTable<K, V> top(int count);
+
+  /**
+   * Returns a PTable made up of the pairs in this PTable with the smallest
+   * value field.
+   *
+   * @param count
+   *          The number of pairs to return
+   */
+  PTable<K, V> bottom(int count);
+
+  /**
+   * Perform an inner join on this table and the one passed in as an argument on
+   * their common keys.
+   */
+  <U> PTable<K, Pair<V, U>> join(PTable<K, U> other);
+
+  /**
+   * Co-group operation with the given table on common keys.
+   */
+  <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other);
+
+  /**
+   * Returns a {@link PCollection} made up of the keys in this PTable.
+   */
+  PCollection<K> keys();
+
+  /**
+   * Returns a {@link PCollection} made up of the values in this PTable.
+   */
+  PCollection<V> values();
+
+  /**
+   * Returns a Map<K, V> made up of the keys and values in this PTable.
+   * <p>
+   * <b>Note:</b> The contents of the returned map may not be exactly the same
+   * as this PTable, as a PTable is a multi-map (i.e. can contain multiple
+   * values for a single key).
+   */
+  Map<K, V> materializeToMap();
+
+  /**
+   * Returns a {@link PObject} encapsulating a {@link Map} made up of the keys and values in this
+   * {@code PTable}.
+   * <p><b>Note:</b>The contents of the returned map may not be exactly the same as this PTable,
+   * as a PTable is a multi-map (i.e. can contain multiple values for a single key).
+   * </p>
+   *
+   * @return The {@code PObject} encapsulating a {@code Map} made up of the keys and values in
+   * this {@code PTable}.
+   */
+  PObject<Map<K, V>> asMap();
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Pair.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Pair.java b/crunch-core/src/main/java/org/apache/crunch/Pair.java
new file mode 100644
index 0000000..fd058b6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Pair.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+/**
+ * A convenience class for two-element {@link Tuple}s.
+ */
+public class Pair<K, V> implements Tuple, Comparable<Pair<K, V>> {
+
+  private final K first;
+  private final V second;
+
+  public static <T, U> Pair<T, U> of(T first, U second) {
+    return new Pair<T, U>(first, second);
+  }
+
+  public Pair(K first, V second) {
+    this.first = first;
+    this.second = second;
+  }
+
+  public K first() {
+    return first;
+  }
+
+  public V second() {
+    return second;
+  }
+
+  public Object get(int index) {
+    switch (index) {
+    case 0:
+      return first;
+    case 1:
+      return second;
+    default:
+      throw new ArrayIndexOutOfBoundsException();
+    }
+  }
+
+  public int size() {
+    return 2;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    return hcb.append(first).append(second).toHashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    Pair<?, ?> other = (Pair<?, ?>) obj;
+    return (first == other.first || (first != null && first.equals(other.first)))
+        && (second == other.second || (second != null && second.equals(other.second)));
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("[");
+    sb.append(first).append(",").append(second).append("]");
+    return sb.toString();
+  }
+
+  private int cmp(Object lhs, Object rhs) {
+    if (lhs == rhs) {
+      return 0;
+    } else if (lhs != null && Comparable.class.isAssignableFrom(lhs.getClass())) {
+      return ((Comparable) lhs).compareTo(rhs);
+    }
+    return (lhs == null ? 0 : lhs.hashCode()) - (rhs == null ? 0 : rhs.hashCode());
+  }
+
+  @Override
+  public int compareTo(Pair<K, V> o) {
+    int diff = cmp(first, o.first);
+    if (diff == 0) {
+      diff = cmp(second, o.second);
+    }
+    return diff;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/ParallelDoOptions.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/ParallelDoOptions.java b/crunch-core/src/main/java/org/apache/crunch/ParallelDoOptions.java
new file mode 100644
index 0000000..2407b3a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/ParallelDoOptions.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.Collections;
+import java.util.Set;
+
+import com.google.common.collect.Sets;
+
+/**
+ * Container class that includes optional information about a {@code parallelDo} operation
+ * applied to a {@code PCollection}. Primarily used within the Crunch framework
+ * itself for certain types of advanced processing operations, such as in-memory joins
+ * that require reading a file from the filesystem into a {@code DoFn}.
+ */
+public class ParallelDoOptions {
+  private final Set<SourceTarget<?>> sourceTargets;
+  
+  private ParallelDoOptions(Set<SourceTarget<?>> sourceTargets) {
+    this.sourceTargets = sourceTargets;
+  }
+  
+  public Set<SourceTarget<?>> getSourceTargets() {
+    return sourceTargets;
+  }
+  
+  public static Builder builder() {
+    return new Builder();
+  }
+  
+  public static class Builder {
+    private Set<SourceTarget<?>> sourceTargets;
+    
+    public Builder() {
+      this.sourceTargets = Sets.newHashSet();
+    }
+    
+    public Builder sourceTargets(SourceTarget<?>... sourceTargets) {
+      Collections.addAll(this.sourceTargets, sourceTargets);
+      return this;
+    }
+    
+    public ParallelDoOptions build() {
+      return new ParallelDoOptions(sourceTargets);
+    }
+  }
+}


[12/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java
deleted file mode 100644
index 6ea9c4c..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PCollectionImpl.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.FilterFn;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.ParallelDoOptions;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Target;
-import org.apache.crunch.fn.ExtractKeyFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.materialize.pobject.CollectionPObject;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-public abstract class PCollectionImpl<S> implements PCollection<S> {
-
-  private static final Log LOG = LogFactory.getLog(PCollectionImpl.class);
-
-  private final String name;
-  protected MRPipeline pipeline;
-  protected SourceTarget<S> materializedAt;
-  private final ParallelDoOptions options;
-  
-  public PCollectionImpl(String name) {
-    this(name, ParallelDoOptions.builder().build());
-  }
-  
-  public PCollectionImpl(String name, ParallelDoOptions options) {
-    this.name = name;
-    this.options = options;
-  }
-
-  @Override
-  public String getName() {
-    return name;
-  }
-
-  @Override
-  public String toString() {
-    return getName();
-  }
-
-  @Override
-  public PCollection<S> union(PCollection<S> other) {
-    return union(new PCollection[] { other });
-  }
-  
-  @Override
-  public PCollection<S> union(PCollection<S>... collections) {
-    List<PCollectionImpl<S>> internal = Lists.newArrayList();
-    internal.add(this);
-    for (PCollection<S> collection : collections) {
-      internal.add((PCollectionImpl<S>) collection.parallelDo(IdentityFn.<S>getInstance(), collection.getPType()));
-    }
-    return new UnionCollection<S>(internal);
-  }
-
-  @Override
-  public <T> PCollection<T> parallelDo(DoFn<S, T> fn, PType<T> type) {
-    MRPipeline pipeline = (MRPipeline) getPipeline();
-    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
-  }
-
-  @Override
-  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> fn, PType<T> type) {
-    return new DoCollectionImpl<T>(name, getChainingCollection(), fn, type);
-  }
-  
-  @Override
-  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> fn, PType<T> type,
-      ParallelDoOptions options) {
-    return new DoCollectionImpl<T>(name, getChainingCollection(), fn, type, options);
-  }
-  
-  @Override
-  public <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> fn, PTableType<K, V> type) {
-    MRPipeline pipeline = (MRPipeline) getPipeline();
-    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
-  }
-
-  @Override
-  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> fn, PTableType<K, V> type) {
-    return new DoTableImpl<K, V>(name, getChainingCollection(), fn, type);
-  }
-
-  @Override
-  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> fn, PTableType<K, V> type,
-      ParallelDoOptions options) {
-    return new DoTableImpl<K, V>(name, getChainingCollection(), fn, type, options);
-  }
-
-  public PCollection<S> write(Target target) {
-    if (materializedAt != null) {
-      getPipeline().write(new InputCollection<S>(materializedAt, (MRPipeline) getPipeline()), target);
-    } else {
-      getPipeline().write(this, target);
-    }
-    return this;
-  }
-
-  @Override
-  public PCollection<S> write(Target target, Target.WriteMode writeMode) {
-    if (materializedAt != null) {
-      getPipeline().write(new InputCollection<S>(materializedAt, (MRPipeline) getPipeline()), target,
-          writeMode);
-    } else {
-      getPipeline().write(this, target, writeMode);
-    }
-    return this;
-  }
-  
-  @Override
-  public Iterable<S> materialize() {
-    if (getSize() == 0) {
-      LOG.warn("Materializing an empty PCollection: " + this.getName());
-      return Collections.emptyList();
-    }
-    return getPipeline().materialize(this);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public PObject<Collection<S>> asCollection() {
-    return new CollectionPObject<S>(this);
-  }
-
-  public SourceTarget<S> getMaterializedAt() {
-    return materializedAt;
-  }
-
-  public void materializeAt(SourceTarget<S> sourceTarget) {
-    this.materializedAt = sourceTarget;
-  }
-
-  @Override
-  public PCollection<S> filter(FilterFn<S> filterFn) {
-    return parallelDo(filterFn, getPType());
-  }
-
-  @Override
-  public PCollection<S> filter(String name, FilterFn<S> filterFn) {
-    return parallelDo(name, filterFn, getPType());
-  }
-
-  @Override
-  public <K> PTable<K, S> by(MapFn<S, K> mapFn, PType<K> keyType) {
-    return parallelDo(new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
-  }
-
-  @Override
-  public <K> PTable<K, S> by(String name, MapFn<S, K> mapFn, PType<K> keyType) {
-    return parallelDo(name, new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
-  }
-
-  @Override
-  public PTable<S, Long> count() {
-    return Aggregate.count(this);
-  }
-
-  @Override
-  public PObject<Long> length() {
-    return Aggregate.length(this);
-  }
-
-  @Override
-  public PObject<S> max() {
-    return Aggregate.max(this);
-  }
-
-  @Override
-  public PObject<S> min() {
-    return Aggregate.min(this);
-  }
-
-  @Override
-  public PTypeFamily getTypeFamily() {
-    return getPType().getFamily();
-  }
-
-  public abstract DoNode createDoNode();
-
-  public abstract List<PCollectionImpl<?>> getParents();
-
-  public PCollectionImpl<?> getOnlyParent() {
-    List<PCollectionImpl<?>> parents = getParents();
-    if (parents.size() != 1) {
-      throw new IllegalArgumentException("Expected exactly one parent PCollection");
-    }
-    return parents.get(0);
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    if (pipeline == null) {
-      pipeline = (MRPipeline) getParents().get(0).getPipeline();
-    }
-    return pipeline;
-  }
-  
-  public Set<SourceTarget<?>> getTargetDependencies() {
-    Set<SourceTarget<?>> targetDeps = options.getSourceTargets();
-    for (PCollectionImpl<?> parent : getParents()) {
-      targetDeps = Sets.union(targetDeps, parent.getTargetDependencies());
-    }
-    return targetDeps;
-  }
-  
-  public int getDepth() {
-    int parentMax = 0;
-    for (PCollectionImpl parent : getParents()) {
-      parentMax = Math.max(parent.getDepth(), parentMax);
-    }
-    return 1 + parentMax;
-  }
-
-  public interface Visitor {
-    void visitInputCollection(InputCollection<?> collection);
-
-    void visitUnionCollection(UnionCollection<?> collection);
-
-    void visitDoFnCollection(DoCollectionImpl<?> collection);
-
-    void visitDoTable(DoTableImpl<?, ?> collection);
-
-    void visitGroupedTable(PGroupedTableImpl<?, ?> collection);
-  }
-
-  public void accept(Visitor visitor) {
-    if (materializedAt != null) {
-      visitor.visitInputCollection(new InputCollection<S>(materializedAt, (MRPipeline) getPipeline()));
-    } else {
-      acceptInternal(visitor);
-    }
-  }
-
-  protected abstract void acceptInternal(Visitor visitor);
-
-  @Override
-  public long getSize() {
-    if (materializedAt != null) {
-      long sz = materializedAt.getSize(getPipeline().getConfiguration());
-      if (sz > 0) {
-        return sz;
-      }
-    }
-    return getSizeInternal();
-  }
-
-  protected abstract long getSizeInternal();
-  
-  /**
-   * Retrieve the PCollectionImpl to be used for chaining within PCollectionImpls further down the pipeline.
-   * @return The PCollectionImpl instance to be chained
-   */
-  protected PCollectionImpl<S> getChainingCollection(){
-    return this;
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java
deleted file mode 100644
index ccac5d5..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PGroupedTableImpl.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.Aggregator;
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.fn.Aggregators;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.util.PartitionUtils;
-import org.apache.hadoop.mapreduce.Job;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
-
-public class PGroupedTableImpl<K, V> extends PCollectionImpl<Pair<K, Iterable<V>>> implements PGroupedTable<K, V> {
-
-  private static final Log LOG = LogFactory.getLog(PGroupedTableImpl.class);
-
-  private final PTableBase<K, V> parent;
-  private final GroupingOptions groupingOptions;
-  private final PGroupedTableType<K, V> ptype;
-  
-  PGroupedTableImpl(PTableBase<K, V> parent) {
-    this(parent, null);
-  }
-
-  PGroupedTableImpl(PTableBase<K, V> parent, GroupingOptions groupingOptions) {
-    super("GBK");
-    this.parent = parent;
-    this.groupingOptions = groupingOptions;
-    this.ptype = parent.getPTableType().getGroupedTableType();
-  }
-
-  public void configureShuffle(Job job) {
-    ptype.configureShuffle(job, groupingOptions);
-    if (groupingOptions == null || groupingOptions.getNumReducers() <= 0) {
-      int numReduceTasks = PartitionUtils.getRecommendedPartitions(this, getPipeline().getConfiguration());
-      if (numReduceTasks > 0) {
-        job.setNumReduceTasks(numReduceTasks);
-        LOG.info(String.format("Setting num reduce tasks to %d", numReduceTasks));
-      } else {
-        LOG.warn("Attempted to set a negative number of reduce tasks");
-      }
-    }
-  }
-
-  @Override
-  protected long getSizeInternal() {
-    return parent.getSizeInternal();
-  }
-
-  @Override
-  public PType<Pair<K, Iterable<V>>> getPType() {
-    return ptype;
-  }
-
-  @Override
-  public PTable<K, V> combineValues(CombineFn<K, V> combineFn) {
-    return new DoTableImpl<K, V>("combine", getChainingCollection(), combineFn, parent.getPTableType());
-  }
-
-  @Override
-  public PTable<K, V> combineValues(Aggregator<V> agg) {
-    return combineValues(Aggregators.<K, V>toCombineFn(agg));
-  }
-
-  private static class Ungroup<K, V> extends DoFn<Pair<K, Iterable<V>>, Pair<K, V>> {
-    @Override
-    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
-      for (V v : input.second()) {
-        emitter.emit(Pair.of(input.first(), v));
-      }
-    }
-  }
-
-  public PTable<K, V> ungroup() {
-    return parallelDo("ungroup", new Ungroup<K, V>(), parent.getPTableType());
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitGroupedTable(this);
-  }
-
-  @Override
-  public Set<SourceTarget<?>> getTargetDependencies() {
-    Set<SourceTarget<?>> td = Sets.newHashSet(super.getTargetDependencies());
-    if (groupingOptions != null) {
-      td.addAll(groupingOptions.getSourceTargets());
-    }
-    return ImmutableSet.copyOf(td);
-  }
-  
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.<PCollectionImpl<?>> of(parent);
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    return DoNode.createFnNode(getName(), ptype.getInputMapFn(), ptype);
-  }
-
-  public DoNode getGroupingNode() {
-    return DoNode.createGroupingNode("", ptype);
-  }
-  
-  @Override
-  protected PCollectionImpl<Pair<K, Iterable<V>>> getChainingCollection() {
-    // Use a copy for chaining to allow sending the output of a single grouped table to multiple outputs
-    // TODO This should be implemented in a cleaner way in the planner
-    return new PGroupedTableImpl<K, V>(parent, groupingOptions);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java
deleted file mode 100644
index 3c2393d..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/PTableBase.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.crunch.FilterFn;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.ParallelDoOptions;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.lib.Cogroup;
-import org.apache.crunch.lib.Join;
-import org.apache.crunch.lib.PTables;
-import org.apache.crunch.materialize.MaterializableMap;
-import org.apache.crunch.materialize.pobject.MapPObject;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.Lists;
-
-abstract class PTableBase<K, V> extends PCollectionImpl<Pair<K, V>> implements PTable<K, V> {
-
-  public PTableBase(String name) {
-    super(name);
-  }
-
-  public PTableBase(String name, ParallelDoOptions options) {
-    super(name, options);
-  }
-  
-  public PType<K> getKeyType() {
-    return getPTableType().getKeyType();
-  }
-
-  public PType<V> getValueType() {
-    return getPTableType().getValueType();
-  }
-
-  public PGroupedTableImpl<K, V> groupByKey() {
-    return new PGroupedTableImpl<K, V>(this);
-  }
-
-  public PGroupedTableImpl<K, V> groupByKey(int numReduceTasks) {
-    return new PGroupedTableImpl<K, V>(this, GroupingOptions.builder().numReducers(numReduceTasks).build());
-  }
-
-  public PGroupedTableImpl<K, V> groupByKey(GroupingOptions groupingOptions) {
-    return new PGroupedTableImpl<K, V>(this, groupingOptions);
-  }
-
-  @Override
-  public PTable<K, V> union(PTable<K, V> other) {
-    return union(new PTable[] { other });
-  }
-  
-  @Override
-  public PTable<K, V> union(PTable<K, V>... others) {
-    List<PTableBase<K, V>> internal = Lists.newArrayList();
-    internal.add(this);
-    for (PTable<K, V> table : others) {
-      internal.add((PTableBase<K, V>) table);
-    }
-    return new UnionTable<K, V>(internal);
-  }
-
-  @Override
-  public PTable<K, V> write(Target target) {
-    if (getMaterializedAt() != null) {
-      getPipeline().write(new InputTable<K, V>(
-          (TableSource<K, V>) getMaterializedAt(), (MRPipeline) getPipeline()), target);
-    } else {
-      getPipeline().write(this, target);
-    }
-    return this;
-  }
-
-  @Override
-  public PTable<K, V> write(Target target, Target.WriteMode writeMode) {
-    if (getMaterializedAt() != null) {
-      getPipeline().write(new InputTable<K, V>(
-          (TableSource<K, V>) getMaterializedAt(), (MRPipeline) getPipeline()), target, writeMode);
-    } else {
-      getPipeline().write(this, target, writeMode);
-    }
-    return this;
-  }
-  
-  @Override
-  public PTable<K, V> filter(FilterFn<Pair<K, V>> filterFn) {
-    return parallelDo(filterFn, getPTableType());
-  }
-  
-  @Override
-  public PTable<K, V> filter(String name, FilterFn<Pair<K, V>> filterFn) {
-    return parallelDo(name, filterFn, getPTableType());
-  }
-  
-  @Override
-  public PTable<K, V> top(int count) {
-    return Aggregate.top(this, count, true);
-  }
-
-  @Override
-  public PTable<K, V> bottom(int count) {
-    return Aggregate.top(this, count, false);
-  }
-
-  @Override
-  public PTable<K, Collection<V>> collectValues() {
-    return Aggregate.collectValues(this);
-  }
-
-  @Override
-  public <U> PTable<K, Pair<V, U>> join(PTable<K, U> other) {
-    return Join.join(this, other);
-  }
-
-  @Override
-  public <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other) {
-    return Cogroup.cogroup(this, other);
-  }
-
-  @Override
-  public PCollection<K> keys() {
-    return PTables.keys(this);
-  }
-
-  @Override
-  public PCollection<V> values() {
-    return PTables.values(this);
-  }
-
-  /**
-   * Returns a Map<K, V> made up of the keys and values in this PTable.
-   */
-  @Override
-  public Map<K, V> materializeToMap() {
-    return new MaterializableMap<K, V>(this.materialize());
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public PObject<Map<K, V>> asMap() {
-    return new MapPObject<K, V>(this);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java
deleted file mode 100644
index 7b3dd7b..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionCollection.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.ImmutableList;
-
-public class UnionCollection<S> extends PCollectionImpl<S> {
-
-  private List<PCollectionImpl<S>> parents;
-  private long size = 0;
-
-  private static <S> String flatName(List<PCollectionImpl<S>> collections) {
-    StringBuilder sb = new StringBuilder("union(");
-    for (int i = 0; i < collections.size(); i++) {
-      if (i != 0) {
-        sb.append(',');
-      }
-      sb.append(collections.get(i).getName());
-    }
-    return sb.append(')').toString();
-  }
-
-  UnionCollection(List<PCollectionImpl<S>> collections) {
-    super(flatName(collections));
-    this.parents = ImmutableList.copyOf(collections);
-    this.pipeline = (MRPipeline) parents.get(0).getPipeline();
-    for (PCollectionImpl<S> parent : parents) {
-      if (this.pipeline != parent.getPipeline()) {
-        throw new IllegalStateException("Cannot union PCollections from different Pipeline instances");
-      }
-      size += parent.getSize();
-    }
-  }
-
-  @Override
-  protected long getSizeInternal() {
-    return size;
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitUnionCollection(this);
-  }
-
-  @Override
-  public PType<S> getPType() {
-    return parents.get(0).getPType();
-  }
-
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.<PCollectionImpl<?>> copyOf(parents);
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    throw new UnsupportedOperationException("Unioned collection does not support DoNodes");
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java
deleted file mode 100644
index a369432..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/UnionTable.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-public class UnionTable<K, V> extends PTableBase<K, V> {
-
-  private PTableType<K, V> ptype;
-  private List<PCollectionImpl<Pair<K, V>>> parents;
-  private long size;
-
-  private static <K, V> String flatName(List<PTableBase<K, V>> tables) {
-    StringBuilder sb = new StringBuilder("union(");
-    for (int i = 0; i < tables.size(); i++) {
-      if (i != 0) {
-        sb.append(',');
-      }
-      sb.append(tables.get(i).getName());
-    }
-    return sb.append(')').toString();
-  }
-
-  public UnionTable(List<PTableBase<K, V>> tables) {
-    super(flatName(tables));
-    this.ptype = tables.get(0).getPTableType();
-    this.pipeline = (MRPipeline) tables.get(0).getPipeline();
-    this.parents = Lists.newArrayList();
-    for (PTableBase<K, V> parent : tables) {
-      if (pipeline != parent.getPipeline()) {
-        throw new IllegalStateException("Cannot union PTables from different Pipeline instances");
-      }
-      this.parents.add(parent);
-      size += parent.getSize();
-    }
-  }
-
-  @Override
-  protected long getSizeInternal() {
-    return size;
-  }
-
-  @Override
-  public PTableType<K, V> getPTableType() {
-    return ptype;
-  }
-
-  @Override
-  public PType<Pair<K, V>> getPType() {
-    return ptype;
-  }
-
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.<PCollectionImpl<?>> copyOf(parents);
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitUnionCollection(new UnionCollection<Pair<K, V>>(parents));
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    throw new UnsupportedOperationException("Unioned table does not support do nodes");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java b/crunch/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java
deleted file mode 100644
index b6df98b..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/emit/IntermediateEmitter.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.emit;
-
-import java.util.List;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.impl.mr.run.RTNode;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.ImmutableList;
-
-/**
- * An {@link Emitter} implementation that links the output of one {@link DoFn} to the input of
- * another {@code DoFn}.
- * 
- */
-public class IntermediateEmitter implements Emitter<Object> {
-
-  private final List<RTNode> children;
-  private final Configuration conf;
-  private final PType<Object> outputPType;
-  private final boolean needDetachedValues;
-
-  public IntermediateEmitter(PType<Object> outputPType, List<RTNode> children, Configuration conf) {
-    this.outputPType = outputPType;
-    this.children = ImmutableList.copyOf(children);
-    this.conf = conf;
-
-    outputPType.initialize(conf);
-    needDetachedValues = this.children.size() > 1;
-  }
-
-  public void emit(Object emitted) {
-    for (RTNode child : children) {
-      Object value = emitted;
-      if (needDetachedValues) {
-        value = this.outputPType.getDetachedValue(emitted);
-      }
-      child.process(value);
-    }
-  }
-
-  public void flush() {
-    // No-op
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java b/crunch/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java
deleted file mode 100644
index 2e58fed..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/emit/MultipleOutputEmitter.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.emit;
-
-import java.io.IOException;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.io.CrunchOutputs;
-import org.apache.crunch.types.Converter;
-
-public class MultipleOutputEmitter<T, K, V> implements Emitter<T> {
-
-  private final Converter converter;
-  private final CrunchOutputs<K, V> outputs;
-  private final String outputName;
-
-  public MultipleOutputEmitter(Converter converter, CrunchOutputs<K, V> outputs,
-      String outputName) {
-    this.converter = converter;
-    this.outputs = outputs;
-    this.outputName = outputName;
-  }
-
-  @Override
-  public void emit(T emitted) {
-    try {
-      this.outputs.write(outputName,
-          (K) converter.outputKey(emitted),
-          (V) converter.outputValue(emitted));
-    } catch (Exception e) {
-      throw new CrunchRuntimeException(e);
-    }
-  }
-
-  @Override
-  public void flush() {
-    // No-op
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java b/crunch/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java
deleted file mode 100644
index bc3ae0d..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/emit/OutputEmitter.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.emit;
-
-import java.io.IOException;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.types.Converter;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-public class OutputEmitter<T, K, V> implements Emitter<T> {
-
-  private final Converter<K, V, Object, Object> converter;
-  private final TaskInputOutputContext<?, ?, K, V> context;
-
-  public OutputEmitter(Converter<K, V, Object, Object> converter, TaskInputOutputContext<?, ?, K, V> context) {
-    this.converter = converter;
-    this.context = context;
-  }
-
-  public void emit(T emitted) {
-    try {
-      K key = converter.outputKey(emitted);
-      V value = converter.outputValue(emitted);
-      this.context.write(key, value);
-    } catch (IOException e) {
-      throw new CrunchRuntimeException(e);
-    } catch (InterruptedException e) {
-      throw new CrunchRuntimeException(e);
-    }
-  }
-
-  public void flush() {
-    // No-op
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java b/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java
deleted file mode 100644
index d90f2e8..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounter.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.exec;
-
-/**
- * Generate a series of capped numbers exponentially.
- *
- * It is used for creating retry intervals. It is NOT thread-safe.
- */
-public class CappedExponentialCounter {
-
-  private long current;
-  private final long limit;
-
-  public CappedExponentialCounter(long start, long limit) {
-    this.current = start;
-    this.limit = limit;
-  }
-
-  public long get() {
-    long result = current;
-    current = Math.min(current * 2, limit);
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java b/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java
deleted file mode 100644
index 74bc9ac..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/exec/CrunchJobHooks.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.exec;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob;
-import org.apache.crunch.impl.mr.plan.PlanningParameters;
-import org.apache.crunch.impl.mr.run.RuntimeParameters;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.PathTarget;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-
-public final class CrunchJobHooks {
-
-  private CrunchJobHooks() {}
-
-  /** Creates missing input directories before job is submitted. */
-  public static final class PrepareHook implements CrunchControlledJob.Hook {
-    private final Job job;
-
-    public PrepareHook(Job job) {
-      this.job = job;
-    }
-
-    @Override
-    public void run() throws IOException {
-      Configuration conf = job.getConfiguration();
-      if (conf.getBoolean(RuntimeParameters.CREATE_DIR, false)) {
-        Path[] inputPaths = FileInputFormat.getInputPaths(job);
-        for (Path inputPath : inputPaths) {
-          FileSystem fs = inputPath.getFileSystem(conf);
-          if (!fs.exists(inputPath)) {
-            try {
-              fs.mkdirs(inputPath);
-            } catch (IOException e) {
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /** Moving output files produced by the MapReduce job to specified directories. */
-  public static final class CompletionHook implements CrunchControlledJob.Hook {
-    private final Job job;
-    private final Path workingPath;
-    private final Map<Integer, PathTarget> multiPaths;
-    private final boolean mapOnlyJob;
-
-    public CompletionHook(Job job, Path workingPath, Map<Integer, PathTarget> multiPaths, boolean mapOnlyJob) {
-      this.job = job;
-      this.workingPath = workingPath;
-      this.multiPaths = multiPaths;
-      this.mapOnlyJob = mapOnlyJob;
-    }
-
-    @Override
-    public void run() throws IOException {
-      handleMultiPaths();
-    }
-
-    private synchronized void handleMultiPaths() throws IOException {
-      if (!multiPaths.isEmpty()) {
-        // Need to handle moving the data from the output directory of the
-        // job to the output locations specified in the paths.
-        FileSystem srcFs = workingPath.getFileSystem(job.getConfiguration());
-        for (Map.Entry<Integer, PathTarget> entry : multiPaths.entrySet()) {
-          final int i = entry.getKey();
-          final Path dst = entry.getValue().getPath();
-          FileNamingScheme fileNamingScheme = entry.getValue().getFileNamingScheme();
-
-          Path src = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + i + "-*");
-          Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(src), src);
-          Configuration conf = job.getConfiguration();
-          FileSystem dstFs = dst.getFileSystem(conf);
-          if (!dstFs.exists(dst)) {
-            dstFs.mkdirs(dst);
-          }
-          boolean sameFs = isCompatible(srcFs, dst);
-          for (Path s : srcs) {
-            Path d = getDestFile(conf, s, dst, fileNamingScheme);
-            if (sameFs) {
-              srcFs.rename(s, d);
-            } else {
-              FileUtil.copy(srcFs, s, dstFs, d, true, true, job.getConfiguration());
-            }
-          }
-        }
-      }
-    }
-
-    private boolean isCompatible(FileSystem fs, Path path) {
-      try {
-        fs.makeQualified(path);
-        return true;
-      } catch (IllegalArgumentException e) {
-        return false;
-      }
-    }
-    private Path getDestFile(Configuration conf, Path src, Path dir, FileNamingScheme fileNamingScheme)
-        throws IOException {
-      String outputFilename = null;
-      if (mapOnlyJob) {
-        outputFilename = fileNamingScheme.getMapOutputName(conf, dir);
-      } else {
-        outputFilename = fileNamingScheme.getReduceOutputName(conf, dir, extractPartitionNumber(src.getName()));
-      }
-      if (src.getName().endsWith(org.apache.avro.mapred.AvroOutputFormat.EXT)) {
-        outputFilename += org.apache.avro.mapred.AvroOutputFormat.EXT;
-      }
-      return new Path(dir, outputFilename);
-    }
-  }
-
-  /**
-   * Extract the partition number from a raw reducer output filename.
-   *
-   * @param reduceOutputFileName The raw reducer output file name
-   * @return The partition number encoded in the filename
-   */
-  static int extractPartitionNumber(String reduceOutputFileName) {
-    Matcher matcher = Pattern.compile(".*-r-(\\d{5})").matcher(reduceOutputFileName);
-    if (matcher.find()) {
-      return Integer.parseInt(matcher.group(1), 10);
-    } else {
-      throw new IllegalArgumentException("Reducer output name '" + reduceOutputFileName + "' cannot be parsed");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java b/crunch/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java
deleted file mode 100644
index 4c7b7ea..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/exec/MRExecutor.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.exec;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicReference;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.PipelineExecution;
-import org.apache.crunch.PipelineResult;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Target;
-import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob;
-import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchJobControl;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.materialize.MaterializableIterable;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.Lists;
-
-/**
- * Provides APIs for job control at runtime to clients.
- *
- * This class has a thread that submits jobs when they become ready, monitors
- * the states of the running jobs, and updates the states of jobs based on the
- * state changes of their depending jobs states.
- *
- * It is thread-safe.
- */
-public class MRExecutor implements PipelineExecution {
-
-  private static final Log LOG = LogFactory.getLog(MRExecutor.class);
-
-  private final CrunchJobControl control;
-  private final Map<PCollectionImpl<?>, Set<Target>> outputTargets;
-  private final Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize;
-  private final CountDownLatch doneSignal = new CountDownLatch(1);
-  private final CountDownLatch killSignal = new CountDownLatch(1);
-  private final CappedExponentialCounter pollInterval;
-  private AtomicReference<Status> status = new AtomicReference<Status>(Status.READY);
-  private PipelineResult result;
-  private Thread monitorThread;
-
-  private String planDotFile;
-  
-  public MRExecutor(Class<?> jarClass, Map<PCollectionImpl<?>, Set<Target>> outputTargets,
-      Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize) {
-    this.control = new CrunchJobControl(jarClass.toString());
-    this.outputTargets = outputTargets;
-    this.toMaterialize = toMaterialize;
-    this.monitorThread = new Thread(new Runnable() {
-      @Override
-      public void run() {
-        monitorLoop();
-      }
-    });
-    this.pollInterval = isLocalMode()
-      ? new CappedExponentialCounter(50, 1000)
-      : new CappedExponentialCounter(500, 10000);
-  }
-
-  public void addJob(CrunchControlledJob job) {
-    this.control.addJob(job);
-  }
-
-  public void setPlanDotFile(String planDotFile) {
-    this.planDotFile = planDotFile;
-  }
-  
-  public PipelineExecution execute() {
-    monitorThread.start();
-    return this;
-  }
-
-  /** Monitors running status. It is called in {@code MonitorThread}. */
-  private void monitorLoop() {
-    try {
-      while (killSignal.getCount() > 0 && !control.allFinished()) {
-        control.pollJobStatusAndStartNewOnes();
-        killSignal.await(pollInterval.get(), TimeUnit.MILLISECONDS);
-      }
-      control.killAllRunningJobs();
-
-      List<CrunchControlledJob> failures = control.getFailedJobList();
-      if (!failures.isEmpty()) {
-        System.err.println(failures.size() + " job failure(s) occurred:");
-        for (CrunchControlledJob job : failures) {
-          System.err.println(job.getJobName() + "(" + job.getJobID() + "): " + job.getMessage());
-        }
-      }
-      List<PipelineResult.StageResult> stages = Lists.newArrayList();
-      for (CrunchControlledJob job : control.getSuccessfulJobList()) {
-        stages.add(new PipelineResult.StageResult(job.getJobName(), job.getJob().getCounters()));
-      }
-
-      for (PCollectionImpl<?> c : outputTargets.keySet()) {
-        if (toMaterialize.containsKey(c)) {
-          MaterializableIterable iter = toMaterialize.get(c);
-          if (iter.isSourceTarget()) {
-            iter.materialize();
-            c.materializeAt((SourceTarget) iter.getSource());
-          }
-        } else {
-          boolean materialized = false;
-          for (Target t : outputTargets.get(c)) {
-            if (!materialized) {
-              if (t instanceof SourceTarget) {
-                c.materializeAt((SourceTarget) t);
-                materialized = true;
-              } else {
-                SourceTarget st = t.asSourceTarget(c.getPType());
-                if (st != null) {
-                  c.materializeAt(st);
-                  materialized = true;
-                }
-              }
-            }
-          }
-        }
-      }
-
-      synchronized (this) {
-        result = new PipelineResult(stages);
-        if (killSignal.getCount() == 0) {
-          status.set(Status.KILLED);
-        } else {
-          status.set(result.succeeded() ? Status.SUCCEEDED : Status.FAILED);
-        }
-      }
-    } catch (InterruptedException e) {
-      throw new AssertionError(e); // Nobody should interrupt us.
-    } catch (IOException e) {
-      LOG.error("Pipeline failed due to exception", e);
-      status.set(Status.FAILED);
-    } finally {
-      doneSignal.countDown();
-    }
-  }
-
-  @Override
-  public String getPlanDotFile() {
-    return planDotFile;
-  }
-
-  @Override
-  public void waitFor(long timeout, TimeUnit timeUnit) throws InterruptedException {
-    doneSignal.await(timeout, timeUnit);
-  }
-
-  @Override
-  public void waitUntilDone() throws InterruptedException {
-    doneSignal.await();
-  }
-
-  @Override
-  public synchronized Status getStatus() {
-    return status.get();
-  }
-
-  @Override
-  public synchronized PipelineResult getResult() {
-    return result;
-  }
-
-  @Override
-  public void kill() throws InterruptedException {
-    killSignal.countDown();
-  }
-
-  private static boolean isLocalMode() {
-    Configuration conf = new Configuration();
-    // Try to handle MapReduce version 0.20 or 0.22
-    String jobTrackerAddress = conf.get("mapreduce.jobtracker.address",
-        conf.get("mapred.job.tracker", "local"));
-    return "local".equals(jobTrackerAddress);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/package-info.java b/crunch/src/main/java/org/apache/crunch/impl/mr/package-info.java
deleted file mode 100644
index 7e403c3..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A Pipeline implementation that runs on Hadoop MapReduce.
- */
-package org.apache.crunch.impl.mr;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java
deleted file mode 100644
index 865369c..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DoNode.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.List;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Source;
-import org.apache.crunch.impl.mr.run.NodeContext;
-import org.apache.crunch.impl.mr.run.RTNode;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-public class DoNode {
-
-  private static final List<DoNode> NO_CHILDREN = ImmutableList.of();
-
-  private final DoFn fn;
-  private final String name;
-  private final PType<?> ptype;
-  private final List<DoNode> children;
-  private final Converter outputConverter;
-  private final Source<?> source;
-  private String outputName;
-
-  private DoNode(DoFn fn, String name, PType<?> ptype, List<DoNode> children, Converter outputConverter,
-      Source<?> source) {
-    this.fn = fn;
-    this.name = name;
-    this.ptype = ptype;
-    this.children = children;
-    this.outputConverter = outputConverter;
-    this.source = source;
-  }
-
-  private static List<DoNode> allowsChildren() {
-    return Lists.newArrayList();
-  }
-
-  public static <K, V> DoNode createGroupingNode(String name, PGroupedTableType<K, V> ptype) {
-    DoFn<?, ?> fn = ptype.getOutputMapFn();
-    return new DoNode(fn, name, ptype, NO_CHILDREN, ptype.getGroupingConverter(), null);
-  }
-
-  public static <S> DoNode createOutputNode(String name, PType<S> ptype) {
-    Converter outputConverter = ptype.getConverter();
-    DoFn<?, ?> fn = ptype.getOutputMapFn();
-    return new DoNode(fn, name, ptype, NO_CHILDREN, outputConverter, null);
-  }
-
-  public static DoNode createFnNode(String name, DoFn<?, ?> function, PType<?> ptype) {
-    return new DoNode(function, name, ptype, allowsChildren(), null, null);
-  }
-
-  public static <S> DoNode createInputNode(Source<S> source) {
-    PType<?> ptype = source.getType();
-    DoFn<?, ?> fn = ptype.getInputMapFn();
-    return new DoNode(fn, source.toString(), ptype, allowsChildren(), null, source);
-  }
-
-  public boolean isInputNode() {
-    return source != null;
-  }
-
-  public boolean isOutputNode() {
-    return outputConverter != null;
-  }
-
-  public String getName() {
-    return name;
-  }
-
-  public List<DoNode> getChildren() {
-    return children;
-  }
-
-  public Source<?> getSource() {
-    return source;
-  }
-
-  public PType<?> getPType() {
-    return ptype;
-  }
-
-  public DoNode addChild(DoNode node) {
-    // TODO: This is sort of terrible, refactor the code to make this make more sense.
-    boolean exists = false;
-    for (DoNode child : children) {
-      if (node == child) {
-        exists = true;
-        break;
-      }
-    }
-    if (!exists) {
-      children.add(node);
-    }
-    return this;
-  }
-
-  public void setOutputName(String outputName) {
-    if (outputConverter == null) {
-      throw new IllegalStateException("Cannot set output name w/o output converter: " + outputName);
-    }
-    this.outputName = outputName;
-  }
-
-  public RTNode toRTNode(boolean inputNode, Configuration conf, NodeContext nodeContext) {
-    List<RTNode> childRTNodes = Lists.newArrayList();
-    fn.configure(conf);
-    for (DoNode child : children) {
-      childRTNodes.add(child.toRTNode(false, conf, nodeContext));
-    }
-
-    Converter inputConverter = null;
-    if (inputNode) {
-      if (nodeContext == NodeContext.MAP) {
-        inputConverter = ptype.getConverter();
-      } else {
-        inputConverter = ((PGroupedTableType<?, ?>) ptype).getGroupingConverter();
-      }
-    }
-    return new RTNode(fn, (PType<Object>) getPType(), name, childRTNodes, inputConverter, outputConverter, outputName);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof DoNode)) {
-      return false;
-    }
-    if (this == other) {
-      return true;
-    }
-    DoNode o = (DoNode) other;
-    return (name.equals(o.name) && fn.equals(o.fn) && source == o.source && outputConverter == o.outputConverter);
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(name).append(fn).append(source).append(outputConverter).toHashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
deleted file mode 100644
index 46d8c53..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- * Writes <a href="http://www.graphviz.org">Graphviz</a> dot files to illustrate
- * the topology of Crunch pipelines.
- */
-public class DotfileWriter {
-  
-  /** The types of tasks within a MapReduce job. */
-  enum MRTaskType { MAP, REDUCE };
-
-  private Set<JobPrototype> jobPrototypes = Sets.newHashSet();
-  private HashMultimap<Pair<JobPrototype, MRTaskType>, String> jobNodeDeclarations = HashMultimap.create();
-  private Set<String> globalNodeDeclarations = Sets.newHashSet();
-  private Set<String> nodePathChains = Sets.newHashSet();
-
-  /**
-   * Format the declaration of a node based on a PCollection.
-   * 
-   * @param pcollectionImpl PCollection for which a node will be declared
-   * @param jobPrototype The job containing the PCollection
-   * @return The node declaration
-   */
-  String formatPCollectionNodeDeclaration(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype) {
-    String shape = "box";
-    if (pcollectionImpl instanceof InputCollection) {
-      shape = "folder";
-    }
-    return String.format("%s [label=\"%s\" shape=%s];", formatPCollection(pcollectionImpl, jobPrototype), pcollectionImpl.getName(),
-        shape);
-  }
-
-  /**
-   * Format a Target as a node declaration.
-   * 
-   * @param target A Target used within a MapReduce pipeline
-   * @return The global node declaration for the Target
-   */
-  String formatTargetNodeDeclaration(Target target) {
-    return String.format("\"%s\" [label=\"%s\" shape=folder];", target.toString(), target.toString());
-  }
-
-  /**
-   * Format a PCollectionImpl into a format to be used for dot files.
-   * 
-   * @param pcollectionImpl The PCollectionImpl to be formatted
-   * @param jobPrototype The job containing the PCollection
-   * @return The dot file formatted representation of the PCollectionImpl
-   */
-  String formatPCollection(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype) {
-    if (pcollectionImpl instanceof InputCollection) {
-      InputCollection<?> inputCollection = (InputCollection<?>) pcollectionImpl;
-      return String.format("\"%s\"", inputCollection.getSource());
-    }
-    return String.format("\"%s@%d@%d\"", pcollectionImpl.getName(), pcollectionImpl.hashCode(), jobPrototype.hashCode());
-  }
-
-  /**
-   * Format a collection of node strings into dot file syntax.
-   * 
-   * @param nodeCollection Collection of chained node strings
-   * @return The dot-formatted chain of nodes
-   */
-  String formatNodeCollection(List<String> nodeCollection) {
-    return String.format("%s;", Joiner.on(" -> ").join(nodeCollection));
-  }
-
-  /**
-   * Format a NodePath in dot file syntax.
-   * 
-   * @param nodePath The node path to be formatted
-   * @param jobPrototype The job containing the NodePath
-   * @return The dot file representation of the node path
-   */
-  List<String> formatNodePath(NodePath nodePath, JobPrototype jobPrototype) {
-    List<String> formattedNodePaths = Lists.newArrayList();
-    
-    List<PCollectionImpl<?>> pcollections = Lists.newArrayList(nodePath);
-    for (int collectionIndex = 1; collectionIndex < pcollections.size(); collectionIndex++){
-      String fromNode = formatPCollection(pcollections.get(collectionIndex - 1), jobPrototype);
-      String toNode = formatPCollection(pcollections.get(collectionIndex), jobPrototype);
-      formattedNodePaths.add(formatNodeCollection(Lists.newArrayList(fromNode, toNode)));
-    }
-    return formattedNodePaths;
-  }
-
-  /**
-   * Add a NodePath to be formatted as a list of node declarations within a
-   * single job.
-   * 
-   * @param jobPrototype The job containing the node path
-   * @param nodePath The node path to be formatted
-   */
-  void addNodePathDeclarations(JobPrototype jobPrototype, NodePath nodePath) {
-    boolean groupingEncountered = false;
-    for (PCollectionImpl<?> pcollectionImpl : nodePath) {
-      if (pcollectionImpl instanceof InputCollection) {
-        globalNodeDeclarations.add(formatPCollectionNodeDeclaration(pcollectionImpl, jobPrototype));
-      } else {
-        if (!groupingEncountered){
-          groupingEncountered = (pcollectionImpl instanceof PGroupedTableImpl);
-        }
-
-        MRTaskType taskType = groupingEncountered ? MRTaskType.REDUCE : MRTaskType.MAP;
-        jobNodeDeclarations.put(Pair.of(jobPrototype, taskType), formatPCollectionNodeDeclaration(pcollectionImpl, jobPrototype));
-      }
-    }
-  }
-
-  /**
-   * Add the chaining of a NodePath to the graph.
-   * 
-   * @param nodePath The path to be formatted as a node chain in the dot file
-   * @param jobPrototype The job containing the NodePath
-   */
-  void addNodePathChain(NodePath nodePath, JobPrototype jobPrototype) {
-    for (String nodePathChain : formatNodePath(nodePath, jobPrototype)){
-      this.nodePathChains.add(nodePathChain);
-    }
-  }
-
-  /**
-   * Get the graph attributes for a task-specific subgraph.
-   * 
-   * @param taskType The type of task in the subgraph
-   * @return Graph attributes
-   */
-  String getTaskGraphAttributes(MRTaskType taskType) {
-    if (taskType == MRTaskType.MAP) {
-      return "label = Map; color = blue;";
-    } else {
-      return "label = Reduce; color = red;";
-    }
-  }
-
-  /**
-   * Add the contents of a {@link JobPrototype} to the graph describing a
-   * pipeline.
-   * 
-   * @param jobPrototype A JobPrototype representing a portion of a MapReduce
-   *          pipeline
-   */
-  public void addJobPrototype(JobPrototype jobPrototype) {
-    jobPrototypes.add(jobPrototype);
-    if (!jobPrototype.isMapOnly()) {
-      for (NodePath nodePath : jobPrototype.getMapNodePaths()) {
-        addNodePathDeclarations(jobPrototype, nodePath);
-        addNodePathChain(nodePath, jobPrototype);
-      }
-    }
-
-    HashMultimap<Target, NodePath> targetsToNodePaths = jobPrototype.getTargetsToNodePaths();
-    for (Target target : targetsToNodePaths.keySet()) {
-      globalNodeDeclarations.add(formatTargetNodeDeclaration(target));
-      for (NodePath nodePath : targetsToNodePaths.get(target)) {
-        addNodePathDeclarations(jobPrototype, nodePath);
-        addNodePathChain(nodePath, jobPrototype);
-        nodePathChains.add(formatNodeCollection(Lists.newArrayList(formatPCollection(nodePath.descendingIterator()
-            .next(), jobPrototype), String.format("\"%s\"", target.toString()))));
-      }
-    }
-  }
-
-  /**
-   * Build up the full dot file containing the description of a MapReduce
-   * pipeline.
-   * 
-   * @return Graphviz dot file contents
-   */
-  public String buildDotfile() {
-    StringBuilder stringBuilder = new StringBuilder();
-    stringBuilder.append("digraph G {\n");
-    int clusterIndex = 0;
-
-    for (String globalDeclaration : globalNodeDeclarations) {
-      stringBuilder.append(String.format("  %s\n", globalDeclaration));
-    }
-
-    for (JobPrototype jobPrototype : jobPrototypes){
-      StringBuilder jobProtoStringBuilder = new StringBuilder();
-      jobProtoStringBuilder.append(String.format("  subgraph cluster%d {\n", clusterIndex++));
-      for (MRTaskType taskType : MRTaskType.values()){
-        Pair<JobPrototype,MRTaskType> jobTaskKey = Pair.of(jobPrototype, taskType);
-        if (jobNodeDeclarations.containsKey(jobTaskKey)){
-          jobProtoStringBuilder.append(String.format("    subgraph cluster%d {\n", clusterIndex++));
-          jobProtoStringBuilder.append(String.format("      %s\n", getTaskGraphAttributes(taskType)));
-          for (String declarationEntry : jobNodeDeclarations.get(jobTaskKey)){
-            jobProtoStringBuilder.append(String.format("      %s\n", declarationEntry));
-          }
-          jobProtoStringBuilder.append("    }\n");
-        }
-      }
-      jobProtoStringBuilder.append("  }\n");
-      stringBuilder.append(jobProtoStringBuilder.toString());
-    }
-    
-    for (String nodePathChain : nodePathChains) {
-      stringBuilder.append(String.format("  %s\n", nodePathChain));
-    }
-
-    stringBuilder.append("}\n");
-    return stringBuilder.toString();
-  }
-
-
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
deleted file mode 100644
index 1e59df0..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.commons.lang.builder.ReflectionToStringBuilder;
-import org.apache.commons.lang.builder.ToStringStyle;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- *
- */
-class Edge {
-  private final Vertex head;
-  private final Vertex tail;
-  private final Set<NodePath> paths;
-  
-  public Edge(Vertex head, Vertex tail) {
-    this.head = head;
-    this.tail = tail;
-    this.paths = Sets.newHashSet();
-  }
-  
-  public Vertex getHead() {
-    return head;
-  }
-  
-  public Vertex getTail() {
-    return tail;
-  }
-
-  public void addNodePath(NodePath path) {
-    this.paths.add(path);
-  }
-  
-  public void addAllNodePaths(Collection<NodePath> paths) {
-    this.paths.addAll(paths);
-  }
-  
-  public Set<NodePath> getNodePaths() {
-    return paths;
-  }
-  
-  public PCollectionImpl getSplit() {
-    List<Iterator<PCollectionImpl<?>>> iters = Lists.newArrayList();
-    for (NodePath nodePath : paths) {
-      Iterator<PCollectionImpl<?>> iter = nodePath.iterator();
-      iter.next(); // prime this past the initial NGroupedTableImpl
-      iters.add(iter);
-    }
-
-    // Find the lowest point w/the lowest cost to be the split point for
-    // all of the dependent paths.
-    boolean end = false;
-    int splitIndex = -1;
-    while (!end) {
-      splitIndex++;
-      PCollectionImpl<?> current = null;
-      for (Iterator<PCollectionImpl<?>> iter : iters) {
-        if (iter.hasNext()) {
-          PCollectionImpl<?> next = iter.next();
-          if (next instanceof PGroupedTableImpl) {
-            end = true;
-            break;
-          } else if (current == null) {
-            current = next;
-          } else if (current != next) {
-            end = true;
-            break;
-          }
-        } else {
-          end = true;
-          break;
-        }
-      }
-    }
-    // TODO: Add costing calcs here.
-    
-    return Iterables.getFirst(paths, null).get(splitIndex);
-  }
-  
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof Edge)) {
-      return false;
-    }
-    Edge e = (Edge) other;
-    return head.equals(e.head) && tail.equals(e.tail) && paths.equals(e.paths);
-  }
-  
-  @Override
-  public int hashCode() {
-    return new HashCodeBuilder().append(head).append(tail).toHashCode();
-  }
-  
-  @Override
-  public String toString() {
-    return ReflectionToStringBuilder.toString(this, ToStringStyle.SHORT_PREFIX_STYLE);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java
deleted file mode 100644
index ce0a847..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/Graph.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-/**
- *
- */
-class Graph implements Iterable<Vertex> {
-
-  private final Map<PCollectionImpl, Vertex> vertices;
-  private final Map<Pair<Vertex, Vertex>, Edge> edges;  
-  private final Map<Vertex, List<Vertex>> dependencies;
-  
-  public Graph() {
-    this.vertices = Maps.newHashMap();
-    this.edges = Maps.newHashMap();
-    this.dependencies = Maps.newHashMap();
-  }
-  
-  public Vertex getVertexAt(PCollectionImpl impl) {
-    return vertices.get(impl);
-  }
-  
-  public Vertex addVertex(PCollectionImpl impl, boolean output) {
-    if (vertices.containsKey(impl)) {
-      Vertex v = vertices.get(impl);
-      if (output) {
-        v.setOutput();
-      }
-      return v;
-    }
-    Vertex v = new Vertex(impl);
-    vertices.put(impl, v);
-    if (output) {
-      v.setOutput();
-    }
-    return v;
-  }
-  
-  public Edge getEdge(Vertex head, Vertex tail) {
-    Pair<Vertex, Vertex> p = Pair.of(head, tail);
-    if (edges.containsKey(p)) {
-      return edges.get(p);
-    }
-    
-    Edge e = new Edge(head, tail);
-    edges.put(p, e);
-    tail.addIncoming(e);
-    head.addOutgoing(e);
-    return e;
-  }
-  
-  @Override
-  public Iterator<Vertex> iterator() {
-    return Sets.newHashSet(vertices.values()).iterator();
-  }
-
-  public Set<Edge> getAllEdges() {
-    return Sets.newHashSet(edges.values());
-  }
-  
-  public void markDependency(Vertex child, Vertex parent) {
-    List<Vertex> parents = dependencies.get(child);
-    if (parents == null) {
-      parents = Lists.newArrayList();
-      dependencies.put(child, parents);
-    }
-    parents.add(parent);
-  }
-  
-  public List<Vertex> getParents(Vertex child) {
-    if (dependencies.containsKey(child)) {
-      return dependencies.get(child);
-    }
-    return ImmutableList.of();
-  }
-  
-  public List<List<Vertex>> connectedComponents() {
-    List<List<Vertex>> components = Lists.newArrayList();
-    Set<Vertex> unassigned = Sets.newHashSet(vertices.values());
-    while (!unassigned.isEmpty()) {
-      Vertex base = unassigned.iterator().next();
-      List<Vertex> component = Lists.newArrayList();
-      component.add(base);
-      unassigned.remove(base);
-      Set<Vertex> working = Sets.newHashSet(base.getAllNeighbors());
-      while (!working.isEmpty()) {
-        Vertex n = working.iterator().next();
-        working.remove(n);
-        if (unassigned.contains(n)) {
-          component.add(n);
-          unassigned.remove(n);
-          for (Vertex n2 : n.getAllNeighbors()) {
-            if (unassigned.contains(n2)) {
-              working.add(n2);
-            }
-          }
-        }
-      }
-      components.add(component);
-    }
-    
-    return components;
-  }  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java
deleted file mode 100644
index 925c39a..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/GraphBuilder.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import org.apache.crunch.impl.mr.collect.DoCollectionImpl;
-import org.apache.crunch.impl.mr.collect.DoTableImpl;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-import org.apache.crunch.impl.mr.collect.UnionCollection;
-
-/**
- *
- */
-class GraphBuilder implements PCollectionImpl.Visitor {
-
-  private Graph graph = new Graph();
-  private Vertex workingVertex;
-  private NodePath workingPath;
-  
-  public Graph getGraph() {
-    return graph;
-  }
-  
-  public void visitOutput(PCollectionImpl<?> output) {
-    workingVertex = graph.addVertex(output, true);
-    workingPath = new NodePath();
-    output.accept(this);
-  }
-  
-  @Override
-  public void visitInputCollection(InputCollection<?> collection) {
-    Vertex v = graph.addVertex(collection, false);
-    graph.getEdge(v, workingVertex).addNodePath(workingPath.close(collection));
-  }
-
-  @Override
-  public void visitUnionCollection(UnionCollection<?> collection) {
-    Vertex baseVertex = workingVertex;
-    NodePath basePath = workingPath;
-    for (PCollectionImpl<?> parent : collection.getParents()) {
-      workingPath = new NodePath(basePath);
-      workingVertex = baseVertex;
-      processParent(parent);
-    }
-  }
-
-  @Override
-  public void visitDoFnCollection(DoCollectionImpl<?> collection) {
-    workingPath.push(collection);
-    processParent(collection.getOnlyParent());
-  }
-
-  @Override
-  public void visitDoTable(DoTableImpl<?, ?> collection) {
-    workingPath.push(collection);
-    processParent(collection.getOnlyParent());
-  }
-
-  @Override
-  public void visitGroupedTable(PGroupedTableImpl<?, ?> collection) {
-    Vertex v = graph.addVertex(collection, false);
-    graph.getEdge(v, workingVertex).addNodePath(workingPath.close(collection));
-    workingVertex = v;
-    workingPath = new NodePath(collection);
-    processParent(collection.getOnlyParent());
-  }
-  
-  private void processParent(PCollectionImpl<?> parent) {
-    Vertex v = graph.getVertexAt(parent);
-    if (v == null) {
-      parent.accept(this);
-    } else {
-      graph.getEdge(v, workingVertex).addNodePath(workingPath.close(parent));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java b/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
deleted file mode 100644
index 9ad7300..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.plan;
-
-import java.util.List;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.Lists;
-
-/**
- * Visitor that traverses the {@code DoNode} instances in a job and builds a
- * String that identifies the stages of the pipeline that belong to this job.
- */
-class JobNameBuilder {
-
-  private static final Joiner JOINER = Joiner.on("+");
-  private static final Joiner CHILD_JOINER = Joiner.on("/");
-
-  private String pipelineName;
-  List<String> rootStack = Lists.newArrayList();
-
-  public JobNameBuilder(final String pipelineName) {
-    this.pipelineName = pipelineName;
-  }
-
-  public void visit(DoNode node) {
-    visit(node, rootStack);
-  }
-
-  public void visit(List<DoNode> nodes) {
-    visit(nodes, rootStack);
-  }
-
-  private void visit(List<DoNode> nodes, List<String> stack) {
-    if (nodes.size() == 1) {
-      visit(nodes.get(0), stack);
-    } else {
-      List<String> childStack = Lists.newArrayList();
-      for (int i = 0; i < nodes.size(); i++) {
-        DoNode node = nodes.get(i);
-        List<String> subStack = Lists.newArrayList();
-        visit(node, subStack);
-        if (!subStack.isEmpty()) {
-          childStack.add("[" + JOINER.join(subStack) + "]");
-        }
-      }
-      if (!childStack.isEmpty()) {
-        stack.add("[" + CHILD_JOINER.join(childStack) + "]");
-      }
-    }
-  }
-
-  private void visit(DoNode node, List<String> stack) {
-    String name = node.getName();
-    if (!name.isEmpty()) {
-      stack.add(node.getName());
-    }
-    visit(node.getChildren(), stack);
-  }
-
-  public String build() {
-    return String.format("%s: %s", pipelineName, JOINER.join(rootStack));
-  }
-}


[35/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Pipeline.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Pipeline.java b/crunch-core/src/main/java/org/apache/crunch/Pipeline.java
new file mode 100644
index 0000000..84c720c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Pipeline.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Manages the state of a pipeline execution.
+ * 
+ */
+public interface Pipeline {
+
+  /**
+   * Set the {@code Configuration} to use with this pipeline.
+   */
+  void setConfiguration(Configuration conf);
+
+  /**
+   * Returns the name of this pipeline.
+   * 
+   * @return Name of the pipeline
+   */
+  String getName();
+
+  /**
+   * Returns the {@code Configuration} instance associated with this pipeline.
+   */
+  Configuration getConfiguration();
+
+  /**
+   * Converts the given {@code Source} into a {@code PCollection} that is
+   * available to jobs run using this {@code Pipeline} instance.
+   * 
+   * @param source
+   *          The source of data
+   * @return A PCollection that references the given source
+   */
+  <T> PCollection<T> read(Source<T> source);
+
+  /**
+   * A version of the read method for {@code TableSource} instances that map to
+   * {@code PTable}s.
+   * 
+   * @param tableSource
+   *          The source of the data
+   * @return A PTable that references the given source
+   */
+  <K, V> PTable<K, V> read(TableSource<K, V> tableSource);
+
+  /**
+   * Write the given collection to the given target on the next pipeline run. The
+   * system will check to see if the target's location already exists using the
+   * {@code WriteMode.DEFAULT} rule for the given {@code Target}.
+   * 
+   * @param collection
+   *          The collection
+   * @param target
+   *          The output target
+   */
+  void write(PCollection<?> collection, Target target);
+
+  /**
+  * Write the contents of the {@code PCollection} to the given {@code Target},
+  * using the storage format specified by the target and the given
+  * {@code WriteMode} for cases where the referenced {@code Target}
+  * already exists.
+  *
+  * @param collection
+  *          The collection
+  * @param target
+  *          The target to write to
+  * @param writeMode
+  *          The strategy to use for handling existing outputs
+  */
+ void write(PCollection<?> collection, Target target,
+     Target.WriteMode writeMode);
+
+ /**
+   * Create the given PCollection and read the data it contains into the
+   * returned Collection instance for client use.
+   * 
+   * @param pcollection
+   *          The PCollection to materialize
+   * @return the data from the PCollection as a read-only Collection
+   */
+  <T> Iterable<T> materialize(PCollection<T> pcollection);
+
+  /**
+   * Constructs and executes a series of MapReduce jobs in order to write data
+   * to the output targets.
+   */
+  PipelineResult run();
+
+  /**
+   * Constructs and starts a series of MapReduce jobs in order ot write data to
+   * the output targets, but returns a {@code ListenableFuture} to allow clients to control
+   * job execution.
+   * @return
+   */
+  PipelineExecution runAsync();
+  
+  /**
+   * Run any remaining jobs required to generate outputs and then clean up any
+   * intermediate data files that were created in this run or previous calls to
+   * {@code run}.
+   */
+  PipelineResult done();
+
+  /**
+   * A convenience method for reading a text file.
+   */
+  PCollection<String> readTextFile(String pathName);
+
+  /**
+   * A convenience method for writing a text file.
+   */
+  <T> void writeTextFile(PCollection<T> collection, String pathName);
+
+  /**
+   * Turn on debug logging for jobs that are run from this pipeline.
+   */
+  void enableDebug();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/PipelineExecution.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/PipelineExecution.java b/crunch-core/src/main/java/org/apache/crunch/PipelineExecution.java
new file mode 100644
index 0000000..fc6bb91
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/PipelineExecution.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A handle to allow clients to control a Crunch pipeline as it runs.
+ *
+ * This interface is thread-safe.
+ */
+public interface PipelineExecution {
+
+  enum Status { READY, RUNNING, SUCCEEDED, FAILED, KILLED }
+
+  /** Returns the .dot file that allows a client to graph the Crunch execution plan for this
+   * pipeline.
+   */
+  String getPlanDotFile();
+
+  /** Blocks until pipeline completes or the specified waiting time elapsed. */
+   void waitFor(long timeout, TimeUnit timeUnit) throws InterruptedException;
+
+   /** Blocks until pipeline completes, i.e. {@code SUCCEEDED}, {@code FAILED} or {@code KILLED}. */
+  void waitUntilDone() throws InterruptedException;
+
+  Status getStatus();
+
+  /** Retrieve the result of a pipeline if it has been completed, otherwise {@code null}. */
+  PipelineResult getResult();
+
+  /**
+   * Kills the pipeline if it is running, no-op otherwise.
+   *
+   * This method only delivers a kill signal to the pipeline, and does not guarantee the pipeline exits on return.
+   * To wait for completely exits, use {@link #waitUntilDone()} after this call.
+   */
+  void kill() throws InterruptedException;
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/PipelineResult.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/PipelineResult.java b/crunch-core/src/main/java/org/apache/crunch/PipelineResult.java
new file mode 100644
index 0000000..90b1067
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/PipelineResult.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.List;
+
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Counters;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Container for the results of a call to {@code run} or {@code done} on the
+ * Pipeline interface that includes details and statistics about the component
+ * stages of the data pipeline.
+ */
+public class PipelineResult {
+
+  public static class StageResult {
+
+    private final String stageName;
+    private final Counters counters;
+
+    public StageResult(String stageName, Counters counters) {
+      this.stageName = stageName;
+      this.counters = counters;
+    }
+
+    public String getStageName() {
+      return stageName;
+    }
+
+    public Counters getCounters() {
+      return counters;
+    }
+
+    public Counter findCounter(Enum<?> key) {
+      return counters.findCounter(key);
+    }
+
+    public long getCounterValue(Enum<?> key) {
+      return findCounter(key).getValue();
+    }
+  }
+
+  public static final PipelineResult EMPTY = new PipelineResult(ImmutableList.<StageResult> of());
+
+  private final List<StageResult> stageResults;
+
+  public PipelineResult(List<StageResult> stageResults) {
+    this.stageResults = ImmutableList.copyOf(stageResults);
+  }
+
+  public boolean succeeded() {
+    return !stageResults.isEmpty();
+  }
+
+  public List<StageResult> getStageResults() {
+    return stageResults;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Source.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Source.java b/crunch-core/src/main/java/org/apache/crunch/Source.java
new file mode 100644
index 0000000..f54d135
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Source.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.io.IOException;
+
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * A {@code Source} represents an input data set that is an input to one or more
+ * MapReduce jobs.
+ * 
+ */
+public interface Source<T> {
+  /**
+   * Returns the {@code PType} for this source.
+   */
+  PType<T> getType();
+
+  /**
+   * Configure the given job to use this source as an input.
+   * 
+   * @param job
+   *          The job to configure
+   * @param inputId
+   *          For a multi-input job, an identifier for this input to the job
+   * @throws IOException
+   */
+  void configureSource(Job job, int inputId) throws IOException;
+
+  /**
+   * Returns the number of bytes in this {@code Source}.
+   */
+  long getSize(Configuration configuration);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/SourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/SourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/SourceTarget.java
new file mode 100644
index 0000000..09c03c6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/SourceTarget.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * An interface for classes that implement both the {@code Source} and the
+ * {@code Target} interfaces.
+ *
+ */
+public interface SourceTarget<T> extends Source<T>, Target {
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/TableSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/TableSource.java b/crunch-core/src/main/java/org/apache/crunch/TableSource.java
new file mode 100644
index 0000000..ff27346
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/TableSource.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.crunch.types.PTableType;
+
+/**
+ * The interface {@code Source} implementations that return a {@link PTable}.
+ * 
+ */
+public interface TableSource<K, V> extends Source<Pair<K, V>> {
+  PTableType<K, V> getTableType();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/TableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/TableSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/TableSourceTarget.java
new file mode 100644
index 0000000..9b1ed34
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/TableSourceTarget.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * An interface for classes that implement both the {@code TableSource} and the
+ * {@code Target} interfaces.
+ */
+public interface TableSourceTarget<K, V> extends TableSource<K, V>, SourceTarget<Pair<K, V>> {
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Target.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Target.java b/crunch-core/src/main/java/org/apache/crunch/Target.java
new file mode 100644
index 0000000..0a0c23d
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Target.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.crunch.io.OutputHandler;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * A {@code Target} represents the output destination of a Crunch {@code PCollection}
+ * in the context of a Crunch job.
+ */
+public interface Target {
+
+  /**
+   * An enum to represent different options the client may specify
+   * for handling the case where the output path, table, etc. referenced
+   * by a {@code Target} already exists.
+   */
+  enum WriteMode {
+    /**
+     * Check to see if the output target already exists before running
+     * the pipeline, and if it does, print an error and throw an exception.
+     */
+    DEFAULT,
+    
+    /**
+     * Check to see if the output target already exists, and if it does,
+     * delete it and overwrite it with the new output (if any).
+     */
+    OVERWRITE,
+
+    /**
+     * If the output target does not exist, create it. If it does exist,
+     * add the output of this pipeline to the target. This was the
+     * behavior in Crunch up to version 0.4.0.
+     */
+    APPEND
+  }
+
+  /**
+   * Apply the given {@code WriteMode} to this {@code Target} instance.
+   * 
+   * @param writeMode The strategy for handling existing outputs
+   * @param conf The ever-useful {@code Configuration} instance
+   */
+  void handleExisting(WriteMode writeMode, Configuration conf);
+  
+  /**
+   * Checks to see if this {@code Target} instance is compatible with the
+   * given {@code PType}.
+   * 
+   * @param handler The {@link OutputHandler} that is managing the output for the job
+   * @param ptype The {@code PType} to check
+   * @return True if this Target can write data in the form of the given {@code PType},
+   * false otherwise
+   */
+  boolean accept(OutputHandler handler, PType<?> ptype);
+
+  /**
+   * Attempt to create the {@code SourceTarget} type that corresponds to this {@code Target}
+   * for the given {@code PType}, if possible. If it is not possible, return {@code null}.
+   * 
+   * @param ptype The {@code PType} to use in constructing the {@code SourceTarget}
+   * @return A new {@code SourceTarget} or null if such a {@code SourceTarget} does not exist
+   */
+  <T> SourceTarget<T> asSourceTarget(PType<T> ptype);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Tuple.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Tuple.java b/crunch-core/src/main/java/org/apache/crunch/Tuple.java
new file mode 100644
index 0000000..4e602ff
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Tuple.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+/**
+ * A fixed-size collection of Objects, used in Crunch for representing joins
+ * between {@code PCollection}s.
+ * 
+ */
+public interface Tuple {
+
+  /**
+   * Returns the Object at the given index.
+   */
+  Object get(int index);
+
+  /**
+   * Returns the number of elements in this Tuple.
+   */
+  int size();
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Tuple3.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Tuple3.java b/crunch-core/src/main/java/org/apache/crunch/Tuple3.java
new file mode 100644
index 0000000..4372811
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Tuple3.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+/**
+ * A convenience class for three-element {@link Tuple}s.
+ */
+public class Tuple3<V1, V2, V3> implements Tuple {
+
+  private final V1 first;
+  private final V2 second;
+  private final V3 third;
+
+  public static <A, B, C> Tuple3<A, B, C> of(A a, B b, C c) {
+    return new Tuple3<A, B, C>(a, b, c);
+  }
+
+  public Tuple3(V1 first, V2 second, V3 third) {
+    this.first = first;
+    this.second = second;
+    this.third = third;
+  }
+
+  public V1 first() {
+    return first;
+  }
+
+  public V2 second() {
+    return second;
+  }
+
+  public V3 third() {
+    return third;
+  }
+
+  public Object get(int index) {
+    switch (index) {
+    case 0:
+      return first;
+    case 1:
+      return second;
+    case 2:
+      return third;
+    default:
+      throw new ArrayIndexOutOfBoundsException();
+    }
+  }
+
+  public int size() {
+    return 3;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    return hcb.append(first).append(second).append(third).toHashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    Tuple3<?, ?, ?> other = (Tuple3<?, ?, ?>) obj;
+    return (first == other.first || (first != null && first.equals(other.first)))
+        && (second == other.second || (second != null && second.equals(other.second)))
+        && (third == other.third || (third != null && third.equals(other.third)));
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("Tuple3[");
+    sb.append(first).append(",").append(second).append(",").append(third);
+    return sb.append("]").toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/Tuple4.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/Tuple4.java b/crunch-core/src/main/java/org/apache/crunch/Tuple4.java
new file mode 100644
index 0000000..f161371
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/Tuple4.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+/**
+ * A convenience class for four-element {@link Tuple}s.
+ */
+public class Tuple4<V1, V2, V3, V4> implements Tuple {
+
+  private final V1 first;
+  private final V2 second;
+  private final V3 third;
+  private final V4 fourth;
+
+  public static <A, B, C, D> Tuple4<A, B, C, D> of(A a, B b, C c, D d) {
+    return new Tuple4<A, B, C, D>(a, b, c, d);
+  }
+
+  public Tuple4(V1 first, V2 second, V3 third, V4 fourth) {
+    this.first = first;
+    this.second = second;
+    this.third = third;
+    this.fourth = fourth;
+  }
+
+  public V1 first() {
+    return first;
+  }
+
+  public V2 second() {
+    return second;
+  }
+
+  public V3 third() {
+    return third;
+  }
+
+  public V4 fourth() {
+    return fourth;
+  }
+
+  public Object get(int index) {
+    switch (index) {
+    case 0:
+      return first;
+    case 1:
+      return second;
+    case 2:
+      return third;
+    case 3:
+      return fourth;
+    default:
+      throw new ArrayIndexOutOfBoundsException();
+    }
+  }
+
+  public int size() {
+    return 4;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    return hcb.append(first).append(second).append(third).append(fourth).toHashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    Tuple4<?, ?, ?, ?> other = (Tuple4<?, ?, ?, ?>) obj;
+    return (first == other.first || (first != null && first.equals(other.first)))
+        && (second == other.second || (second != null && second.equals(other.second)))
+        && (third == other.third || (third != null && third.equals(other.third)))
+        && (fourth == other.fourth || (fourth != null && fourth.equals(other.fourth)));
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("Tuple4[");
+    sb.append(first).append(",").append(second).append(",").append(third);
+    return sb.append(",").append(fourth).append("]").toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/TupleN.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/TupleN.java b/crunch-core/src/main/java/org/apache/crunch/TupleN.java
new file mode 100644
index 0000000..e5eceb5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/TupleN.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import java.util.Arrays;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+/**
+ * A {@link Tuple} instance for an arbitrary number of values.
+ */
+public class TupleN implements Tuple {
+
+  private final Object values[];
+
+  public static TupleN of(Object... values) {
+    return new TupleN(values);
+  }
+
+  public TupleN(Object... values) {
+    this.values = new Object[values.length];
+    System.arraycopy(values, 0, this.values, 0, values.length);
+  }
+
+  public Object get(int index) {
+    return values[index];
+  }
+
+  public int size() {
+    return values.length;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    for (Object v : values) {
+      hcb.append(v);
+    }
+    return hcb.toHashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    TupleN other = (TupleN) obj;
+    return Arrays.equals(this.values, other.values);
+  }
+
+  @Override
+  public String toString() {
+    return Arrays.toString(values);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/Aggregators.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/Aggregators.java b/crunch-core/src/main/java/org/apache/crunch/fn/Aggregators.java
new file mode 100644
index 0000000..0ac79e2
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/Aggregators.java
@@ -0,0 +1,1111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import java.math.BigInteger;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+
+import org.apache.crunch.Aggregator;
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PGroupedTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.util.Tuples;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+
+/**
+ * A collection of pre-defined {@link org.apache.crunch.Aggregator}s.
+ *
+ * <p>The factory methods of this class return {@link org.apache.crunch.Aggregator}
+ * instances that you can use to combine the values of a {@link PGroupedTable}.
+ * In most cases, they turn a multimap (multiple entries per key) into a map (one
+ * entry per key).</p>
+ *
+ * <p><strong>Note</strong>: When using composed aggregators, like those built by the
+ * {@link #pairAggregator(Aggregator, Aggregator) pairAggregator()}
+ * factory method, you typically don't want to put in the same child aggregator more than once,
+ * even if all child aggregators have the same type. In most cases, this is what you want:</p>
+ *
+ * <pre>
+ *   PTable&lt;K, Long&gt; result = groupedTable.combineValues(
+ *      pairAggregator(SUM_LONGS(), SUM_LONGS())
+ *   );
+ * </pre>
+ */
+public final class Aggregators {
+
+  private Aggregators() {
+    // utility class, not for instantiation
+  }
+
+  /**
+   * Sum up all {@code long} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Long> SUM_LONGS() {
+    return new SumLongs();
+  }
+
+  /**
+   * Sum up all {@code int} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Integer> SUM_INTS() {
+    return new SumInts();
+  }
+
+  /**
+   * Sum up all {@code float} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Float> SUM_FLOATS() {
+    return new SumFloats();
+  }
+
+  /**
+   * Sum up all {@code double} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Double> SUM_DOUBLES() {
+    return new SumDoubles();
+  }
+
+  /**
+   * Sum up all {@link BigInteger} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<BigInteger> SUM_BIGINTS() {
+    return new SumBigInts();
+  }
+
+  /**
+   * Return the maximum of all given {@code long} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Long> MAX_LONGS() {
+    return new MaxLongs();
+  }
+
+  /**
+   * Return the {@code n} largest {@code long} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Long> MAX_LONGS(int n) {
+    return new MaxLongs();
+  }
+
+  /**
+   * Return the maximum of all given {@code int} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Integer> MAX_INTS() {
+    return new MaxInts();
+  }
+
+  /**
+   * Return the {@code n} largest {@code int} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Integer> MAX_INTS(int n) {
+    return new MaxNAggregator<Integer>(n);
+  }
+
+  /**
+   * Return the maximum of all given {@code float} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Float> MAX_FLOATS() {
+    return new MaxFloats();
+  }
+
+  /**
+   * Return the {@code n} largest {@code float} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Float> MAX_FLOATS(int n) {
+    return new MaxNAggregator<Float>(n);
+  }
+
+  /**
+   * Return the maximum of all given {@code double} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Double> MAX_DOUBLES() {
+    return new MaxDoubles();
+  }
+
+  /**
+   * Return the {@code n} largest {@code double} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Double> MAX_DOUBLES(int n) {
+    return new MaxNAggregator<Double>(n);
+  }
+
+  /**
+   * Return the maximum of all given {@link BigInteger} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<BigInteger> MAX_BIGINTS() {
+    return new MaxBigInts();
+  }
+
+  /**
+   * Return the {@code n} largest {@link BigInteger} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<BigInteger> MAX_BIGINTS(int n) {
+    return new MaxNAggregator<BigInteger>(n);
+  }
+
+  /**
+   * Return the {@code n} largest values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @param cls The type of the values to aggregate (must implement {@link Comparable}!)
+   * @return The newly constructed instance
+   */
+  public static <V extends Comparable<V>> Aggregator<V> MAX_N(int n, Class<V> cls) {
+    return new MaxNAggregator<V>(n);
+  }
+
+  /**
+   * Return the minimum of all given {@code long} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Long> MIN_LONGS() {
+    return new MinLongs();
+  }
+
+  /**
+   * Return the {@code n} smallest {@code long} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Long> MIN_LONGS(int n) {
+    return new MinNAggregator<Long>(n);
+  }
+
+  /**
+   * Return the minimum of all given {@code int} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Integer> MIN_INTS() {
+    return new MinInts();
+  }
+
+  /**
+   * Return the {@code n} smallest {@code int} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Integer> MIN_INTS(int n) {
+    return new MinNAggregator<Integer>(n);
+  }
+
+  /**
+   * Return the minimum of all given {@code float} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Float> MIN_FLOATS() {
+    return new MinFloats();
+  }
+
+  /**
+   * Return the {@code n} smallest {@code float} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Float> MIN_FLOATS(int n) {
+    return new MinNAggregator<Float>(n);
+  }
+
+  /**
+   * Return the minimum of all given {@code double} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Double> MIN_DOUBLES() {
+    return new MinDoubles();
+  }
+
+  /**
+   * Return the {@code n} smallest {@code double} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<Double> MIN_DOUBLES(int n) {
+    return new MinNAggregator<Double>(n);
+  }
+
+  /**
+   * Return the minimum of all given {@link BigInteger} values.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<BigInteger> MIN_BIGINTS() {
+    return new MinBigInts();
+  }
+
+  /**
+   * Return the {@code n} smallest {@link BigInteger} values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static Aggregator<BigInteger> MIN_BIGINTS(int n) {
+    return new MinNAggregator<BigInteger>(n);
+  }
+
+  /**
+   * Return the {@code n} smallest values (or fewer if there are fewer
+   * values than {@code n}).
+   * @param n The number of values to return
+   * @param cls The type of the values to aggregate (must implement {@link Comparable}!)
+   * @return The newly constructed instance
+   */
+  public static <V extends Comparable<V>> Aggregator<V> MIN_N(int n, Class<V> cls) {
+    return new MinNAggregator<V>(n);
+  }
+
+  /**
+   * Return the first {@code n} values (or fewer if there are fewer values than {@code n}).
+   *
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static <V> Aggregator<V> FIRST_N(int n) {
+    return new FirstNAggregator<V>(n);
+  }
+
+  /**
+   * Return the last {@code n} values (or fewer if there are fewer values than {@code n}).
+   *
+   * @param n The number of values to return
+   * @return The newly constructed instance
+   */
+  public static <V> Aggregator<V> LAST_N(int n) {
+    return new LastNAggregator<V>(n);
+  }
+  
+  /**
+   * Concatenate strings, with a separator between strings. There
+   * is no limits of length for the concatenated string.
+   *
+   * <p><em>Note: String concatenation is not commutative, which means the
+   * result of the aggregation is not deterministic!</em></p>
+   *
+   * @param separator
+   *            the separator which will be appended between each string
+   * @param skipNull
+   *            define if we should skip null values. Throw
+   *            NullPointerException if set to false and there is a null
+   *            value.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<String> STRING_CONCAT(String separator, boolean skipNull) {
+    return new StringConcatAggregator(separator, skipNull);
+  }
+
+  /**
+   * Concatenate strings, with a separator between strings. You can specify
+   * the maximum length of the output string and of the input strings, if
+   * they are &gt; 0. If a value is &lt;= 0, there is no limit.
+   *
+   * <p>Any too large string (or any string which would made the output too
+   * large) will be silently discarded.</p>
+   *
+   * <p><em>Note: String concatenation is not commutative, which means the
+   * result of the aggregation is not deterministic!</em></p>
+   *
+   * @param separator
+   *            the separator which will be appended between each string
+   * @param skipNull
+   *            define if we should skip null values. Throw
+   *            NullPointerException if set to false and there is a null
+   *            value.
+   * @param maxOutputLength
+   *            the maximum length of the output string. If it's set &lt;= 0,
+   *            there is no limit. The number of characters of the output
+   *            string will be &lt; maxOutputLength.
+   * @param maxInputLength
+   *            the maximum length of the input strings. If it's set <= 0,
+   *            there is no limit. The number of characters of the input string
+   *            will be &lt; maxInputLength to be concatenated.
+   * @return The newly constructed instance
+   */
+  public static Aggregator<String> STRING_CONCAT(String separator, boolean skipNull,
+      long maxOutputLength, long maxInputLength) {
+    return new StringConcatAggregator(separator, skipNull, maxOutputLength, maxInputLength);
+  }
+
+  /**
+   * Collect the unique elements of the input, as defined by the {@code equals} method for
+   * the input objects. No guarantees are made about the order in which the final elements
+   * will be returned.
+   * 
+   * @return The newly constructed instance
+   */
+  public static <V> Aggregator<V> UNIQUE_ELEMENTS() {
+    return new SetAggregator<V>();
+  }
+  
+  /**
+   * Collect a sample of unique elements from the input, where 'unique' is defined by
+   * the {@code equals} method for the input objects. No guarantees are made about which
+   * elements will be returned, simply that there will not be any more than the given sample
+   * size for any key.
+   * 
+   * @param maximumSampleSize The maximum number of unique elements to return per key
+   * @return The newly constructed instance
+   */
+  public static <V> Aggregator<V> SAMPLE_UNIQUE_ELEMENTS(int maximumSampleSize) {
+    return new SetAggregator<V>(maximumSampleSize);
+  }
+  
+  /**
+   * Apply separate aggregators to each component of a {@link Pair}.
+   */
+  public static <V1, V2> Aggregator<Pair<V1, V2>> pairAggregator(
+      Aggregator<V1> a1, Aggregator<V2> a2) {
+    return new PairAggregator<V1, V2>(a1, a2);
+  }
+
+  /**
+   * Apply separate aggregators to each component of a {@link Tuple3}.
+   */
+  public static <V1, V2, V3> Aggregator<Tuple3<V1, V2, V3>> tripAggregator(
+      Aggregator<V1> a1, Aggregator<V2> a2, Aggregator<V3> a3) {
+    return new TripAggregator<V1, V2, V3>(a1, a2, a3);
+  }
+
+  /**
+   * Apply separate aggregators to each component of a {@link Tuple4}.
+   */
+  public static <V1, V2, V3, V4> Aggregator<Tuple4<V1, V2, V3, V4>> quadAggregator(
+      Aggregator<V1> a1, Aggregator<V2> a2, Aggregator<V3> a3, Aggregator<V4> a4) {
+    return new QuadAggregator<V1, V2, V3, V4>(a1, a2, a3, a4);
+  }
+
+  /**
+   * Apply separate aggregators to each component of a {@link Tuple}.
+   */
+  public static Aggregator<TupleN> tupleAggregator(Aggregator<?>... aggregators) {
+    return new TupleNAggregator(aggregators);
+  }
+
+  /**
+   * Wrap a {@link CombineFn} adapter around the given aggregator.
+   *
+   * @param aggregator The instance to wrap
+   * @return A {@link CombineFn} delegating to {@code aggregator}
+   */
+  public static final <K, V> CombineFn<K, V> toCombineFn(Aggregator<V> aggregator) {
+    return new AggregatorCombineFn<K, V>(aggregator);
+  }
+
+  /**
+   * Base class for aggregators that do not require any initialization.
+   */
+  public static abstract class SimpleAggregator<T> implements Aggregator<T> {
+    @Override
+    public void initialize(Configuration conf) {
+      // No-op
+    }
+  }
+
+  /**
+   * A {@code CombineFn} that delegates all of the actual work to an
+   * {@code Aggregator} instance.
+   */
+  private static class AggregatorCombineFn<K, V> extends CombineFn<K, V> {
+    // TODO: Has to be fully qualified until CombineFn.Aggregator can be removed.
+    private final org.apache.crunch.Aggregator<V> aggregator;
+
+    public AggregatorCombineFn(org.apache.crunch.Aggregator<V> aggregator) {
+      this.aggregator = aggregator;
+    }
+
+    @Override
+    public void initialize() {
+      aggregator.initialize(getConfiguration());
+    }
+
+    @Override
+    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
+      aggregator.reset();
+      for (V v : input.second()) {
+        aggregator.update(v);
+      }
+      for (V v : aggregator.results()) {
+        emitter.emit(Pair.of(input.first(), v));
+      }
+    }
+  }
+
+  private static class SumLongs extends SimpleAggregator<Long> {
+    private long sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0;
+    }
+
+    @Override
+    public void update(Long next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Long> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  private static class SumInts extends SimpleAggregator<Integer> {
+    private int sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0;
+    }
+
+    @Override
+    public void update(Integer next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Integer> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  private static class SumFloats extends SimpleAggregator<Float> {
+    private float sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0f;
+    }
+
+    @Override
+    public void update(Float next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Float> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  private static class SumDoubles extends SimpleAggregator<Double> {
+    private double sum = 0;
+
+    @Override
+    public void reset() {
+      sum = 0f;
+    }
+
+    @Override
+    public void update(Double next) {
+      sum += next;
+    }
+
+    @Override
+    public Iterable<Double> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  private static class SumBigInts extends SimpleAggregator<BigInteger> {
+    private BigInteger sum = BigInteger.ZERO;
+
+    @Override
+    public void reset() {
+      sum = BigInteger.ZERO;
+    }
+
+    @Override
+    public void update(BigInteger next) {
+      sum = sum.add(next);
+    }
+
+    @Override
+    public Iterable<BigInteger> results() {
+      return ImmutableList.of(sum);
+    }
+  }
+
+  private static class MaxLongs extends SimpleAggregator<Long> {
+    private Long max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Long next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Long> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  private static class MaxInts extends SimpleAggregator<Integer> {
+    private Integer max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Integer next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Integer> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  private static class MaxFloats extends SimpleAggregator<Float> {
+    private Float max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Float next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Float> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  private static class MaxDoubles extends SimpleAggregator<Double> {
+    private Double max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(Double next) {
+      if (max == null || max < next) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<Double> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  private static class MaxBigInts extends SimpleAggregator<BigInteger> {
+    private BigInteger max = null;
+
+    @Override
+    public void reset() {
+      max = null;
+    }
+
+    @Override
+    public void update(BigInteger next) {
+      if (max == null || max.compareTo(next) < 0) {
+        max = next;
+      }
+    }
+
+    @Override
+    public Iterable<BigInteger> results() {
+      return ImmutableList.of(max);
+    }
+  }
+
+  private static class MinLongs extends SimpleAggregator<Long> {
+    private Long min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Long next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Long> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  private static class MinInts extends SimpleAggregator<Integer> {
+    private Integer min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Integer next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Integer> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  private static class MinFloats extends SimpleAggregator<Float> {
+    private Float min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Float next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Float> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  private static class MinDoubles extends SimpleAggregator<Double> {
+    private Double min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(Double next) {
+      if (min == null || min > next) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<Double> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  private static class MinBigInts extends SimpleAggregator<BigInteger> {
+    private BigInteger min = null;
+
+    @Override
+    public void reset() {
+      min = null;
+    }
+
+    @Override
+    public void update(BigInteger next) {
+      if (min == null || min.compareTo(next) > 0) {
+        min = next;
+      }
+    }
+
+    @Override
+    public Iterable<BigInteger> results() {
+      return ImmutableList.of(min);
+    }
+  }
+
+  private static class MaxNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
+    private final int arity;
+    private transient SortedSet<V> elements;
+
+    public MaxNAggregator(int arity) {
+      this.arity = arity;
+    }
+
+    @Override
+    public void reset() {
+      if (elements == null) {
+        elements = Sets.newTreeSet();
+      } else {
+        elements.clear();
+      }
+    }
+
+    @Override
+    public void update(V value) {
+      if (elements.size() < arity) {
+        elements.add(value);
+      } else if (value.compareTo(elements.first()) > 0) {
+        elements.remove(elements.first());
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  private static class MinNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
+    private final int arity;
+    private transient SortedSet<V> elements;
+
+    public MinNAggregator(int arity) {
+      this.arity = arity;
+    }
+
+    @Override
+    public void reset() {
+      if (elements == null) {
+        elements = Sets.newTreeSet();
+      } else {
+        elements.clear();
+      }
+    }
+
+    @Override
+    public void update(V value) {
+      if (elements.size() < arity) {
+        elements.add(value);
+      } else if (value.compareTo(elements.last()) < 0) {
+        elements.remove(elements.last());
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  private static class FirstNAggregator<V> extends SimpleAggregator<V> {
+    private final int arity;
+    private final List<V> elements;
+
+    public FirstNAggregator(int arity) {
+      this.arity = arity;
+      this.elements = Lists.newArrayList();
+    }
+
+    @Override
+    public void reset() {
+      elements.clear();
+    }
+
+    @Override
+    public void update(V value) {
+      if (elements.size() < arity) {
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  private static class LastNAggregator<V> extends SimpleAggregator<V> {
+    private final int arity;
+    private final LinkedList<V> elements;
+
+    public LastNAggregator(int arity) {
+      this.arity = arity;
+      this.elements = Lists.newLinkedList();
+    }
+
+    @Override
+    public void reset() {
+      elements.clear();
+    }
+
+    @Override
+    public void update(V value) {
+      elements.add(value);
+      if (elements.size() == arity + 1) {
+        elements.removeFirst();
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+
+  private static class StringConcatAggregator extends SimpleAggregator<String> {
+    private final String separator;
+    private final boolean skipNulls;
+    private final long maxOutputLength;
+    private final long maxInputLength;
+    private long currentLength;
+    private final LinkedList<String> list = new LinkedList<String>();
+
+    private transient Joiner joiner;
+
+    public StringConcatAggregator(final String separator, final boolean skipNulls) {
+      this.separator = separator;
+      this.skipNulls = skipNulls;
+      this.maxInputLength = 0;
+      this.maxOutputLength = 0;
+    }
+
+    public StringConcatAggregator(final String separator, final boolean skipNull, final long maxOutputLength, final long maxInputLength) {
+      this.separator = separator;
+      this.skipNulls = skipNull;
+      this.maxOutputLength = maxOutputLength;
+      this.maxInputLength = maxInputLength;
+      this.currentLength = -separator.length();
+    }
+
+    @Override
+    public void reset() {
+      if (joiner == null) {
+        joiner = skipNulls ? Joiner.on(separator).skipNulls() : Joiner.on(separator);
+      }
+      currentLength = -separator.length();
+      list.clear();
+    }
+
+    @Override
+    public void update(final String next) {
+      long length = (next == null) ? 0 : next.length() + separator.length();
+      if (maxOutputLength > 0 && currentLength + length > maxOutputLength || maxInputLength > 0 && next.length() > maxInputLength) {
+        return;
+      }
+      if (maxOutputLength > 0) {
+        currentLength += length;
+      }
+      list.add(next);
+    }
+
+    @Override
+    public Iterable<String> results() {
+      return ImmutableList.of(joiner.join(list));
+    }
+  }
+
+
+  private static abstract class TupleAggregator<T> implements Aggregator<T> {
+    private final List<Aggregator<Object>> aggregators;
+
+    @SuppressWarnings("unchecked")
+    public TupleAggregator(Aggregator<?>... aggregators) {
+      this.aggregators = Lists.newArrayList();
+      for (Aggregator<?> a : aggregators) {
+        this.aggregators.add((Aggregator<Object>) a);
+      }
+    }
+
+    @Override
+    public void initialize(Configuration configuration) {
+      for (Aggregator<?> a : aggregators) {
+        a.initialize(configuration);
+      }
+    }
+
+    @Override
+    public void reset() {
+      for (Aggregator<?> a : aggregators) {
+        a.reset();
+      }
+    }
+
+    protected void updateTuple(Tuple t) {
+      for (int i = 0; i < aggregators.size(); i++) {
+        aggregators.get(i).update(t.get(i));
+      }
+    }
+
+    protected Iterable<Object> results(int index) {
+      return aggregators.get(index).results();
+    }
+  }
+
+  private static class PairAggregator<V1, V2> extends TupleAggregator<Pair<V1, V2>> {
+
+    public PairAggregator(Aggregator<V1> a1, Aggregator<V2> a2) {
+      super(a1, a2);
+    }
+
+    @Override
+    public void update(Pair<V1, V2> value) {
+      updateTuple(value);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Iterable<Pair<V1, V2>> results() {
+      return new Tuples.PairIterable<V1, V2>((Iterable<V1>) results(0), (Iterable<V2>) results(1));
+    }
+  }
+
+  private static class TripAggregator<A, B, C> extends TupleAggregator<Tuple3<A, B, C>> {
+
+    public TripAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3) {
+      super(a1, a2, a3);
+    }
+
+    @Override
+    public void update(Tuple3<A, B, C> value) {
+      updateTuple(value);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Iterable<Tuple3<A, B, C>> results() {
+      return new Tuples.TripIterable<A, B, C>((Iterable<A>) results(0), (Iterable<B>) results(1),
+          (Iterable<C>) results(2));
+    }
+  }
+
+  private static class QuadAggregator<A, B, C, D> extends TupleAggregator<Tuple4<A, B, C, D>> {
+
+    public QuadAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3, Aggregator<D> a4) {
+      super(a1, a2, a3, a4);
+    }
+
+    @Override
+    public void update(Tuple4<A, B, C, D> value) {
+      updateTuple(value);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Iterable<Tuple4<A, B, C, D>> results() {
+      return new Tuples.QuadIterable<A, B, C, D>((Iterable<A>) results(0), (Iterable<B>) results(1),
+          (Iterable<C>) results(2), (Iterable<D>) results(3));
+    }
+  }
+
+  private static class TupleNAggregator extends TupleAggregator<TupleN> {
+    private final int size;
+
+    public TupleNAggregator(Aggregator<?>... aggregators) {
+      super(aggregators);
+      size = aggregators.length;
+    }
+
+    @Override
+    public void update(TupleN value) {
+      updateTuple(value);
+    }
+
+    @Override
+    public Iterable<TupleN> results() {
+      Iterable<?>[] iterables = new Iterable[size];
+      for (int i = 0; i < size; i++) {
+        iterables[i] = results(i);
+      }
+      return new Tuples.TupleNIterable(iterables);
+    }
+  }
+
+  private static class SetAggregator<V> extends SimpleAggregator<V> {
+    private final Set<V> elements;
+    private final int sizeLimit;
+    
+    public SetAggregator() {
+      this(-1);
+    }
+    
+    public SetAggregator(int sizeLimit) {
+      this.elements = Sets.newHashSet();
+      this.sizeLimit = sizeLimit;
+    }
+    
+    @Override
+    public void reset() {
+      elements.clear();
+    }
+
+    @Override
+    public void update(V value) {
+      if (sizeLimit == -1 || elements.size() < sizeLimit) {
+        elements.add(value);
+      }
+    }
+
+    @Override
+    public Iterable<V> results() {
+      return ImmutableList.copyOf(elements);
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/CompositeMapFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/CompositeMapFn.java b/crunch-core/src/main/java/org/apache/crunch/fn/CompositeMapFn.java
new file mode 100644
index 0000000..2a8e7d9
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/CompositeMapFn.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.MapFn;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+public class CompositeMapFn<R, S, T> extends MapFn<R, T> {
+
+  private final MapFn<R, S> first;
+  private final MapFn<S, T> second;
+
+  public CompositeMapFn(MapFn<R, S> first, MapFn<S, T> second) {
+    this.first = first;
+    this.second = second;
+  }
+
+  @Override
+  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+    first.setContext(context);
+    second.setContext(context);
+  }
+  
+  @Override
+  public void initialize() {
+    first.initialize();
+    second.initialize();
+  }
+
+  public MapFn<R, S> getFirst() {
+    return first;
+  }
+
+  public MapFn<S, T> getSecond() {
+    return second;
+  }
+
+  @Override
+  public T map(R input) {
+    return second.map(first.map(input));
+  }
+
+  @Override
+  public void cleanup(Emitter<T> emitter) {
+    first.cleanup(null);
+    second.cleanup(null);
+  }
+
+  @Override
+  public void configure(Configuration conf) {
+    first.configure(conf);
+    second.configure(conf);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java b/crunch-core/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java
new file mode 100644
index 0000000..b8cc9df
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+/**
+ * Wrapper function for converting a {@code MapFn} into a key-value pair that is
+ * used to convert from a {@code PCollection<V>} to a {@code PTable<K, V>}.
+ */
+public class ExtractKeyFn<K, V> extends MapFn<V, Pair<K, V>> {
+
+  private final MapFn<V, K> mapFn;
+
+  public ExtractKeyFn(MapFn<V, K> mapFn) {
+    this.mapFn = mapFn;
+  }
+
+  @Override
+  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+    mapFn.setContext(context);
+  }
+  
+  @Override
+  public void initialize() {
+    mapFn.initialize();
+  }
+
+  @Override
+  public Pair<K, V> map(V input) {
+    return Pair.of(mapFn.map(input), input);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/FilterFns.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/FilterFns.java b/crunch-core/src/main/java/org/apache/crunch/fn/FilterFns.java
new file mode 100644
index 0000000..8dc4268
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/FilterFns.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.FilterFn;
+import org.apache.crunch.FilterFn.AndFn;
+import org.apache.crunch.FilterFn.NotFn;
+import org.apache.crunch.FilterFn.OrFn;
+
+
+/**
+ * A collection of pre-defined {@link FilterFn} implementations.
+ */
+public final class FilterFns {
+  // Note: We delegate to the deprecated implementation classes in FilterFn. When their
+  //       time is up, we just move them here.
+
+  private FilterFns() {
+    // utility class, not for instantiation
+  }
+
+  /**
+   * Accept an entry if all of the given filters accept it, using short-circuit evaluation.
+   * @param fn1 The first functions to delegate to
+   * @param fn2 The second functions to delegate to
+   * @return The composed filter function
+   */
+  public static <S> FilterFn<S> and(FilterFn<S> fn1, FilterFn<S> fn2) {
+    return new AndFn<S>(fn1, fn2);
+  }
+
+  /**
+   * Accept an entry if all of the given filters accept it, using short-circuit evaluation.
+   * @param fns The functions to delegate to (in the given order)
+   * @return The composed filter function
+   */
+  public static <S> FilterFn<S> and(FilterFn<S>... fns) {
+    return new AndFn<S>(fns);
+  }
+
+  /**
+   * Accept an entry if at least one of the given filters accept it, using short-circuit evaluation.
+   * @param fn1 The first functions to delegate to
+   * @param fn2 The second functions to delegate to
+   * @return The composed filter function
+   */
+  public static <S> FilterFn<S> or(FilterFn<S> fn1, FilterFn<S> fn2) {
+    return new OrFn<S>(fn1, fn2);
+  }
+
+  /**
+   * Accept an entry if at least one of the given filters accept it, using short-circuit evaluation.
+   * @param fns The functions to delegate to (in the given order)
+   * @return The composed filter function
+   */
+  public static <S> FilterFn<S> or(FilterFn<S>... fns) {
+    return new OrFn<S>(fns);
+  }
+
+  /**
+   * Accept an entry if the given filter <em>does not</em> accept it.
+   * @param fn The function to delegate to
+   * @return The composed filter function
+   */
+  public static <S> FilterFn<S> not(FilterFn<S> fn) {
+    return new NotFn<S>(fn);
+  }
+
+  /**
+   * Accept everything.
+   * @return A filter function that accepts everything.
+   */
+  public static <S> FilterFn<S> ACCEPT_ALL() {
+    return new AcceptAllFn<S>();
+  }
+
+  /**
+   * Reject everything.
+   * @return A filter function that rejects everything.
+   */
+  public static <S> FilterFn<S> REJECT_ALL() {
+    return not(new AcceptAllFn<S>());
+  }
+
+  private static class AcceptAllFn<S> extends FilterFn<S> {
+    @Override
+    public boolean accept(S input) {
+      return true;
+    }
+
+    @Override
+    public float scaleFactor() {
+      return 1.0f;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/IdentityFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/IdentityFn.java b/crunch-core/src/main/java/org/apache/crunch/fn/IdentityFn.java
new file mode 100644
index 0000000..0eadb06
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/IdentityFn.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.MapFn;
+
+public class IdentityFn<T> extends MapFn<T, T> {
+
+  private static final IdentityFn<Object> INSTANCE = new IdentityFn<Object>();
+
+  @SuppressWarnings("unchecked")
+  public static <T> IdentityFn<T> getInstance() {
+    return (IdentityFn<T>) INSTANCE;
+  }
+
+  // Non-instantiable
+  private IdentityFn() {
+  }
+
+  @Override
+  public T map(T input) {
+    return input;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/MapKeysFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/MapKeysFn.java b/crunch-core/src/main/java/org/apache/crunch/fn/MapKeysFn.java
new file mode 100644
index 0000000..cbaf24d
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/MapKeysFn.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+
+public abstract class MapKeysFn<K1, K2, V> extends DoFn<Pair<K1, V>, Pair<K2, V>> {
+
+  @Override
+  public void process(Pair<K1, V> input, Emitter<Pair<K2, V>> emitter) {
+    emitter.emit(Pair.of(map(input.first()), input.second()));
+  }
+
+  public abstract K2 map(K1 k1);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/MapValuesFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/MapValuesFn.java b/crunch-core/src/main/java/org/apache/crunch/fn/MapValuesFn.java
new file mode 100644
index 0000000..b90f5ff
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/MapValuesFn.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+
+public abstract class MapValuesFn<K, V1, V2> extends DoFn<Pair<K, V1>, Pair<K, V2>> {
+
+  @Override
+  public void process(Pair<K, V1> input, Emitter<Pair<K, V2>> emitter) {
+    emitter.emit(Pair.of(input.first(), map(input.second())));
+  }
+
+  public abstract V2 map(V1 v);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/PairMapFn.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/PairMapFn.java b/crunch-core/src/main/java/org/apache/crunch/fn/PairMapFn.java
new file mode 100644
index 0000000..9ee4336
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/PairMapFn.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+public class PairMapFn<K, V, S, T> extends MapFn<Pair<K, V>, Pair<S, T>> {
+
+  private MapFn<K, S> keys;
+  private MapFn<V, T> values;
+
+  public PairMapFn(MapFn<K, S> keys, MapFn<V, T> values) {
+    this.keys = keys;
+    this.values = values;
+  }
+
+  @Override
+  public void configure(Configuration conf) {
+    keys.configure(conf);
+    values.configure(conf);
+  }
+
+  @Override
+  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+    keys.setContext(context);
+    values.setContext(context);
+  }
+
+  @Override
+  public void initialize() {
+    keys.initialize();
+    values.initialize();
+  }
+  
+  @Override
+  public Pair<S, T> map(Pair<K, V> input) {
+    return Pair.of(keys.map(input.first()), values.map(input.second()));
+  }
+
+  @Override
+  public void cleanup(Emitter<Pair<S, T>> emitter) {
+    keys.cleanup(null);
+    values.cleanup(null);
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/fn/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/fn/package-info.java b/crunch-core/src/main/java/org/apache/crunch/fn/package-info.java
new file mode 100644
index 0000000..acefdff
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/fn/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Commonly used functions for manipulating collections.
+ */
+package org.apache.crunch.fn;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java b/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java
new file mode 100644
index 0000000..887c051
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.hadoop.mapreduce;
+
+import java.lang.reflect.Constructor;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+/**
+ * A factory class that allows us to hide the fact that {@code TaskAttemptContext} is a class in
+ * Hadoop 1.x.x and an interface in Hadoop 2.x.x.
+ */
+@SuppressWarnings("unchecked")
+public class TaskAttemptContextFactory {
+
+  private static final Log LOG = LogFactory.getLog(TaskAttemptContextFactory.class);
+
+  private static final TaskAttemptContextFactory INSTANCE = new TaskAttemptContextFactory();
+
+  public static TaskAttemptContext create(Configuration conf, TaskAttemptID taskAttemptId) {
+    return INSTANCE.createInternal(conf, taskAttemptId);
+  }
+
+  private Constructor<TaskAttemptContext> taskAttemptConstructor;
+
+  private TaskAttemptContextFactory() {
+    Class<TaskAttemptContext> implClass = TaskAttemptContext.class;
+    if (implClass.isInterface()) {
+      try {
+        implClass = (Class<TaskAttemptContext>) Class.forName(
+            "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl");
+      } catch (ClassNotFoundException e) {
+        LOG.fatal("Could not find TaskAttemptContextImpl class, exiting", e);
+      }
+    }
+    try {
+      this.taskAttemptConstructor = implClass.getConstructor(Configuration.class, TaskAttemptID.class);
+    } catch (Exception e) {
+      LOG.fatal("Could not access TaskAttemptContext constructor, exiting", e);
+    }
+  }
+
+  private TaskAttemptContext createInternal(Configuration conf, TaskAttemptID taskAttemptId) {
+    try {
+      return (TaskAttemptContext) taskAttemptConstructor.newInstance(conf, taskAttemptId);
+    } catch (Exception e) {
+      LOG.error("Could not construct a TaskAttemptContext instance", e);
+      return null;
+    }
+  }
+}


[09/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java
deleted file mode 100644
index 4d2b88a..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import java.io.IOException;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.Source;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Target;
-import org.apache.crunch.io.OutputHandler;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-
-class SourceTargetImpl<T> implements SourceTarget<T> {
-
-  protected final Source<T> source;
-  protected final Target target;
-
-  public SourceTargetImpl(Source<T> source, Target target) {
-    this.source = source;
-    this.target = target;
-  }
-
-  @Override
-  public PType<T> getType() {
-    return source.getType();
-  }
-
-  @Override
-  public void configureSource(Job job, int inputId) throws IOException {
-    source.configureSource(job, inputId);
-  }
-
-  @Override
-  public long getSize(Configuration configuration) {
-    return source.getSize(configuration);
-  }
-
-  @Override
-  public boolean accept(OutputHandler handler, PType<?> ptype) {
-    return target.accept(handler, ptype);
-  }
-
-  @Override
-  public <S> SourceTarget<S> asSourceTarget(PType<S> ptype) {
-    return target.asSourceTarget(ptype);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other.getClass().equals(getClass()))) {
-      return false;
-    }
-    SourceTargetImpl sti = (SourceTargetImpl) other;
-    return source.equals(sti.source) && target.equals(sti.target);
-  }
-
-  @Override
-  public int hashCode() {
-    return new HashCodeBuilder().append(source).append(target).toHashCode();
-  }
-
-  @Override
-  public String toString() {
-    return source.toString();
-  }
-
-  @Override
-  public void handleExisting(WriteMode strategy, Configuration conf) {
-    target.handleExisting(strategy, conf);  
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java
deleted file mode 100644
index a8ff639..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.types.PTableType;
-
-public class TableSourcePathTargetImpl<K, V> extends SourcePathTargetImpl<Pair<K, V>> implements TableSource<K, V> {
-
-  public TableSourcePathTargetImpl(TableSource<K, V> source, PathTarget target) {
-    this(source, target, new SequentialFileNamingScheme());
-  }
-
-  public TableSourcePathTargetImpl(TableSource<K, V> source, PathTarget target, FileNamingScheme fileNamingScheme) {
-    super(source, target, fileNamingScheme);
-  }
-
-  @Override
-  public PTableType<K, V> getTableType() {
-    return ((TableSource<K, V>) source).getTableType();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java b/crunch/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java
deleted file mode 100644
index 965b0f9..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.impl;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.Target;
-import org.apache.crunch.types.PTableType;
-
-public class TableSourceTargetImpl<K, V> extends SourceTargetImpl<Pair<K, V>> implements TableSource<K, V> {
-
-  public TableSourceTargetImpl(TableSource<K, V> source, Target target) {
-    super(source, target);
-  }
-
-  @Override
-  public PTableType<K, V> getTableType() {
-    return ((TableSource<K, V>) source).getTableType();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/package-info.java b/crunch/src/main/java/org/apache/crunch/io/package-info.java
deleted file mode 100644
index 022bc99..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Data input and output for Pipelines.
- */
-package org.apache.crunch.io;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java
deleted file mode 100644
index ba07506..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.writable.WritableType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.util.ReflectionUtils;
-
-class SeqFileHelper {
-  static <T> Writable newInstance(PType<T> ptype, Configuration conf) {
-    return (Writable) ReflectionUtils.newInstance(((WritableType) ptype).getSerializationClass(), conf);
-  }
-
-  static <T> MapFn<Object, T> getInputMapFn(PType<T> ptype) {
-    return ptype.getInputMapFn();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java
deleted file mode 100644
index 3f45644..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.io.FileReaderFactory;
-import org.apache.crunch.io.impl.AutoClosingIterator;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.collect.Iterators;
-import com.google.common.collect.UnmodifiableIterator;
-
-public class SeqFileReaderFactory<T> implements FileReaderFactory<T> {
-
-  private static final Log LOG = LogFactory.getLog(SeqFileReaderFactory.class);
-
-  private final Converter converter;
-  private final MapFn<Object, T> mapFn;
-  private final Writable key;
-  private final Writable value;
-
-  public SeqFileReaderFactory(PType<T> ptype) {
-    this.converter = ptype.getConverter();
-    this.mapFn = ptype.getInputMapFn();
-    if (ptype instanceof PTableType) {
-      PTableType ptt = (PTableType) ptype;
-      this.key = SeqFileHelper.newInstance(ptt.getKeyType(), null);
-      this.value = SeqFileHelper.newInstance(ptt.getValueType(), null);
-    } else {
-      this.key = NullWritable.get();
-      this.value = SeqFileHelper.newInstance(ptype, null);
-    }
-  }
-
-  public SeqFileReaderFactory(Class clazz) {
-    PType<T> ptype = Writables.writables(clazz);
-    this.converter = ptype.getConverter();
-    this.mapFn = ptype.getInputMapFn();
-    this.key = NullWritable.get();
-    this.value = (Writable) ReflectionUtils.newInstance(clazz, null);
-  }
-  
-  @Override
-  public Iterator<T> read(FileSystem fs, final Path path) {
-    mapFn.initialize();
-    try {
-      final SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
-      return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
-        boolean nextChecked = false;
-        boolean hasNext = false;
-
-        @Override
-        public boolean hasNext() {
-          if (nextChecked == true) {
-            return hasNext;
-          }
-          try {
-            hasNext = reader.next(key, value);
-            nextChecked = true;
-            return hasNext;
-          } catch (IOException e) {
-            LOG.info("Error reading from path: " + path, e);
-            return false;
-          }
-        }
-
-        @Override
-        public T next() {
-          if (!nextChecked && !hasNext()) {
-            return null;
-          }
-          nextChecked = false;
-          return mapFn.map(converter.convertInput(key, value));
-        }
-      });
-    } catch (IOException e) {
-      LOG.info("Could not read seqfile at path: " + path, e);
-      return Iterators.emptyIterator();
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java
deleted file mode 100644
index 8fac4ae..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import java.io.IOException;
-
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileSourceImpl;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-
-public class SeqFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
-
-  public SeqFileSource(Path path, PType<T> ptype) {
-    super(path, ptype, SequenceFileInputFormat.class);
-  }
-
-  @Override
-  public Iterable<T> read(Configuration conf) throws IOException {
-    FileSystem fs = path.getFileSystem(conf);
-    return CompositePathIterable.create(fs, path, new SeqFileReaderFactory<T>(ptype));
-  }
-
-  @Override
-  public String toString() {
-    return "SeqFile(" + path.toString() + ")";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java
deleted file mode 100644
index adc739f..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-
-public class SeqFileSourceTarget<T> extends ReadableSourcePathTargetImpl<T> {
-
-  public SeqFileSourceTarget(String path, PType<T> ptype) {
-    this(new Path(path), ptype);
-  }
-
-  public SeqFileSourceTarget(Path path, PType<T> ptype) {
-    this(path, ptype, new SequentialFileNamingScheme());
-  }
-
-  public SeqFileSourceTarget(Path path, PType<T> ptype, FileNamingScheme fileNamingScheme) {
-    super(new SeqFileSource<T>(path, ptype), new SeqFileTarget(path), fileNamingScheme);
-  }
-
-  @Override
-  public String toString() {
-    return target.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java
deleted file mode 100644
index 7a63272..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import java.io.IOException;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileTableSourceImpl;
-import org.apache.crunch.types.PTableType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-
-/**
- * A {@code TableSource} that uses {@code SequenceFileInputFormat} to read the input
- * file.
- */
-public class SeqFileTableSource<K, V> extends FileTableSourceImpl<K, V> implements ReadableSource<Pair<K, V>> {
-
-  public SeqFileTableSource(String path, PTableType<K, V> ptype) {
-    this(new Path(path), ptype);
-  }
-
-  public SeqFileTableSource(Path path, PTableType<K, V> ptype) {
-    super(path, ptype, SequenceFileInputFormat.class);
-  }
-
-  @Override
-  public Iterable<Pair<K, V>> read(Configuration conf) throws IOException {
-    FileSystem fs = path.getFileSystem(conf);
-    return CompositePathIterable.create(fs, path,
-        new SeqFileReaderFactory<Pair<K, V>>(getTableType()));
-  }
-
-  @Override
-  public String toString() {
-    return "SeqFile(" + path.toString() + ")";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java
deleted file mode 100644
index ebdf319..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.TableSourceTarget;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
-import org.apache.crunch.types.PTableType;
-import org.apache.hadoop.fs.Path;
-
-public class SeqFileTableSourceTarget<K, V> extends ReadableSourcePathTargetImpl<Pair<K, V>> implements
-    TableSourceTarget<K, V> {
-  private final PTableType<K, V> tableType;
-
-  public SeqFileTableSourceTarget(String path, PTableType<K, V> tableType) {
-    this(new Path(path), tableType);
-  }
-
-  public SeqFileTableSourceTarget(Path path, PTableType<K, V> tableType) {
-    this(path, tableType, new SequentialFileNamingScheme());
-  }
-
-  public SeqFileTableSourceTarget(Path path, PTableType<K, V> tableType, FileNamingScheme fileNamingScheme) {
-    super(new SeqFileTableSource<K, V>(path, tableType), new SeqFileTarget(path), fileNamingScheme);
-    this.tableType = tableType;
-  }
-
-  @Override
-  public PTableType<K, V> getTableType() {
-    return tableType;
-  }
-
-  @Override
-  public String toString() {
-    return target.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java
deleted file mode 100644
index 60e4739..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.seq;
-
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.FileTargetImpl;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-
-public class SeqFileTarget extends FileTargetImpl {
-  public SeqFileTarget(String path) {
-    this(new Path(path));
-  }
-
-  public SeqFileTarget(Path path) {
-    this(path, new SequentialFileNamingScheme());
-  }
-
-  public SeqFileTarget(Path path, FileNamingScheme fileNamingScheme) {
-    super(path, SequenceFileOutputFormat.class, fileNamingScheme);
-  }
-
-  @Override
-  public String toString() {
-    return "SeqFile(" + path.toString() + ")";
-  }
-
-  @Override
-  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
-    if (ptype instanceof PTableType) {
-      return new SeqFileTableSourceTarget(path, (PTableType) ptype);
-    } else {
-      return new SeqFileSourceTarget(path, ptype);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java b/crunch/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java
deleted file mode 100644
index 67a8870..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java
+++ /dev/null
@@ -1,235 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-
-class BZip2TextInputFormat extends FileInputFormat<LongWritable, Text> {
-  /**
-   * Treats keys as offset in file and value as line. Since the input file is
-   * compressed, the offset for a particular line is not well-defined. This
-   * implementation returns the starting position of a compressed block as the
-   * key for every line in that block.
-   */
-
-  private static class BZip2LineRecordReader extends RecordReader<LongWritable, Text> {
-
-    private long start;
-
-    private long end;
-
-    private long pos;
-
-    private CBZip2InputStream in;
-
-    private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
-
-    // flag to indicate if previous character read was Carriage Return ('\r')
-    // and the next character was not Line Feed ('\n')
-    private boolean CRFollowedByNonLF = false;
-
-    // in the case where a Carriage Return ('\r') was not followed by a
-    // Line Feed ('\n'), this variable will hold that non Line Feed character
-    // that was read from the underlying stream.
-    private byte nonLFChar;
-
-    /**
-     * Provide a bridge to get the bytes from the ByteArrayOutputStream without
-     * creating a new byte array.
-     */
-    private static class TextStuffer extends OutputStream {
-      public Text target;
-
-      @Override
-      public void write(int b) {
-        throw new UnsupportedOperationException("write(byte) not supported");
-      }
-
-      @Override
-      public void write(byte[] data, int offset, int len) throws IOException {
-        target.clear();
-        target.set(data, offset, len);
-      }
-    }
-
-    private TextStuffer bridge = new TextStuffer();
-
-    private LongWritable key = new LongWritable();
-    private Text value = new Text();
-
-    public BZip2LineRecordReader(Configuration job, FileSplit split) throws IOException {
-      start = split.getStart();
-      end = start + split.getLength();
-      final Path file = split.getPath();
-
-      // open the file and seek to the start of the split
-      FileSystem fs = file.getFileSystem(job);
-      FSDataInputStream fileIn = fs.open(split.getPath());
-      fileIn.seek(start);
-
-      in = new CBZip2InputStream(fileIn, 9, end);
-      if (start != 0) {
-        // skip first line and re-establish "start".
-        // LineRecordReader.readLine(this.in, null);
-        readLine(this.in, null);
-        start = in.getPos();
-      }
-      pos = in.getPos();
-    }
-
-    /*
-     * LineRecordReader.readLine() is depricated in HAdoop 0.17. So it is added
-     * here locally.
-     */
-    private long readLine(InputStream in, OutputStream out) throws IOException {
-      long bytes = 0;
-      while (true) {
-        int b = -1;
-        if (CRFollowedByNonLF) {
-          // In the previous call, a Carriage Return ('\r') was followed
-          // by a non Line Feed ('\n') character - in that call we would
-          // have not returned the non Line Feed character but would have
-          // read it from the stream - lets use that already read character
-          // now
-          b = nonLFChar;
-          CRFollowedByNonLF = false;
-        } else {
-          b = in.read();
-        }
-        if (b == -1) {
-          break;
-        }
-        bytes += 1;
-
-        byte c = (byte) b;
-        if (c == '\n') {
-          break;
-        }
-
-        if (c == '\r') {
-          byte nextC = (byte) in.read();
-          if (nextC != '\n') {
-            CRFollowedByNonLF = true;
-            nonLFChar = nextC;
-          } else {
-            bytes += 1;
-          }
-          break;
-        }
-
-        if (out != null) {
-          out.write(c);
-        }
-      }
-      return bytes;
-    }
-
-    /** Read a line. */
-    public boolean next(LongWritable key, Text value) throws IOException {
-      if (pos > end)
-        return false;
-
-      key.set(pos); // key is position
-      buffer.reset();
-      // long bytesRead = LineRecordReader.readLine(in, buffer);
-      long bytesRead = readLine(in, buffer);
-      if (bytesRead == 0) {
-        return false;
-      }
-      pos = in.getPos();
-      // if we have read ahead because we encountered a carriage return
-      // char followed by a non line feed char, decrement the pos
-      if (CRFollowedByNonLF) {
-        pos--;
-      }
-
-      bridge.target = value;
-      buffer.writeTo(bridge);
-      return true;
-    }
-
-    /**
-     * Get the progress within the split
-     */
-    @Override
-    public float getProgress() {
-      if (start == end) {
-        return 0.0f;
-      } else {
-        return Math.min(1.0f, (pos - start) / (float) (end - start));
-      }
-    }
-
-    @Override
-    public void close() throws IOException {
-      in.close();
-    }
-
-    @Override
-    public LongWritable getCurrentKey() throws IOException, InterruptedException {
-      return key;
-    }
-
-    @Override
-    public Text getCurrentValue() throws IOException, InterruptedException {
-      return value;
-    }
-
-    @Override
-    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
-      // no op
-    }
-
-    @Override
-    public boolean nextKeyValue() throws IOException, InterruptedException {
-      return next(key, value);
-    }
-
-  }
-
-  @Override
-  protected boolean isSplitable(JobContext context, Path file) {
-    return true;
-  }
-
-  @Override
-  public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
-    try {
-      return new BZip2LineRecordReader(context.getConfiguration(), (FileSplit) split);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java b/crunch/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java
deleted file mode 100644
index 92bb787..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java
+++ /dev/null
@@ -1,980 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.io.compress.bzip2.BZip2Constants;
-import org.apache.hadoop.mapreduce.InputSplit;
-
-/**
- * An input stream that decompresses from the BZip2 format (without the file
- * header chars) to be read as any other stream.
- * 
- * @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
- */
-class CBZip2InputStream extends InputStream implements BZip2Constants {
-  private static void cadvise(String reason) throws IOException {
-    throw new IOException(reason);
-  }
-
-  private static void compressedStreamEOF() throws IOException {
-    cadvise("compressedStream EOF");
-  }
-
-  private void makeMaps() {
-    int i;
-    nInUse = 0;
-    for (i = 0; i < 256; i++) {
-      if (inUse[i]) {
-        seqToUnseq[nInUse] = (char) i;
-        unseqToSeq[i] = (char) nInUse;
-        nInUse++;
-      }
-    }
-  }
-
-  /*
-   * index of the last char in the block, so the block size == last + 1.
-   */
-  private int last;
-
-  /*
-   * index in zptr[] of original string after sorting.
-   */
-  private int origPtr;
-
-  /*
-   * always: in the range 0 .. 9. The current block size is 100000 * this
-   * number.
-   */
-  private int blockSize100k;
-
-  private boolean blockRandomised;
-
-  // a buffer to keep the read byte
-  private int bsBuff;
-
-  // since bzip is bit-aligned at block boundaries there can be a case wherein
-  // only few bits out of a read byte are consumed and the remaining bits
-  // need to be consumed while processing the next block.
-  // indicate how many bits in bsBuff have not been processed yet
-  private int bsLive;
-  private CRC mCrc = new CRC();
-
-  private boolean[] inUse = new boolean[256];
-  private int nInUse;
-
-  private char[] seqToUnseq = new char[256];
-  private char[] unseqToSeq = new char[256];
-
-  private char[] selector = new char[MAX_SELECTORS];
-  private char[] selectorMtf = new char[MAX_SELECTORS];
-
-  private int[] tt;
-  private char[] ll8;
-
-  /*
-   * freq table collected to save a pass over the data during decompression.
-   */
-  private int[] unzftab = new int[256];
-
-  private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
-  private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
-  private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
-  private int[] minLens = new int[N_GROUPS];
-
-  private FSDataInputStream innerBsStream;
-  long readLimit = Long.MAX_VALUE;
-
-  public long getReadLimit() {
-    return readLimit;
-  }
-
-  public void setReadLimit(long readLimit) {
-    this.readLimit = readLimit;
-  }
-
-  long readCount;
-
-  public long getReadCount() {
-    return readCount;
-  }
-
-  private boolean streamEnd = false;
-
-  private int currentChar = -1;
-
-  private static final int START_BLOCK_STATE = 1;
-  private static final int RAND_PART_A_STATE = 2;
-  private static final int RAND_PART_B_STATE = 3;
-  private static final int RAND_PART_C_STATE = 4;
-  private static final int NO_RAND_PART_A_STATE = 5;
-  private static final int NO_RAND_PART_B_STATE = 6;
-  private static final int NO_RAND_PART_C_STATE = 7;
-
-  private int currentState = START_BLOCK_STATE;
-
-  private int storedBlockCRC, storedCombinedCRC;
-  private int computedBlockCRC, computedCombinedCRC;
-  private boolean checkComputedCombinedCRC = true;
-
-  int i2, count, chPrev, ch2;
-  int i, tPos;
-  int rNToGo = 0;
-  int rTPos = 0;
-  int j2;
-  char z;
-
-  // see comment in getPos()
-  private long retPos = -1;
-  // the position offset which corresponds to the end of the InputSplit that
-  // will be processed by this instance
-  private long endOffsetOfSplit;
-
-  private boolean signalToStopReading;
-
-  public CBZip2InputStream(FSDataInputStream zStream, int blockSize, long end) throws IOException {
-    endOffsetOfSplit = end;
-    // initialize retPos to the beginning of the current InputSplit
-    // see comments in getPos() to understand how this is used.
-    retPos = zStream.getPos();
-    ll8 = null;
-    tt = null;
-    checkComputedCombinedCRC = blockSize == -1;
-    bsSetStream(zStream);
-    initialize(blockSize);
-    initBlock(blockSize != -1);
-    setupBlock();
-  }
-
-  @Override
-  public int read() throws IOException {
-    if (streamEnd) {
-      return -1;
-    } else {
-
-      // if we just started reading a bzip block which starts at a position
-      // >= end of current split, then we should set up retpos such that
-      // after a record is read, future getPos() calls will get a value
-      // > end of current split - this way we will read only one record out
-      // of this bzip block - the rest of the records from this bzip block
-      // should be read by the next map task while processing the next split
-      if (signalToStopReading) {
-        retPos = endOffsetOfSplit + 1;
-      }
-
-      int retChar = currentChar;
-      switch (currentState) {
-      case START_BLOCK_STATE:
-        break;
-      case RAND_PART_A_STATE:
-        break;
-      case RAND_PART_B_STATE:
-        setupRandPartB();
-        break;
-      case RAND_PART_C_STATE:
-        setupRandPartC();
-        break;
-      case NO_RAND_PART_A_STATE:
-        break;
-      case NO_RAND_PART_B_STATE:
-        setupNoRandPartB();
-        break;
-      case NO_RAND_PART_C_STATE:
-        setupNoRandPartC();
-        break;
-      default:
-        break;
-      }
-      return retChar;
-    }
-  }
-
-  /**
-   * getPos is used by the caller to know when the processing of the current
-   * {@link InputSplit} is complete. In this method, as we read each bzip block,
-   * we keep returning the beginning of the {@link InputSplit} as the return
-   * value until we hit a block which starts at a position >= end of current
-   * split. At that point we should set up retpos such that after a record is
-   * read, future getPos() calls will get a value > end of current split - this
-   * way we will read only one record out of that bzip block - the rest of the
-   * records from that bzip block should be read by the next map task while
-   * processing the next split
-   * 
-   * @return
-   * @throws IOException
-   */
-  public long getPos() throws IOException {
-    return retPos;
-  }
-
-  private void initialize(int blockSize) throws IOException {
-    if (blockSize == -1) {
-      char magic1, magic2;
-      char magic3, magic4;
-      magic1 = bsGetUChar();
-      magic2 = bsGetUChar();
-      magic3 = bsGetUChar();
-      magic4 = bsGetUChar();
-      if (magic1 != 'B' || magic2 != 'Z' || magic3 != 'h' || magic4 < '1' || magic4 > '9') {
-        bsFinishedWithStream();
-        streamEnd = true;
-        return;
-      }
-      blockSize = magic4 - '0';
-    }
-
-    setDecompressStructureSizes(blockSize);
-    computedCombinedCRC = 0;
-  }
-
-  private final static long mask = 0xffffffffffffL;
-  private final static long eob = 0x314159265359L & mask;
-  private final static long eos = 0x177245385090L & mask;
-
-  private void initBlock(boolean searchForMagic) throws IOException {
-    if (readCount >= readLimit) {
-      bsFinishedWithStream();
-      streamEnd = true;
-      return;
-    }
-
-    // position before beginning of bzip block header
-    long pos = innerBsStream.getPos();
-    if (!searchForMagic) {
-      char magic1, magic2, magic3, magic4;
-      char magic5, magic6;
-      magic1 = bsGetUChar();
-      magic2 = bsGetUChar();
-      magic3 = bsGetUChar();
-      magic4 = bsGetUChar();
-      magic5 = bsGetUChar();
-      magic6 = bsGetUChar();
-      if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
-        complete();
-        return;
-      }
-
-      if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
-        badBlockHeader();
-        streamEnd = true;
-        return;
-      }
-    } else {
-      long magic = 0;
-      for (int i = 0; i < 6; i++) {
-        magic <<= 8;
-        magic |= bsGetUChar();
-      }
-      while (magic != eos && magic != eob) {
-        magic <<= 1;
-        magic &= mask;
-        magic |= bsR(1);
-        // if we just found the block header, the beginning of the bzip
-        // header would be 6 bytes before the current stream position
-        // when we eventually break from this while(), if it is because
-        // we found a block header then pos will have the correct start
-        // of header position
-        pos = innerBsStream.getPos() - 6;
-      }
-      if (magic == eos) {
-        complete();
-        return;
-      }
-
-    }
-    // if the previous block finished a few bits into the previous byte,
-    // then we will first be reading the remaining bits from the previous
-    // byte - so logically pos needs to be one behind
-    if (bsLive > 0) {
-      pos--;
-    }
-
-    if (pos >= endOffsetOfSplit) {
-      // we have reached a block which begins exactly at the next InputSplit
-      // or >1 byte into the next InputSplit - lets record this fact
-      signalToStopReading = true;
-    }
-    storedBlockCRC = bsGetInt32();
-
-    if (bsR(1) == 1) {
-      blockRandomised = true;
-    } else {
-      blockRandomised = false;
-    }
-
-    // currBlockNo++;
-    getAndMoveToFrontDecode();
-
-    mCrc.initialiseCRC();
-    currentState = START_BLOCK_STATE;
-  }
-
-  private void endBlock() throws IOException {
-    computedBlockCRC = mCrc.getFinalCRC();
-    /* A bad CRC is considered a fatal error. */
-    if (storedBlockCRC != computedBlockCRC) {
-      crcError();
-    }
-
-    computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >>> 31);
-    computedCombinedCRC ^= computedBlockCRC;
-  }
-
-  private void complete() throws IOException {
-    storedCombinedCRC = bsGetInt32();
-    if (checkComputedCombinedCRC && storedCombinedCRC != computedCombinedCRC) {
-      crcError();
-    }
-    if (innerBsStream.getPos() < endOffsetOfSplit) {
-      throw new IOException("Encountered additional bytes in the filesplit past the crc block. "
-          + "Loading of concatenated bz2 files is not supported");
-    }
-    bsFinishedWithStream();
-    streamEnd = true;
-  }
-
-  private static void blockOverrun() throws IOException {
-    cadvise("block overrun");
-  }
-
-  private static void badBlockHeader() throws IOException {
-    cadvise("bad block header");
-  }
-
-  private static void crcError() throws IOException {
-    cadvise("CRC error");
-  }
-
-  private void bsFinishedWithStream() {
-    if (this.innerBsStream != null) {
-      if (this.innerBsStream != System.in) {
-        this.innerBsStream = null;
-      }
-    }
-  }
-
-  private void bsSetStream(FSDataInputStream f) {
-    innerBsStream = f;
-    bsLive = 0;
-    bsBuff = 0;
-  }
-
-  final private int readBs() throws IOException {
-    readCount++;
-    return innerBsStream.read();
-  }
-
-  private int bsR(int n) throws IOException {
-    int v;
-    while (bsLive < n) {
-      int zzi;
-      zzi = readBs();
-      if (zzi == -1) {
-        compressedStreamEOF();
-      }
-      bsBuff = (bsBuff << 8) | (zzi & 0xff);
-      bsLive += 8;
-    }
-
-    v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
-    bsLive -= n;
-    return v;
-  }
-
-  private char bsGetUChar() throws IOException {
-    return (char) bsR(8);
-  }
-
-  private int bsGetint() throws IOException {
-    int u = 0;
-    u = (u << 8) | bsR(8);
-    u = (u << 8) | bsR(8);
-    u = (u << 8) | bsR(8);
-    u = (u << 8) | bsR(8);
-    return u;
-  }
-
-  private int bsGetIntVS(int numBits) throws IOException {
-    return bsR(numBits);
-  }
-
-  private int bsGetInt32() throws IOException {
-    return bsGetint();
-  }
-
-  private void hbCreateDecodeTables(int[] limit, int[] base, int[] perm, char[] length, int minLen, int maxLen,
-      int alphaSize) {
-    int pp, i, j, vec;
-
-    pp = 0;
-    for (i = minLen; i <= maxLen; i++) {
-      for (j = 0; j < alphaSize; j++) {
-        if (length[j] == i) {
-          perm[pp] = j;
-          pp++;
-        }
-      }
-    }
-
-    for (i = 0; i < MAX_CODE_LEN; i++) {
-      base[i] = 0;
-    }
-    for (i = 0; i < alphaSize; i++) {
-      base[length[i] + 1]++;
-    }
-
-    for (i = 1; i < MAX_CODE_LEN; i++) {
-      base[i] += base[i - 1];
-    }
-
-    for (i = 0; i < MAX_CODE_LEN; i++) {
-      limit[i] = 0;
-    }
-    vec = 0;
-
-    for (i = minLen; i <= maxLen; i++) {
-      vec += (base[i + 1] - base[i]);
-      limit[i] = vec - 1;
-      vec <<= 1;
-    }
-    for (i = minLen + 1; i <= maxLen; i++) {
-      base[i] = ((limit[i - 1] + 1) << 1) - base[i];
-    }
-  }
-
-  private void recvDecodingTables() throws IOException {
-    char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
-    int i, j, t, nGroups, nSelectors, alphaSize;
-    int minLen, maxLen;
-    boolean[] inUse16 = new boolean[16];
-
-    /* Receive the mapping table */
-    for (i = 0; i < 16; i++) {
-      if (bsR(1) == 1) {
-        inUse16[i] = true;
-      } else {
-        inUse16[i] = false;
-      }
-    }
-
-    for (i = 0; i < 256; i++) {
-      inUse[i] = false;
-    }
-
-    for (i = 0; i < 16; i++) {
-      if (inUse16[i]) {
-        for (j = 0; j < 16; j++) {
-          if (bsR(1) == 1) {
-            inUse[i * 16 + j] = true;
-          }
-        }
-      }
-    }
-
-    makeMaps();
-    alphaSize = nInUse + 2;
-
-    /* Now the selectors */
-    nGroups = bsR(3);
-    nSelectors = bsR(15);
-    for (i = 0; i < nSelectors; i++) {
-      j = 0;
-      while (bsR(1) == 1) {
-        j++;
-      }
-      selectorMtf[i] = (char) j;
-    }
-
-    /* Undo the MTF values for the selectors. */
-    {
-      char[] pos = new char[N_GROUPS];
-      char tmp, v;
-      for (v = 0; v < nGroups; v++) {
-        pos[v] = v;
-      }
-
-      for (i = 0; i < nSelectors; i++) {
-        v = selectorMtf[i];
-        tmp = pos[v];
-        while (v > 0) {
-          pos[v] = pos[v - 1];
-          v--;
-        }
-        pos[0] = tmp;
-        selector[i] = tmp;
-      }
-    }
-
-    /* Now the coding tables */
-    for (t = 0; t < nGroups; t++) {
-      int curr = bsR(5);
-      for (i = 0; i < alphaSize; i++) {
-        while (bsR(1) == 1) {
-          if (bsR(1) == 0) {
-            curr++;
-          } else {
-            curr--;
-          }
-        }
-        len[t][i] = (char) curr;
-      }
-    }
-
-    /* Create the Huffman decoding tables */
-    for (t = 0; t < nGroups; t++) {
-      minLen = 32;
-      maxLen = 0;
-      for (i = 0; i < alphaSize; i++) {
-        if (len[t][i] > maxLen) {
-          maxLen = len[t][i];
-        }
-        if (len[t][i] < minLen) {
-          minLen = len[t][i];
-        }
-      }
-      hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize);
-      minLens[t] = minLen;
-    }
-  }
-
-  private void getAndMoveToFrontDecode() throws IOException {
-    char[] yy = new char[256];
-    int i, j, nextSym, limitLast;
-    int EOB, groupNo, groupPos;
-
-    limitLast = baseBlockSize * blockSize100k;
-    origPtr = bsGetIntVS(24);
-
-    recvDecodingTables();
-    EOB = nInUse + 1;
-    groupNo = -1;
-    groupPos = 0;
-
-    /*
-     * Setting up the unzftab entries here is not strictly necessary, but it
-     * does save having to do it later in a separate pass, and so saves a
-     * block's worth of cache misses.
-     */
-    for (i = 0; i <= 255; i++) {
-      unzftab[i] = 0;
-    }
-
-    for (i = 0; i <= 255; i++) {
-      yy[i] = (char) i;
-    }
-
-    last = -1;
-
-    {
-      int zt, zn, zvec, zj;
-      if (groupPos == 0) {
-        groupNo++;
-        groupPos = G_SIZE;
-      }
-      groupPos--;
-      zt = selector[groupNo];
-      zn = minLens[zt];
-      zvec = bsR(zn);
-      while (zvec > limit[zt][zn]) {
-        zn++;
-        {
-          {
-            while (bsLive < 1) {
-              int zzi = 0;
-              try {
-                zzi = readBs();
-              } catch (IOException e) {
-                compressedStreamEOF();
-              }
-              if (zzi == -1) {
-                compressedStreamEOF();
-              }
-              bsBuff = (bsBuff << 8) | (zzi & 0xff);
-              bsLive += 8;
-            }
-          }
-          zj = (bsBuff >> (bsLive - 1)) & 1;
-          bsLive--;
-        }
-        zvec = (zvec << 1) | zj;
-      }
-      nextSym = perm[zt][zvec - base[zt][zn]];
-    }
-
-    while (true) {
-
-      if (nextSym == EOB) {
-        break;
-      }
-
-      if (nextSym == RUNA || nextSym == RUNB) {
-        char ch;
-        int s = -1;
-        int N = 1;
-        do {
-          if (nextSym == RUNA) {
-            s = s + (0 + 1) * N;
-          } else if (nextSym == RUNB) {
-            s = s + (1 + 1) * N;
-          }
-          N = N * 2;
-          {
-            int zt, zn, zvec, zj;
-            if (groupPos == 0) {
-              groupNo++;
-              groupPos = G_SIZE;
-            }
-            groupPos--;
-            zt = selector[groupNo];
-            zn = minLens[zt];
-            zvec = bsR(zn);
-            while (zvec > limit[zt][zn]) {
-              zn++;
-              {
-                {
-                  while (bsLive < 1) {
-                    int zzi = 0;
-                    try {
-                      zzi = readBs();
-                    } catch (IOException e) {
-                      compressedStreamEOF();
-                    }
-                    if (zzi == -1) {
-                      compressedStreamEOF();
-                    }
-                    bsBuff = (bsBuff << 8) | (zzi & 0xff);
-                    bsLive += 8;
-                  }
-                }
-                zj = (bsBuff >> (bsLive - 1)) & 1;
-                bsLive--;
-              }
-              zvec = (zvec << 1) | zj;
-            }
-            nextSym = perm[zt][zvec - base[zt][zn]];
-          }
-        } while (nextSym == RUNA || nextSym == RUNB);
-
-        s++;
-        ch = seqToUnseq[yy[0]];
-        unzftab[ch] += s;
-
-        while (s > 0) {
-          last++;
-          ll8[last] = ch;
-          s--;
-        }
-
-        if (last >= limitLast) {
-          blockOverrun();
-        }
-        continue;
-      } else {
-        char tmp;
-        last++;
-        if (last >= limitLast) {
-          blockOverrun();
-        }
-
-        tmp = yy[nextSym - 1];
-        unzftab[seqToUnseq[tmp]]++;
-        ll8[last] = seqToUnseq[tmp];
-
-        /*
-         * This loop is hammered during decompression, hence the unrolling.
-         * 
-         * for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
-         */
-
-        j = nextSym - 1;
-        for (; j > 3; j -= 4) {
-          yy[j] = yy[j - 1];
-          yy[j - 1] = yy[j - 2];
-          yy[j - 2] = yy[j - 3];
-          yy[j - 3] = yy[j - 4];
-        }
-        for (; j > 0; j--) {
-          yy[j] = yy[j - 1];
-        }
-
-        yy[0] = tmp;
-        {
-          int zt, zn, zvec, zj;
-          if (groupPos == 0) {
-            groupNo++;
-            groupPos = G_SIZE;
-          }
-          groupPos--;
-          zt = selector[groupNo];
-          zn = minLens[zt];
-          zvec = bsR(zn);
-          while (zvec > limit[zt][zn]) {
-            zn++;
-            {
-              {
-                while (bsLive < 1) {
-                  int zzi;
-                  char thech = 0;
-                  try {
-                    thech = (char) readBs();
-                  } catch (IOException e) {
-                    compressedStreamEOF();
-                  }
-                  zzi = thech;
-                  bsBuff = (bsBuff << 8) | (zzi & 0xff);
-                  bsLive += 8;
-                }
-              }
-              zj = (bsBuff >> (bsLive - 1)) & 1;
-              bsLive--;
-            }
-            zvec = (zvec << 1) | zj;
-          }
-          nextSym = perm[zt][zvec - base[zt][zn]];
-        }
-        continue;
-      }
-    }
-  }
-
-  private void setupBlock() throws IOException {
-    int[] cftab = new int[257];
-    char ch;
-
-    cftab[0] = 0;
-    for (i = 1; i <= 256; i++) {
-      cftab[i] = unzftab[i - 1];
-    }
-    for (i = 1; i <= 256; i++) {
-      cftab[i] += cftab[i - 1];
-    }
-
-    for (i = 0; i <= last; i++) {
-      ch = ll8[i];
-      tt[cftab[ch]] = i;
-      cftab[ch]++;
-    }
-    cftab = null;
-
-    tPos = tt[origPtr];
-
-    count = 0;
-    i2 = 0;
-    ch2 = 256; /* not a char and not EOF */
-
-    if (blockRandomised) {
-      rNToGo = 0;
-      rTPos = 0;
-      setupRandPartA();
-    } else {
-      setupNoRandPartA();
-    }
-  }
-
-  private void setupRandPartA() throws IOException {
-    if (i2 <= last) {
-      chPrev = ch2;
-      ch2 = ll8[tPos];
-      tPos = tt[tPos];
-      if (rNToGo == 0) {
-        rNToGo = rNums[rTPos];
-        rTPos++;
-        if (rTPos == 512) {
-          rTPos = 0;
-        }
-      }
-      rNToGo--;
-      ch2 ^= ((rNToGo == 1) ? 1 : 0);
-      i2++;
-
-      currentChar = ch2;
-      currentState = RAND_PART_B_STATE;
-      mCrc.updateCRC(ch2);
-    } else {
-      endBlock();
-      initBlock(false);
-      setupBlock();
-    }
-  }
-
-  private void setupNoRandPartA() throws IOException {
-    if (i2 <= last) {
-      chPrev = ch2;
-      ch2 = ll8[tPos];
-      tPos = tt[tPos];
-      i2++;
-
-      currentChar = ch2;
-      currentState = NO_RAND_PART_B_STATE;
-      mCrc.updateCRC(ch2);
-    } else {
-      endBlock();
-      initBlock(false);
-      setupBlock();
-    }
-  }
-
-  private void setupRandPartB() throws IOException {
-    if (ch2 != chPrev) {
-      currentState = RAND_PART_A_STATE;
-      count = 1;
-      setupRandPartA();
-    } else {
-      count++;
-      if (count >= 4) {
-        z = ll8[tPos];
-        tPos = tt[tPos];
-        if (rNToGo == 0) {
-          rNToGo = rNums[rTPos];
-          rTPos++;
-          if (rTPos == 512) {
-            rTPos = 0;
-          }
-        }
-        rNToGo--;
-        z ^= ((rNToGo == 1) ? 1 : 0);
-        j2 = 0;
-        currentState = RAND_PART_C_STATE;
-        setupRandPartC();
-      } else {
-        currentState = RAND_PART_A_STATE;
-        setupRandPartA();
-      }
-    }
-  }
-
-  private void setupRandPartC() throws IOException {
-    if (j2 < (int) z) {
-      currentChar = ch2;
-      mCrc.updateCRC(ch2);
-      j2++;
-    } else {
-      currentState = RAND_PART_A_STATE;
-      i2++;
-      count = 0;
-      setupRandPartA();
-    }
-  }
-
-  private void setupNoRandPartB() throws IOException {
-    if (ch2 != chPrev) {
-      currentState = NO_RAND_PART_A_STATE;
-      count = 1;
-      setupNoRandPartA();
-    } else {
-      count++;
-      if (count >= 4) {
-        z = ll8[tPos];
-        tPos = tt[tPos];
-        currentState = NO_RAND_PART_C_STATE;
-        j2 = 0;
-        setupNoRandPartC();
-      } else {
-        currentState = NO_RAND_PART_A_STATE;
-        setupNoRandPartA();
-      }
-    }
-  }
-
-  private void setupNoRandPartC() throws IOException {
-    if (j2 < (int) z) {
-      currentChar = ch2;
-      mCrc.updateCRC(ch2);
-      j2++;
-    } else {
-      currentState = NO_RAND_PART_A_STATE;
-      i2++;
-      count = 0;
-      setupNoRandPartA();
-    }
-  }
-
-  private void setDecompressStructureSizes(int newSize100k) {
-    if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k && blockSize100k <= 9)) {
-      // throw new IOException("Invalid block size");
-    }
-
-    blockSize100k = newSize100k;
-
-    if (newSize100k == 0) {
-      return;
-    }
-
-    int n = baseBlockSize * newSize100k;
-    ll8 = new char[n];
-    tt = new int[n];
-  }
-
-  private static class CRC {
-    public static int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
-        0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a,
-        0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
-        0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0,
-        0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef,
-        0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe,
-        0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
-        0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab,
-        0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4,
-        0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5,
-        0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
-        0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f,
-        0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050,
-        0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31,
-        0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
-        0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a,
-        0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5,
-        0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94,
-        0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
-        0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e,
-        0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
-        0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7,
-        0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
-        0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35,
-        0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a,
-        0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb,
-        0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
-        0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1,
-        0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 };
-
-    public CRC() {
-      initialiseCRC();
-    }
-
-    void initialiseCRC() {
-      globalCrc = 0xffffffff;
-    }
-
-    int getFinalCRC() {
-      return ~globalCrc;
-    }
-
-    void updateCRC(int inCh) {
-      int temp = (globalCrc >> 24) ^ inCh;
-      if (temp < 0) {
-        temp = 256 + temp;
-      }
-      globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
-    }
-
-    int globalCrc;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/LineParser.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/LineParser.java b/crunch/src/main/java/org/apache/crunch/io/text/LineParser.java
deleted file mode 100644
index 9438014..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/LineParser.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.util.Iterator;
-import java.util.List;
-import java.util.StringTokenizer;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.fn.CompositeMapFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-
-import com.google.common.base.Splitter;
-import com.google.common.collect.ImmutableList;
-
-/**
- * An abstraction for parsing the lines of a text file using a {@code PType<T>} to
- * convert the lines of text into a given data type. 
- *
- * @param <T> The type returned by the text parsing
- */
-abstract class LineParser<T> {
-
-  public static <S> LineParser<S> forType(PType<S> ptype) {
-    return new SimpleLineParser<S>(ptype);
-  }
-  
-  public static <K, V> LineParser<Pair<K, V>> forTableType(PTableType<K, V> ptt, String sep) {
-    return new KeyValueLineParser<K, V>(ptt, sep); 
-  }
-  
-  private MapFn<String, T> mapFn;
-  
-  public void initialize() {
-    mapFn = getMapFn();
-    mapFn.initialize();
-  }
-    
-  public T parse(String line) {
-    return mapFn.map(line);
-  }
-  
-  protected abstract MapFn<String, T> getMapFn();
-  
-  private static <T> MapFn<String, T> getMapFnForPType(PType<T> ptype) {
-    MapFn ret = null;
-    if (String.class.equals(ptype.getTypeClass())) {
-      ret = (MapFn) IdentityFn.getInstance();
-    } else {
-      // Check for a composite MapFn for the PType.
-      // Note that this won't work for Avro-- need to solve that.
-      ret = ptype.getInputMapFn();
-      if (ret instanceof CompositeMapFn) {
-        ret = ((CompositeMapFn) ret).getSecond();
-      }
-    }
-    return ret;
-  }
-  
-  private static class SimpleLineParser<S> extends LineParser<S> {
-
-    private final PType<S> ptype;
-    
-    public SimpleLineParser(PType<S> ptype) {
-      this.ptype = ptype;
-    }
-
-    @Override
-    protected MapFn<String, S> getMapFn() {
-      return getMapFnForPType(ptype);
-    }
-  }
-  
-  private static class KeyValueLineParser<K, V> extends LineParser<Pair<K, V>> {
-
-    private final PTableType<K, V> ptt;
-    private final String sep;
-    
-    public KeyValueLineParser(PTableType<K, V> ptt, String sep) {
-      this.ptt = ptt;
-      this.sep = sep;
-    }
-
-    @Override
-    protected MapFn<String, Pair<K, V>> getMapFn() {
-      final MapFn<String, K> keyMapFn = getMapFnForPType(ptt.getKeyType());
-      final MapFn<String, V> valueMapFn = getMapFnForPType(ptt.getValueType());
-      
-      return new MapFn<String, Pair<K, V>>() {
-        @Override
-        public void initialize() {
-          keyMapFn.initialize();
-          valueMapFn.initialize();
-        }
-        
-        @Override
-        public Pair<K, V> map(String input) {
-          List<String> kv = ImmutableList.copyOf(Splitter.on(sep).limit(1).split(input));
-          if (kv.size() != 2) {
-            throw new RuntimeException("Invalid input string: " + input);
-          }
-          return Pair.of(keyMapFn.map(kv.get(0)), valueMapFn.map(kv.get(1)));
-        }
-      };
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/NLineFileSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/NLineFileSource.java b/crunch/src/main/java/org/apache/crunch/io/text/NLineFileSource.java
deleted file mode 100644
index 40e2dbd..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/NLineFileSource.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.io.IOException;
-
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.FormatBundle;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileSourceImpl;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
-
-/**
- * A {@code Source} instance that uses the {@code NLineInputFormat}, which gives each map
- * task a fraction of the lines in a text file as input. Most useful when running simulations
- * on Hadoop, where each line represents configuration information about each simulation
- * run.
- */
-public class NLineFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
-
-  private static FormatBundle getBundle(int linesPerTask) {
-    FormatBundle bundle = FormatBundle.forInput(NLineInputFormat.class);
-    bundle.set(NLineInputFormat.LINES_PER_MAP, String.valueOf(linesPerTask));
-    return bundle;
-  }
-  
-  /**
-   * Create a new {@code NLineFileSource} instance.
-   * 
-   * @param path The path to the input data, as a String
-   * @param ptype The PType to use for processing the data
-   * @param linesPerTask The number of lines from the input each map task will process
-   */
-  public NLineFileSource(String path, PType<T> ptype, int linesPerTask) {
-    this(new Path(path), ptype, linesPerTask);
-  }
-  
-  /**
-   * Create a new {@code NLineFileSource} instance.
-   *  
-   * @param path The {@code Path} to the input data
-   * @param ptype The PType to use for processing the data
-   * @param linesPerTask The number of lines from the input each map task will process
-   */
-  public NLineFileSource(Path path, PType<T> ptype, int linesPerTask) {
-    super(path, ptype, getBundle(linesPerTask));
-  }
-
-  @Override
-  public String toString() {
-    return "NLine(" + path + ")";
-  }
-  
-  @Override
-  public Iterable<T> read(Configuration conf) throws IOException {
-    return CompositePathIterable.create(path.getFileSystem(conf), path,
-        new TextFileReaderFactory<T>(LineParser.forType(ptype)));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java
deleted file mode 100644
index e1fea6e..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.io.FileReaderFactory;
-import org.apache.crunch.io.impl.AutoClosingIterator;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.collect.Iterators;
-import com.google.common.collect.UnmodifiableIterator;
-
-public class TextFileReaderFactory<T> implements FileReaderFactory<T> {
-
-  private static final Log LOG = LogFactory.getLog(TextFileReaderFactory.class);
-
-  private final LineParser<T> parser;
-
-  public TextFileReaderFactory(PType<T> ptype) {
-    this(LineParser.forType(ptype));
-  }
-  
-  public TextFileReaderFactory(LineParser<T> parser) {
-    this.parser = parser;
-  }
-
-  @Override
-  public Iterator<T> read(FileSystem fs, Path path) {
-    parser.initialize();
-
-    FSDataInputStream is;
-    try {
-      is = fs.open(path);
-    } catch (IOException e) {
-      LOG.info("Could not read path: " + path, e);
-      return Iterators.emptyIterator();
-    }
-
-    final BufferedReader reader = new BufferedReader(new InputStreamReader(is));
-    return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
-      private String nextLine;
-
-      @Override
-      public boolean hasNext() {
-        try {
-          return (nextLine = reader.readLine()) != null;
-        } catch (IOException e) {
-          LOG.info("Exception reading text file stream", e);
-          return false;
-        }
-      }
-
-      @Override
-      public T next() {
-        return parser.parse(nextLine);
-      }
-    });
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/TextFileSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileSource.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileSource.java
deleted file mode 100644
index 026fca9..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileSource.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.io.IOException;
-
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileSourceImpl;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.AvroUtf8InputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-
-public class TextFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
-
-  private static boolean isBZip2(Path path) {
-    String strPath = path.toString();
-    return strPath.endsWith(".bz") || strPath.endsWith(".bz2");
-  }
-
-  private static <S> Class<? extends FileInputFormat<?, ?>> getInputFormat(Path path, PType<S> ptype) {
-    if (ptype.getFamily().equals(AvroTypeFamily.getInstance())) {
-      return AvroUtf8InputFormat.class;
-    } else if (isBZip2(path)) {
-      return BZip2TextInputFormat.class;
-    } else {
-      return TextInputFormat.class;
-    }
-  }
-
-  public TextFileSource(Path path, PType<T> ptype) {
-    super(path, ptype, getInputFormat(path, ptype));
-  }
-
-  @Override
-  public long getSize(Configuration conf) {
-    long sz = super.getSize(conf);
-    if (isBZip2(path)) {
-      sz *= 10; // Arbitrary compression factor
-    }
-    return sz;
-  }
-
-  @Override
-  public String toString() {
-    return "Text(" + path + ")";
-  }
-
-  @Override
-  public Iterable<T> read(Configuration conf) throws IOException {
-    return CompositePathIterable.create(path.getFileSystem(conf), path,
-        new TextFileReaderFactory<T>(LineParser.forType(ptype)));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java
deleted file mode 100644
index 1d1211e..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.fs.Path;
-
-public class TextFileSourceTarget<T> extends ReadableSourcePathTargetImpl<T> {
-
-  public TextFileSourceTarget(String path, PType<T> ptype) {
-    this(new Path(path), ptype);
-  }
-
-  public TextFileSourceTarget(Path path, PType<T> ptype) {
-    this(path, ptype, new SequentialFileNamingScheme());
-  }
-
-  public TextFileSourceTarget(Path path, PType<T> ptype, FileNamingScheme fileNamingScheme) {
-    super(new TextFileSource<T>(path, ptype), new TextFileTarget(path), fileNamingScheme);
-  }
-
-  @Override
-  public String toString() {
-    return target.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java
deleted file mode 100644
index 94fc5fd..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import java.io.IOException;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.FormatBundle;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileTableSourceImpl;
-import org.apache.crunch.types.PTableType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
-
-/** 
- * A {@code Source} that uses the {@code KeyValueTextInputFormat} to process
- * input text. If a separator for the keys and values in the text file is not specified,
- * a tab character is used. 
- */
-public class TextFileTableSource<K, V> extends FileTableSourceImpl<K, V>
-    implements ReadableSource<Pair<K, V>> {
-
-  // CRUNCH-125: Maintain compatibility with both versions of the KeyValueTextInputFormat's
-  // configuration field for specifying the separator character.
-  private static final String OLD_KV_SEP = "key.value.separator.in.input.line";
-  private static final String NEW_KV_SEP = "mapreduce.input.keyvaluelinerecordreader.key.value.separator";
-  
-  private static FormatBundle getBundle(String sep) {
-    FormatBundle bundle = FormatBundle.forInput(KeyValueTextInputFormat.class);
-    bundle.set(OLD_KV_SEP, sep);
-    bundle.set(NEW_KV_SEP, sep);
-    return bundle;
-  }
-  
-  private final String separator;
-  
-  public TextFileTableSource(String path, PTableType<K, V> tableType) {
-    this(new Path(path), tableType);
-  }
-  
-  public TextFileTableSource(Path path, PTableType<K, V> tableType) {
-    this(path, tableType, "\t");
-  }
-  
-  public TextFileTableSource(String path, PTableType<K, V> tableType, String separator) {
-    this(new Path(path), tableType, separator);
-  }
-  
-  public TextFileTableSource(Path path, PTableType<K, V> tableType, String separator) {
-    super(path, tableType, getBundle(separator));
-    this.separator = separator;
-  }
-
-  @Override
-  public String toString() {
-    return "KeyValueText(" + path + ")";
-  }
-
-  @Override
-  public Iterable<Pair<K, V>> read(Configuration conf) throws IOException {
-    return CompositePathIterable.create(path.getFileSystem(conf), path,
-        new TextFileReaderFactory<Pair<K, V>>(LineParser.forTableType(getTableType(), separator)));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java
deleted file mode 100644
index dec97e5..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.TableSourceTarget;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
-import org.apache.crunch.types.PTableType;
-import org.apache.hadoop.fs.Path;
-
-/**
- * A {@code TableSource} and {@code SourceTarget} implementation that uses the
- * {@code KeyValueTextInputFormat} and {@code TextOutputFormat} to support reading
- * and writing text files as {@code PTable} instances using a tab separator for
- * the keys and the values.
- */
-public class TextFileTableSourceTarget<K, V> extends ReadableSourcePathTargetImpl<Pair<K, V>> implements
-    TableSourceTarget<K, V> {
-
-  private final PTableType<K, V> tableType;
-  
-  public TextFileTableSourceTarget(String path, PTableType<K, V> tableType) {
-    this(new Path(path), tableType);
-  }
-
-  public TextFileTableSourceTarget(Path path, PTableType<K, V> tableType) {
-    this(path, tableType, new SequentialFileNamingScheme());
-  }
-
-  public TextFileTableSourceTarget(Path path, PTableType<K, V> tableType,
-      FileNamingScheme fileNamingScheme) {
-    super(new TextFileTableSource<K, V>(path, tableType), new TextFileTarget(path),
-        fileNamingScheme);
-    this.tableType = tableType;
-  }
-
-  @Override
-  public PTableType<K, V> getTableType() {
-    return tableType;
-  }
-
-  @Override
-  public String toString() {
-    return target.toString();
-  }
-}


[23/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java
new file mode 100644
index 0000000..7e2e444
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/join/BrokenLeftAndOuterJoinTest.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.apache.crunch.test.StringWrapper.wrap;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.CrunchTestSupport;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class BrokenLeftAndOuterJoinTest {
+
+  List<Pair<StringWrapper, String>> createValuePairList(StringWrapper leftValue, String rightValue) {
+    Pair<StringWrapper, String> valuePair = Pair.of(leftValue, rightValue);
+    List<Pair<StringWrapper, String>> valuePairList = Lists.newArrayList();
+    valuePairList.add(valuePair);
+    return valuePairList;
+  }
+  
+  @Test
+  public void testOuterJoin() {
+    JoinFn<StringWrapper, StringWrapper, String> joinFn = new LeftOuterJoinFn<StringWrapper, StringWrapper, String>(
+        Avros.reflects(StringWrapper.class),
+        Avros.reflects(StringWrapper.class));
+    joinFn.setContext(CrunchTestSupport.getTestContext(new Configuration()));
+    joinFn.initialize();
+    Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter = mock(Emitter.class);
+    
+    StringWrapper key = new StringWrapper();
+    StringWrapper leftValue = new StringWrapper();
+    key.setValue("left-only");
+    leftValue.setValue("left-only-left");
+    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
+
+    key.setValue("right-only");
+    joinFn.join(key, 1, createValuePairList(null, "right-only-right"), emitter);
+
+    verify(emitter).emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
+    verifyNoMoreInteractions(emitter);
+  }
+  
+  @Test
+  public void testFullJoin() {
+    JoinFn<StringWrapper, StringWrapper, String> joinFn = new FullOuterJoinFn<StringWrapper, StringWrapper, String>(
+        Avros.reflects(StringWrapper.class),
+        Avros.reflects(StringWrapper.class));
+    joinFn.setContext(CrunchTestSupport.getTestContext(new Configuration()));
+    joinFn.initialize();
+    Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter = mock(Emitter.class);
+    
+    StringWrapper key = new StringWrapper();
+    StringWrapper leftValue = new StringWrapper();
+    key.setValue("left-only");
+    leftValue.setValue("left-only-left");
+    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
+
+    key.setValue("right-only");
+    joinFn.join(key, 1, createValuePairList(null, "right-only-right"), emitter);
+
+    verify(emitter).emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
+    verify(emitter).emit(Pair.of(wrap("right-only"), Pair.of((StringWrapper)null, "right-only-right")));
+    verifyNoMoreInteractions(emitter);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java
new file mode 100644
index 0000000..5cf4f51
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/join/FullOuterJoinFnTest.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.apache.crunch.test.StringWrapper.wrap;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.Avros;
+
+public class FullOuterJoinFnTest extends JoinFnTestBase {
+
+  @Override
+  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter) {
+    verify(emitter)
+        .emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
+    verify(emitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
+    verify(emitter).emit(
+        Pair.of(wrap("right-only"), Pair.of((StringWrapper) null, "right-only-right")));
+    verifyNoMoreInteractions(emitter);
+  }
+
+  @Override
+  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
+    return new FullOuterJoinFn<StringWrapper, StringWrapper, String>(
+        Avros.reflects(StringWrapper.class),
+        Avros.reflects(StringWrapper.class));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java
new file mode 100644
index 0000000..d2347de
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/join/InnerJoinFnTest.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.apache.crunch.test.StringWrapper.wrap;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.Avros;
+
+public class InnerJoinFnTest extends JoinFnTestBase {
+
+  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> joinEmitter) {
+    verify(joinEmitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
+    verifyNoMoreInteractions(joinEmitter);
+  }
+
+  @Override
+  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
+    return new InnerJoinFn<StringWrapper, StringWrapper, String>(
+        Avros.reflects(StringWrapper.class),
+        Avros.reflects(StringWrapper.class));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java b/crunch-core/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java
new file mode 100644
index 0000000..9e4337f
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/join/JoinFnTestBase.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.mockito.Mockito.mock;
+
+import java.util.List;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.CrunchTestSupport;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public abstract class JoinFnTestBase {
+
+  private JoinFn<StringWrapper, StringWrapper, String> joinFn;
+
+  private Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter;
+
+  // Avoid warnings on generic Emitter mock
+  @SuppressWarnings("unchecked")
+  @Before
+  public void setUp() {
+    joinFn = getJoinFn();
+    joinFn.setContext(CrunchTestSupport.getTestContext(new Configuration()));
+    joinFn.initialize();
+    emitter = mock(Emitter.class);
+  }
+
+  @Test
+  public void testJoin() {
+
+    StringWrapper key = new StringWrapper();
+    StringWrapper leftValue = new StringWrapper();
+    key.setValue("left-only");
+    leftValue.setValue("left-only-left");
+    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
+
+    key.setValue("both");
+    leftValue.setValue("both-left");
+    joinFn.join(key, 0, createValuePairList(leftValue, null), emitter);
+    joinFn.join(key, 1, createValuePairList(null, "both-right"), emitter);
+
+    key.setValue("right-only");
+    joinFn.join(key, 1, createValuePairList(null, "right-only-right"), emitter);
+
+    checkOutput(emitter);
+
+  }
+
+  protected abstract void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter);
+
+  protected abstract JoinFn<StringWrapper, StringWrapper, String> getJoinFn();
+
+  protected List<Pair<StringWrapper, String>> createValuePairList(StringWrapper leftValue, String rightValue) {
+    Pair<StringWrapper, String> valuePair = Pair.of(leftValue, rightValue);
+    List<Pair<StringWrapper, String>> valuePairList = Lists.newArrayList();
+    valuePairList.add(valuePair);
+    return valuePairList;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java
new file mode 100644
index 0000000..a90457e
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/join/LeftOuterJoinTest.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.apache.crunch.test.StringWrapper.wrap;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.Avros;
+
+public class LeftOuterJoinTest extends JoinFnTestBase {
+
+  @Override
+  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter) {
+    verify(emitter)
+        .emit(Pair.of(wrap("left-only"), Pair.of(wrap("left-only-left"), (String) null)));
+    verify(emitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
+    verifyNoMoreInteractions(emitter);
+  }
+
+  @Override
+  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
+    return new LeftOuterJoinFn<StringWrapper, StringWrapper, String>(
+        Avros.reflects(StringWrapper.class),
+        Avros.reflects(StringWrapper.class));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java
new file mode 100644
index 0000000..7e41284
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/join/RightOuterJoinFnTest.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib.join;
+
+import static org.apache.crunch.test.StringWrapper.wrap;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.Avros;
+
+public class RightOuterJoinFnTest extends JoinFnTestBase {
+
+  @Override
+  protected void checkOutput(Emitter<Pair<StringWrapper, Pair<StringWrapper, String>>> emitter) {
+    verify(emitter).emit(Pair.of(wrap("both"), Pair.of(wrap("both-left"), "both-right")));
+    verify(emitter).emit(
+        Pair.of(wrap("right-only"), Pair.of((StringWrapper) null, "right-only-right")));
+    verifyNoMoreInteractions(emitter);
+  }
+
+  @Override
+  protected JoinFn<StringWrapper, StringWrapper, String> getJoinFn() {
+    return new RightOuterJoinFn<StringWrapper, StringWrapper, String>(
+        Avros.reflects(StringWrapper.class),
+        Avros.reflects(StringWrapper.class));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/test/CountersTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/test/CountersTest.java b/crunch-core/src/test/java/org/apache/crunch/test/CountersTest.java
new file mode 100644
index 0000000..66f854e
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/test/CountersTest.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.test;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+/**
+ * A test to verify using counters inside of a unit test works. :)
+ */
+public class CountersTest {
+
+  public enum CT {
+    ONE,
+    TWO,
+    THREE
+  };
+
+  public static class CTFn extends DoFn<String, String> {
+    CTFn() {
+      setContext(CrunchTestSupport.getTestContext(new Configuration()));
+    }
+
+    @Override
+    public void process(String input, Emitter<String> emitter) {
+      getCounter(CT.ONE).increment(1);
+      getCounter(CT.TWO).increment(4);
+      getCounter(CT.THREE).increment(7);
+    }
+  }
+
+  @Test
+  public void test() {
+    CTFn fn = new CTFn();
+    fn.process("foo", null);
+    fn.process("bar", null);
+    assertEquals(2L, TestCounters.getCounter(CT.ONE).getValue());
+    assertEquals(8L, TestCounters.getCounter(CT.TWO).getValue());
+    assertEquals(14L, TestCounters.getCounter(CT.THREE).getValue());
+  }
+
+  @Test
+  public void secondTest() {
+    CTFn fn = new CTFn();
+    fn.process("foo", null);
+    fn.process("bar", null);
+    assertEquals(2L, TestCounters.getCounter(CT.ONE).getValue());
+    assertEquals(8L, TestCounters.getCounter(CT.TWO).getValue());
+    assertEquals(14L, TestCounters.getCounter(CT.THREE).getValue());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/test/StringWrapper.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/test/StringWrapper.java b/crunch-core/src/test/java/org/apache/crunch/test/StringWrapper.java
new file mode 100644
index 0000000..34302b5
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/test/StringWrapper.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.test;
+
+import org.apache.crunch.MapFn;
+
+/**
+ * Simple String wrapper for testing with Avro reflection.
+ */
+public class StringWrapper implements Comparable<StringWrapper> {
+
+  public static class StringToStringWrapperMapFn extends MapFn<String, StringWrapper> {
+
+    @Override
+    public StringWrapper map(String input) {
+      return wrap(input);
+    }
+
+  }
+
+  public static class StringWrapperToStringMapFn extends MapFn<StringWrapper, String> {
+
+    @Override
+    public String map(StringWrapper input) {
+      return input.getValue();
+    }
+
+  }
+
+  private String value;
+
+  public StringWrapper() {
+    this("");
+  }
+
+  public StringWrapper(String value) {
+    this.value = value;
+  }
+
+  @Override
+  public int compareTo(StringWrapper o) {
+    return this.value.compareTo(o.value);
+  }
+
+  public String getValue() {
+    return value;
+  }
+
+  public void setValue(String value) {
+    this.value = value;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + ((value == null) ? 0 : value.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    StringWrapper other = (StringWrapper) obj;
+    if (value == null) {
+      if (other.value != null)
+        return false;
+    } else if (!value.equals(other.value))
+      return false;
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return "StringWrapper [value=" + value + "]";
+  }
+
+  public static StringWrapper wrap(String value) {
+    return new StringWrapper(value);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java b/crunch-core/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java
new file mode 100644
index 0000000..bd7fcd7
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/CollectionDeepCopierTest.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+
+import java.util.Collection;
+
+import org.apache.crunch.test.Person;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class CollectionDeepCopierTest {
+
+  @Test
+  public void testDeepCopy() {
+    Person person = new Person();
+    person.age = 42;
+    person.name = "John Smith";
+    person.siblingnames = Lists.<CharSequence> newArrayList();
+
+    Collection<Person> personCollection = Lists.newArrayList(person);
+    CollectionDeepCopier<Person> collectionDeepCopier = new CollectionDeepCopier<Person>(
+        Avros.records(Person.class));
+    collectionDeepCopier.initialize(new Configuration());
+
+    Collection<Person> deepCopyCollection = collectionDeepCopier.deepCopy(personCollection);
+
+    assertEquals(personCollection, deepCopyCollection);
+    assertNotSame(personCollection.iterator().next(), deepCopyCollection.iterator().next());
+  }
+
+  @Test
+  public void testNullDeepCopy() {
+    CollectionDeepCopier<Person> collectionDeepCopier = new CollectionDeepCopier<Person>(
+        Avros.records(Person.class));
+    collectionDeepCopier.initialize(new Configuration());
+    Collection<Person> nullCollection = null;
+    assertNull(collectionDeepCopier.deepCopy(nullCollection));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java b/crunch-core/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java
new file mode 100644
index 0000000..c13e4a2
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/MapDeepCopierTest.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+
+import java.util.Map;
+
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+
+public class MapDeepCopierTest {
+
+  @Test
+  public void testDeepCopy() {
+    StringWrapper stringWrapper = new StringWrapper("value");
+    String key = "key";
+    Map<String, StringWrapper> map = Maps.newHashMap();
+    map.put(key, stringWrapper);
+
+    MapDeepCopier<StringWrapper> deepCopier = new MapDeepCopier<StringWrapper>(
+        Avros.reflects(StringWrapper.class));
+    deepCopier.initialize(new Configuration());
+    Map<String, StringWrapper> deepCopy = deepCopier.deepCopy(map);
+
+    assertEquals(map, deepCopy);
+    assertNotSame(map.get(key), deepCopy.get(key));
+  }
+  
+  @Test
+  public void testDeepCopy_Null() {
+    Map<String, StringWrapper> map = null;
+
+    MapDeepCopier<StringWrapper> deepCopier = new MapDeepCopier<StringWrapper>(
+        Avros.reflects(StringWrapper.class));
+    deepCopier.initialize(new Configuration());
+    Map<String, StringWrapper> deepCopy = deepCopier.deepCopy(map);
+
+    assertNull(deepCopy);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java b/crunch-core/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java
new file mode 100644
index 0000000..e6fd90c
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/PTypeUtilsTest.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.Collection;
+
+import org.apache.avro.Schema;
+import org.apache.avro.util.Utf8;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.WritableTypeFamily;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PTypeUtilsTest {
+  @Test
+  public void testPrimitives() {
+    assertEquals(Avros.strings(), AvroTypeFamily.getInstance().as(Writables.strings()));
+    Assert.assertEquals(Writables.doubles(), WritableTypeFamily.getInstance().as(Avros.doubles()));
+  }
+
+  @Test
+  public void testTuple3() {
+    PType<Tuple3<String, Float, Integer>> t = Writables.triples(Writables.strings(), Writables.floats(),
+        Writables.ints());
+    PType<Tuple3<String, Float, Integer>> at = AvroTypeFamily.getInstance().as(t);
+    assertEquals(Avros.strings(), at.getSubTypes().get(0));
+    assertEquals(Avros.floats(), at.getSubTypes().get(1));
+    assertEquals(Avros.ints(), at.getSubTypes().get(2));
+  }
+
+  @Test
+  public void testTupleN() {
+    PType<TupleN> t = Avros.tuples(Avros.strings(), Avros.floats(), Avros.ints());
+    PType<TupleN> wt = WritableTypeFamily.getInstance().as(t);
+    assertEquals(Writables.strings(), wt.getSubTypes().get(0));
+    assertEquals(Writables.floats(), wt.getSubTypes().get(1));
+    assertEquals(Writables.ints(), wt.getSubTypes().get(2));
+  }
+
+  @Test
+  public void testWritableCollections() {
+    PType<Collection<String>> t = Avros.collections(Avros.strings());
+    t = WritableTypeFamily.getInstance().as(t);
+    assertEquals(Writables.strings(), t.getSubTypes().get(0));
+  }
+
+  @Test
+  public void testAvroCollections() {
+    PType<Collection<Double>> t = Writables.collections(Writables.doubles());
+    t = AvroTypeFamily.getInstance().as(t);
+    assertEquals(Avros.doubles(), t.getSubTypes().get(0));
+  }
+
+  @Test
+  public void testAvroRegistered() {
+    AvroType<Utf8> at = new AvroType<Utf8>(Utf8.class, Schema.create(Schema.Type.STRING), new DeepCopier.NoOpDeepCopier<Utf8>());
+    Avros.register(Utf8.class, at);
+    assertEquals(at, Avros.records(Utf8.class));
+  }
+
+  @Test
+  public void testWritableBuiltin() {
+    assertNotNull(Writables.records(Text.class));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/PTypesTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/PTypesTest.java b/crunch-core/src/test/java/org/apache/crunch/types/PTypesTest.java
new file mode 100644
index 0000000..d7c8811
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/PTypesTest.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.UUID;
+
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.junit.Test;
+
+public class PTypesTest {
+  @Test
+  public void testUUID() throws Exception {
+    UUID uuid = UUID.randomUUID();
+    PType<UUID> ptype = PTypes.uuid(AvroTypeFamily.getInstance());
+    assertEquals(uuid, ptype.getInputMapFn().map(ptype.getOutputMapFn().map(uuid)));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java b/crunch-core/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java
new file mode 100644
index 0000000..e46a680
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/TupleDeepCopierTest.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class TupleDeepCopierTest {
+
+  @Test
+  public void testDeepCopy_Pair() {
+    Person person = new Person();
+    person.name = "John Doe";
+    person.age = 42;
+    person.siblingnames = Lists.<CharSequence> newArrayList();
+
+    Pair<Integer, Person> inputPair = Pair.of(1, person);
+    DeepCopier<Pair> deepCopier = new TupleDeepCopier<Pair>(Pair.class, Avros.ints(),
+        Avros.records(Person.class));
+
+    deepCopier.initialize(new Configuration());
+    Pair<Integer, Person> deepCopyPair = deepCopier.deepCopy(inputPair);
+
+    assertEquals(inputPair, deepCopyPair);
+    assertNotSame(inputPair.second(), deepCopyPair.second());
+  }
+  
+  @Test
+  public void testDeepCopy_PairContainingNull() {
+
+    Pair<Integer, Person> inputPair = Pair.of(1, null);
+    DeepCopier<Pair> deepCopier = new TupleDeepCopier<Pair>(Pair.class, Avros.ints(),
+        Avros.records(Person.class));
+
+    deepCopier.initialize(new Configuration());
+    Pair<Integer, Person> deepCopyPair = deepCopier.deepCopy(inputPair);
+
+    assertEquals(inputPair, deepCopyPair);
+  }
+  
+  @Test
+  public void testDeepCopy_NullPair() {
+    Pair<Integer, Person> inputPair = null;
+    DeepCopier<Pair> deepCopier = new TupleDeepCopier<Pair>(Pair.class, Avros.ints(),
+        Avros.records(Person.class));
+
+    deepCopier.initialize(new Configuration());
+    Pair<Integer, Person> deepCopyPair = deepCopier.deepCopy(inputPair);
+
+    assertNull(deepCopyPair);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/TupleFactoryTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/TupleFactoryTest.java b/crunch-core/src/test/java/org/apache/crunch/types/TupleFactoryTest.java
new file mode 100644
index 0000000..0726be2
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/TupleFactoryTest.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.junit.Test;
+
+public class TupleFactoryTest {
+
+  @Test
+  public void testGetTupleFactory_Pair() {
+    assertEquals(TupleFactory.PAIR, TupleFactory.getTupleFactory(Pair.class));
+  }
+
+  @Test
+  public void testGetTupleFactory_Tuple3() {
+    assertEquals(TupleFactory.TUPLE3, TupleFactory.getTupleFactory(Tuple3.class));
+  }
+
+  @Test
+  public void testGetTupleFactory_Tuple4() {
+    assertEquals(TupleFactory.TUPLE4, TupleFactory.getTupleFactory(Tuple4.class));
+  }
+
+  @Test
+  public void testGetTupleFactory_TupleN() {
+    assertEquals(TupleFactory.TUPLEN, TupleFactory.getTupleFactory(TupleN.class));
+  }
+
+  public void testGetTupleFactory_CustomTupleClass() {
+	TupleFactory<CustomTupleImplementation> customTupleFactory = TupleFactory.create(CustomTupleImplementation.class);
+    assertEquals(customTupleFactory, TupleFactory.getTupleFactory(CustomTupleImplementation.class));
+  }
+
+  private static class CustomTupleImplementation implements Tuple {
+
+    @Override
+    public Object get(int index) {
+      return null;
+    }
+
+    @Override
+    public int size() {
+      return 0;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java
new file mode 100644
index 0000000..37c13c0
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroDeepCopierTest.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+
+import java.util.List;
+
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.types.avro.AvroDeepCopier.AvroSpecificDeepCopier;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class AvroDeepCopierTest {
+  
+  @Test
+  public void testDeepCopySpecific() {
+    Person person = new Person();
+    person.name = "John Doe";
+    person.age = 42;
+    person.siblingnames = Lists.<CharSequence> newArrayList();
+
+    Person deepCopyPerson = new AvroSpecificDeepCopier<Person>(Person.class, Person.SCHEMA$)
+        .deepCopy(person);
+
+    assertEquals(person, deepCopyPerson);
+    assertNotSame(person, deepCopyPerson);
+  }
+
+  @Test
+  public void testDeepCopyGeneric() {
+    Record record = new Record(Person.SCHEMA$);
+    record.put("name", "John Doe");
+    record.put("age", 42);
+    record.put("siblingnames", Lists.newArrayList());
+
+    Record deepCopyRecord = new AvroDeepCopier.AvroGenericDeepCopier(Person.SCHEMA$)
+        .deepCopy(record);
+
+    assertEquals(record, deepCopyRecord);
+    assertNotSame(record, deepCopyRecord);
+  }
+
+  static class ReflectedPerson {
+    String name;
+    int age;
+    List<String> siblingnames;
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof ReflectedPerson)) {
+        return false;
+      }
+      ReflectedPerson that = (ReflectedPerson) other;
+      return name.equals(that.name) && age == that.age && siblingnames.equals(that.siblingnames);
+    }
+  }
+
+  @Test
+  public void testDeepCopyReflect() {
+    ReflectedPerson person = new ReflectedPerson();
+    person.name = "John Doe";
+    person.age = 42;
+    person.siblingnames = Lists.newArrayList();
+
+    AvroDeepCopier<ReflectedPerson> avroDeepCopier = new AvroDeepCopier.AvroReflectDeepCopier<ReflectedPerson>(
+        ReflectedPerson.class, Avros.reflects(ReflectedPerson.class).getSchema());
+    avroDeepCopier.initialize(new Configuration());
+
+    ReflectedPerson deepCopyPerson = avroDeepCopier.deepCopy(person);
+
+    assertEquals(person, deepCopyPerson);
+    assertNotSame(person, deepCopyPerson);
+
+  }
+  
+  @Test
+  public void testDeepCopy_Null() {
+    Person person = null;
+
+    Person deepCopyPerson = new AvroSpecificDeepCopier<Person>(Person.class, Person.SCHEMA$)
+        .deepCopy(person);
+
+    assertNull(deepCopyPerson);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java
new file mode 100644
index 0000000..db9ebdc
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroGroupedTableTypeTest.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+import java.util.List;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class AvroGroupedTableTypeTest {
+
+  @Test
+  public void testGetDetachedValue() {
+    Integer integerValue = 42;
+    Person person = new Person();
+    person.name = "John Doe";
+    person.age = 42;
+    person.siblingnames = Lists.<CharSequence> newArrayList();
+
+    Iterable<Person> inputPersonIterable = Lists.newArrayList(person);
+    Pair<Integer, Iterable<Person>> pair = Pair.of(integerValue, inputPersonIterable);
+
+    PGroupedTableType<Integer, Person> groupedTableType = Avros.tableOf(Avros.ints(),
+        Avros.specifics(Person.class)).getGroupedTableType();
+    groupedTableType.initialize(new Configuration());
+
+    Pair<Integer, Iterable<Person>> detachedPair = groupedTableType.getDetachedValue(pair);
+
+    assertSame(integerValue, detachedPair.first());
+    List<Person> personList = Lists.newArrayList(detachedPair.second());
+    assertEquals(inputPersonIterable, personList);
+    assertNotSame(person, personList.get(0));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java
new file mode 100644
index 0000000..35d4e5b
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTableTypeTest.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class AvroTableTypeTest {
+
+  @Test
+  public void testGetDetachedValue() {
+    Integer integerValue = 42;
+    Person person = new Person();
+    person.name = "John Doe";
+    person.age = 42;
+    person.siblingnames = Lists.<CharSequence> newArrayList();
+
+    Pair<Integer, Person> pair = Pair.of(integerValue, person);
+
+    AvroTableType<Integer, Person> tableType = Avros.tableOf(Avros.ints(),
+        Avros.specifics(Person.class));
+    tableType.initialize(new Configuration());
+
+    Pair<Integer, Person> detachedPair = tableType.getDetachedValue(pair);
+
+    assertSame(integerValue, detachedPair.first());
+    assertEquals(person, detachedPair.second());
+    assertNotSame(person, detachedPair.second());
+  }
+
+  @Test
+  public void testIsReflect_ContainsReflectKey() {
+    assertTrue(Avros.tableOf(Avros.reflects(StringWrapper.class), Avros.ints()).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_ContainsReflectValue() {
+    assertTrue(Avros.tableOf(Avros.ints(), Avros.reflects(StringWrapper.class)).hasReflect());
+  }
+
+  @Test
+  public void testReflect_NoReflectKeyOrValue() {
+    assertFalse(Avros.tableOf(Avros.ints(), Avros.ints()).hasReflect());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
new file mode 100644
index 0000000..a874c63
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvroTypeTest.java
@@ -0,0 +1,279 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.crunch.Pair;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class AvroTypeTest {
+
+  @Test
+  public void testIsSpecific_SpecificData() {
+    assertTrue(Avros.records(Person.class).hasSpecific());
+  }
+
+  @Test
+  public void testIsGeneric_SpecificData() {
+    assertFalse(Avros.records(Person.class).isGeneric());
+  }
+
+  @Test
+  public void testIsSpecific_GenericData() {
+    assertFalse(Avros.generics(Person.SCHEMA$).hasSpecific());
+  }
+
+  @Test
+  public void testIsGeneric_GenericData() {
+    assertTrue(Avros.generics(Person.SCHEMA$).isGeneric());
+  }
+
+  @Test
+  public void testIsSpecific_NonAvroClass() {
+    assertFalse(Avros.ints().hasSpecific());
+  }
+
+  @Test
+  public void testIsGeneric_NonAvroClass() {
+    assertFalse(Avros.ints().isGeneric());
+  }
+
+  @Test
+  public void testIsSpecific_SpecificAvroTable() {
+    assertTrue(Avros.tableOf(Avros.strings(), Avros.records(Person.class)).hasSpecific());
+  }
+
+  @Test
+  public void testIsGeneric_SpecificAvroTable() {
+    assertFalse(Avros.tableOf(Avros.strings(), Avros.records(Person.class)).isGeneric());
+  }
+
+  @Test
+  public void testIsSpecific_GenericAvroTable() {
+    assertFalse(Avros.tableOf(Avros.strings(), Avros.generics(Person.SCHEMA$)).hasSpecific());
+  }
+
+  @Test
+  public void testIsGeneric_GenericAvroTable() {
+    assertFalse(Avros.tableOf(Avros.strings(), Avros.generics(Person.SCHEMA$)).isGeneric());
+  }
+
+  @Test
+  public void testIsReflect_GenericType() {
+    assertFalse(Avros.generics(Person.SCHEMA$).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_SpecificType() {
+    assertFalse(Avros.records(Person.class).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_ReflectSimpleType() {
+    assertTrue(Avros.reflects(StringWrapper.class).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_NonReflectSubType() {
+    assertFalse(Avros.pairs(Avros.ints(), Avros.ints()).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_ReflectSubType() {
+    assertTrue(Avros.pairs(Avros.ints(), Avros.reflects(StringWrapper.class)).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_TableOfNonReflectTypes() {
+    assertFalse(Avros.tableOf(Avros.ints(), Avros.strings()).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_TableWithReflectKey() {
+    assertTrue(Avros.tableOf(Avros.reflects(StringWrapper.class), Avros.ints()).hasReflect());
+  }
+
+  @Test
+  public void testIsReflect_TableWithReflectValue() {
+    assertTrue(Avros.tableOf(Avros.ints(), Avros.reflects(StringWrapper.class)).hasReflect());
+  }
+
+  @Test
+  public void testReflect_CollectionContainingReflectValue() {
+    assertTrue(Avros.collections(Avros.reflects(StringWrapper.class)).hasReflect());
+  }
+
+  @Test
+  public void testReflect_CollectionNotContainingReflectValue() {
+    assertFalse(Avros.collections(Avros.generics(Person.SCHEMA$)).hasReflect());
+  }
+
+  @Test
+  public void testGetDetachedValue_AlreadyMappedAvroType() {
+    Integer value = 42;
+    AvroType<Integer> intType = Avros.ints();
+    intType.initialize(new Configuration());
+    Integer detachedValue = intType.getDetachedValue(value);
+    assertSame(value, detachedValue);
+  }
+
+  @Test
+  public void testGetDetachedValue_GenericAvroType() {
+    AvroType<Record> genericType = Avros.generics(Person.SCHEMA$);
+    genericType.initialize(new Configuration());
+    GenericData.Record record = new GenericData.Record(Person.SCHEMA$);
+    record.put("name", "name value");
+    record.put("age", 42);
+    record.put("siblingnames", Lists.newArrayList());
+
+    Record detachedRecord = genericType.getDetachedValue(record);
+    assertEquals(record, detachedRecord);
+    assertNotSame(record, detachedRecord);
+  }
+
+  private Person createPerson() {
+    Person person = new Person();
+    person.name = "name value";
+    person.age = 42;
+    person.siblingnames = Lists.<CharSequence> newArrayList();
+    return person;
+  }
+
+  @Test
+  public void testGetDetachedValue_SpecificAvroType() {
+    AvroType<Person> specificType = Avros.specifics(Person.class);
+    specificType.initialize(new Configuration());
+    Person person = createPerson();
+    Person detachedPerson = specificType.getDetachedValue(person);
+    assertEquals(person, detachedPerson);
+    assertNotSame(person, detachedPerson);
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testGetDetachedValue_NotInitialized() {
+    AvroType<Person> specificType = Avros.specifics(Person.class);
+    Person person = createPerson();
+    specificType.getDetachedValue(person);
+  }
+
+  static class ReflectedPerson {
+    String name;
+    int age;
+    List<String> siblingnames;
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof ReflectedPerson)) {
+        return false;
+      }
+      ReflectedPerson that = (ReflectedPerson) other;
+      return name.equals(that.name) && age == that.age && siblingnames.equals(that.siblingnames);
+    }
+  }
+
+  @Test
+  public void testGetDetachedValue_ReflectAvroType() {
+    AvroType<ReflectedPerson> reflectType = Avros.reflects(ReflectedPerson.class);
+    reflectType.initialize(new Configuration());
+    ReflectedPerson rp = new ReflectedPerson();
+    rp.name = "josh";
+    rp.age = 32;
+    rp.siblingnames = Lists.newArrayList();
+    ReflectedPerson detached = reflectType.getDetachedValue(rp);
+    assertEquals(rp, detached);
+    assertNotSame(rp, detached);
+  }
+
+  @Test
+  public void testGetDetachedValue_Pair() {
+    Person person = createPerson();
+    AvroType<Pair<Integer, Person>> pairType = Avros.pairs(Avros.ints(),
+        Avros.records(Person.class));
+    pairType.initialize(new Configuration());
+
+    Pair<Integer, Person> inputPair = Pair.of(1, person);
+    Pair<Integer, Person> detachedPair = pairType.getDetachedValue(inputPair);
+
+    assertEquals(inputPair, detachedPair);
+    assertNotSame(inputPair.second(), detachedPair.second());
+  }
+
+  @Test
+  public void testGetDetachedValue_Collection() {
+    Person person = createPerson();
+    List<Person> personList = Lists.newArrayList(person);
+
+    AvroType<Collection<Person>> collectionType = Avros.collections(Avros.records(Person.class));
+    collectionType.initialize(new Configuration());
+
+    Collection<Person> detachedCollection = collectionType.getDetachedValue(personList);
+
+    assertEquals(personList, detachedCollection);
+    Person detachedPerson = detachedCollection.iterator().next();
+
+    assertNotSame(person, detachedPerson);
+  }
+
+  @Test
+  public void testGetDetachedValue_Map() {
+    String key = "key";
+    Person value = createPerson();
+
+    Map<String, Person> stringPersonMap = Maps.newHashMap();
+    stringPersonMap.put(key, value);
+
+    AvroType<Map<String, Person>> mapType = Avros.maps(Avros.records(Person.class));
+    mapType.initialize(new Configuration());
+
+    Map<String, Person> detachedMap = mapType.getDetachedValue(stringPersonMap);
+
+    assertEquals(stringPersonMap, detachedMap);
+    assertNotSame(value, detachedMap.get(key));
+  }
+
+  @Test
+  public void testGetDetachedValue_TupleN() {
+    Person person = createPerson();
+    AvroType<TupleN> ptype = Avros.tuples(Avros.records(Person.class));
+    ptype.initialize(new Configuration());
+    TupleN tuple = new TupleN(person);
+    TupleN detachedTuple = ptype.getDetachedValue(tuple);
+
+    assertEquals(tuple, detachedTuple);
+    assertNotSame(person, detachedTuple.get(0));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/avro/AvrosTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/avro/AvrosTest.java b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvrosTest.java
new file mode 100644
index 0000000..5622a56
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/avro/AvrosTest.java
@@ -0,0 +1,325 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Collections;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.util.Utf8;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.test.CrunchTestSupport;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.DeepCopier;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+/**
+ * TODO test Avros.register and Avros.containers
+ */
+public class AvrosTest {
+
+  @Test
+  public void testNulls() throws Exception {
+    Void n = null;
+    testInputOutputFn(Avros.nulls(), n, n);
+  }
+
+  @Test
+  public void testStrings() throws Exception {
+    String s = "abc";
+    Utf8 w = new Utf8(s);
+    testInputOutputFn(Avros.strings(), s, w);
+  }
+
+  @Test
+  public void testInts() throws Exception {
+    int j = 55;
+    testInputOutputFn(Avros.ints(), j, j);
+  }
+
+  @Test
+  public void testLongs() throws Exception {
+    long j = Long.MAX_VALUE;
+    testInputOutputFn(Avros.longs(), j, j);
+  }
+
+  @Test
+  public void testFloats() throws Exception {
+    float j = Float.MIN_VALUE;
+    testInputOutputFn(Avros.floats(), j, j);
+  }
+
+  @Test
+  public void testDoubles() throws Exception {
+    double j = Double.MIN_VALUE;
+    testInputOutputFn(Avros.doubles(), j, j);
+  }
+
+  @Test
+  public void testBooleans() throws Exception {
+    boolean j = true;
+    testInputOutputFn(Avros.booleans(), j, j);
+  }
+
+  @Test
+  public void testBytes() throws Exception {
+    byte[] bytes = new byte[] { 17, 26, -98 };
+    ByteBuffer bb = ByteBuffer.wrap(bytes);
+    testInputOutputFn(Avros.bytes(), bb, bb);
+  }
+
+  @Test
+  public void testCollections() throws Exception {
+    Collection<String> j = Lists.newArrayList();
+    j.add("a");
+    j.add("b");
+    Schema collectionSchema = Schema.createArray(Schema.createUnion(ImmutableList.of(Avros.strings().getSchema(),
+        Schema.create(Type.NULL))));
+    GenericData.Array<Utf8> w = new GenericData.Array<Utf8>(2, collectionSchema);
+    w.add(new Utf8("a"));
+    w.add(new Utf8("b"));
+    testInputOutputFn(Avros.collections(Avros.strings()), j, w);
+  }
+
+  @Test
+  public void testNestedTables() throws Exception {
+    PTableType<Long, Long> pll = Avros.tableOf(Avros.longs(), Avros.longs());
+    String schema = Avros.tableOf(pll, Avros.strings()).getSchema().toString();
+    assertNotNull(schema);
+  }
+
+  @Test
+  public void testPairs() throws Exception {
+    AvroType<Pair<String, String>> at = Avros.pairs(Avros.strings(), Avros.strings());
+    Pair<String, String> j = Pair.of("a", "b");
+    GenericData.Record w = new GenericData.Record(at.getSchema());
+    w.put(0, new Utf8("a"));
+    w.put(1, new Utf8("b"));
+    testInputOutputFn(at, j, w);
+  }
+
+  @Test
+  public void testPairEquals() throws Exception {
+    AvroType<Pair<Long, ByteBuffer>> at1 = Avros.pairs(Avros.longs(), Avros.bytes());
+    AvroType<Pair<Long, ByteBuffer>> at2 = Avros.pairs(Avros.longs(), Avros.bytes());
+    assertEquals(at1, at2);
+    assertEquals(at1.hashCode(), at2.hashCode());
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testTriples() throws Exception {
+    AvroType at = Avros.triples(Avros.strings(), Avros.strings(), Avros.strings());
+    Tuple3 j = Tuple3.of("a", "b", "c");
+    GenericData.Record w = new GenericData.Record(at.getSchema());
+    w.put(0, new Utf8("a"));
+    w.put(1, new Utf8("b"));
+    w.put(2, new Utf8("c"));
+    testInputOutputFn(at, j, w);
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testQuads() throws Exception {
+    AvroType at = Avros.quads(Avros.strings(), Avros.strings(), Avros.strings(), Avros.strings());
+    Tuple4 j = Tuple4.of("a", "b", "c", "d");
+    GenericData.Record w = new GenericData.Record(at.getSchema());
+    w.put(0, new Utf8("a"));
+    w.put(1, new Utf8("b"));
+    w.put(2, new Utf8("c"));
+    w.put(3, new Utf8("d"));
+    testInputOutputFn(at, j, w);
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testTupleN() throws Exception {
+    AvroType at = Avros.tuples(Avros.strings(), Avros.strings(), Avros.strings(), Avros.strings(), Avros.strings());
+    TupleN j = new TupleN("a", "b", "c", "d", "e");
+    GenericData.Record w = new GenericData.Record(at.getSchema());
+    w.put(0, new Utf8("a"));
+    w.put(1, new Utf8("b"));
+    w.put(2, new Utf8("c"));
+    w.put(3, new Utf8("d"));
+    w.put(4, new Utf8("e"));
+    testInputOutputFn(at, j, w);
+
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testWritables() throws Exception {
+    AvroType at = Avros.writables(LongWritable.class);
+    
+    TaskInputOutputContext<?, ?, ?, ?> testContext = CrunchTestSupport.getTestContext(new Configuration());
+    at.getInputMapFn().setContext(testContext);
+    at.getInputMapFn().initialize();
+    at.getOutputMapFn().setContext(testContext);
+    at.getOutputMapFn().initialize();
+    
+    LongWritable lw = new LongWritable(1729L);
+    assertEquals(lw, at.getInputMapFn().map(at.getOutputMapFn().map(lw)));
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testTableOf() throws Exception {
+    AvroType at = Avros.tableOf(Avros.strings(), Avros.strings());
+    Pair<String, String> j = Pair.of("a", "b");
+    org.apache.avro.mapred.Pair w = new org.apache.avro.mapred.Pair(at.getSchema());
+    w.put(0, new Utf8("a"));
+    w.put(1, new Utf8("b"));
+    // TODO update this after resolving the o.a.a.m.Pair.equals issue
+    initialize(at);
+    assertEquals(j, at.getInputMapFn().map(w));
+    org.apache.avro.mapred.Pair converted = (org.apache.avro.mapred.Pair) at.getOutputMapFn().map(j);
+    assertEquals(w.key(), converted.key());
+    assertEquals(w.value(), converted.value());
+  }
+
+  private static void initialize(PType ptype) {
+    ptype.getInputMapFn().initialize();
+    ptype.getOutputMapFn().initialize();
+  }
+
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  protected static void testInputOutputFn(PType ptype, Object java, Object avro) {
+    initialize(ptype);
+    assertEquals(java, ptype.getInputMapFn().map(avro));
+    assertEquals(avro, ptype.getOutputMapFn().map(java));
+  }
+
+  @Test
+  public void testIsPrimitive_PrimitiveMappedType() {
+    assertTrue(Avros.isPrimitive(Avros.ints()));
+  }
+
+  @Test
+  public void testIsPrimitive_TruePrimitiveValue() {
+    AvroType truePrimitiveAvroType = new AvroType(int.class, Schema.create(Type.INT), new DeepCopier.NoOpDeepCopier());
+    assertTrue(Avros.isPrimitive(truePrimitiveAvroType));
+  }
+
+  @Test
+  public void testIsPrimitive_False() {
+    assertFalse(Avros.isPrimitive(Avros.reflects(Person.class)));
+  }
+
+  @Test
+  public void testPairs_Generic() {
+    Schema schema = ReflectData.get().getSchema(IntWritable.class);
+
+    GenericData.Record recordA = new GenericData.Record(schema);
+    GenericData.Record recordB = new GenericData.Record(schema);
+
+    AvroType<Pair<Record, Record>> pairType = Avros.pairs(Avros.generics(schema), Avros.generics(schema));
+    Pair<Record, Record> pair = Pair.of(recordA, recordB);
+    pairType.getOutputMapFn().initialize();
+    pairType.getInputMapFn().initialize();
+    Object mapped = pairType.getOutputMapFn().map(pair);
+    Pair<Record, Record> doubleMappedPair = pairType.getInputMapFn().map(mapped);
+
+    assertEquals(pair, doubleMappedPair);
+    mapped.hashCode();
+  }
+
+  @Test
+  public void testPairs_Reflect() {
+    IntWritable intWritableA = new IntWritable(1);
+    IntWritable intWritableB = new IntWritable(2);
+
+    AvroType<Pair<IntWritable, IntWritable>> pairType = Avros.pairs(Avros.reflects(IntWritable.class),
+        Avros.reflects(IntWritable.class));
+    Pair<IntWritable, IntWritable> pair = Pair.of(intWritableA, intWritableB);
+    pairType.getOutputMapFn().initialize();
+    pairType.getInputMapFn().initialize();
+    Object mapped = pairType.getOutputMapFn().map(pair);
+
+    Pair<IntWritable, IntWritable> doubleMappedPair = pairType.getInputMapFn().map(mapped);
+
+    assertEquals(pair, doubleMappedPair);
+  }
+
+  @Test
+  public void testPairs_Specific() {
+    Person personA = new Person();
+    Person personB = new Person();
+
+    personA.age = 1;
+    personA.name = "A";
+    personA.siblingnames = Collections.<CharSequence> emptyList();
+
+    personB.age = 2;
+    personB.name = "B";
+    personB.siblingnames = Collections.<CharSequence> emptyList();
+
+    AvroType<Pair<Person, Person>> pairType = Avros.pairs(Avros.records(Person.class), Avros.records(Person.class));
+
+    Pair<Person, Person> pair = Pair.of(personA, personB);
+    pairType.getOutputMapFn().initialize();
+    pairType.getInputMapFn().initialize();
+
+    Object mapped = pairType.getOutputMapFn().map(pair);
+    Pair<Person, Person> doubleMappedPair = pairType.getInputMapFn().map(mapped);
+
+    assertEquals(pair, doubleMappedPair);
+
+  }
+
+  @Test
+  public void testPairOutputMapFn_VerifyNoObjectReuse() {
+    StringWrapper stringWrapper = new StringWrapper("Test");
+
+    Pair<Integer, StringWrapper> pair = Pair.of(1, stringWrapper);
+
+    AvroType<Pair<Integer, StringWrapper>> pairType = Avros.pairs(Avros.ints(), Avros.reflects(StringWrapper.class));
+
+    pairType.getOutputMapFn().initialize();
+
+    Object outputMappedValueA = pairType.getOutputMapFn().map(pair);
+    Object outputMappedValueB = pairType.getOutputMapFn().map(pair);
+
+    assertEquals(outputMappedValueA, outputMappedValueB);
+    assertNotSame(outputMappedValueA, outputMappedValueB);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java b/crunch-core/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java
new file mode 100644
index 0000000..c807a90
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/writable/GenericArrayWritableTest.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import static org.hamcrest.Matchers.hasItems;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.sameInstance;
+import static org.junit.Assert.assertThat;
+
+import java.util.Arrays;
+
+import org.apache.crunch.test.Tests;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+
+public class GenericArrayWritableTest {
+
+  @Test
+  public void testEmpty() {
+    GenericArrayWritable<Text> src = new GenericArrayWritable<Text>(Text.class);
+    src.set(new Text[0]);
+
+    GenericArrayWritable<Text> dest = Tests.roundtrip(src, new GenericArrayWritable<Text>());
+
+    assertThat(dest.get().length, is(0));
+  }
+
+  @Test
+  public void testNonEmpty() {
+    GenericArrayWritable<Text> src = new GenericArrayWritable<Text>(Text.class);
+    src.set(new Text[] { new Text("foo"), new Text("bar") });
+
+    GenericArrayWritable<Text> dest = Tests.roundtrip(src, new GenericArrayWritable<Text>());
+
+    assertThat(src.get(), not(sameInstance(dest.get())));
+    assertThat(dest.get().length, is(2));
+    assertThat(Arrays.asList(dest.get()), hasItems((Writable) new Text("foo"), new Text("bar")));
+  }
+
+  @Test
+  public void testNulls() {
+    GenericArrayWritable<Text> src = new GenericArrayWritable<Text>(Text.class);
+    src.set(new Text[] { new Text("a"), null, new Text("b") });
+
+    GenericArrayWritable<Text> dest = Tests.roundtrip(src, new GenericArrayWritable<Text>());
+
+    assertThat(src.get(), not(sameInstance(dest.get())));
+    assertThat(dest.get().length, is(3));
+    assertThat(Arrays.asList(dest.get()), hasItems((Writable) new Text("a"), new Text("b"), null));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java
new file mode 100644
index 0000000..c49491b
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableDeepCopierTest.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Test;
+
+public class WritableDeepCopierTest {
+
+  private WritableDeepCopier<Text> deepCopier;
+
+  @Before
+  public void setUp() {
+    deepCopier = new WritableDeepCopier<Text>(Text.class);
+  }
+
+  @Test
+  public void testDeepCopy() {
+    Text text = new Text("value");
+    Text deepCopy = deepCopier.deepCopy(text);
+
+    assertEquals(text, deepCopy);
+    assertNotSame(text, deepCopy);
+  }
+  
+  @Test
+  public void testDeepCopy_Null() {
+    Text text = null;
+    Text deepCopy = deepCopier.deepCopy(text);
+    
+    assertNull(deepCopy);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java
new file mode 100644
index 0000000..f6c201b
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableGroupedTableTypeTest.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+import java.util.List;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class WritableGroupedTableTypeTest {
+
+  @Test
+  public void testGetDetachedValue() {
+    Integer integerValue = 42;
+    Text textValue = new Text("forty-two");
+    Iterable<Text> inputTextIterable = Lists.newArrayList(textValue);
+    Pair<Integer, Iterable<Text>> pair = Pair.of(integerValue, inputTextIterable);
+
+    PGroupedTableType<Integer, Text> groupedTableType = Writables.tableOf(Writables.ints(),
+        Writables.writables(Text.class)).getGroupedTableType();
+    groupedTableType.initialize(new Configuration());
+
+    Pair<Integer, Iterable<Text>> detachedPair = groupedTableType.getDetachedValue(pair);
+
+    assertSame(integerValue, detachedPair.first());
+    List<Text> textList = Lists.newArrayList(detachedPair.second());
+    assertEquals(inputTextIterable, textList);
+    assertNotSame(textValue, textList.get(0));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java
new file mode 100644
index 0000000..697a28c
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTableTypeTest.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+import org.apache.crunch.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+public class WritableTableTypeTest {
+
+  @Test
+  public void testGetDetachedValue() {
+    Integer integerValue = 42;
+    Text textValue = new Text("forty-two");
+    Pair<Integer, Text> pair = Pair.of(integerValue, textValue);
+
+    WritableTableType<Integer, Text> tableType = Writables.tableOf(Writables.ints(),
+        Writables.writables(Text.class));
+    tableType.initialize(new Configuration());
+    Pair<Integer, Text> detachedPair = tableType.getDetachedValue(pair);
+
+    assertSame(integerValue, detachedPair.first());
+    assertEquals(textValue, detachedPair.second());
+    assertNotSame(textValue, detachedPair.second());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java
new file mode 100644
index 0000000..65e946b
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/types/writable/WritableTypeTest.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.crunch.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class WritableTypeTest {
+
+  @Test(expected = IllegalStateException.class)
+  public void testGetDetachedValue_NotInitialized() {
+    WritableType<Text, Text> textWritableType = Writables.writables(Text.class);
+    Text value = new Text("test");
+
+    // Calling getDetachedValue without first calling initialize should throw an
+    // exception
+    textWritableType.getDetachedValue(value);
+  }
+
+  @Test
+  public void testGetDetachedValue_CustomWritable() {
+    WritableType<Text, Text> textWritableType = Writables.writables(Text.class);
+    textWritableType.initialize(new Configuration());
+    Text value = new Text("test");
+
+    Text detachedValue = textWritableType.getDetachedValue(value);
+    assertEquals(value, detachedValue);
+    assertNotSame(value, detachedValue);
+  }
+
+  @Test
+  public void testGetDetachedValue_Collection() {
+    Collection<Text> textCollection = Lists.newArrayList(new Text("value"));
+    WritableType<Collection<Text>, GenericArrayWritable<Text>> ptype = Writables
+        .collections(Writables.writables(Text.class));
+    ptype.initialize(new Configuration());
+
+    Collection<Text> detachedCollection = ptype.getDetachedValue(textCollection);
+    assertEquals(textCollection, detachedCollection);
+    assertNotSame(textCollection.iterator().next(), detachedCollection.iterator().next());
+  }
+
+  @Test
+  public void testGetDetachedValue_Tuple() {
+    Pair<Text, Text> textPair = Pair.of(new Text("one"), new Text("two"));
+    WritableType<Pair<Text, Text>, TupleWritable> ptype = Writables.pairs(
+        Writables.writables(Text.class), Writables.writables(Text.class));
+    ptype.initialize(new Configuration());
+
+    Pair<Text, Text> detachedPair = ptype.getDetachedValue(textPair);
+    assertEquals(textPair, detachedPair);
+    assertNotSame(textPair.first(), detachedPair.first());
+    assertNotSame(textPair.second(), detachedPair.second());
+  }
+
+  @Test
+  public void testGetDetachedValue_Map() {
+    Map<String, Text> stringTextMap = Maps.newHashMap();
+    stringTextMap.put("key", new Text("value"));
+
+    WritableType<Map<String, Text>, MapWritable> ptype = Writables.maps(Writables
+        .writables(Text.class));
+    ptype.initialize(new Configuration());
+    Map<String, Text> detachedMap = ptype.getDetachedValue(stringTextMap);
+
+    assertEquals(stringTextMap, detachedMap);
+    assertNotSame(stringTextMap.get("key"), detachedMap.get("key"));
+  }
+
+}


[24/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/AggregatorsTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/AggregatorsTest.java b/crunch-core/src/test/java/org/apache/crunch/fn/AggregatorsTest.java
new file mode 100644
index 0000000..6ee1972
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/AggregatorsTest.java
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.apache.crunch.fn.Aggregators.MAX_BIGINTS;
+import static org.apache.crunch.fn.Aggregators.MAX_DOUBLES;
+import static org.apache.crunch.fn.Aggregators.MAX_FLOATS;
+import static org.apache.crunch.fn.Aggregators.MAX_INTS;
+import static org.apache.crunch.fn.Aggregators.MAX_LONGS;
+import static org.apache.crunch.fn.Aggregators.MAX_N;
+import static org.apache.crunch.fn.Aggregators.MIN_BIGINTS;
+import static org.apache.crunch.fn.Aggregators.MIN_DOUBLES;
+import static org.apache.crunch.fn.Aggregators.MIN_FLOATS;
+import static org.apache.crunch.fn.Aggregators.MIN_INTS;
+import static org.apache.crunch.fn.Aggregators.MIN_LONGS;
+import static org.apache.crunch.fn.Aggregators.MIN_N;
+import static org.apache.crunch.fn.Aggregators.STRING_CONCAT;
+import static org.apache.crunch.fn.Aggregators.SUM_BIGINTS;
+import static org.apache.crunch.fn.Aggregators.SUM_DOUBLES;
+import static org.apache.crunch.fn.Aggregators.SUM_FLOATS;
+import static org.apache.crunch.fn.Aggregators.SUM_INTS;
+import static org.apache.crunch.fn.Aggregators.SUM_LONGS;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.crunch.Aggregator;
+import org.apache.crunch.CombineFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.impl.mem.emit.InMemoryEmitter;
+import org.junit.Test;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+
+
+public class AggregatorsTest {
+
+  @Test
+  public void testSums2() {
+    assertThat(sapply(SUM_INTS(), 1, 2, 3, -4), is(2));
+    assertThat(sapply(SUM_LONGS(), 1L, 2L, 3L, -4L, 5000000000L), is(5000000002L));
+    assertThat(sapply(SUM_FLOATS(), 1f, 2f, 3f, -4f), is(2f));
+    assertThat(sapply(SUM_DOUBLES(), 0.1, 0.2, 0.3), is(closeTo(0.6, 0.00001)));
+    assertThat(sapply(SUM_BIGINTS(), bigInt("7"), bigInt("3")), is(bigInt("10")));
+  }
+
+  @Test
+  public void testSums() {
+    assertThat(sapply(SUM_LONGS(), 29L, 17L, 1729L), is(1775L));
+    assertThat(sapply(SUM_LONGS(), 29L, 7L, 1729L), is(1765L));
+    assertThat(sapply(SUM_INTS(), 29, 17, 1729), is(1775));
+    assertThat(sapply(SUM_FLOATS(), 29f, 17f, 1729f), is(1775.0f));
+    assertThat(sapply(SUM_DOUBLES(), 29.0, 17.0, 1729.0), is(1775.0));
+    assertThat(sapply(SUM_BIGINTS(), bigInt("29"), bigInt("17"), bigInt("1729")), is(bigInt("1775")));
+  }
+
+  @Test
+  public void testMax() {
+    assertThat(sapply(MAX_LONGS(), 29L, 17L, 1729L), is(1729L));
+    assertThat(sapply(MAX_INTS(), 29, 17, 1729), is(1729));
+    assertThat(sapply(MAX_FLOATS(), 29f, 17f, 1729f), is(1729.0f));
+    assertThat(sapply(MAX_DOUBLES(), 29.0, 17.0, 1729.0), is(1729.0));
+    assertThat(sapply(MAX_FLOATS(), 29f, 1745f, 17f, 1729f), is(1745.0f));
+    assertThat(sapply(MAX_BIGINTS(), bigInt("29"), bigInt("17"), bigInt("1729")), is(bigInt("1729")));
+  }
+
+  @Test
+  public void testMin() {
+    assertThat(sapply(MIN_LONGS(), 29L, 17L, 1729L), is(17L));
+    assertThat(sapply(MIN_INTS(), 29, 17, 1729), is(17));
+    assertThat(sapply(MIN_FLOATS(), 29f, 17f, 1729f), is(17.0f));
+    assertThat(sapply(MIN_DOUBLES(), 29.0, 17.0, 1729.0), is(17.0));
+    assertThat(sapply(MIN_INTS(), 29, 170, 1729), is(29));
+    assertThat(sapply(MIN_BIGINTS(), bigInt("29"), bigInt("17"), bigInt("1729")), is(bigInt("17")));
+  }
+
+  @Test
+  public void testMaxN() {
+    assertThat(apply(MAX_INTS(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(98, 1009)));
+    assertThat(apply(MAX_N(1, String.class), "b", "a"), is(ImmutableList.of("b")));
+    assertThat(apply(MAX_N(3, String.class), "b", "a", "d", "c"), is(ImmutableList.of("b", "c", "d")));
+  }
+
+  @Test
+  public void testMinN() {
+    assertThat(apply(MIN_INTS(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(17, 29)));
+    assertThat(apply(MIN_N(1, String.class), "b", "a"), is(ImmutableList.of("a")));
+    assertThat(apply(MIN_N(3, String.class), "b", "a", "d", "c"), is(ImmutableList.of("a", "b", "c")));
+  }
+
+  @Test
+  public void testFirstN() {
+    assertThat(apply(Aggregators.<Integer>FIRST_N(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(17, 34)));
+  }
+
+  @Test
+  public void testLastN() {
+    assertThat(apply(Aggregators.<Integer>LAST_N(2), 17, 34, 98, 29, 1009), is(ImmutableList.of(29, 1009)));
+  }
+  
+  @Test
+  public void testUniqueElements() {
+    assertThat(ImmutableSet.copyOf(apply(Aggregators.<Integer>UNIQUE_ELEMENTS(), 17, 29, 29, 16, 17)),
+        is(ImmutableSet.of(17, 29, 16)));
+    
+    Iterable<Integer> samp = apply(Aggregators.<Integer>SAMPLE_UNIQUE_ELEMENTS(2), 17, 29, 16, 17, 29, 16);
+    assertThat(Iterables.size(samp), is(2));
+    assertThat(ImmutableSet.copyOf(samp).size(), is(2)); // check that the two elements are unique
+  }
+  
+  @Test
+  public void testPairs() {
+    List<Pair<Long, Double>> input = ImmutableList.of(Pair.of(1720L, 17.29), Pair.of(9L, -3.14));
+    Aggregator<Pair<Long, Double>> a = Aggregators.pairAggregator(SUM_LONGS(), MIN_DOUBLES());
+
+    assertThat(sapply(a, input), is(Pair.of(1729L, -3.14)));
+  }
+
+  @Test
+  public void testPairsTwoLongs() {
+    List<Pair<Long, Long>> input = ImmutableList.of(Pair.of(1720L, 1L), Pair.of(9L, 19L));
+    Aggregator<Pair<Long, Long>> a = Aggregators.pairAggregator(SUM_LONGS(), SUM_LONGS());
+
+    assertThat(sapply(a, input), is(Pair.of(1729L, 20L)));
+  }
+
+  @Test
+  public void testTrips() {
+    List<Tuple3<Float, Double, Double>> input = ImmutableList.of(Tuple3.of(17.29f, 12.2, 0.1),
+        Tuple3.of(3.0f, 1.2, 3.14), Tuple3.of(-1.0f, 14.5, -0.98));
+    Aggregator<Tuple3<Float, Double, Double>> a = Aggregators.tripAggregator(
+        MAX_FLOATS(), MAX_DOUBLES(), MIN_DOUBLES());
+
+    assertThat(sapply(a, input), is(Tuple3.of(17.29f, 14.5, -0.98)));
+  }
+
+  @Test
+  public void testQuads() {
+    List<Tuple4<Float, Double, Double, Integer>> input = ImmutableList.of(Tuple4.of(17.29f, 12.2, 0.1, 1),
+        Tuple4.of(3.0f, 1.2, 3.14, 2), Tuple4.of(-1.0f, 14.5, -0.98, 3));
+    Aggregator<Tuple4<Float, Double, Double, Integer>> a = Aggregators.quadAggregator(
+        MAX_FLOATS(), MAX_DOUBLES(), MIN_DOUBLES(), SUM_INTS());
+
+    assertThat(sapply(a, input), is(Tuple4.of(17.29f, 14.5, -0.98, 6)));
+  }
+
+  @Test
+  public void testTupleN() {
+    List<TupleN> input = ImmutableList.of(new TupleN(1, 3.0, 1, 2.0, 4L), new TupleN(4, 17.0, 1, 9.7, 12L));
+    Aggregator<TupleN> a = Aggregators.tupleAggregator(
+        MIN_INTS(), SUM_DOUBLES(), MAX_INTS(), MIN_DOUBLES(), MAX_LONGS());
+
+    assertThat(sapply(a, input), is(new TupleN(1, 20.0, 1, 2.0, 12L)));
+  }
+
+  @Test
+  public void testConcatenation() {
+    assertThat(sapply(STRING_CONCAT("", true), "foo", "foobar", "bar"), is("foofoobarbar"));
+    assertThat(sapply(STRING_CONCAT("/", false), "foo", "foobar", "bar"), is("foo/foobar/bar"));
+    assertThat(sapply(STRING_CONCAT(" ", true), " ", ""), is("  "));
+    assertThat(sapply(STRING_CONCAT(" ", true), Arrays.asList(null, "")), is(""));
+    assertThat(sapply(STRING_CONCAT(" ", true, 20, 3), "foo", "foobar", "bar"), is("foo bar"));
+    assertThat(sapply(STRING_CONCAT(" ", true, 10, 6), "foo", "foobar", "bar"), is("foo foobar"));
+    assertThat(sapply(STRING_CONCAT(" ", true, 9, 6), "foo", "foobar", "bar"), is("foo bar"));
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testConcatenationNullException() {
+    sapply(STRING_CONCAT(" ", false), Arrays.asList(null, "" ));
+  }
+
+
+  private static <T> T sapply(Aggregator<T> a, T... values) {
+    return sapply(a, ImmutableList.copyOf(values));
+  }
+
+  private static <T> T sapply(Aggregator<T> a, Iterable<T> values) {
+    return Iterables.getOnlyElement(apply(a, values));
+  }
+
+  private static <T> ImmutableList<T> apply(Aggregator<T> a, T... values) {
+    return apply(a, ImmutableList.copyOf(values));
+  }
+
+  private static <T> ImmutableList<T> apply(Aggregator<T> a, Iterable<T> values) {
+    CombineFn<String, T> fn = Aggregators.toCombineFn(a);
+
+    InMemoryEmitter<Pair<String, T>> e1 = new InMemoryEmitter<Pair<String,T>>();
+    fn.process(Pair.of("", values), e1);
+
+    // and a second time to make sure Aggregator.reset() works
+    InMemoryEmitter<Pair<String, T>> e2 = new InMemoryEmitter<Pair<String,T>>();
+    fn.process(Pair.of("", values), e2);
+
+    assertEquals(getValues(e1), getValues(e2));
+
+    return getValues(e1);
+  }
+
+  private static <K, V> ImmutableList<V> getValues(InMemoryEmitter<Pair<K, V>> emitter) {
+    return ImmutableList.copyOf(
+        Iterables.transform(emitter.getOutput(), new Function<Pair<K, V>, V>() {
+      @Override
+      public V apply(Pair<K, V> input) {
+        return input.second();
+      }
+    }));
+  }
+
+  private static BigInteger bigInt(String value) {
+    return new BigInteger(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java b/crunch-core/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java
new file mode 100644
index 0000000..b5b2a1b
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/ExtractKeyFnTest.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class ExtractKeyFnTest {
+
+  protected static final MapFn<String, Integer> mapFn = new MapFn<String, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return input.hashCode();
+    }
+  };
+
+  protected static final ExtractKeyFn<Integer, String> one = new ExtractKeyFn<Integer, String>(mapFn);
+
+  @Test
+  public void test() {
+    StoreLastEmitter<Pair<Integer, String>> emitter = StoreLastEmitter.create();
+    one.process("boza", emitter);
+    assertEquals(Pair.of("boza".hashCode(), "boza"), emitter.getLast());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/FilterFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/FilterFnTest.java b/crunch-core/src/test/java/org/apache/crunch/fn/FilterFnTest.java
new file mode 100644
index 0000000..a649f99
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/FilterFnTest.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import org.apache.crunch.FilterFn;
+import org.junit.Test;
+
+import com.google.common.base.Predicates;
+
+
+public class FilterFnTest {
+
+  private static final FilterFn<String> TRUE = FilterFns.<String>ACCEPT_ALL();
+  private static final FilterFn<String> FALSE = FilterFns.<String>REJECT_ALL();
+
+  @Test
+  public void testAcceptAll() {
+    assertThat(TRUE.accept(""), is(true));
+    assertThat(TRUE.accept("foo"), is(true));
+  }
+
+  @Test
+  public void testRejectAll() {
+    assertThat(FALSE.accept(""), is(false));
+    assertThat(FALSE.accept("foo"), is(false));
+
+    Predicates.or(Predicates.alwaysFalse(), Predicates.alwaysTrue());
+  }
+
+  @Test
+  public void testAnd() {
+    assertThat(FilterFns.and(TRUE, TRUE).accept("foo"), is(true));
+    assertThat(FilterFns.and(TRUE, FALSE).accept("foo"), is(false));
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testGeneric() {
+    assertThat(FilterFns.and(TRUE).accept("foo"), is(true));
+    assertThat(FilterFns.and(FALSE).accept("foo"), is(false));
+    assertThat(FilterFns.and(FALSE, FALSE, FALSE).accept("foo"), is(false));
+    assertThat(FilterFns.and(TRUE, TRUE, FALSE).accept("foo"), is(false));
+    assertThat(FilterFns.and(FALSE, FALSE, FALSE, FALSE).accept("foo"), is(false));
+  }
+
+  @Test
+  public void testOr() {
+    assertThat(FilterFns.or(FALSE, TRUE).accept("foo"), is(true));
+    assertThat(FilterFns.or(TRUE, FALSE).accept("foo"), is(true));
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testOrGeneric() {
+    assertThat(FilterFns.or(TRUE).accept("foo"), is(true));
+    assertThat(FilterFns.or(FALSE).accept("foo"), is(false));
+    assertThat(FilterFns.or(TRUE, FALSE, TRUE).accept("foo"), is(true));
+    assertThat(FilterFns.or(FALSE, FALSE, TRUE).accept("foo"), is(true));
+    assertThat(FilterFns.or(FALSE, FALSE, FALSE).accept("foo"), is(false));
+  }
+
+  @Test
+  public void testNot() {
+    assertThat(FilterFns.not(TRUE).accept("foo"), is(false));
+    assertThat(FilterFns.not(FALSE).accept("foo"), is(true));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/MapKeysTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/MapKeysTest.java b/crunch-core/src/test/java/org/apache/crunch/fn/MapKeysTest.java
new file mode 100644
index 0000000..6b73700
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/MapKeysTest.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.Pair;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class MapKeysTest {
+
+  protected static final MapKeysFn<String, Integer, Integer> one = new MapKeysFn<String, Integer, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return 1;
+    }
+  };
+
+  protected static final MapKeysFn<String, Integer, Integer> two = new MapKeysFn<String, Integer, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return 2;
+    }
+  };
+
+  @Test
+  public void test() {
+    StoreLastEmitter<Pair<Integer, Integer>> emitter = StoreLastEmitter.create();
+    one.process(Pair.of("k", Integer.MAX_VALUE), emitter);
+    assertEquals(Pair.of(1, Integer.MAX_VALUE), emitter.getLast());
+    two.process(Pair.of("k", Integer.MAX_VALUE), emitter);
+    assertEquals(Pair.of(2, Integer.MAX_VALUE), emitter.getLast());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/MapValuesTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/MapValuesTest.java b/crunch-core/src/test/java/org/apache/crunch/fn/MapValuesTest.java
new file mode 100644
index 0000000..097b008
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/MapValuesTest.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.Pair;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class MapValuesTest {
+
+  static final MapValuesFn<String, String, Integer> one = new MapValuesFn<String, String, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return 1;
+    }
+  };
+
+  static final MapValuesFn<String, String, Integer> two = new MapValuesFn<String, String, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return 2;
+    }
+  };
+
+  @Test
+  public void test() {
+    StoreLastEmitter<Pair<String, Integer>> emitter = StoreLastEmitter.create();
+    one.process(Pair.of("k", "v"), emitter);
+    assertEquals(Pair.of("k", 1), emitter.getLast());
+    two.process(Pair.of("k", "v"), emitter);
+    assertEquals(Pair.of("k", 2), emitter.getLast());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/PairMapTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/PairMapTest.java b/crunch-core/src/test/java/org/apache/crunch/fn/PairMapTest.java
new file mode 100644
index 0000000..bef6c85
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/PairMapTest.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.junit.Test;
+
+@SuppressWarnings("serial")
+public class PairMapTest {
+
+  static final MapFn<String, Integer> one = new MapFn<String, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return 1;
+    }
+  };
+
+  static final MapFn<String, Integer> two = new MapFn<String, Integer>() {
+    @Override
+    public Integer map(String input) {
+      return 2;
+    }
+  };
+
+  @Test
+  public void testPairMap() {
+    StoreLastEmitter<Pair<Integer, Integer>> emitter = StoreLastEmitter.create();
+    PairMapFn<String, String, Integer, Integer> fn = new PairMapFn<String, String, Integer, Integer>(one, two);
+    fn.process(Pair.of("a", "b"), emitter);
+    Pair<Integer, Integer> pair = emitter.getLast();
+    assertTrue(pair.first() == 1);
+    assertTrue(pair.second() == 2);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java b/crunch-core/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java
new file mode 100644
index 0000000..cdd8754
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/fn/StoreLastEmitter.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.fn;
+
+import org.apache.crunch.Emitter;
+
+class StoreLastEmitter<T> implements Emitter<T> {
+  private T last;
+
+  @Override
+  public void emit(T emitted) {
+    last = emitted;
+  }
+
+  public T getLast() {
+    return last;
+  }
+
+  @Override
+  public void flush() {
+  }
+
+  public static <T> StoreLastEmitter<T> create() {
+    return new StoreLastEmitter<T>();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java
new file mode 100644
index 0000000..811a0a3
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/SingleUseIterableTest.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class SingleUseIterableTest {
+
+  @Test
+  public void testIterator() {
+    List<Integer> values = Lists.newArrayList(1,2,3);
+    
+    SingleUseIterable<Integer> iterable = new SingleUseIterable<Integer>(values);
+
+    List<Integer> retrievedValues = Lists.newArrayList(iterable);
+    
+    assertEquals(values, retrievedValues);
+  }
+  
+  @Test(expected=IllegalStateException.class)
+  public void testIterator_MultipleCalls() {
+    List<Integer> values = Lists.newArrayList(1,2,3);
+    
+    SingleUseIterable<Integer> iterable = new SingleUseIterable<Integer>(values);
+
+    List<Integer> retrievedValues = Lists.newArrayList(iterable);
+
+    for (Integer n : iterable) {
+      
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java
new file mode 100644
index 0000000..9ed7a46
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/MRPipelineTest.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.run.RuntimeParameters;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+
+@RunWith(MockitoJUnitRunner.class)
+public class MRPipelineTest {
+  @Rule
+  public TemporaryFolder tempDir = new TemporaryFolder();
+  @Mock
+  private PCollectionImpl<String> pcollection;
+  @Mock
+  private ReadableSourceTarget<String> readableSourceTarget;
+  @Mock
+  private SourceTarget<String> nonReadableSourceTarget;
+  private MRPipeline pipeline;
+
+  @Before
+  public void setUp() throws IOException {
+    Configuration conf = new Configuration();
+    conf.set(RuntimeParameters.TMP_DIR, tempDir.getRoot().getAbsolutePath());
+    pipeline = spy(new MRPipeline(MRPipelineTest.class, conf));
+  }
+
+  @Test
+  public void testGetMaterializeSourceTarget_AlreadyMaterialized() {
+    when(pcollection.getMaterializedAt()).thenReturn(readableSourceTarget);
+
+    assertEquals(readableSourceTarget, pipeline.getMaterializeSourceTarget(pcollection));
+  }
+
+  @Test
+  public void testGetMaterializeSourceTarget_NotMaterialized_HasOutput() {
+    when(pcollection.getPType()).thenReturn(Avros.strings());
+    doReturn(readableSourceTarget).when(pipeline).createIntermediateOutput(Avros.strings());
+    when(pcollection.getMaterializedAt()).thenReturn(null);
+
+    assertEquals(readableSourceTarget, pipeline.getMaterializeSourceTarget(pcollection));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testGetMaterializeSourceTarget_NotMaterialized_NotReadableSourceTarget() {
+    when(pcollection.getPType()).thenReturn(Avros.strings());
+    doReturn(nonReadableSourceTarget).when(pipeline).createIntermediateOutput(Avros.strings());
+    when(pcollection.getMaterializedAt()).thenReturn(null);
+
+    pipeline.getMaterializeSourceTarget(pcollection);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java
new file mode 100644
index 0000000..fd582bc
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoCollectionImplTest.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.impl.mr.plan.DoNode;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Test;
+
+public class DoCollectionImplTest {
+
+  @Test
+  public void testGetSizeInternal_NoScaleFactor() {
+    runScaleTest(100L, 1.0f, 100L);
+  }
+
+  @Test
+  public void testGetSizeInternal_ScaleFactorBelowZero() {
+    runScaleTest(100L, 0.5f, 50L);
+  }
+
+  @Test
+  public void testGetSizeInternal_ScaleFactorAboveZero() {
+    runScaleTest(100L, 1.5f, 150L);
+  }
+
+  private void runScaleTest(long inputSize, float scaleFactor, long expectedScaledSize) {
+    PCollectionImpl<String> parentCollection = new SizedPCollectionImpl("Sized collection", inputSize);
+
+    DoCollectionImpl<String> doCollectionImpl = new DoCollectionImpl<String>("Scaled collection", parentCollection,
+        new ScaledFunction(scaleFactor), Writables.strings());
+
+    assertEquals(expectedScaledSize, doCollectionImpl.getSizeInternal());
+  }
+
+  static class ScaledFunction extends DoFn<String, String> {
+
+    private float scaleFactor;
+
+    public ScaledFunction(float scaleFactor) {
+      this.scaleFactor = scaleFactor;
+    }
+
+    @Override
+    public void process(String input, Emitter<String> emitter) {
+      emitter.emit(input);
+    }
+
+    @Override
+    public float scaleFactor() {
+      return scaleFactor;
+    }
+
+  }
+
+  static class SizedPCollectionImpl extends PCollectionImpl<String> {
+
+    private long internalSize;
+
+    public SizedPCollectionImpl(String name, long internalSize) {
+      super(name);
+      this.internalSize = internalSize;
+    }
+
+    @Override
+    public PType getPType() {
+      return null;
+    }
+
+    @Override
+    public DoNode createDoNode() {
+      return null;
+    }
+
+    @Override
+    public List getParents() {
+      return null;
+    }
+
+    @Override
+    protected void acceptInternal(Visitor visitor) {
+    }
+
+    @Override
+    protected long getSizeInternal() {
+      return internalSize;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java
new file mode 100644
index 0000000..89b9944
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/collect/DoTableImplTest.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.collect;
+
+import static org.apache.crunch.types.writable.Writables.strings;
+import static org.apache.crunch.types.writable.Writables.tableOf;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.Pair;
+import org.junit.Test;
+
+public class DoTableImplTest {
+
+  @Test
+  public void testGetSizeInternal_NoScaleFactor() {
+    runScaleTest(100L, 1.0f, 100L);
+  }
+
+  @Test
+  public void testGetSizeInternal_ScaleFactorBelowZero() {
+    runScaleTest(100L, 0.5f, 50L);
+  }
+
+  @Test
+  public void testGetSizeInternal_ScaleFactorAboveZero() {
+    runScaleTest(100L, 1.5f, 150L);
+  }
+
+  private void runScaleTest(long inputSize, float scaleFactor, long expectedScaledSize) {
+
+    @SuppressWarnings("unchecked")
+    PCollectionImpl<String> parentCollection = (PCollectionImpl<String>) mock(PCollectionImpl.class);
+
+    when(parentCollection.getSize()).thenReturn(inputSize);
+
+    DoTableImpl<String, String> doTableImpl = new DoTableImpl<String, String>("Scalled table collection",
+        parentCollection, new TableScaledFunction(scaleFactor), tableOf(strings(), strings()));
+
+    assertEquals(expectedScaledSize, doTableImpl.getSizeInternal());
+
+    verify(parentCollection).getSize();
+
+    verifyNoMoreInteractions(parentCollection);
+  }
+
+  static class TableScaledFunction extends DoFn<String, Pair<String, String>> {
+
+    private float scaleFactor;
+
+    public TableScaledFunction(float scaleFactor) {
+      this.scaleFactor = scaleFactor;
+    }
+
+    @Override
+    public float scaleFactor() {
+      return scaleFactor;
+    }
+
+    @Override
+    public void process(String input, Emitter<Pair<String, String>> emitter) {
+      emitter.emit(Pair.of(input, input));
+
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java
new file mode 100644
index 0000000..dd72364
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/emit/IntermediateEmitterTest.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.emit;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
+
+import org.apache.crunch.impl.mr.run.RTNode;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+import com.google.common.collect.Lists;
+
+public class IntermediateEmitterTest {
+
+  private StringWrapper stringWrapper;
+  private PType ptype;
+
+  @Before
+  public void setUp() {
+    stringWrapper = new StringWrapper("test");
+    ptype = spy(Avros.reflects(StringWrapper.class));
+  }
+
+  @Test
+  public void testEmit_SingleChild() {
+    RTNode singleChild = mock(RTNode.class);
+    IntermediateEmitter emitter = new IntermediateEmitter(ptype, Lists.newArrayList(singleChild),
+        new Configuration());
+    emitter.emit(stringWrapper);
+
+    ArgumentCaptor<StringWrapper> argumentCaptor = ArgumentCaptor.forClass(StringWrapper.class);
+    verify(singleChild).process(argumentCaptor.capture());
+    assertSame(stringWrapper, argumentCaptor.getValue());
+  }
+
+  @Test
+  public void testEmit_MultipleChildren() {
+    RTNode childA = mock(RTNode.class);
+    RTNode childB = mock(RTNode.class);
+    IntermediateEmitter emitter = new IntermediateEmitter(ptype, Lists.newArrayList(childA, childB),
+        new Configuration());
+    emitter.emit(stringWrapper);
+
+    ArgumentCaptor<StringWrapper> argumentCaptorA = ArgumentCaptor.forClass(StringWrapper.class);
+    ArgumentCaptor<StringWrapper> argumentCaptorB = ArgumentCaptor.forClass(StringWrapper.class);
+
+    verify(childA).process(argumentCaptorA.capture());
+    verify(childB).process(argumentCaptorB.capture());
+
+    assertEquals(stringWrapper, argumentCaptorA.getValue());
+    assertEquals(stringWrapper, argumentCaptorB.getValue());
+
+    // Make sure that multiple children means deep copies are performed
+    assertNotSame(stringWrapper, argumentCaptorA.getValue());
+    assertNotSame(stringWrapper, argumentCaptorB.getValue());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java
new file mode 100644
index 0000000..958df12
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CappedExponentialCounterTest.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.exec;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class CappedExponentialCounterTest {
+
+  @Test
+  public void testGet() {
+    CappedExponentialCounter c = new CappedExponentialCounter(1L, Long.MAX_VALUE);
+    assertEquals(1L, c.get());
+    assertEquals(2L, c.get());
+    assertEquals(4L, c.get());
+    assertEquals(8L, c.get());
+  }
+
+  @Test
+  public void testCap() {
+    CappedExponentialCounter c = new CappedExponentialCounter(1L, 2);
+    assertEquals(1L, c.get());
+    assertEquals(2L, c.get());
+    assertEquals(2L, c.get());
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java
new file mode 100644
index 0000000..f03c3e2
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/exec/CrunchJobHooksTest.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.exec;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class CrunchJobHooksTest {
+
+  @Test
+  public void testExtractPartitionNumber() {
+    assertEquals(0, CrunchJobHooks.extractPartitionNumber("out1-r-00000"));
+    assertEquals(10, CrunchJobHooks.extractPartitionNumber("out2-r-00010"));
+    assertEquals(99999, CrunchJobHooks.extractPartitionNumber("out3-r-99999"));
+  }
+
+  @Test
+  public void testExtractPartitionNumber_WithSuffix() {
+    assertEquals(10, CrunchJobHooks.extractPartitionNumber("out2-r-00010.avro"));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testExtractPartitionNumber_MapOutputFile() {
+    CrunchJobHooks.extractPartitionNumber("out1-m-00000");
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
new file mode 100644
index 0000000..562238d
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.List;
+
+import org.apache.crunch.Source;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.plan.DotfileWriter.MRTaskType;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.common.collect.Lists;
+
+public class DotfileWriterTest {
+
+  private DotfileWriter dotfileWriter;
+
+  @Before
+  public void setUp() {
+    dotfileWriter = new DotfileWriter();
+  }
+
+  @Test
+  public void testFormatPCollectionNodeDeclaration() {
+    PCollectionImpl<?> pcollectionImpl = mock(PCollectionImpl.class);
+    JobPrototype jobPrototype = mock(JobPrototype.class);
+    when(pcollectionImpl.getName()).thenReturn("collection");
+
+    assertEquals("\"collection@" + pcollectionImpl.hashCode() + "@" + jobPrototype.hashCode()
+        + "\" [label=\"collection\" shape=box];",
+        dotfileWriter.formatPCollectionNodeDeclaration(pcollectionImpl, jobPrototype));
+  }
+
+  @Test
+  public void testFormatPCollectionNodeDeclaration_InputPCollection() {
+    InputCollection<?> inputCollection = mock(InputCollection.class, Mockito.RETURNS_DEEP_STUBS);
+    JobPrototype jobPrototype = mock(JobPrototype.class);
+    when(inputCollection.getName()).thenReturn("input");
+    when(inputCollection.getSource().toString()).thenReturn("source");
+
+    assertEquals("\"source\" [label=\"input\" shape=folder];",
+        dotfileWriter.formatPCollectionNodeDeclaration(inputCollection, jobPrototype));
+  }
+
+  @Test
+  public void testFormatTargetNodeDeclaration() {
+    Target target = mock(Target.class);
+    when(target.toString()).thenReturn("target/path");
+
+    assertEquals("\"target/path\" [label=\"target/path\" shape=folder];",
+        dotfileWriter.formatTargetNodeDeclaration(target));
+  }
+
+  @Test
+  public void testFormatPCollection() {
+    PCollectionImpl<?> pcollectionImpl = mock(PCollectionImpl.class);
+    JobPrototype jobPrototype = mock(JobPrototype.class);
+    when(pcollectionImpl.getName()).thenReturn("collection");
+
+    assertEquals("\"collection@" + pcollectionImpl.hashCode() + "@" + jobPrototype.hashCode() + "\"",
+        dotfileWriter.formatPCollection(pcollectionImpl, jobPrototype));
+  }
+
+  @Test
+  public void testFormatPCollection_InputCollection() {
+    InputCollection<Object> inputCollection = mock(InputCollection.class);
+    Source<Object> source = mock(Source.class);
+    JobPrototype jobPrototype = mock(JobPrototype.class);
+    when(source.toString()).thenReturn("mocksource");
+    when(inputCollection.getSource()).thenReturn(source);
+
+    assertEquals("\"mocksource\"", dotfileWriter.formatPCollection(inputCollection, jobPrototype));
+  }
+
+  @Test
+  public void testFormatNodeCollection() {
+    List<String> nodeCollection = Lists.newArrayList("one", "two", "three");
+    assertEquals("one -> two -> three;", dotfileWriter.formatNodeCollection(nodeCollection));
+  }
+
+  @Test
+  public void testFormatNodePath() {
+    PCollectionImpl<?> tail = mock(PCollectionImpl.class);
+    PCollectionImpl<?> head = mock(PCollectionImpl.class);
+    JobPrototype jobPrototype = mock(JobPrototype.class);
+
+    when(tail.getName()).thenReturn("tail");
+    when(head.getName()).thenReturn("head");
+
+    NodePath nodePath = new NodePath(tail);
+    nodePath.close(head);
+
+    assertEquals(
+        Lists.newArrayList("\"head@" + head.hashCode() + "@" + jobPrototype.hashCode() + "\" -> \"tail@"
+            + tail.hashCode() + "@" + jobPrototype.hashCode() + "\";"),
+        dotfileWriter.formatNodePath(nodePath, jobPrototype));
+  }
+
+  @Test
+  public void testGetTaskGraphAttributes_Map() {
+    assertEquals("label = Map; color = blue;", dotfileWriter.getTaskGraphAttributes(MRTaskType.MAP));
+  }
+
+  @Test
+  public void testGetTaskGraphAttributes_Reduce() {
+    assertEquals("label = Reduce; color = red;", dotfileWriter.getTaskGraphAttributes(MRTaskType.REDUCE));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
new file mode 100644
index 0000000..7963c83
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class JobNameBuilderTest {
+
+  @Test
+  public void testBuild() {
+    final String pipelineName = "PipelineName";
+    final String nodeName = "outputNode";
+    DoNode doNode = DoNode.createOutputNode(nodeName, Writables.strings());
+    JobNameBuilder jobNameBuilder = new JobNameBuilder(pipelineName);
+    jobNameBuilder.visit(Lists.newArrayList(doNode));
+    String jobName = jobNameBuilder.build();
+
+    assertEquals(String.format("%s: %s", pipelineName, nodeName), jobName);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java b/crunch-core/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java
new file mode 100644
index 0000000..467da15
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/io/SequentialFileNamingSchemeTest.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class SequentialFileNamingSchemeTest {
+
+  // The partition id used for testing. This partition id should be ignored by
+  // the SequentialFileNamingScheme.
+  private static final int PARTITION_ID = 42;
+
+  private SequentialFileNamingScheme namingScheme;
+  private Configuration configuration;
+
+  @Rule
+  public TemporaryFolder tmpOutputDir = new TemporaryFolder();
+
+  @Before
+  public void setUp() throws IOException {
+    configuration = new Configuration();
+    namingScheme = new SequentialFileNamingScheme();
+  }
+
+  @Test
+  public void testGetMapOutputName_EmptyDirectory() throws IOException {
+    assertEquals("part-m-00000",
+        namingScheme.getMapOutputName(configuration, new Path(tmpOutputDir.getRoot().getAbsolutePath())));
+  }
+
+  @Test
+  public void testGetMapOutputName_NonEmptyDirectory() throws IOException {
+    File outputDirectory = tmpOutputDir.getRoot();
+
+    new File(outputDirectory, "existing-1").createNewFile();
+    new File(outputDirectory, "existing-2").createNewFile();
+
+    assertEquals("part-m-00002",
+        namingScheme.getMapOutputName(configuration, new Path(outputDirectory.getAbsolutePath())));
+  }
+
+  @Test
+  public void testGetReduceOutputName_EmptyDirectory() throws IOException {
+    assertEquals("part-r-00000", namingScheme.getReduceOutputName(configuration, new Path(tmpOutputDir.getRoot()
+        .getAbsolutePath()), PARTITION_ID));
+  }
+
+  @Test
+  public void testGetReduceOutputName_NonEmptyDirectory() throws IOException {
+    File outputDirectory = tmpOutputDir.getRoot();
+
+    new File(outputDirectory, "existing-1").createNewFile();
+    new File(outputDirectory, "existing-2").createNewFile();
+
+    assertEquals("part-r-00002",
+        namingScheme.getReduceOutputName(configuration, new Path(outputDirectory.getAbsolutePath()), PARTITION_ID));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java b/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
new file mode 100644
index 0000000..5b0ea55
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/io/SourceTargetHelperTest.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+public class SourceTargetHelperTest {
+
+  @Test
+  public void testGetNonexistentPathSize() throws Exception {
+    File tmp = File.createTempFile("pathsize", "");
+    Path tmpPath = new Path(tmp.getAbsolutePath());
+    tmp.delete();
+    FileSystem fs = FileSystem.getLocal(new Configuration());
+    assertEquals(-1L, SourceTargetHelper.getPathSize(fs, tmpPath));
+  }
+
+  @Test
+  public void testGetNonExistentPathSize_NonExistantPath() throws IOException {
+    FileSystem mockFs = new MockFileSystem();
+    assertEquals(-1L, SourceTargetHelper.getPathSize(mockFs, new Path("does/not/exist")));
+  }
+
+  /**
+   * Mock FileSystem that returns null for {@link FileSystem#listStatus(Path)}.
+   */
+  static class MockFileSystem extends LocalFileSystem {
+
+    @Override
+    public FileStatus[] listStatus(Path f) throws IOException {
+      return null;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java b/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java
new file mode 100644
index 0000000..62085f8
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileReaderFactoryTest.java
@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.reflect.ReflectDatumReader;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.crunch.Pair;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.After;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class AvroFileReaderFactoryTest {
+
+  private File avroFile;
+
+  @Before
+  public void setUp() throws IOException {
+    avroFile = File.createTempFile("test", ".av");
+  }
+
+  @After
+  public void tearDown() {
+    avroFile.delete();
+  }
+
+  private void populateGenericFile(List<GenericRecord> genericRecords, Schema outputSchema) throws IOException {
+    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
+    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(outputSchema);
+
+    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
+    dataFileWriter.create(outputSchema, outputStream);
+
+    for (GenericRecord record : genericRecords) {
+      dataFileWriter.append(record);
+    }
+
+    dataFileWriter.close();
+    outputStream.close();
+
+  }
+
+  private <T> AvroFileReaderFactory<T> createFileReaderFactory(AvroType<T> avroType) {
+    return new AvroFileReaderFactory<T>(avroType);
+  }
+
+  @Test
+  public void testRead_GenericReader() throws IOException {
+    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
+    savedRecord.put("name", "John Doe");
+    savedRecord.put("age", 42);
+    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
+    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
+
+    AvroFileReaderFactory<GenericData.Record> genericReader = createFileReaderFactory(Avros.generics(Person.SCHEMA$));
+    Iterator<GenericData.Record> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()),
+        new Path(this.avroFile.getAbsolutePath()));
+
+    GenericRecord genericRecord = recordIterator.next();
+    assertEquals(savedRecord, genericRecord);
+    assertFalse(recordIterator.hasNext());
+  }
+
+  @Test
+  public void testRead_SpecificReader() throws IOException {
+    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
+    savedRecord.put("name", "John Doe");
+    savedRecord.put("age", 42);
+    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
+    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
+
+    AvroFileReaderFactory<Person> genericReader = createFileReaderFactory(Avros.records(Person.class));
+    Iterator<Person> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(
+        this.avroFile.getAbsolutePath()));
+
+    Person expectedPerson = new Person();
+    expectedPerson.age = 42;
+    expectedPerson.name = "John Doe";
+    List<CharSequence> siblingNames = Lists.newArrayList();
+    siblingNames.add("Jimmy");
+    siblingNames.add("Jane");
+    expectedPerson.siblingnames = siblingNames;
+
+    Person person = recordIterator.next();
+
+    assertEquals(expectedPerson, person);
+    assertFalse(recordIterator.hasNext());
+  }
+
+  @Test
+  public void testRead_ReflectReader() throws IOException {
+    Schema reflectSchema = ReflectData.get().getSchema(StringWrapper.class);
+    GenericRecord savedRecord = new GenericData.Record(reflectSchema);
+    savedRecord.put("value", "stringvalue");
+    populateGenericFile(Lists.newArrayList(savedRecord), reflectSchema);
+
+    AvroFileReaderFactory<StringWrapper> genericReader = createFileReaderFactory(Avros.reflects(StringWrapper.class));
+    Iterator<StringWrapper> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(
+        this.avroFile.getAbsolutePath()));
+
+    StringWrapper stringWrapper = recordIterator.next();
+
+    assertEquals("stringvalue", stringWrapper.getValue());
+    assertFalse(recordIterator.hasNext());
+  }
+
+  @Test
+  public void testCreateDatumReader_Generic() {
+    DatumReader<Record> datumReader = AvroFileReaderFactory.createDatumReader(Avros.generics(Person.SCHEMA$));
+    assertEquals(GenericDatumReader.class, datumReader.getClass());
+  }
+
+  @Test
+  public void testCreateDatumReader_Reflect() {
+    DatumReader<StringWrapper> datumReader = AvroFileReaderFactory.createDatumReader(Avros
+        .reflects(StringWrapper.class));
+    assertEquals(ReflectDatumReader.class, datumReader.getClass());
+  }
+
+  @Test
+  public void testCreateDatumReader_Specific() {
+    DatumReader<Person> datumReader = AvroFileReaderFactory.createDatumReader(Avros.records(Person.class));
+    assertEquals(SpecificDatumReader.class, datumReader.getClass());
+  }
+
+  @Test
+  public void testCreateDatumReader_ReflectAndSpecific() {
+    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
+
+    DatumReader<Pair<Person, StringWrapper>> datumReader = AvroFileReaderFactory.createDatumReader(Avros.pairs(
+        Avros.records(Person.class), Avros.reflects(StringWrapper.class)));
+    assertEquals(ReflectDatumReader.class, datumReader.getClass());
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testCreateDatumReader_ReflectAndSpecific_NotSupported() {
+    Assume.assumeTrue(!Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
+    AvroFileReaderFactory.createDatumReader(Avros.pairs(Avros.records(Person.class),
+        Avros.reflects(StringWrapper.class)));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java b/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java
new file mode 100644
index 0000000..ceef2b2
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/io/avro/AvroFileSourceTest.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.crunch.test.Person;
+import org.apache.crunch.test.StringWrapper;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class AvroFileSourceTest {
+
+  private Job job;
+  File tempFile;
+
+  @Before
+  public void setUp() throws IOException {
+    job = new Job();
+    tempFile = File.createTempFile("test", ".avr");
+  }
+
+  @After
+  public void tearDown() {
+    tempFile.delete();
+  }
+
+  @Test
+  public void testConfigureJob_SpecificData() throws IOException {
+    AvroType<Person> avroSpecificType = Avros.records(Person.class);
+    AvroFileSource<Person> personFileSource = new AvroFileSource<Person>(new Path(tempFile.getAbsolutePath()),
+        avroSpecificType);
+
+    personFileSource.configureSource(job, -1);
+
+    assertFalse(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true));
+    assertEquals(Person.SCHEMA$.toString(), job.getConfiguration().get(AvroJob.INPUT_SCHEMA));
+  }
+
+  @Test
+  public void testConfigureJob_GenericData() throws IOException {
+    AvroType<Record> avroGenericType = Avros.generics(Person.SCHEMA$);
+    AvroFileSource<Record> personFileSource = new AvroFileSource<Record>(new Path(tempFile.getAbsolutePath()),
+        avroGenericType);
+
+    personFileSource.configureSource(job, -1);
+
+    assertFalse(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true));
+
+  }
+
+  @Test
+  public void testConfigureJob_ReflectData() throws IOException {
+    AvroType<StringWrapper> avroReflectType = Avros.reflects(StringWrapper.class);
+    AvroFileSource<StringWrapper> personFileSource = new AvroFileSource<StringWrapper>(new Path(
+        tempFile.getAbsolutePath()), avroReflectType);
+
+    personFileSource.configureSource(job, -1);
+
+    assertTrue(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, false));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java
new file mode 100644
index 0000000..0dfed32
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/AvroIndexedRecordPartitionerTest.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.crunch.lib.join.JoinUtils.AvroIndexedRecordPartitioner;
+import org.junit.Before;
+import org.junit.Test;
+
+public class AvroIndexedRecordPartitionerTest {
+
+  private AvroIndexedRecordPartitioner avroPartitioner;
+
+  @Before
+  public void setUp() {
+    avroPartitioner = new AvroIndexedRecordPartitioner();
+  }
+
+  @Test
+  public void testGetPartition() {
+    IndexedRecord indexedRecord = new MockIndexedRecord(3);
+    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
+
+    assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
+    assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
+  }
+
+  @Test
+  public void testGetPartition_NegativeHashValue() {
+    IndexedRecord indexedRecord = new MockIndexedRecord(-3);
+    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
+
+    assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
+    assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
+  }
+
+  @Test
+  public void testGetPartition_IntegerMinValue() {
+    IndexedRecord indexedRecord = new MockIndexedRecord(Integer.MIN_VALUE);
+    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
+
+    assertEquals(0, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), Integer.MAX_VALUE));
+  }
+
+  /**
+   * Mock implementation of IndexedRecord to give us control over the hashCode.
+   */
+  static class MockIndexedRecord implements IndexedRecord {
+
+    private Integer value;
+
+    public MockIndexedRecord(Integer value) {
+      this.value = value;
+    }
+
+    @Override
+    public int hashCode() {
+      return value.hashCode();
+    }
+
+    @Override
+    public Schema getSchema() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Object get(int arg0) {
+      return this.value;
+    }
+
+    @Override
+    public void put(int arg0, Object arg1) {
+      throw new UnsupportedOperationException();
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/CartesianTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/CartesianTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/CartesianTest.java
new file mode 100644
index 0000000..b19097c
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/CartesianTest.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class CartesianTest {
+
+  @Test
+  public void testCartesianCollection_SingleValues() {
+
+    PCollection<String> letters = MemPipeline.typedCollectionOf(Writables.strings(), "a", "b");
+    PCollection<Integer> ints = MemPipeline.typedCollectionOf(Writables.ints(), 1, 2);
+
+    PCollection<Pair<String, Integer>> cartesianProduct = Cartesian.cross(letters, ints);
+
+    @SuppressWarnings("unchecked")
+    List<Pair<String, Integer>> expectedResults = Lists.newArrayList(Pair.of("a", 1), Pair.of("a", 2), Pair.of("b", 1),
+        Pair.of("b", 2));
+    List<Pair<String, Integer>> actualResults = Lists.newArrayList(cartesianProduct.materialize());
+    Collections.sort(actualResults);
+
+    assertEquals(expectedResults, actualResults);
+  }
+
+  @Test
+  public void testCartesianCollection_Tables() {
+
+    PTable<String, Integer> leftTable = MemPipeline.typedTableOf(
+        Writables.tableOf(Writables.strings(), Writables.ints()), "a", 1, "b", 2);
+    PTable<String, Float> rightTable = MemPipeline.typedTableOf(
+        Writables.tableOf(Writables.strings(), Writables.floats()), "A", 1.0f, "B", 2.0f);
+
+    PTable<Pair<String, String>, Pair<Integer, Float>> cartesianProduct = Cartesian.cross(leftTable, rightTable);
+
+    List<Pair<Pair<String, String>, Pair<Integer, Float>>> expectedResults = Lists.newArrayList();
+    expectedResults.add(Pair.of(Pair.of("a", "A"), Pair.of(1, 1.0f)));
+    expectedResults.add(Pair.of(Pair.of("a", "B"), Pair.of(1, 2.0f)));
+    expectedResults.add(Pair.of(Pair.of("b", "A"), Pair.of(2, 1.0f)));
+    expectedResults.add(Pair.of(Pair.of("b", "B"), Pair.of(2, 2.0f)));
+
+    List<Pair<Pair<String, String>, Pair<Integer, Float>>> actualResults = Lists.newArrayList(cartesianProduct
+        .materialize());
+    Collections.sort(actualResults);
+
+    assertEquals(expectedResults, actualResults);
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/DistinctTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/DistinctTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/DistinctTest.java
new file mode 100644
index 0000000..8c0b3bf
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/DistinctTest.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.types.avro.Avros;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableSet;
+
+public class DistinctTest {
+  private static final List<Integer> DATA = Arrays.asList(
+      17, 29, 17, 29, 17, 29, 36, 45, 17, 45, 36, 29
+  );
+
+  @Test
+  public void testDistinct() {
+    PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), DATA);
+    Iterable<Integer> unique = Distinct.distinct(input).materialize();
+
+    assertEquals(ImmutableSet.copyOf(DATA), ImmutableSet.copyOf(unique));
+  }
+
+  @Test
+  public void testDistinctFlush() {
+    PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), DATA);
+    Iterable<Integer> unique = Distinct.distinct(input, 2).materialize();
+
+    assertEquals(ImmutableSet.copyOf(DATA), ImmutableSet.copyOf(unique));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/SampleTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/SampleTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/SampleTest.java
new file mode 100644
index 0000000..bd6fd81
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/SampleTest.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.types.writable.Writables;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class SampleTest {
+  private PCollection<Pair<String, Double>> values = MemPipeline.typedCollectionOf(
+      Writables.pairs(Writables.strings(), Writables.doubles()),
+      ImmutableList.of(
+        Pair.of("foo", 200.0),
+        Pair.of("bar", 400.0),
+        Pair.of("baz", 100.0),
+        Pair.of("biz", 100.0)));
+  
+  @Test
+  public void testWRS() throws Exception {
+    Map<String, Integer> histogram = Maps.newHashMap();
+    
+    for (int i = 0; i < 100; i++) {
+      PCollection<String> sample = Sample.weightedReservoirSample(values, 1, 1729L + i);
+      for (String s : sample.materialize()) {
+        if (!histogram.containsKey(s)) {
+          histogram.put(s, 1);
+        } else {
+          histogram.put(s, 1 + histogram.get(s));
+        }
+      }
+    }
+    
+    Map<String, Integer> expected = ImmutableMap.of(
+        "foo", 24, "bar", 51, "baz", 13, "biz", 12);
+    assertEquals(expected, histogram);
+  }
+
+  @Test
+  public void testSample() {
+    PCollection<Integer> pcollect = MemPipeline.collectionOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    Iterable<Integer> sample = Sample.sample(pcollect, 123998L, 0.2).materialize();
+    List<Integer> sampleValues = ImmutableList.copyOf(sample);
+    assertEquals(ImmutableList.of(6, 7), sampleValues);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/SecondarySortTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/SecondarySortTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/SecondarySortTest.java
new file mode 100644
index 0000000..933b986
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/SecondarySortTest.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.apache.crunch.types.avro.Avros.*;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pair;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+
+public class SecondarySortTest {
+  @Test
+  public void testInMemory() throws Exception {
+    PTable<Long, Pair<Long, String>> input = MemPipeline.typedTableOf(tableOf(longs(), pairs(longs(), strings())),
+        1729L, Pair.of(17L, "a"), 100L, Pair.of(29L, "b"), 1729L, Pair.of(29L, "c"));
+    PCollection<String> letters = SecondarySort.sortAndApply(input, new StringifyFn(), strings());
+    assertEquals(ImmutableList.of("b", "ac"), letters.materialize());
+  }
+  
+  private static class StringifyFn extends DoFn<Pair<Long, Iterable<Pair<Long, String>>>, String> {
+    @Override
+    public void process(Pair<Long, Iterable<Pair<Long, String>>> input, Emitter<String> emitter) {
+      StringBuilder sb = new StringBuilder();
+      for (Pair<Long, String> p : input.second()) {
+        sb.append(p.second());
+      }
+      emitter.emit(sb.toString());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java b/crunch-core/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java
new file mode 100644
index 0000000..35ccc11
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/lib/TupleWritablePartitionerTest.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.lib.join.JoinUtils.TupleWritablePartitioner;
+import org.apache.crunch.types.writable.TupleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TupleWritablePartitionerTest {
+
+  private TupleWritablePartitioner tupleWritableParitioner;
+
+  @Before
+  public void setUp() {
+    tupleWritableParitioner = new TupleWritablePartitioner();
+  }
+
+  @Test
+  public void testGetPartition() {
+    IntWritable intWritable = new IntWritable(3);
+    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
+    assertEquals(3, tupleWritableParitioner.getPartition(key, NullWritable.get(), 5));
+    assertEquals(1, tupleWritableParitioner.getPartition(key, NullWritable.get(), 2));
+  }
+
+  @Test
+  public void testGetPartition_NegativeHashValue() {
+    IntWritable intWritable = new IntWritable(-3);
+    // Sanity check, if this doesn't work then the premise of this test is wrong
+    assertEquals(-3, intWritable.hashCode());
+
+    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
+    assertEquals(3, tupleWritableParitioner.getPartition(key, NullWritable.get(), 5));
+    assertEquals(1, tupleWritableParitioner.getPartition(key, NullWritable.get(), 2));
+  }
+
+  @Test
+  public void testGetPartition_IntegerMinValue() {
+    IntWritable intWritable = new IntWritable(Integer.MIN_VALUE);
+    // Sanity check, if this doesn't work then the premise of this test is wrong
+    assertEquals(Integer.MIN_VALUE, intWritable.hashCode());
+
+    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
+    assertEquals(0, tupleWritableParitioner.getPartition(key, NullWritable.get(), Integer.MAX_VALUE));
+  }
+
+}


[06/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/PTypeUtils.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/PTypeUtils.java b/crunch/src/main/java/org/apache/crunch/types/PTypeUtils.java
deleted file mode 100644
index e61b98b..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/PTypeUtils.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-
-/**
- * Utilities for converting between {@code PType}s from different
- * {@code PTypeFamily} implementations.
- * 
- */
-public class PTypeUtils {
-
-  public static <T> PType<T> convert(PType<T> ptype, PTypeFamily tf) {
-    if (ptype instanceof PTableType) {
-      PTableType ptt = (PTableType) ptype;
-      return tf.tableOf(tf.as(ptt.getKeyType()), tf.as(ptt.getValueType()));
-    }
-    Class<T> typeClass = ptype.getTypeClass();
-    if (Tuple.class.isAssignableFrom(typeClass)) {
-      List<PType> subTypes = ptype.getSubTypes();
-      if (Pair.class.equals(typeClass)) {
-        return tf.pairs(tf.as(subTypes.get(0)), tf.as(subTypes.get(1)));
-      } else if (Tuple3.class.equals(typeClass)) {
-        return tf.triples(tf.as(subTypes.get(0)), tf.as(subTypes.get(1)), tf.as(subTypes.get(2)));
-      } else if (Tuple4.class.equals(typeClass)) {
-        return tf.quads(tf.as(subTypes.get(0)), tf.as(subTypes.get(1)), tf.as(subTypes.get(2)), tf.as(subTypes.get(3)));
-      } else if (TupleN.class.equals(typeClass)) {
-        PType[] newPTypes = subTypes.toArray(new PType[0]);
-        for (int i = 0; i < newPTypes.length; i++) {
-          newPTypes[i] = tf.as(subTypes.get(i));
-        }
-        return (PType<T>) tf.tuples(newPTypes);
-      }
-    }
-    if (Collection.class.isAssignableFrom(typeClass)) {
-      return tf.collections(tf.as(ptype.getSubTypes().get(0)));
-    }
-    return tf.records(typeClass);
-  }
-
-  private PTypeUtils() {
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/PTypes.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/PTypes.java b/crunch/src/main/java/org/apache/crunch/types/PTypes.java
deleted file mode 100644
index 546719c..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/PTypes.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.math.BigInteger;
-import java.nio.ByteBuffer;
-import java.util.UUID;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.MapFn;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.thrift.TBase;
-import org.apache.thrift.TDeserializer;
-import org.apache.thrift.TException;
-import org.apache.thrift.TSerializer;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.codehaus.jackson.map.ObjectMapper;
-
-import com.google.protobuf.InvalidProtocolBufferException;
-import com.google.protobuf.Message;
-
-/**
- * Utility functions for creating common types of derived PTypes, e.g., for JSON
- * data, protocol buffers, and Thrift records.
- * 
- */
-public class PTypes {
-
-  public static PType<BigInteger> bigInt(PTypeFamily typeFamily) {
-    return typeFamily.derived(BigInteger.class, BYTE_TO_BIGINT, BIGINT_TO_BYTE, typeFamily.bytes());
-  }
-
-  public static PType<UUID> uuid(PTypeFamily ptf) {
-    return ptf.derived(UUID.class, BYTE_TO_UUID, UUID_TO_BYTE, ptf.bytes());
-  }
-  
-  public static <T> PType<T> jsonString(Class<T> clazz, PTypeFamily typeFamily) {
-    return typeFamily
-        .derived(clazz, new JacksonInputMapFn<T>(clazz), new JacksonOutputMapFn<T>(), typeFamily.strings());
-  }
-
-  public static <T extends Message> PType<T> protos(Class<T> clazz, PTypeFamily typeFamily) {
-    return typeFamily.derived(clazz, new ProtoInputMapFn<T>(clazz), new ProtoOutputMapFn<T>(), typeFamily.bytes());
-  }
-
-  public static <T extends TBase> PType<T> thrifts(Class<T> clazz, PTypeFamily typeFamily) {
-    return typeFamily.derived(clazz, new ThriftInputMapFn<T>(clazz), new ThriftOutputMapFn<T>(), typeFamily.bytes());
-  }
-
-  public static final <T extends Enum> PType<T> enums(final Class<T> type, PTypeFamily typeFamily) {
-    return typeFamily.derived(type, new EnumInputMapper<T>(type), new EnumOutputMapper<T>(), typeFamily.strings());
-  }
-
-  public static MapFn<ByteBuffer, BigInteger> BYTE_TO_BIGINT = new MapFn<ByteBuffer, BigInteger>() {
-    public BigInteger map(ByteBuffer input) {
-      return input == null ? null : new BigInteger(input.array());
-    }
-  };
-
-  public static MapFn<BigInteger, ByteBuffer> BIGINT_TO_BYTE = new MapFn<BigInteger, ByteBuffer>() {
-    public ByteBuffer map(BigInteger input) {
-      return input == null ? null : ByteBuffer.wrap(input.toByteArray());
-    }
-  };
-
-  private static class JacksonInputMapFn<T> extends MapFn<String, T> {
-
-    private final Class<T> clazz;
-    private transient ObjectMapper mapper;
-
-    public JacksonInputMapFn(Class<T> clazz) {
-      this.clazz = clazz;
-    }
-
-    @Override
-    public void initialize() {
-      this.mapper = new ObjectMapper();
-    }
-
-    @Override
-    public T map(String input) {
-      try {
-        return mapper.readValue(input, clazz);
-      } catch (Exception e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-  }
-
-  private static class JacksonOutputMapFn<T> extends MapFn<T, String> {
-
-    private transient ObjectMapper mapper;
-
-    @Override
-    public void initialize() {
-      this.mapper = new ObjectMapper();
-    }
-
-    @Override
-    public String map(T input) {
-      try {
-        return mapper.writeValueAsString(input);
-      } catch (Exception e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-  }
-
-  private static class ProtoInputMapFn<T extends Message> extends MapFn<ByteBuffer, T> {
-
-    private final Class<T> clazz;
-    private transient T instance;
-
-    public ProtoInputMapFn(Class<T> clazz) {
-      this.clazz = clazz;
-    }
-
-    @Override
-    public void initialize() {
-      this.instance = Protos.getDefaultInstance(clazz);
-    }
-
-    @Override
-    public T map(ByteBuffer bb) {
-      try {
-        return (T) instance.newBuilderForType().mergeFrom(bb.array(), bb.position(), bb.limit()).build();
-      } catch (InvalidProtocolBufferException e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-  }
-
-  private static class ProtoOutputMapFn<T extends Message> extends MapFn<T, ByteBuffer> {
-
-    public ProtoOutputMapFn() {
-    }
-
-    @Override
-    public ByteBuffer map(T proto) {
-      return ByteBuffer.wrap(proto.toByteArray());
-    }
-  }
-
-  private static class ThriftInputMapFn<T extends TBase> extends MapFn<ByteBuffer, T> {
-
-    private final Class<T> clazz;
-    private transient T instance;
-    private transient TDeserializer deserializer;
-    private transient byte[] bytes;
-
-    public ThriftInputMapFn(Class<T> clazz) {
-      this.clazz = clazz;
-    }
-
-    @Override
-    public void initialize() {
-      this.instance = ReflectionUtils.newInstance(clazz, null);
-      this.deserializer = new TDeserializer(new TBinaryProtocol.Factory());
-      this.bytes = new byte[0];
-    }
-
-    @Override
-    public T map(ByteBuffer bb) {
-      T next = (T) instance.deepCopy();
-      int len = bb.limit() - bb.position();
-      if (len != bytes.length) {
-        bytes = new byte[len];
-      }
-      System.arraycopy(bb.array(), bb.position(), bytes, 0, len);
-      try {
-        deserializer.deserialize(next, bytes);
-      } catch (TException e) {
-        throw new CrunchRuntimeException(e);
-      }
-      return next;
-    }
-  }
-
-  private static class ThriftOutputMapFn<T extends TBase> extends MapFn<T, ByteBuffer> {
-
-    private transient TSerializer serializer;
-
-    public ThriftOutputMapFn() {
-    }
-
-    @Override
-    public void initialize() {
-      this.serializer = new TSerializer(new TBinaryProtocol.Factory());
-    }
-
-    @Override
-    public ByteBuffer map(T t) {
-      try {
-        return ByteBuffer.wrap(serializer.serialize(t));
-      } catch (TException e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-  }
-
-  private static class EnumInputMapper<T extends Enum> extends MapFn<String, T> {
-    private final Class<T> type;
-
-    public EnumInputMapper(Class<T> type) {
-      this.type = type;
-    }
-
-    @Override
-    public T map(String input) {
-      return (T) Enum.valueOf(type, input);
-    }
-  };
-
-  private static class EnumOutputMapper<T extends Enum> extends MapFn<T, String> {
-
-    @Override
-    public String map(T input) {
-      return input.name();
-    }
-  };
-  
-  private static MapFn<ByteBuffer, UUID> BYTE_TO_UUID = new MapFn<ByteBuffer, UUID>() {
-    @Override
-    public UUID map(ByteBuffer input) {
-      return new UUID(input.getLong(), input.getLong());
-    }
-  };
-  
-  private static MapFn<UUID, ByteBuffer> UUID_TO_BYTE = new MapFn<UUID, ByteBuffer>() {
-    @Override
-    public ByteBuffer map(UUID input) {
-      ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
-      bb.asLongBuffer().put(input.getMostSignificantBits()).put(input.getLeastSignificantBits());
-      return bb;
-    }
-  };
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/Protos.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/Protos.java b/crunch/src/main/java/org/apache/crunch/types/Protos.java
deleted file mode 100644
index 4cd5068..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/Protos.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.MapFn;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.base.Splitter;
-import com.google.protobuf.Descriptors.FieldDescriptor;
-import com.google.protobuf.Message;
-import com.google.protobuf.Message.Builder;
-
-/**
- * Utility functions for working with protocol buffers in Crunch.
- */
-public class Protos {
-
-  /**
-   * Utility function for creating a default PB Messgae from a Class object that
-   * works with both protoc 2.3.0 and 2.4.x.
-   * @param clazz The class of the protocol buffer to create
-   * @return An instance of a protocol buffer
-   */
-  public static <M extends Message> M getDefaultInstance(Class<M> clazz) {
-    if (clazz.getConstructors().length > 0) {
-      // Protobuf 2.3.0
-      return ReflectionUtils.newInstance(clazz, null);
-    } else {
-      // Protobuf 2.4.x
-      try {
-        Message.Builder mb = (Message.Builder) clazz.getDeclaredMethod("newBuilder").invoke(null);
-        return (M) mb.getDefaultInstanceForType();
-      } catch (Exception e) {
-        throw new CrunchRuntimeException(e);
-      }  
-    }
-  }
-  
-  public static <M extends Message, K> MapFn<M, K> extractKey(String fieldName) {
-    return new ExtractKeyFn<M, K>(fieldName);
-  }
-
-  public static <M extends Message> DoFn<String, M> lineParser(String sep, Class<M> msgClass) {
-    return new TextToProtoFn<M>(sep, msgClass);
-  }
-
-  private static class ExtractKeyFn<M extends Message, K> extends MapFn<M, K> {
-
-    private final String fieldName;
-
-    private transient FieldDescriptor fd;
-
-    public ExtractKeyFn(String fieldName) {
-      this.fieldName = fieldName;
-    }
-
-    @Override
-    public K map(M input) {
-      if (input == null) {
-        throw new IllegalArgumentException("Null inputs not supported by Protos.ExtractKeyFn");
-      } else if (fd == null) {
-        fd = input.getDescriptorForType().findFieldByName(fieldName);
-        if (fd == null) {
-          throw new IllegalStateException("Could not find field: " + fieldName + " in message: " + input);
-        }
-      }
-      return (K) input.getField(fd);
-    }
-
-  }
-
-  private static class TextToProtoFn<M extends Message> extends DoFn<String, M> {
-
-    private final String sep;
-    private final Class<M> msgClass;
-
-    private transient M msgInstance;
-    private transient List<FieldDescriptor> fields;
-    private transient Splitter splitter;
-
-    enum ParseErrors {
-      TOTAL,
-      NUMBER_FORMAT
-    };
-
-    public TextToProtoFn(String sep, Class<M> msgClass) {
-      this.sep = sep;
-      this.msgClass = msgClass;
-    }
-
-    @Override
-    public void initialize() {
-      this.msgInstance = getDefaultInstance(msgClass);
-      this.fields = msgInstance.getDescriptorForType().getFields();
-      this.splitter = Splitter.on(sep);
-    }
-
-    @Override
-    public void process(String input, Emitter<M> emitter) {
-      if (input != null && !input.isEmpty()) {
-        Builder b = msgInstance.newBuilderForType();
-        Iterator<String> iter = splitter.split(input).iterator();
-        boolean parseError = false;
-        for (FieldDescriptor fd : fields) {
-          if (iter.hasNext()) {
-            String value = iter.next();
-            if (value != null && !value.isEmpty()) {
-              Object parsedValue = null;
-              try {
-                switch (fd.getJavaType()) {
-                case STRING:
-                  parsedValue = value;
-                  break;
-                case INT:
-                  parsedValue = Integer.valueOf(value);
-                  break;
-                case LONG:
-                  parsedValue = Long.valueOf(value);
-                  break;
-                case FLOAT:
-                  parsedValue = Float.valueOf(value);
-                  break;
-                case DOUBLE:
-                  parsedValue = Double.valueOf(value);
-                  break;
-                case BOOLEAN:
-                  parsedValue = Boolean.valueOf(value);
-                  break;
-                case ENUM:
-                  parsedValue = fd.getEnumType().findValueByName(value);
-                  break;
-                }
-                b.setField(fd, parsedValue);
-              } catch (NumberFormatException nfe) {
-                increment(ParseErrors.NUMBER_FORMAT);
-                parseError = true;
-                break;
-              }
-            }
-          }
-        }
-
-        if (parseError) {
-          increment(ParseErrors.TOTAL);
-        } else {
-          emitter.emit((M) b.build());
-        }
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/TupleDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/TupleDeepCopier.java b/crunch/src/main/java/org/apache/crunch/types/TupleDeepCopier.java
deleted file mode 100644
index a2ffae3..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/TupleDeepCopier.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.util.List;
-
-import org.apache.crunch.Tuple;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.Lists;
-
-/**
- * Performs deep copies (based on underlying PType deep copying) of Tuple-based objects.
- * 
- * @param <T> The type of Tuple implementation being copied
- */
-public class TupleDeepCopier<T extends Tuple> implements DeepCopier<T> {
-
-  private final TupleFactory<T> tupleFactory;
-  private final List<PType> elementTypes;
-
-  public TupleDeepCopier(Class<T> tupleClass, PType... elementTypes) {
-    tupleFactory = TupleFactory.getTupleFactory(tupleClass);
-    this.elementTypes = Lists.newArrayList(elementTypes);
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    for (PType elementType : elementTypes) {
-      elementType.initialize(conf);
-    }
-  }
-
-  @Override
-  public T deepCopy(T source) {
-    
-    if (source == null) {
-      return null;
-    }
-    
-    Object[] deepCopyValues = new Object[source.size()];
-
-    for (int valueIndex = 0; valueIndex < elementTypes.size(); valueIndex++) {
-      PType elementType = elementTypes.get(valueIndex);
-      deepCopyValues[valueIndex] = elementType.getDetachedValue(source.get(valueIndex));
-    }
-
-    return tupleFactory.makeTuple(deepCopyValues);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/TupleFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/TupleFactory.java b/crunch/src/main/java/org/apache/crunch/types/TupleFactory.java
deleted file mode 100644
index 73b47de..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/TupleFactory.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.io.Serializable;
-import java.lang.reflect.Constructor;
-import java.util.Map;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-
-import com.google.common.collect.Maps;
-
-public abstract class TupleFactory<T extends Tuple> implements Serializable {
-
-  public void initialize() {
-  }
-
-  public abstract T makeTuple(Object... values);
-
-  
-  private static final Map<Class, TupleFactory> customTupleFactories = Maps.newHashMap();
-  
-  /**
-   * Get the {@link TupleFactory} for a given Tuple implementation.
-   * 
-   * @param tupleClass
-   *          The class for which the factory is to be retrieved
-   * @return The appropriate TupleFactory
-   */
-  public static <T extends Tuple> TupleFactory<T> getTupleFactory(Class<T> tupleClass) {
-    if (tupleClass == Pair.class) {
-      return (TupleFactory<T>) PAIR;
-    } else if (tupleClass == Tuple3.class) {
-      return (TupleFactory<T>) TUPLE3;
-    } else if (tupleClass == Tuple4.class) {
-      return (TupleFactory<T>) TUPLE4;
-    } else if (tupleClass == TupleN.class) {
-      return (TupleFactory<T>) TUPLEN;
-    } else if (customTupleFactories.containsKey(tupleClass)) {
-      return (TupleFactory<T>) customTupleFactories.get(tupleClass);
-    } else {
-      throw new IllegalArgumentException("Can't create TupleFactory for " + tupleClass);
-    }
-  }
-
-  public static final TupleFactory<Pair> PAIR = new TupleFactory<Pair>() {
-    @Override
-    public Pair makeTuple(Object... values) {
-      return Pair.of(values[0], values[1]);
-    }
-  };
-
-  public static final TupleFactory<Tuple3> TUPLE3 = new TupleFactory<Tuple3>() {
-    @Override
-    public Tuple3 makeTuple(Object... values) {
-      return Tuple3.of(values[0], values[1], values[2]);
-    }
-  };
-
-  public static final TupleFactory<Tuple4> TUPLE4 = new TupleFactory<Tuple4>() {
-    @Override
-    public Tuple4 makeTuple(Object... values) {
-      return Tuple4.of(values[0], values[1], values[2], values[3]);
-    }
-  };
-
-  public static final TupleFactory<TupleN> TUPLEN = new TupleFactory<TupleN>() {
-    @Override
-    public TupleN makeTuple(Object... values) {
-      return new TupleN(values);
-    }
-  };
-
-  public static <T extends Tuple> TupleFactory<T> create(Class<T> clazz, Class... typeArgs) {
-    if (customTupleFactories.containsKey(clazz)) {
-      return (TupleFactory<T>) customTupleFactories.get(clazz);
-    }
-    TupleFactory<T> custom = new CustomTupleFactory<T>(clazz, typeArgs);
-    customTupleFactories.put(clazz, custom);
-    return custom;
-  }
-
-  private static class CustomTupleFactory<T extends Tuple> extends TupleFactory<T> {
-
-    private final Class<T> clazz;
-    private final Class[] typeArgs;
-
-    private transient Constructor<T> constructor;
-
-    public CustomTupleFactory(Class<T> clazz, Class[] typeArgs) {
-      this.clazz = clazz;
-      this.typeArgs = typeArgs;
-    }
-
-    @Override
-    public void initialize() {
-      try {
-        constructor = clazz.getConstructor(typeArgs);
-      } catch (Exception e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-
-    @Override
-    public T makeTuple(Object... values) {
-      try {
-        return constructor.newInstance(values);
-      } catch (Exception e) {
-        throw new CrunchRuntimeException(e);
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java
deleted file mode 100644
index cc1636c..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-import org.apache.avro.Schema;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.BinaryEncoder;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.io.EncoderFactory;
-import org.apache.avro.reflect.ReflectDatumReader;
-import org.apache.avro.reflect.ReflectDatumWriter;
-
-import com.google.common.collect.Lists;
-
-/**
- * Determines the capabilities of the Avro version that is currently being used.
- */
-class AvroCapabilities {
-
-  public static class Record extends org.apache.avro.specific.SpecificRecordBase implements
-      org.apache.avro.specific.SpecificRecord {
-    public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
-        .parse("{\"type\":\"record\",\"name\":\"Record\",\"namespace\":\"org.apache.crunch.types.avro\",\"fields\":[{\"name\":\"subrecords\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}");
-    @Deprecated
-    public java.util.List<java.lang.CharSequence> subrecords;
-
-    public java.lang.Object get(int field$) {
-      switch (field$) {
-      case 0:
-        return subrecords;
-      default:
-        throw new org.apache.avro.AvroRuntimeException("Bad index");
-      }
-    }
-
-    // Used by DatumReader. Applications should not call.
-    @SuppressWarnings(value = "unchecked")
-    public void put(int field$, java.lang.Object value$) {
-      switch (field$) {
-      case 0:
-        subrecords = (java.util.List<java.lang.CharSequence>) value$;
-        break;
-      default:
-        throw new org.apache.avro.AvroRuntimeException("Bad index");
-      }
-    }
-
-    @Override
-    public Schema getSchema() {
-      return SCHEMA$;
-    }
-  }
-
-  /**
-   * Determine if the current Avro version can use the ReflectDatumReader to
-   * read SpecificData that includes an array. The inability to do this was a
-   * bug that was fixed in Avro 1.7.0.
-   * 
-   * @return true if SpecificData can be properly read using a
-   *         ReflectDatumReader
-   */
-  static boolean canDecodeSpecificSchemaWithReflectDatumReader() {
-    ReflectDatumReader<Record> datumReader = new ReflectDatumReader(Record.SCHEMA$);
-    ReflectDatumWriter<Record> datumWriter = new ReflectDatumWriter(Record.SCHEMA$);
-
-    Record record = new Record();
-    record.subrecords = Lists.<CharSequence> newArrayList("a", "b");
-
-    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
-    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
-
-    try {
-      datumWriter.write(record, encoder);
-      encoder.flush();
-      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(
-          byteArrayOutputStream.toByteArray(), null);
-      datumReader.read(record, decoder);
-    } catch (IOException ioe) {
-      throw new RuntimeException("Error performing specific schema test", ioe);
-    } catch (ClassCastException cce) {
-      // This indicates that we're using a pre-1.7.0 version of Avro, as the
-      // ReflectDatumReader in those versions could not correctly handle an
-      // array in a SpecificData value
-      return false;
-    }
-    return true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java
deleted file mode 100644
index 0fe9288..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.ByteArrayOutputStream;
-import java.io.Serializable;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.BinaryEncoder;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.io.DatumWriter;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.io.EncoderFactory;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.avro.specific.SpecificDatumWriter;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.types.DeepCopier;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * Performs deep copies of Avro-serializable objects.
- * <p>
- * <b>Warning:</b> Methods in this class are not thread-safe. This shouldn't be a problem when
- * running in a map-reduce context where each mapper/reducer is running in its own JVM, but it may
- * well be a problem in any other kind of multi-threaded context.
- */
-abstract class AvroDeepCopier<T> implements DeepCopier<T>, Serializable {
-
-  private String jsonSchema;
-  private transient Configuration conf;
-  private transient Schema schema;
-  private BinaryEncoder binaryEncoder;
-  private BinaryDecoder binaryDecoder;
-
-  private transient DatumWriter<T> datumWriter;
-  private transient DatumReader<T> datumReader;
-
-  public AvroDeepCopier(Schema schema) {
-    this.jsonSchema = schema.toString();
-  }
-
-  protected Schema getSchema() {
-    if (schema == null) {
-      schema = new Schema.Parser().parse(jsonSchema);
-    }
-    return schema;
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    this.conf = conf;
-  }
-
-  protected abstract T createCopyTarget();
-
-  protected abstract DatumWriter<T> createDatumWriter(Configuration conf);
-
-  protected abstract DatumReader<T> createDatumReader(Configuration conf);
-
-  /**
-   * Deep copier for Avro specific data objects.
-   */
-  public static class AvroSpecificDeepCopier<T> extends AvroDeepCopier<T> {
-
-    private Class<T> valueClass;
-
-    public AvroSpecificDeepCopier(Class<T> valueClass, Schema schema) {
-      super(schema);
-      this.valueClass = valueClass;
-    }
-
-    @Override
-    protected T createCopyTarget() {
-      return createNewInstance(valueClass);
-    }
-
-    @Override
-    protected DatumWriter<T> createDatumWriter(Configuration conf) {
-      return new SpecificDatumWriter<T>(getSchema());
-    }
-
-    @Override
-    protected DatumReader<T> createDatumReader(Configuration conf) {
-      return new SpecificDatumReader<T>(getSchema());
-    }
-
-  }
-
-  /**
-   * Deep copier for Avro generic data objects.
-   */
-  public static class AvroGenericDeepCopier extends AvroDeepCopier<Record> {
-
-    private transient Schema schema;
-
-    public AvroGenericDeepCopier(Schema schema) {
-      super(schema);
-    }
-
-    @Override
-    protected Record createCopyTarget() {
-      return new GenericData.Record(getSchema());
-    }
-
-    @Override
-    protected DatumReader<Record> createDatumReader(Configuration conf) {
-      return new GenericDatumReader<Record>(getSchema());
-    }
-
-    @Override
-    protected DatumWriter<Record> createDatumWriter(Configuration conf) {
-      return new GenericDatumWriter<Record>(getSchema());
-    }
-  }
-
-  /**
-   * Deep copier for Avro reflect data objects.
-   */
-  public static class AvroReflectDeepCopier<T> extends AvroDeepCopier<T> {
-
-    private Class<T> valueClass;
-
-    public AvroReflectDeepCopier(Class<T> valueClass, Schema schema) {
-      super(schema);
-      this.valueClass = valueClass;
-    }
-
-    @Override
-    protected T createCopyTarget() {
-      return createNewInstance(valueClass);
-    }
-
-    @Override
-    protected DatumReader<T> createDatumReader(Configuration conf) {
-      return Avros.getReflectDataFactory(conf).getReader(getSchema());
-    }
-
-    @Override
-    protected DatumWriter<T> createDatumWriter(Configuration conf) {
-      return Avros.getReflectDataFactory(conf).getWriter(getSchema());
-    }
-  }
-
-  /**
-   * Create a deep copy of an Avro value.
-   * 
-   * @param source The value to be copied
-   * @return The deep copy of the value
-   */
-  @Override
-  public T deepCopy(T source) {
-    
-    if (source == null) {
-      return null;
-    }
-    
-    if (datumReader == null) {
-      datumReader = createDatumReader(conf);
-    }
-    if (datumWriter == null) {
-      datumWriter = createDatumWriter(conf);
-    }
-    ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
-    binaryEncoder = EncoderFactory.get().binaryEncoder(byteOutStream, binaryEncoder);
-    T target = createCopyTarget();
-    try {
-      datumWriter.write(source, binaryEncoder);
-      binaryEncoder.flush();
-      binaryDecoder = DecoderFactory.get()
-          .binaryDecoder(byteOutStream.toByteArray(), binaryDecoder);
-      datumReader.read(target, binaryDecoder);
-    } catch (Exception e) {
-      throw new CrunchRuntimeException("Error while deep copying avro value " + source, e);
-    }
-
-    return target;
-  }
-
-  protected T createNewInstance(Class<T> targetClass) {
-    try {
-      return targetClass.newInstance();
-    } catch (InstantiationException e) {
-      throw new CrunchRuntimeException(e);
-    } catch (IllegalAccessException e) {
-      throw new CrunchRuntimeException(e);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java
deleted file mode 100644
index 598868f..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.util.Collection;
-
-import org.apache.avro.mapred.AvroJob;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroKeyComparator;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.fn.PairMapFn;
-import org.apache.crunch.lib.PTables;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- *
- *
- */
-class AvroGroupedTableType<K, V> extends PGroupedTableType<K, V> {
-
-  private static final AvroPairConverter CONVERTER = new AvroPairConverter();
-  private final MapFn inputFn;
-  private final MapFn outputFn;
-
-  public AvroGroupedTableType(AvroTableType<K, V> tableType) {
-    super(tableType);
-    AvroType keyType = (AvroType) tableType.getKeyType();
-    AvroType valueType = (AvroType) tableType.getValueType();
-    this.inputFn = new PairIterableMapFn(keyType.getInputMapFn(), valueType.getInputMapFn());
-    this.outputFn = new PairMapFn(keyType.getOutputMapFn(), valueType.getOutputMapFn());
-  }
-
-  @Override
-  public Class<Pair<K, Iterable<V>>> getTypeClass() {
-    return (Class<Pair<K, Iterable<V>>>) Pair.of(null, null).getClass();
-  }
-
-  @Override
-  public Converter getGroupingConverter() {
-    return CONVERTER;
-  }
-
-  @Override
-  public MapFn getInputMapFn() {
-    return inputFn;
-  }
-
-  @Override
-  public MapFn getOutputMapFn() {
-    return outputFn;
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    getTableType().initialize(conf);
-  }
-
-  @Override
-  public Pair<K, Iterable<V>> getDetachedValue(Pair<K, Iterable<V>> value) {
-    return PTables.getGroupedDetachedValue(this, value);
-  }
-
-  @Override
-  public void configureShuffle(Job job, GroupingOptions options) {
-    AvroTableType<K, V> att = (AvroTableType<K, V>) tableType;
-    String schemaJson = att.getSchema().toString();
-    Configuration conf = job.getConfiguration();
-
-    if (att.hasReflect()) {
-      if (att.hasSpecific()) {
-        Avros.checkCombiningSpecificAndReflectionSchemas();
-      }
-      conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true);
-    }
-    conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson);
-    job.setSortComparatorClass(AvroKeyComparator.class);
-    job.setMapOutputKeyClass(AvroKey.class);
-    job.setMapOutputValueClass(AvroValue.class);
-    if (options != null) {
-      options.configure(job);
-    }
-
-    Avros.configureReflectDataFactory(conf);
-
-    Collection<String> serializations = job.getConfiguration().getStringCollection(
-        "io.serializations");
-    if (!serializations.contains(SafeAvroSerialization.class.getName())) {
-      serializations.add(SafeAvroSerialization.class.getName());
-      job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0]));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java
deleted file mode 100644
index b8bbebd..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.IOException;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapred.AvroJob;
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-
-/** An {@link org.apache.hadoop.mapreduce.InputFormat} for Avro data files. */
-public class AvroInputFormat<T> extends FileInputFormat<AvroWrapper<T>, NullWritable> {
-  @Override
-  public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
-      throws IOException, InterruptedException {
-    context.setStatus(split.toString());
-    String jsonSchema = context.getConfiguration().get(AvroJob.INPUT_SCHEMA);
-    Schema schema = new Schema.Parser().parse(jsonSchema);
-    return new AvroRecordReader<T>(schema);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java
deleted file mode 100644
index 68b717d..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.crunch.types.Converter;
-import org.apache.hadoop.io.NullWritable;
-
-class AvroKeyConverter<K> implements Converter<AvroWrapper<K>, NullWritable, K, Iterable<K>> {
-
-  private transient AvroWrapper<K> wrapper = null;
-
-  @Override
-  public K convertInput(AvroWrapper<K> key, NullWritable value) {
-    return key.datum();
-  }
-
-  @Override
-  public AvroWrapper<K> outputKey(K value) {
-    getWrapper().datum(value);
-    return wrapper;
-  }
-
-  @Override
-  public NullWritable outputValue(K value) {
-    return NullWritable.get();
-  }
-
-  @Override
-  public Class<AvroWrapper<K>> getKeyClass() {
-    return (Class<AvroWrapper<K>>) getWrapper().getClass();
-  }
-
-  @Override
-  public Class<NullWritable> getValueClass() {
-    return NullWritable.class;
-  }
-
-  private AvroWrapper<K> getWrapper() {
-    if (wrapper == null) {
-      wrapper = new AvroWrapper<K>();
-    }
-    return wrapper;
-  }
-
-  @Override
-  public Iterable<K> convertIterableInput(AvroWrapper<K> key, Iterable<NullWritable> value) {
-    throw new UnsupportedOperationException("Should not be possible");
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java
deleted file mode 100644
index 98d3f50..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.IOException;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.CodecFactory;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.mapred.AvroJob;
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
-/** An {@link org.apache.hadoop.mapreduce.OutputFormat} for Avro data files. */
-public class AvroOutputFormat<T> extends FileOutputFormat<AvroWrapper<T>, NullWritable> {
-
-  @Override
-  public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException,
-      InterruptedException {
-
-    Configuration conf = context.getConfiguration();
-    Schema schema = null;
-    String outputName = conf.get("crunch.namedoutput");
-    if (outputName != null && !outputName.isEmpty()) {
-      schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
-    } else {
-      schema = AvroJob.getOutputSchema(context.getConfiguration());
-    }
-
-    ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
-    final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T> getWriter(schema));
-
-    JobConf jc = new JobConf(conf);
-    /* copied from org.apache.avro.mapred.AvroOutputFormat */
-    
-    if (org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(jc)) {
-      int level = conf.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
-          org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
-      String codecName = conf.get(AvroJob.OUTPUT_CODEC, 
-          org.apache.avro.file.DataFileConstants.DEFLATE_CODEC);
-      CodecFactory codec = codecName.equals(org.apache.avro.file.DataFileConstants.DEFLATE_CODEC)
-          ? CodecFactory.deflateCodec(level)
-          : CodecFactory.fromString(codecName);
-      WRITER.setCodec(codec);
-    }
-
-    WRITER.setSyncInterval(jc.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, 
-        org.apache.avro.file.DataFileConstants.DEFAULT_SYNC_INTERVAL));
-
-    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
-    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
-    
-    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
-      @Override
-      public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
-        WRITER.append(wrapper.datum());
-      }
-
-      @Override
-      public void close(TaskAttemptContext context) throws IOException, InterruptedException {
-        WRITER.close();
-      }
-    };
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java
deleted file mode 100644
index d1d2627..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.util.Iterator;
-
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.Converter;
-
-class AvroPairConverter<K, V> implements Converter<AvroKey<K>, AvroValue<V>, Pair<K, V>, Pair<K, Iterable<V>>> {
-
-  private transient AvroKey<K> keyWrapper = null;
-  private transient AvroValue<V> valueWrapper = null;
-
-  @Override
-  public Pair<K, V> convertInput(AvroKey<K> key, AvroValue<V> value) {
-    return Pair.of(key.datum(), value.datum());
-  }
-
-  public Pair<K, Iterable<V>> convertIterableInput(AvroKey<K> key, Iterable<AvroValue<V>> iter) {
-    Iterable<V> it = new AvroWrappedIterable<V>(iter);
-    return Pair.of(key.datum(), it);
-  }
-
-  @Override
-  public AvroKey<K> outputKey(Pair<K, V> value) {
-    getKeyWrapper().datum(value.first());
-    return keyWrapper;
-  }
-
-  @Override
-  public AvroValue<V> outputValue(Pair<K, V> value) {
-    getValueWrapper().datum(value.second());
-    return valueWrapper;
-  }
-
-  @Override
-  public Class<AvroKey<K>> getKeyClass() {
-    return (Class<AvroKey<K>>) getKeyWrapper().getClass();
-  }
-
-  @Override
-  public Class<AvroValue<V>> getValueClass() {
-    return (Class<AvroValue<V>>) getValueWrapper().getClass();
-  }
-
-  private AvroKey<K> getKeyWrapper() {
-    if (keyWrapper == null) {
-      keyWrapper = new AvroKey<K>();
-    }
-    return keyWrapper;
-  }
-
-  private AvroValue<V> getValueWrapper() {
-    if (valueWrapper == null) {
-      valueWrapper = new AvroValue<V>();
-    }
-    return valueWrapper;
-  }
-
-  private static class AvroWrappedIterable<V> implements Iterable<V> {
-
-    private final Iterable<AvroValue<V>> iters;
-
-    public AvroWrappedIterable(Iterable<AvroValue<V>> iters) {
-      this.iters = iters;
-    }
-
-    @Override
-    public Iterator<V> iterator() {
-      return new Iterator<V>() {
-        private final Iterator<AvroValue<V>> it = iters.iterator();
-
-        @Override
-        public boolean hasNext() {
-          return it.hasNext();
-        }
-
-        @Override
-        public V next() {
-          return it.next().datum();
-        }
-
-        @Override
-        public void remove() {
-          it.remove();
-        }
-      };
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java
deleted file mode 100644
index 9c7578c..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.IOException;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.file.FileReader;
-import org.apache.avro.file.SeekableInput;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.mapred.AvroJob;
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.avro.mapred.FsInput;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-
-/** An {@link RecordReader} for Avro data files. */
-class AvroRecordReader<T> extends RecordReader<AvroWrapper<T>, NullWritable> {
-
-  private FileReader<T> reader;
-  private long start;
-  private long end;
-  private AvroWrapper<T> key;
-  private NullWritable value;
-  private Schema schema;
-
-  public AvroRecordReader(Schema schema) {
-    this.schema = schema;
-  }
-
-  @Override
-  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
-    FileSplit split = (FileSplit) genericSplit;
-    Configuration conf = context.getConfiguration();
-    SeekableInput in = new FsInput(split.getPath(), conf);
-    DatumReader<T> datumReader = null;
-    if (context.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true)) {
-      ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
-      datumReader = factory.getReader(schema);
-    } else {
-      datumReader = new SpecificDatumReader<T>(schema);
-    }
-    this.reader = DataFileReader.openReader(in, datumReader);
-    reader.sync(split.getStart()); // sync to start
-    this.start = reader.tell();
-    this.end = split.getStart() + split.getLength();
-  }
-
-  @Override
-  public boolean nextKeyValue() throws IOException, InterruptedException {
-    if (!reader.hasNext() || reader.pastSync(end)) {
-      key = null;
-      value = null;
-      return false;
-    }
-    if (key == null) {
-      key = new AvroWrapper<T>();
-    }
-    if (value == null) {
-      value = NullWritable.get();
-    }
-    key.datum(reader.next(key.datum()));
-    return true;
-  }
-
-  @Override
-  public AvroWrapper<T> getCurrentKey() throws IOException, InterruptedException {
-    return key;
-  }
-
-  @Override
-  public NullWritable getCurrentValue() throws IOException, InterruptedException {
-    return value;
-  }
-
-  @Override
-  public float getProgress() throws IOException {
-    if (end == start) {
-      return 0.0f;
-    } else {
-      return Math.min(1.0f, (getPos() - start) / (float) (end - start));
-    }
-  }
-
-  public long getPos() throws IOException {
-    return reader.tell();
-  }
-
-  @Override
-  public void close() throws IOException {
-    reader.close();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroTableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroTableType.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroTableType.java
deleted file mode 100644
index 86613df..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroTableType.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.lib.PTables;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.TupleDeepCopier;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-/**
- * The implementation of the PTableType interface for Avro-based serialization.
- * 
- */
-class AvroTableType<K, V> extends AvroType<Pair<K, V>> implements PTableType<K, V> {
-
-  private static class PairToAvroPair extends MapFn<Pair, org.apache.avro.mapred.Pair> {
-    private final MapFn keyMapFn;
-    private final MapFn valueMapFn;
-    private final String firstJson;
-    private final String secondJson;
-
-    private String pairSchemaJson;
-    private transient Schema pairSchema;
-
-    public PairToAvroPair(AvroType keyType, AvroType valueType) {
-      this.keyMapFn = keyType.getOutputMapFn();
-      this.firstJson = keyType.getSchema().toString();
-      this.valueMapFn = valueType.getOutputMapFn();
-      this.secondJson = valueType.getSchema().toString();
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      keyMapFn.configure(conf);
-      valueMapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      keyMapFn.setContext(context);
-      valueMapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      keyMapFn.initialize();
-      valueMapFn.initialize();
-      pairSchemaJson = org.apache.avro.mapred.Pair.getPairSchema(
-          new Schema.Parser().parse(firstJson), new Schema.Parser().parse(secondJson)).toString();
-    }
-
-    @Override
-    public org.apache.avro.mapred.Pair map(Pair input) {
-      if (pairSchema == null) {
-        pairSchema = new Schema.Parser().parse(pairSchemaJson);
-      }
-      org.apache.avro.mapred.Pair avroPair = new org.apache.avro.mapred.Pair(pairSchema);
-      avroPair.key(keyMapFn.map(input.first()));
-      avroPair.value(valueMapFn.map(input.second()));
-      return avroPair;
-    }
-  }
-
-  private static class IndexedRecordToPair extends MapFn<IndexedRecord, Pair> {
-
-    private final MapFn firstMapFn;
-    private final MapFn secondMapFn;
-
-    public IndexedRecordToPair(MapFn firstMapFn, MapFn secondMapFn) {
-      this.firstMapFn = firstMapFn;
-      this.secondMapFn = secondMapFn;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      firstMapFn.configure(conf);
-      secondMapFn.configure(conf);
-    }
-
-    @Override
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      firstMapFn.setContext(context);
-      secondMapFn.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      firstMapFn.initialize();
-      secondMapFn.initialize();
-    }
-
-    @Override
-    public Pair map(IndexedRecord input) {
-      return Pair.of(firstMapFn.map(input.get(0)), secondMapFn.map(input.get(1)));
-    }
-  }
-
-  private final AvroType<K> keyType;
-  private final AvroType<V> valueType;
-
-  public AvroTableType(AvroType<K> keyType, AvroType<V> valueType, Class<Pair<K, V>> pairClass) {
-    super(pairClass, org.apache.avro.mapred.Pair.getPairSchema(keyType.getSchema(),
-        valueType.getSchema()), new IndexedRecordToPair(keyType.getInputMapFn(),
-        valueType.getInputMapFn()), new PairToAvroPair(keyType, valueType), new TupleDeepCopier(
-        Pair.class, keyType, valueType), keyType, valueType);
-    this.keyType = keyType;
-    this.valueType = valueType;
-  }
-
-  @Override
-  public PType<K> getKeyType() {
-    return keyType;
-  }
-
-  @Override
-  public PType<V> getValueType() {
-    return valueType;
-  }
-
-  @Override
-  public PGroupedTableType<K, V> getGroupedTableType() {
-    return new AvroGroupedTableType<K, V>(this);
-  }
-
-  @Override
-  public Pair<K, V> getDetachedValue(Pair<K, V> value) {
-    return PTables.getDetachedValue(this, value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java
deleted file mode 100644
index 4930235..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.IOException;
-
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-
-public class AvroTextOutputFormat<K, V> extends TextOutputFormat<K, V> {
-  class DatumRecordTextWriter extends RecordWriter<K, V> {
-    private RecordWriter lineRecordWriter;
-
-    public DatumRecordTextWriter(RecordWriter recordWriter) {
-      this.lineRecordWriter = recordWriter;
-    }
-
-    @Override
-    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
-      lineRecordWriter.close(context);
-    }
-
-    @Override
-    public void write(K arg0, V arg1) throws IOException, InterruptedException {
-      lineRecordWriter.write(getData(arg0), getData(arg1));
-    }
-
-    private Object getData(Object o) {
-      Object data = o;
-      if (o instanceof AvroWrapper) {
-        data = ((AvroWrapper) o).datum();
-      }
-      return data;
-    }
-  }
-
-  @Override
-  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
-    RecordWriter<K, V> recordWriter = super.getRecordWriter(context);
-    return new DatumRecordTextWriter(recordWriter);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroType.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroType.java
deleted file mode 100644
index a92b0d0..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroType.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.util.List;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.io.avro.AvroFileSourceTarget;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.DeepCopier;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-/**
- * The implementation of the PType interface for Avro-based serialization.
- * 
- */
-public class AvroType<T> implements PType<T> {
-
-  private static final Converter AVRO_CONVERTER = new AvroKeyConverter();
-
-  private final Class<T> typeClass;
-  private final String schemaString;
-  private transient Schema schema;
-  private final MapFn baseInputMapFn;
-  private final MapFn baseOutputMapFn;
-  private final List<PType> subTypes;
-  private DeepCopier<T> deepCopier;
-  private boolean initialized = false;
-
-  public AvroType(Class<T> typeClass, Schema schema, DeepCopier<T> deepCopier, PType... ptypes) {
-    this(typeClass, schema, IdentityFn.getInstance(), IdentityFn.getInstance(), deepCopier, ptypes);
-  }
-
-  public AvroType(Class<T> typeClass, Schema schema, MapFn inputMapFn, MapFn outputMapFn,
-      DeepCopier<T> deepCopier, PType... ptypes) {
-    this.typeClass = typeClass;
-    this.schema = Preconditions.checkNotNull(schema);
-    this.schemaString = schema.toString();
-    this.baseInputMapFn = inputMapFn;
-    this.baseOutputMapFn = outputMapFn;
-    this.deepCopier = deepCopier;
-    this.subTypes = ImmutableList.<PType> builder().add(ptypes).build();
-  }
-
-  @Override
-  public Class<T> getTypeClass() {
-    return typeClass;
-  }
-
-  @Override
-  public PTypeFamily getFamily() {
-    return AvroTypeFamily.getInstance();
-  }
-
-  @Override
-  public List<PType> getSubTypes() {
-    return Lists.<PType> newArrayList(subTypes);
-  }
-
-  public Schema getSchema() {
-    if (schema == null) {
-      schema = new Schema.Parser().parse(schemaString);
-    }
-    return schema;
-  }
-
-  /**
-   * Determine if the wrapped type is a specific data avro type or wraps one.
-   * 
-   * @return true if the wrapped type is a specific data type or wraps one
-   */
-  public boolean hasSpecific() {
-    if (Avros.isPrimitive(this)) {
-      return false;
-    }
-
-    if (!this.subTypes.isEmpty()) {
-      for (PType<?> subType : this.subTypes) {
-        AvroType<?> atype = (AvroType<?>) subType;
-        if (atype.hasSpecific()) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    return SpecificRecord.class.isAssignableFrom(typeClass);
-  }
-
-  /**
-   * Determine if the wrapped type is a generic data avro type.
-   * 
-   * @return true if the wrapped type is a generic type
-   */
-  public boolean isGeneric() {
-    return GenericData.Record.class.equals(typeClass);
-  }
-
-  /**
-   * Determine if the wrapped type is a reflection-based avro type or wraps one.
-   * 
-   * @return true if the wrapped type is a reflection-based type or wraps one.
-   */
-  public boolean hasReflect() {
-    if (Avros.isPrimitive(this)) {
-      return false;
-    }
-
-    if (!this.subTypes.isEmpty()) {
-      for (PType<?> subType : this.subTypes) {
-        if (((AvroType<?>) subType).hasReflect()) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    return !(typeClass.equals(GenericData.Record.class) || SpecificRecord.class
-        .isAssignableFrom(typeClass));
-  }
-
-  public MapFn<Object, T> getInputMapFn() {
-    return baseInputMapFn;
-  }
-
-  public MapFn<T, Object> getOutputMapFn() {
-    return baseOutputMapFn;
-  }
-
-  @Override
-  public Converter getConverter() {
-    return AVRO_CONVERTER;
-  }
-
-  @Override
-  public ReadableSourceTarget<T> getDefaultFileSource(Path path) {
-    return new AvroFileSourceTarget<T>(path, this);
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    deepCopier.initialize(conf);
-    initialized = true;
-  }
-
-  @Override
-  public T getDetachedValue(T value) {
-    if (!initialized) {
-      throw new IllegalStateException("Cannot call getDetachedValue on an uninitialized PType");
-    }
-    return deepCopier.deepCopy(value);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof AvroType)) {
-      return false;
-    }
-    AvroType at = (AvroType) other;
-    return (typeClass.equals(at.typeClass) && subTypes.equals(at.subTypes));
-
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(typeClass).append(subTypes);
-    return hcb.toHashCode();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java
deleted file mode 100644
index e09e173..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.PTypeUtils;
-
-public class AvroTypeFamily implements PTypeFamily {
-
-  private static final AvroTypeFamily INSTANCE = new AvroTypeFamily();
-
-  public static AvroTypeFamily getInstance() {
-    return INSTANCE;
-  }
-
-  // There can only be one instance.
-  private AvroTypeFamily() {
-  }
-
-  @Override
-  public PType<Void> nulls() {
-    return Avros.nulls();
-  }
-
-  @Override
-  public PType<String> strings() {
-    return Avros.strings();
-  }
-
-  @Override
-  public PType<Long> longs() {
-    return Avros.longs();
-  }
-
-  @Override
-  public PType<Integer> ints() {
-    return Avros.ints();
-  }
-
-  @Override
-  public PType<Float> floats() {
-    return Avros.floats();
-  }
-
-  @Override
-  public PType<Double> doubles() {
-    return Avros.doubles();
-  }
-
-  @Override
-  public PType<Boolean> booleans() {
-    return Avros.booleans();
-  }
-
-  @Override
-  public PType<ByteBuffer> bytes() {
-    return Avros.bytes();
-  }
-
-  @Override
-  public <T> PType<T> records(Class<T> clazz) {
-    return Avros.records(clazz);
-  }
-
-  public PType<GenericData.Record> generics(Schema schema) {
-    return Avros.generics(schema);
-  }
-
-  public <T> PType<T> containers(Class<T> clazz) {
-    return Avros.containers(clazz);
-  }
-
-  @Override
-  public <T> PType<Collection<T>> collections(PType<T> ptype) {
-    return Avros.collections(ptype);
-  }
-
-  @Override
-  public <T> PType<Map<String, T>> maps(PType<T> ptype) {
-    return Avros.maps(ptype);
-  }
-
-  @Override
-  public <V1, V2> PType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2) {
-    return Avros.pairs(p1, p2);
-  }
-
-  @Override
-  public <V1, V2, V3> PType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3) {
-    return Avros.triples(p1, p2, p3);
-  }
-
-  @Override
-  public <V1, V2, V3, V4> PType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3, PType<V4> p4) {
-    return Avros.quads(p1, p2, p3, p4);
-  }
-
-  @Override
-  public PType<TupleN> tuples(PType<?>... ptypes) {
-    return Avros.tuples(ptypes);
-  }
-
-  @Override
-  public <K, V> PTableType<K, V> tableOf(PType<K> key, PType<V> value) {
-    return Avros.tableOf(key, value);
-  }
-
-  @Override
-  public <T> PType<T> as(PType<T> ptype) {
-    if (ptype instanceof AvroType || ptype instanceof AvroGroupedTableType) {
-      return ptype;
-    }
-    if (ptype instanceof PGroupedTableType) {
-      PTableType ptt = ((PGroupedTableType) ptype).getTableType();
-      return new AvroGroupedTableType((AvroTableType) as(ptt));
-    }
-    Class<T> typeClass = ptype.getTypeClass();
-    PType<T> prim = Avros.getPrimitiveType(typeClass);
-    if (prim != null) {
-      return prim;
-    }
-    return PTypeUtils.convert(ptype, this);
-  }
-
-  @Override
-  public <T extends Tuple> PType<T> tuples(Class<T> clazz, PType<?>... ptypes) {
-    return Avros.tuples(clazz, ptypes);
-  }
-
-  @Override
-  public <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
-    return Avros.derived(clazz, inputFn, outputFn, base);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java b/crunch/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java
deleted file mode 100644
index 9460fa5..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types.avro;
-
-import java.io.IOException;
-
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.avro.util.Utf8;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
-
-/**
- * An {@link org.apache.hadoop.mapred.InputFormat} for text files. Each line is
- * a {@link Utf8} key; values are null.
- */
-public class AvroUtf8InputFormat extends FileInputFormat<AvroWrapper<Utf8>, NullWritable> {
-
-  static class Utf8LineRecordReader extends RecordReader<AvroWrapper<Utf8>, NullWritable> {
-
-    private LineRecordReader lineRecordReader;
-
-    private AvroWrapper<Utf8> currentKey = new AvroWrapper<Utf8>();
-
-    public Utf8LineRecordReader() throws IOException {
-      this.lineRecordReader = new LineRecordReader();
-    }
-
-    public void close() throws IOException {
-      lineRecordReader.close();
-    }
-
-    public float getProgress() throws IOException {
-      return lineRecordReader.getProgress();
-    }
-
-    @Override
-    public AvroWrapper<Utf8> getCurrentKey() throws IOException, InterruptedException {
-      Text txt = lineRecordReader.getCurrentValue();
-      currentKey.datum(new Utf8(txt.toString()));
-      return currentKey;
-    }
-
-    @Override
-    public NullWritable getCurrentValue() throws IOException, InterruptedException {
-      return NullWritable.get();
-    }
-
-    @Override
-    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
-      lineRecordReader.initialize(split, context);
-    }
-
-    @Override
-    public boolean nextKeyValue() throws IOException, InterruptedException {
-      return lineRecordReader.nextKeyValue();
-    }
-  }
-
-  private CompressionCodecFactory compressionCodecs = null;
-
-  public void configure(Configuration conf) {
-    compressionCodecs = new CompressionCodecFactory(conf);
-  }
-
-  protected boolean isSplitable(FileSystem fs, Path file) {
-    return compressionCodecs.getCodec(file) == null;
-  }
-
-  @Override
-  public RecordReader<AvroWrapper<Utf8>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
-      throws IOException, InterruptedException {
-    return new Utf8LineRecordReader();
-  }
-}


[31/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/CrunchOutputs.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/CrunchOutputs.java b/crunch-core/src/main/java/org/apache/crunch/io/CrunchOutputs.java
new file mode 100644
index 0000000..ccf4fb5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/CrunchOutputs.java
@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.hadoop.mapreduce.TaskAttemptContextFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An analogue of {@link CrunchInputs} for handling multiple {@code OutputFormat} instances
+ * writing to multiple files within a single MapReduce job.
+ */
+public class CrunchOutputs<K, V> {
+  public static final String CRUNCH_OUTPUTS = "crunch.outputs.dir";
+  
+  private static final char RECORD_SEP = ',';
+  private static final char FIELD_SEP = ';';
+  private static final Joiner JOINER = Joiner.on(FIELD_SEP);
+  private static final Splitter SPLITTER = Splitter.on(FIELD_SEP);
+
+  public static void addNamedOutput(Job job, String name,
+      Class<? extends OutputFormat> outputFormatClass,
+      Class keyClass, Class valueClass) {
+    addNamedOutput(job, name, FormatBundle.forOutput(outputFormatClass), keyClass, valueClass);
+  }
+  
+  public static void addNamedOutput(Job job, String name,
+      FormatBundle<? extends OutputFormat> outputBundle,
+      Class keyClass, Class valueClass) {
+    Configuration conf = job.getConfiguration();
+    String inputs = JOINER.join(name, outputBundle.serialize(), keyClass.getName(), valueClass.getName());
+    String existing = conf.get(CRUNCH_OUTPUTS);
+    conf.set(CRUNCH_OUTPUTS, existing == null ? inputs : existing + RECORD_SEP + inputs);
+  }
+  
+  private static class OutputConfig<K, V> {
+    public FormatBundle<OutputFormat<K, V>> bundle;
+    public Class<K> keyClass;
+    public Class<V> valueClass;
+    
+    public OutputConfig(FormatBundle<OutputFormat<K, V>> bundle,
+        Class<K> keyClass, Class<V> valueClass) {
+      this.bundle = bundle;
+      this.keyClass = keyClass;
+      this.valueClass = valueClass;
+    }
+  }
+  
+  private static Map<String, OutputConfig> getNamedOutputs(
+      TaskInputOutputContext<?, ?, ?, ?> context) {
+    Map<String, OutputConfig> out = Maps.newHashMap();
+    Configuration conf = context.getConfiguration();
+    for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_OUTPUTS))) {
+      List<String> fields = Lists.newArrayList(SPLITTER.split(input));
+      String name = fields.get(0);
+      FormatBundle<OutputFormat> bundle = FormatBundle.fromSerialized(fields.get(1),
+          OutputFormat.class);
+      try {
+        Class<?> keyClass = Class.forName(fields.get(2));
+        Class<?> valueClass = Class.forName(fields.get(3));
+        out.put(name, new OutputConfig(bundle, keyClass, valueClass));
+      } catch (ClassNotFoundException e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+    return out;
+  }
+  
+  private static final String BASE_OUTPUT_NAME = "mapreduce.output.basename";
+  private static final String COUNTERS_GROUP = CrunchOutputs.class.getName();
+
+  private TaskInputOutputContext<?, ?, K, V> baseContext;
+  private Map<String, OutputConfig> namedOutputs;
+  private Map<String, RecordWriter<K, V>> recordWriters;
+  private Map<String, TaskAttemptContext> taskContextCache;
+  
+  /**
+   * Creates and initializes multiple outputs support,
+   * it should be instantiated in the Mapper/Reducer setup method.
+   *
+   * @param context the TaskInputOutputContext object
+   */
+  public CrunchOutputs(TaskInputOutputContext<?, ?, K, V> context) {
+    this.baseContext = context;
+    namedOutputs = getNamedOutputs(context);
+    recordWriters = Maps.newHashMap();
+    taskContextCache = Maps.newHashMap();
+  }
+  
+  @SuppressWarnings("unchecked")
+  public void write(String namedOutput, K key, V value)
+      throws IOException, InterruptedException {
+    if (!namedOutputs.containsKey(namedOutput)) {
+      throw new IllegalArgumentException("Undefined named output '" +
+        namedOutput + "'");
+    }
+    TaskAttemptContext taskContext = getContext(namedOutput);
+    baseContext.getCounter(COUNTERS_GROUP, namedOutput).increment(1);
+    getRecordWriter(taskContext, namedOutput).write(key, value);
+  }
+  
+  public void close() throws IOException, InterruptedException {
+    for (RecordWriter<?, ?> writer : recordWriters.values()) {
+      writer.close(baseContext);
+    }
+  }
+  
+  private TaskAttemptContext getContext(String nameOutput) throws IOException {
+    TaskAttemptContext taskContext = taskContextCache.get(nameOutput);
+    if (taskContext != null) {
+      return taskContext;
+    }
+
+    // The following trick leverages the instantiation of a record writer via
+    // the job thus supporting arbitrary output formats.
+    OutputConfig outConfig = namedOutputs.get(nameOutput);
+    Configuration conf = new Configuration(baseContext.getConfiguration());
+    Job job = new Job(conf);
+    job.getConfiguration().set("crunch.namedoutput", nameOutput);
+    job.setOutputFormatClass(outConfig.bundle.getFormatClass());
+    job.setOutputKeyClass(outConfig.keyClass);
+    job.setOutputValueClass(outConfig.valueClass);
+    outConfig.bundle.configure(job.getConfiguration());
+    taskContext = TaskAttemptContextFactory.create(
+      job.getConfiguration(), baseContext.getTaskAttemptID());
+
+    taskContextCache.put(nameOutput, taskContext);
+    return taskContext;
+  }
+  
+  private synchronized RecordWriter<K, V> getRecordWriter(
+      TaskAttemptContext taskContext, String namedOutput) 
+      throws IOException, InterruptedException {
+    // look for record-writer in the cache
+    RecordWriter<K, V> writer = recordWriters.get(namedOutput);
+    
+    // If not in cache, create a new one
+    if (writer == null) {
+      // get the record writer from context output format
+      taskContext.getConfiguration().set(BASE_OUTPUT_NAME, namedOutput);
+      try {
+        OutputFormat format = ReflectionUtils.newInstance(
+            taskContext.getOutputFormatClass(),
+            taskContext.getConfiguration());
+        writer = format.getRecordWriter(taskContext);
+      } catch (ClassNotFoundException e) {
+        throw new IOException(e);
+      }
+      recordWriters.put(namedOutput, writer);
+    }
+    
+    return writer;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/FileNamingScheme.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/FileNamingScheme.java b/crunch-core/src/main/java/org/apache/crunch/io/FileNamingScheme.java
new file mode 100644
index 0000000..cf93651
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/FileNamingScheme.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Encapsulates rules for naming output files. It is the responsibility of
+ * implementors to avoid file name collisions.
+ */
+public interface FileNamingScheme {
+
+  /**
+   * Get the output file name for a map task. Note that the implementation is
+   * responsible for avoiding naming collisions.
+   * 
+   * @param configuration The configuration of the job for which the map output
+   *          is being written
+   * @param outputDirectory The directory where the output will be written
+   * @return The filename for the output of the map task
+   * @throws IOException if an exception occurs while accessing the output file
+   *           system
+   */
+  String getMapOutputName(Configuration configuration, Path outputDirectory) throws IOException;
+
+  /**
+   * Get the output file name for a reduce task. Note that the implementation is
+   * responsible for avoiding naming collisions.
+   * 
+   * @param configuration The configuration of the job for which output is being
+   *          written
+   * @param outputDirectory The directory where the file will be written
+   * @param partitionId The partition of the reduce task being output
+   * @return The filename for the output of the reduce task
+   * @throws IOException if an exception occurs while accessing output file
+   *           system
+   */
+  String getReduceOutputName(Configuration configuration, Path outputDirectory, int partitionId) throws IOException;
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/FileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/FileReaderFactory.java b/crunch-core/src/main/java/org/apache/crunch/io/FileReaderFactory.java
new file mode 100644
index 0000000..5cccb7b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/FileReaderFactory.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public interface FileReaderFactory<T> {
+  Iterator<T> read(FileSystem fs, Path path);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/FormatBundle.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/FormatBundle.java b/crunch-core/src/main/java/org/apache/crunch/io/FormatBundle.java
new file mode 100644
index 0000000..d969009
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/FormatBundle.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.Map;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.OutputFormat;
+
+import com.google.common.collect.Maps;
+
+/**
+ * A combination of an {@link InputFormat} or {@link OutputFormat} and any extra 
+ * configuration information that format class needs to run.
+ * 
+ * <p>The {@code FormatBundle} allow us to let different formats act as
+ * if they are the only format that exists in a particular MapReduce job, even
+ * when we have multiple types of inputs and outputs within a single job.
+ */
+public class FormatBundle<K> implements Serializable {
+
+  private Class<K> formatClass;
+  private Map<String, String> extraConf;
+
+  public static <T> FormatBundle<T> fromSerialized(String serialized, Class<T> clazz) {
+    ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(serialized));
+    try {
+      ObjectInputStream ois = new ObjectInputStream(bais);
+      FormatBundle<T> bundle = (FormatBundle<T>) ois.readObject();
+      ois.close();
+      return bundle;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static <T extends InputFormat<?, ?>> FormatBundle<T> forInput(Class<T> inputFormatClass) {
+    return new FormatBundle<T>(inputFormatClass);
+  }
+  
+  public static <T extends OutputFormat<?, ?>> FormatBundle<T> forOutput(Class<T> inputFormatClass) {
+    return new FormatBundle<T>(inputFormatClass);
+  }
+  
+  private FormatBundle(Class<K> formatClass) {
+    this.formatClass = formatClass;
+    this.extraConf = Maps.newHashMap();
+  }
+
+  public FormatBundle<K> set(String key, String value) {
+    this.extraConf.put(key, value);
+    return this;
+  }
+
+  public Class<K> getFormatClass() {
+    return formatClass;
+  }
+
+  public Configuration configure(Configuration conf) {
+    for (Map.Entry<String, String> e : extraConf.entrySet()) {
+      conf.set(e.getKey(), e.getValue());
+    }
+    return conf;
+  }
+
+  public String serialize() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      ObjectOutputStream oos = new ObjectOutputStream(baos);
+      oos.writeObject(this);
+      oos.close();
+      return Base64.encodeBase64String(baos.toByteArray());
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public String getName() {
+    return formatClass.getSimpleName();
+  }
+
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().append(formatClass).append(extraConf).toHashCode();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof FormatBundle)) {
+      return false;
+    }
+    FormatBundle<K> oib = (FormatBundle<K>) other;
+    return formatClass.equals(oib.formatClass) && extraConf.equals(oib.extraConf);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/From.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/From.java b/crunch-core/src/main/java/org/apache/crunch/io/From.java
new file mode 100644
index 0000000..e4cfb6a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/From.java
@@ -0,0 +1,324 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.avro.specific.SpecificRecord;
+import org.apache.crunch.Source;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.io.avro.AvroFileSource;
+import org.apache.crunch.io.impl.FileTableSourceImpl;
+import org.apache.crunch.io.seq.SeqFileSource;
+import org.apache.crunch.io.seq.SeqFileTableSource;
+import org.apache.crunch.io.text.TextFileSource;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+
+/**
+ * <p>Static factory methods for creating common {@link Source} types.</p>
+ * 
+ * <p>The {@code From} class is intended to provide a literate API for creating
+ * Crunch pipelines from common input file types.
+ * 
+ * <code>
+ *   Pipeline pipeline = new MRPipeline(this.getClass());
+ *   
+ *   // Reference the lines of a text file by wrapping the TextInputFormat class.
+ *   PCollection<String> lines = pipeline.read(From.textFile("/path/to/myfiles"));
+ *   
+ *   // Reference entries from a sequence file where the key is a LongWritable and the
+ *   // value is a custom Writable class.
+ *   PTable<LongWritable, MyWritable> table = pipeline.read(From.sequenceFile(
+ *       "/path/to/seqfiles", LongWritable.class, MyWritable.class));
+ *   
+ *   // Reference the records from an Avro file, where MyAvroObject implements Avro's
+ *   // SpecificRecord interface.
+ *   PCollection<MyAvroObject> myObjects = pipeline.read(From.avroFile("/path/to/avrofiles",
+ *       MyAvroObject.class));
+ *       
+ *   // References the key-value pairs from a custom extension of FileInputFormat:
+ *   PTable<KeyWritable, ValueWritable> custom = pipeline.read(From.formattedFile(
+ *       "/custom", MyFileInputFormat.class, KeyWritable.class, ValueWritable.class));
+ * </code>
+ * </p>
+ */
+public class From {
+
+  /**
+   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+   * {@code FileInputFormat<K, V>} implementations not covered by the provided {@code TableSource}
+   * and {@code Source} factory methods.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param formatClass The {@code FileInputFormat} implementation
+   * @param keyClass The {@code Writable} to use for the key
+   * @param valueClass The {@code Writable} to use for the value
+   * @return A new {@code TableSource<K, V>} instance
+   */
+  public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile(
+      String pathName, Class<? extends FileInputFormat<K, V>> formatClass,
+      Class<K> keyClass, Class<V> valueClass) {
+    return formattedFile(new Path(pathName), formatClass, keyClass, valueClass);
+  }
+
+  /**
+   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+   * {@code FileInputFormat<K, V>} implementations not covered by the provided {@code TableSource}
+   * and {@code Source} factory methods.
+   * 
+   * @param  The {@code Path} to the data
+   * @param formatClass The {@code FileInputFormat} implementation
+   * @param keyClass The {@code Writable} to use for the key
+   * @param valueClass The {@code Writable} to use for the value
+   * @return A new {@code TableSource<K, V>} instance
+   */
+  public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile(
+      Path path, Class<? extends FileInputFormat<K, V>> formatClass,
+      Class<K> keyClass, Class<V> valueClass) {
+    return formattedFile(path, formatClass, Writables.writables(keyClass),
+        Writables.writables(valueClass));
+  }
+
+  /**
+   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+   * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
+   * and {@code Source} factory methods.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param formatClass The {@code FileInputFormat} implementation
+   * @param keyType The {@code PType} to use for the key
+   * @param valueType The {@code PType} to use for the value
+   * @return A new {@code TableSource<K, V>} instance
+   */
+  public static <K, V> TableSource<K, V> formattedFile(String pathName,
+      Class<? extends FileInputFormat<?, ?>> formatClass,
+      PType<K> keyType, PType<V> valueType) {
+    return formattedFile(new Path(pathName), formatClass, keyType, valueType);
+  }
+
+  /**
+   * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+   * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
+   * and {@code Source} factory methods.
+   * 
+   * @param  The {@code Path} to the data
+   * @param formatClass The {@code FileInputFormat} implementation
+   * @param keyType The {@code PType} to use for the key
+   * @param valueType The {@code PType} to use for the value
+   * @return A new {@code TableSource<K, V>} instance
+   */
+  public static <K, V> TableSource<K, V> formattedFile(Path path,
+      Class<? extends FileInputFormat<?, ?>> formatClass,
+      PType<K> keyType, PType<V> valueType) {
+    PTableType<K, V> tableType = keyType.getFamily().tableOf(keyType, valueType);
+    return new FileTableSourceImpl<K, V>(path, tableType, formatClass);
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T extends SpecificRecord> Source<T> avroFile(String pathName, Class<T> avroClass) {
+    return avroFile(new Path(pathName), avroClass);  
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance from the Avro file(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T extends SpecificRecord> Source<T> avroFile(Path path, Class<T> avroClass) {
+    return avroFile(path, Avros.specifics(avroClass));  
+  }
+  
+  /**
+   * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param avroType The {@code AvroType} for the Avro records
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T> Source<T> avroFile(String pathName, AvroType<T> avroType) {
+    return avroFile(new Path(pathName), avroType);
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance from the Avro file(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @param avroType The {@code AvroType} for the Avro records
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T> Source<T> avroFile(Path path, AvroType<T> avroType) {
+    return new AvroFileSource<T>(path, avroType);
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T extends Writable> Source<T> sequenceFile(String pathName, Class<T> valueClass) {
+    return sequenceFile(new Path(pathName), valueClass);
+  }
+  
+  /**
+   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given {@code Path}
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param path The {@code Path} to the data
+   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T extends Writable> Source<T> sequenceFile(Path path, Class<T> valueClass) {
+    return sequenceFile(path, Writables.writables(valueClass));
+  }
+  
+  /**
+   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param ptype The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T> Source<T> sequenceFile(String pathName, PType<T> ptype) {
+    return sequenceFile(new Path(pathName), ptype);
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given {@code Path}
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param path The {@code Path} to the data
+   * @param ptype The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T> Source<T> sequenceFile(Path path, PType<T> ptype) {
+    return new SeqFileSource<T>(path, ptype);
+  }
+
+  /**
+   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry
+   * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry
+   * @return A new {@code SourceTable<K, V>} instance
+   */
+  public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile(
+      String pathName, Class<K> keyClass, Class<V> valueClass) {
+    return sequenceFile(new Path(pathName), keyClass, valueClass);
+  }
+
+  /**
+   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry
+   * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry
+   * @return A new {@code SourceTable<K, V>} instance
+   */
+  public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile(
+      Path path, Class<K> keyClass, Class<V> valueClass) {
+    return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass));
+  }
+  
+  /**
+   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param keyType The {@code PType} for the key of the SequenceFile entry
+   * @param valueType The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code SourceTable<K, V>} instance
+   */
+  public static <K, V> TableSource<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) {
+    return sequenceFile(new Path(pathName), keyType, valueType);
+  }
+
+  /**
+   * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @param keyType The {@code PType} for the key of the SequenceFile entry
+   * @param valueType The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code SourceTable<K, V>} instance
+   */
+  public static <K, V> TableSource<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
+    PTypeFamily ptf = keyType.getFamily();
+    return new SeqFileTableSource<K, V>(path, ptf.tableOf(keyType, valueType));
+  }
+
+  /**
+   * Creates a {@code Source<String>} instance for the text file(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @return A new {@code Source<String>} instance
+   */
+  public static Source<String> textFile(String pathName) {
+    return textFile(new Path(pathName));
+  }
+
+  /**
+   * Creates a {@code Source<String>} instance for the text file(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @return A new {@code Source<String>} instance
+   */
+  public static Source<String> textFile(Path path) {
+    return textFile(path, Writables.strings());
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance for the text file(s) at the given path name using
+   * the provided {@code PType<T>} to convert the input text.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param ptype The {@code PType<T>} to use to process the input text
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T> Source<T> textFile(String pathName, PType<T> ptype) {
+    return textFile(new Path(pathName), ptype);
+  }
+
+  /**
+   * Creates a {@code Source<T>} instance for the text file(s) at the given {@code Path} using
+   * the provided {@code PType<T>} to convert the input text.
+   * 
+   * @param path The {@code Path} to the data
+   * @param ptype The {@code PType<T>} to use to process the input text
+   * @return A new {@code Source<T>} instance
+   */
+  public static <T> Source<T> textFile(Path path, PType<T> ptype) {
+    return new TextFileSource<T>(path, ptype);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/MapReduceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/MapReduceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/MapReduceTarget.java
new file mode 100644
index 0000000..b484103
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/MapReduceTarget.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.crunch.Target;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+
+public interface MapReduceTarget extends Target {
+  void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/OutputHandler.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/OutputHandler.java b/crunch-core/src/main/java/org/apache/crunch/io/OutputHandler.java
new file mode 100644
index 0000000..01d7f99
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/OutputHandler.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.crunch.Target;
+import org.apache.crunch.types.PType;
+
+public interface OutputHandler {
+  boolean configure(Target target, PType<?> ptype);
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/PathTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/PathTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/PathTarget.java
new file mode 100644
index 0000000..7a35209
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/PathTarget.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A target whose output goes to a given path on a file system.
+ */
+public interface PathTarget extends MapReduceTarget {
+
+  Path getPath();
+
+  /**
+   * Get the naming scheme to be used for outputs being written to an output
+   * path.
+   * 
+   * @return the naming scheme to be used
+   */
+  FileNamingScheme getFileNamingScheme();
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/PathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/PathTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/PathTargetImpl.java
new file mode 100644
index 0000000..0be3f9a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/PathTargetImpl.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+public abstract class PathTargetImpl implements PathTarget {
+
+  private final Path path;
+  private final Class<OutputFormat> outputFormatClass;
+  private final Class keyClass;
+  private final Class valueClass;
+
+  public PathTargetImpl(String path, Class<OutputFormat> outputFormatClass, Class keyClass, Class valueClass) {
+    this(new Path(path), outputFormatClass, keyClass, valueClass);
+  }
+
+  public PathTargetImpl(Path path, Class<OutputFormat> outputFormatClass, Class keyClass, Class valueClass) {
+    this.path = path;
+    this.outputFormatClass = outputFormatClass;
+    this.keyClass = keyClass;
+    this.valueClass = valueClass;
+  }
+
+  @Override
+  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
+    try {
+      FileOutputFormat.setOutputPath(job, path);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+    if (name == null) {
+      job.setOutputFormatClass(outputFormatClass);
+      job.setOutputKeyClass(keyClass);
+      job.setOutputValueClass(valueClass);
+    } else {
+      CrunchOutputs.addNamedOutput(job, name, outputFormatClass, keyClass, valueClass);
+    }
+  }
+
+  @Override
+  public Path getPath() {
+    return path;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/ReadableSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/ReadableSource.java b/crunch-core/src/main/java/org/apache/crunch/io/ReadableSource.java
new file mode 100644
index 0000000..0407167
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/ReadableSource.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.io.IOException;
+
+import org.apache.crunch.Source;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * An extension of the {@code Source} interface that indicates that a
+ * {@code Source} instance may be read as a series of records by the client
+ * code. This is used to determine whether a {@code PCollection} instance can be
+ * materialized.
+ */
+public interface ReadableSource<T> extends Source<T> {
+
+  /**
+   * Returns an {@code Iterable} that contains the contents of this source.
+   * 
+   * @param conf The current {@code Configuration} instance
+   * @return the contents of this {@code Source} as an {@code Iterable} instance
+   * @throws IOException
+   */
+  Iterable<T> read(Configuration conf) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java
new file mode 100644
index 0000000..95c90aa
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/ReadableSourceTarget.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.crunch.SourceTarget;
+
+/**
+ * An interface that indicates that a {@code SourceTarget} instance can be read
+ * into the local client.
+ * 
+ * @param <T>
+ *          The type of data read.
+ */
+public interface ReadableSourceTarget<T> extends ReadableSource<T>, SourceTarget<T> {
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java b/crunch-core/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java
new file mode 100644
index 0000000..bdda8e6
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/SequentialFileNamingScheme.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Default {@link FileNamingScheme} that uses an incrementing sequence number in
+ * order to generate unique file names.
+ */
+public class SequentialFileNamingScheme implements FileNamingScheme {
+
+  @Override
+  public String getMapOutputName(Configuration configuration, Path outputDirectory) throws IOException {
+    return getSequentialFileName(configuration, outputDirectory, "m");
+  }
+
+  @Override
+  public String getReduceOutputName(Configuration configuration, Path outputDirectory, int partitionId)
+      throws IOException {
+    return getSequentialFileName(configuration, outputDirectory, "r");
+  }
+
+  private String getSequentialFileName(Configuration configuration, Path outputDirectory, String jobTypeName)
+      throws IOException {
+    FileSystem fileSystem = outputDirectory.getFileSystem(configuration);
+    int fileSequenceNumber = fileSystem.listStatus(outputDirectory).length;
+
+    return String.format("part-%s-%05d", jobTypeName, fileSequenceNumber);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
new file mode 100644
index 0000000..f4400de
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Functions for configuring the inputs/outputs of MapReduce jobs.
+ * 
+ */
+public class SourceTargetHelper {
+
+  public static long getPathSize(Configuration conf, Path path) throws IOException {
+    return getPathSize(path.getFileSystem(conf), path);
+  }
+
+  public static long getPathSize(FileSystem fs, Path path) throws IOException {
+    FileStatus[] stati = fs.globStatus(path);
+    if (stati == null || stati.length == 0) {
+      return -1L;
+    }
+    long size = 0;
+    for (FileStatus status : stati) {
+      size += fs.getContentSummary(status.getPath()).getLength();
+    }
+    return size;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/To.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/To.java b/crunch-core/src/main/java/org/apache/crunch/io/To.java
new file mode 100644
index 0000000..d62d294
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/To.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.crunch.Target;
+import org.apache.crunch.io.avro.AvroFileTarget;
+import org.apache.crunch.io.impl.FileTargetImpl;
+import org.apache.crunch.io.seq.SeqFileTarget;
+import org.apache.crunch.io.text.TextFileTarget;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+/**
+ * <p>Static factory methods for creating common {@link Target} types.</p>
+ * 
+ * <p>The {@code To} class is intended to be used as part of a literate API
+ * for writing the output of Crunch pipelines to common file types. We can use
+ * the {@code Target} objects created by the factory methods in the {@code To}
+ * class with either the {@code write} method on the {@code Pipeline} class or
+ * the convenience {@code write} method on {@code PCollection} and {@code PTable}
+ * instances.
+ * 
+ * <code>
+ *   Pipeline pipeline = new MRPipeline(this.getClass());
+ *   ...
+ *   // Write a PCollection<String> to a text file:
+ *   PCollection<String> words = ...;
+ *   pipeline.write(words, To.textFile("/put/my/words/here"));
+ *   
+ *   // Write a PTable<Text, Text> to a sequence file:
+ *   PTable<Text, Text> textToText = ...;
+ *   textToText.write(To.sequenceFile("/words/to/words"));
+ *   
+ *   // Write a PCollection<MyAvroObject> to an Avro data file:
+ *   PCollection<MyAvroObject> objects = ...;
+ *   objects.write(To.avroFile("/my/avro/files"));
+ *   
+ *   // Write a PTable to a custom FileOutputFormat:
+ *   PTable<KeyWritable, ValueWritable> custom = ...;
+ *   pipeline.write(custom, To.formattedFile("/custom", MyFileFormat.class));
+ * </code>
+ * </p>
+ */
+public class To {
+
+  /**
+   * Creates a {@code Target} at the given path name that writes data to
+   * a custom {@code FileOutputFormat}.
+   * 
+   * @param pathName The name of the path to write the data to on the filesystem
+   * @param formatClass The {@code FileOutputFormat<K, V>} to write the data to
+   * @return A new {@code Target} instance
+   */
+  public static <K extends Writable, V extends Writable> Target formattedFile(
+      String pathName, Class<? extends FileOutputFormat<K, V>> formatClass) {
+    return formattedFile(new Path(pathName), formatClass);
+  }
+
+  /**
+   * Creates a {@code Target} at the given {@code Path} that writes data to
+   * a custom {@code FileOutputFormat}.
+   * 
+   * @param path The {@code Path} to write the data to
+   * @param formatClass The {@code FileOutputFormat} to write the data to
+   * @return A new {@code Target} instance
+   */
+  public static <K extends Writable, V extends Writable> Target formattedFile(
+      Path path, Class<? extends FileOutputFormat<K, V>> formatClass) {
+    return new FileTargetImpl(path, formatClass, new SequentialFileNamingScheme());
+  }
+
+  /**
+   * Creates a {@code Target} at the given path name that writes data to
+   * Avro files. The {@code PType} for the written data must be for Avro records.
+   * 
+   * @param pathName The name of the path to write the data to on the filesystem
+   * @return A new {@code Target} instance
+   */
+  public static Target avroFile(String pathName) {
+    return avroFile(new Path(pathName));
+  }
+
+  /**
+   * Creates a {@code Target} at the given {@code Path} that writes data to
+   * Avro files. The {@code PType} for the written data must be for Avro records.
+   * 
+   * @param path The {@code Path} to write the data to
+   * @return A new {@code Target} instance
+   */
+  public static Target avroFile(Path path) {
+    return new AvroFileTarget(path);
+  }
+
+  /**
+   * Creates a {@code Target} at the given path name that writes data to
+   * SequenceFiles.
+   * 
+   * @param pathName The name of the path to write the data to on the filesystem
+   * @return A new {@code Target} instance
+   */
+  public static Target sequenceFile(String pathName) {
+    return sequenceFile(new Path(pathName));
+  }
+
+  /**
+   * Creates a {@code Target} at the given {@code Path} that writes data to
+   * SequenceFiles.
+   * 
+   * @param path The {@code Path} to write the data to
+   * @return A new {@code Target} instance
+   */
+  public static Target sequenceFile(Path path) {
+    return new SeqFileTarget(path);
+  }
+
+  /**
+   * Creates a {@code Target} at the given path name that writes data to
+   * text files.
+   * 
+   * @param pathName The name of the path to write the data to on the filesystem
+   * @return A new {@code Target} instance
+   */
+  public static Target textFile(String pathName) {
+    return textFile(new Path(pathName));
+  }
+
+  /**
+   * Creates a {@code Target} at the given {@code Path} that writes data to
+   * text files.
+   * 
+   * @param path The {@code Path} to write the data to
+   * @return A new {@code Target} instance
+   */
+  public static Target textFile(Path path) {
+    return new TextFileTarget(path);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java
new file mode 100644
index 0000000..c8fe23a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileReaderFactory.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.avro.reflect.ReflectDatumReader;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.io.FileReaderFactory;
+import org.apache.crunch.io.impl.AutoClosingIterator;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.UnmodifiableIterator;
+
+public class AvroFileReaderFactory<T> implements FileReaderFactory<T> {
+
+  private static final Log LOG = LogFactory.getLog(AvroFileReaderFactory.class);
+
+  private final DatumReader<T> recordReader;
+  private final MapFn<T, T> mapFn;
+
+  public AvroFileReaderFactory(AvroType<T> atype) {
+    this.recordReader = createDatumReader(atype);
+    this.mapFn = (MapFn<T, T>) atype.getInputMapFn();
+  }
+
+  public AvroFileReaderFactory(Schema schema) {
+    this.recordReader = new GenericDatumReader<T>(schema);
+    this.mapFn = IdentityFn.<T>getInstance();
+  }
+  
+  static <T> DatumReader<T> createDatumReader(AvroType<T> avroType) {
+    if (avroType.hasReflect()) {
+      if (avroType.hasSpecific()) {
+        Avros.checkCombiningSpecificAndReflectionSchemas();
+      }
+      return new ReflectDatumReader<T>(avroType.getSchema());
+    } else if (avroType.hasSpecific()) {
+      return new SpecificDatumReader<T>(avroType.getSchema());
+    } else {
+      return new GenericDatumReader<T>(avroType.getSchema());
+    }
+  }
+
+  @Override
+  public Iterator<T> read(FileSystem fs, final Path path) {
+    this.mapFn.initialize();
+    try {
+      FsInput fsi = new FsInput(path, fs.getConf());
+      final DataFileReader<T> reader = new DataFileReader<T>(fsi, recordReader);
+      return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
+        @Override
+        public boolean hasNext() {
+          return reader.hasNext();
+        }
+
+        @Override
+        public T next() {
+          return mapFn.map(reader.next());
+        }
+      });
+    } catch (IOException e) {
+      LOG.info("Could not read avro file at path: " + path, e);
+      return Iterators.emptyIterator();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java
new file mode 100644
index 0000000..15792bf
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSource.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import java.io.IOException;
+
+import org.apache.avro.mapred.AvroJob;
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.FormatBundle;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileSourceImpl;
+import org.apache.crunch.types.avro.AvroInputFormat;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class AvroFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
+
+  private static <S> FormatBundle getBundle(AvroType<S> ptype) {
+    FormatBundle bundle = FormatBundle.forInput(AvroInputFormat.class)
+        .set(AvroJob.INPUT_IS_REFLECT, String.valueOf(ptype.hasReflect()))
+        .set(AvroJob.INPUT_SCHEMA, ptype.getSchema().toString())
+        .set(Avros.REFLECT_DATA_FACTORY_CLASS, Avros.REFLECT_DATA_FACTORY.getClass().getName());
+    return bundle;
+  }
+  
+  public AvroFileSource(Path path, AvroType<T> ptype) {
+    super(path, ptype, getBundle(ptype));
+  }
+
+  @Override
+  public String toString() {
+    return "Avro(" + path.toString() + ")";
+  }
+
+  @Override
+  public Iterable<T> read(Configuration conf) throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    return CompositePathIterable.create(fs, path, new AvroFileReaderFactory<T>((AvroType<T>) ptype));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java
new file mode 100644
index 0000000..76103e5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileSourceTarget.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.hadoop.fs.Path;
+
+public class AvroFileSourceTarget<T> extends ReadableSourcePathTargetImpl<T> {
+  public AvroFileSourceTarget(Path path, AvroType<T> atype) {
+    this(path, atype, new SequentialFileNamingScheme());
+  }
+
+  public AvroFileSourceTarget(Path path, AvroType<T> atype, FileNamingScheme fileNamingScheme) {
+    super(new AvroFileSource<T>(path, atype), new AvroFileTarget(path), fileNamingScheme);
+  }
+
+  @Override
+  public String toString() {
+    return target.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java
new file mode 100644
index 0000000..3a9e42c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/avro/AvroFileTarget.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.avro;
+
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.OutputHandler;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.FileTargetImpl;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.avro.AvroOutputFormat;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+
+public class AvroFileTarget extends FileTargetImpl {
+
+  public AvroFileTarget(String path) {
+    this(new Path(path));
+  }
+
+  public AvroFileTarget(Path path) {
+    this(path, new SequentialFileNamingScheme());
+  }
+
+  public AvroFileTarget(Path path, FileNamingScheme fileNamingScheme) {
+    super(path, AvroOutputFormat.class, fileNamingScheme);
+  }
+
+  @Override
+  public String toString() {
+    return "Avro(" + path.toString() + ")";
+  }
+
+  @Override
+  public boolean accept(OutputHandler handler, PType<?> ptype) {
+    if (!(ptype instanceof AvroType)) {
+      return false;
+    }
+    handler.configure(this, ptype);
+    return true;
+  }
+
+  @Override
+  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
+    AvroType<?> atype = (AvroType<?>) ptype;
+    Configuration conf = job.getConfiguration();
+    String schemaParam = null;
+    if (name == null) {
+      schemaParam = "avro.output.schema";
+    } else {
+      schemaParam = "avro.output.schema." + name;
+    }
+    String outputSchema = conf.get(schemaParam);
+    if (outputSchema == null) {
+      conf.set(schemaParam, atype.getSchema().toString());
+    } else if (!outputSchema.equals(atype.getSchema().toString())) {
+      throw new IllegalStateException("Avro targets must use the same output schema");
+    }
+    Avros.configureReflectDataFactory(conf);
+    configureForMapReduce(job, AvroWrapper.class, NullWritable.class, AvroOutputFormat.class,
+        outputPath, name);
+  }
+
+  @Override
+  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
+    if (ptype instanceof AvroType) {
+      return new AvroFileSourceTarget<T>(path, (AvroType<T>) ptype);
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java
new file mode 100644
index 0000000..3bd802e
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/AutoClosingIterator.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Iterator;
+
+import com.google.common.collect.UnmodifiableIterator;
+import com.google.common.io.Closeables;
+
+/**
+ * Closes the wrapped {@code Closeable} when {@link #hasNext()} returns false.  As long a client loops through to
+ * completion (doesn't abort early due to an exception, short circuit, etc.) resources will be closed automatically.
+ */
+public class AutoClosingIterator<T> extends UnmodifiableIterator<T> implements Closeable {
+  private final Iterator<T> iter;
+  private Closeable closeable;
+
+  public AutoClosingIterator(Closeable closeable, Iterator<T> iter) {
+    this.closeable = closeable;
+    this.iter = iter;
+  }
+
+  @Override
+  public boolean hasNext() {
+    if (!iter.hasNext()) {
+      Closeables.closeQuietly(this);
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  @Override
+  public T next() {
+    return iter.next();
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (closeable != null) {
+      closeable.close();
+      closeable = null;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java
new file mode 100644
index 0000000..688c801
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/FileSourceImpl.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import java.io.IOException;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.Source;
+import org.apache.crunch.io.CrunchInputs;
+import org.apache.crunch.io.FormatBundle;
+import org.apache.crunch.io.SourceTargetHelper;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+
+public class FileSourceImpl<T> implements Source<T> {
+
+  private static final Log LOG = LogFactory.getLog(FileSourceImpl.class);
+
+  protected final Path path;
+  protected final PType<T> ptype;
+  protected final FormatBundle<? extends InputFormat> inputBundle;
+
+  public FileSourceImpl(Path path, PType<T> ptype, Class<? extends InputFormat> inputFormatClass) {
+    this.path = path;
+    this.ptype = ptype;
+    this.inputBundle = FormatBundle.forInput(inputFormatClass);
+  }
+
+  public FileSourceImpl(Path path, PType<T> ptype, FormatBundle<? extends InputFormat> inputBundle) {
+    this.path = path;
+    this.ptype = ptype;
+    this.inputBundle = inputBundle;
+  }
+
+  public Path getPath() {
+    return path;
+  }
+  
+  @Override
+  public void configureSource(Job job, int inputId) throws IOException {
+    if (inputId == -1) {
+      FileInputFormat.addInputPath(job, path);
+      job.setInputFormatClass(inputBundle.getFormatClass());
+      inputBundle.configure(job.getConfiguration());
+    } else {
+      CrunchInputs.addInputPath(job, path, inputBundle, inputId);
+    }
+  }
+
+  @Override
+  public PType<T> getType() {
+    return ptype;
+  }
+
+  @Override
+  public long getSize(Configuration configuration) {
+    try {
+      return SourceTargetHelper.getPathSize(configuration, path);
+    } catch (IOException e) {
+      LOG.warn(String.format("Exception thrown looking up size of: %s", path), e);
+      throw new IllegalStateException("Failed to get the file size of:" + path, e);
+    }
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !getClass().equals(other.getClass())) {
+      return false;
+    }
+    FileSourceImpl o = (FileSourceImpl) other;
+    return ptype.equals(o.ptype) && path.equals(o.path) && inputBundle.equals(o.inputBundle);
+  }
+
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().append(ptype).append(path).append(inputBundle).toHashCode();
+  }
+
+  @Override
+  public String toString() {
+    return new StringBuilder().append(inputBundle.getName()).append("(").append(path).append(")").toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java
new file mode 100644
index 0000000..295edb5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTableSourceImpl.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.io.FormatBundle;
+import org.apache.crunch.types.PTableType;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+
+public class FileTableSourceImpl<K, V> extends FileSourceImpl<Pair<K, V>> implements TableSource<K, V> {
+
+  public FileTableSourceImpl(Path path, PTableType<K, V> tableType, Class<? extends FileInputFormat> formatClass) {
+    super(path, tableType, formatClass);
+  }
+
+  public FileTableSourceImpl(Path path, PTableType<K, V> tableType, FormatBundle bundle) {
+    super(path, tableType, bundle);
+  }
+  
+  @Override
+  public PTableType<K, V> getTableType() {
+    return (PTableType<K, V>) getType();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java
new file mode 100644
index 0000000..c1c29e4
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/FileTargetImpl.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import java.io.IOException;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.io.CrunchOutputs;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.OutputHandler;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+public class FileTargetImpl implements PathTarget {
+
+  private static final Log LOG = LogFactory.getLog(FileTargetImpl.class);
+  
+  protected final Path path;
+  private final Class<? extends FileOutputFormat> outputFormatClass;
+  private final FileNamingScheme fileNamingScheme;
+
+  public FileTargetImpl(Path path, Class<? extends FileOutputFormat> outputFormatClass,
+      FileNamingScheme fileNamingScheme) {
+    this.path = path;
+    this.outputFormatClass = outputFormatClass;
+    this.fileNamingScheme = fileNamingScheme;
+  }
+
+  @Override
+  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
+    Converter converter = ptype.getConverter();
+    Class keyClass = converter.getKeyClass();
+    Class valueClass = converter.getValueClass();
+    configureForMapReduce(job, keyClass, valueClass, outputFormatClass, outputPath, name);
+  }
+
+  protected void configureForMapReduce(Job job, Class keyClass, Class valueClass,
+      Class outputFormatClass, Path outputPath, String name) {
+    try {
+      FileOutputFormat.setOutputPath(job, outputPath);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+    if (name == null) {
+      job.setOutputFormatClass(outputFormatClass);
+      job.setOutputKeyClass(keyClass);
+      job.setOutputValueClass(valueClass);
+    } else {
+      CrunchOutputs.addNamedOutput(job, name, outputFormatClass, keyClass, valueClass);
+    }
+  }
+
+  @Override
+  public boolean accept(OutputHandler handler, PType<?> ptype) {
+    handler.configure(this, ptype);
+    return true;
+  }
+
+  @Override
+  public Path getPath() {
+    return path;
+  }
+
+  @Override
+  public FileNamingScheme getFileNamingScheme() {
+    return fileNamingScheme;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !getClass().equals(other.getClass())) {
+      return false;
+    }
+    FileTargetImpl o = (FileTargetImpl) other;
+    return path.equals(o.path);
+  }
+
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().append(path).toHashCode();
+  }
+
+  @Override
+  public String toString() {
+    return new StringBuilder().append(outputFormatClass.getSimpleName()).append("(").append(path).append(")")
+        .toString();
+  }
+
+  @Override
+  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
+    // By default, assume that we cannot do this.
+    return null;
+  }
+
+  @Override
+  public void handleExisting(WriteMode strategy, Configuration conf) {
+    FileSystem fs = null;
+    try {
+      fs = FileSystem.get(conf);
+    } catch (IOException e) {
+      LOG.error("Could not retrieve FileSystem object to check for existing path", e);
+      throw new CrunchRuntimeException(e);
+    }
+    
+    boolean exists = false;
+    try {
+      exists = fs.exists(path);
+    } catch (IOException e) {
+      LOG.error("Exception checking existence of path: " + path, e);
+      throw new CrunchRuntimeException(e);
+    }
+    
+    if (exists) {
+      switch (strategy) {
+      case DEFAULT:
+        LOG.error("Path " + path + " already exists!");
+        throw new CrunchRuntimeException("Path already exists: " + path);
+      case OVERWRITE:
+        LOG.info("Removing data at existing path: " + path);
+        try {
+          fs.delete(path, true);
+        } catch (IOException e) {
+          LOG.error("Exception thrown removing data at path: " + path, e);
+        }
+        break;
+      case APPEND:
+        LOG.info("Adding output files to existing path: " + path);
+        break;
+      default:
+        throw new CrunchRuntimeException("Unknown WriteMode:  " + strategy);
+      }
+    } else {
+      LOG.info("Will write output files to new path: " + path);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java
new file mode 100644
index 0000000..6506816
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourcePathTargetImpl.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import java.io.IOException;
+
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.hadoop.conf.Configuration;
+
+public class ReadableSourcePathTargetImpl<T> extends SourcePathTargetImpl<T> implements ReadableSourceTarget<T> {
+
+  public ReadableSourcePathTargetImpl(ReadableSource<T> source, PathTarget target, FileNamingScheme fileNamingScheme) {
+    super(source, target, fileNamingScheme);
+  }
+
+  @Override
+  public Iterable<T> read(Configuration conf) throws IOException {
+    return ((ReadableSource<T>) source).read(conf);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java
new file mode 100644
index 0000000..f435b3b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/ReadableSourceTargetImpl.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import java.io.IOException;
+
+import org.apache.crunch.Target;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.hadoop.conf.Configuration;
+
+public class ReadableSourceTargetImpl<T> extends SourceTargetImpl<T> implements ReadableSourceTarget<T> {
+
+  public ReadableSourceTargetImpl(ReadableSource<T> source, Target target) {
+    super(source, target);
+  }
+
+  @Override
+  public Iterable<T> read(Configuration conf) throws IOException {
+    return ((ReadableSource<T>) source).read(conf);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java
new file mode 100644
index 0000000..c0d7ce0
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/SourcePathTargetImpl.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import org.apache.crunch.Source;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+
+public class SourcePathTargetImpl<T> extends SourceTargetImpl<T> implements PathTarget {
+
+  private final FileNamingScheme fileNamingScheme;
+
+  public SourcePathTargetImpl(Source<T> source, PathTarget target, FileNamingScheme fileNamingScheme) {
+    super(source, target);
+    this.fileNamingScheme = fileNamingScheme;
+  }
+
+  @Override
+  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
+    ((PathTarget) target).configureForMapReduce(job, ptype, outputPath, name);
+  }
+
+  @Override
+  public Path getPath() {
+    return ((PathTarget) target).getPath();
+  }
+
+  @Override
+  public FileNamingScheme getFileNamingScheme() {
+    return fileNamingScheme;
+  }
+}


[16/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/shakes.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/shakes.txt b/crunch/src/it/resources/shakes.txt
deleted file mode 100644
index 63acf18..0000000
--- a/crunch/src/it/resources/shakes.txt
+++ /dev/null
@@ -1,3667 +0,0 @@
-***The Project Gutenberg's Etext of Shakespeare's First Folio***
-********************The Tragedie of Macbeth*********************
-
-This is our 3rd edition of most of these plays.  See the index.
-
-
-Copyright laws are changing all over the world, be sure to check
-the copyright laws for your country before posting these files!!
-
-Please take a look at the important information in this header.
-We encourage you to keep this file on your own disk, keeping an
-electronic path open for the next readers.  Do not remove this.
-
-
-**Welcome To The World of Free Plain Vanilla Electronic Texts**
-
-**Etexts Readable By Both Humans and By Computers, Since 1971**
-
-*These Etexts Prepared By Hundreds of Volunteers and Donations*
-
-Information on contacting Project Gutenberg to get Etexts, and
-further information is included below.  We need your donations.
-
-
-The Tragedie of Macbeth
-
-by William Shakespeare
-
-July, 2000  [Etext #2264]
-
-
-***The Project Gutenberg's Etext of Shakespeare's First Folio***
-********************The Tragedie of Macbeth*********************
-
-*****This file should be named 0ws3410.txt or 0ws3410.zip******
-
-Corrected EDITIONS of our etexts get a new NUMBER, 0ws3411.txt
-VERSIONS based on separate sources get new LETTER, 0ws3410a.txt
-
-
-Project Gutenberg Etexts are usually created from multiple editions,
-all of which are in the Public Domain in the United States, unless a
-copyright notice is included.  Therefore, we usually do NOT keep any
-of these books in compliance with any particular paper edition.
-
-
-We are now trying to release all our books one month in advance
-of the official release dates, leaving time for better editing.
-
-Please note:  neither this list nor its contents are final till
-midnight of the last day of the month of any such announcement.
-The official release date of all Project Gutenberg Etexts is at
-Midnight, Central Time, of the last day of the stated month.  A
-preliminary version may often be posted for suggestion, comment
-and editing by those who wish to do so.  To be sure you have an
-up to date first edition [xxxxx10x.xxx] please check file sizes
-in the first week of the next month.  Since our ftp program has
-a bug in it that scrambles the date [tried to fix and failed] a
-look at the file size will have to do, but we will try to see a
-new copy has at least one byte more or less.
-
-
-Information about Project Gutenberg (one page)
-
-We produce about two million dollars for each hour we work.  The
-time it takes us, a rather conservative estimate, is fifty hours
-to get any etext selected, entered, proofread, edited, copyright
-searched and analyzed, the copyright letters written, etc.  This
-projected audience is one hundred million readers.  If our value
-per text is nominally estimated at one dollar then we produce $2
-million dollars per hour this year as we release thirty-six text
-files per month, or 432 more Etexts in 1999 for a total of 2000+
-If these reach just 10% of the computerized population, then the
-total should reach over 200 billion Etexts given away this year.
-
-The Goal of Project Gutenberg is to Give Away One Trillion Etext
-Files by December 31, 2001.  [10,000 x 100,000,000 = 1 Trillion]
-This is ten thousand titles each to one hundred million readers,
-which is only ~5% of the present number of computer users.
-
-At our revised rates of production, we will reach only one-third
-of that goal by the end of 2001, or about 3,333 Etexts unless we
-manage to get some real funding; currently our funding is mostly
-from Michael Hart's salary at Carnegie-Mellon University, and an
-assortment of sporadic gifts; this salary is only good for a few
-more years, so we are looking for something to replace it, as we
-don't want Project Gutenberg to be so dependent on one person.
-
-We need your donations more than ever!
-
-
-All donations should be made to "Project Gutenberg/CMU": and are
-tax deductible to the extent allowable by law.  (CMU = Carnegie-
-Mellon University).
-
-For these and other matters, please mail to:
-
-Project Gutenberg
-P. O. Box  2782
-Champaign, IL 61825
-
-When all other email fails. . .try our Executive Director:
-Michael S. Hart <ha...@pobox.com>
-hart@pobox.com forwards to hart@prairienet.org and archive.org
-if your mail bounces from archive.org, I will still see it, if
-it bounces from prairienet.org, better resend later on. . . .
-
-We would prefer to send you this information by email.
-
-******
-
-To access Project Gutenberg etexts, use any Web browser
-to view http://promo.net/pg.  This site lists Etexts by
-author and by title, and includes information about how
-to get involved with Project Gutenberg.  You could also
-download our past Newsletters, or subscribe here.  This
-is one of our major sites, please email hart@pobox.com,
-for a more complete list of our various sites.
-
-To go directly to the etext collections, use FTP or any
-Web browser to visit a Project Gutenberg mirror (mirror
-sites are available on 7 continents; mirrors are listed
-at http://promo.net/pg).
-
-Mac users, do NOT point and click, typing works better.
-
-Example FTP session:
-
-ftp sunsite.unc.edu
-login: anonymous
-password: your@login
-cd pub/docs/books/gutenberg
-cd etext90 through etext99
-dir [to see files]
-get or mget [to get files. . .set bin for zip files]
-GET GUTINDEX.??  [to get a year's listing of books, e.g., GUTINDEX.99]
-GET GUTINDEX.ALL [to get a listing of ALL books]
-
-***
-
-**Information prepared by the Project Gutenberg legal advisor**
-
-(Three Pages)
-
-
-***START**THE SMALL PRINT!**FOR PUBLIC DOMAIN ETEXTS**START***
-Why is this "Small Print!" statement here?  You know: lawyers.
-They tell us you might sue us if there is something wrong with
-your copy of this etext, even if you got it for free from
-someone other than us, and even if what's wrong is not our
-fault.  So, among other things, this "Small Print!" statement
-disclaims most of our liability to you.  It also tells you how
-you can distribute copies of this etext if you want to.
-
-*BEFORE!* YOU USE OR READ THIS ETEXT
-By using or reading any part of this PROJECT GUTENBERG-tm
-etext, you indicate that you understand, agree to and accept
-this "Small Print!" statement.  If you do not, you can receive
-a refund of the money (if any) you paid for this etext by
-sending a request within 30 days of receiving it to the person
-you got it from.  If you received this etext on a physical
-medium (such as a disk), you must return it with your request.
-
-ABOUT PROJECT GUTENBERG-TM ETEXTS
-This PROJECT GUTENBERG-tm etext, like most PROJECT GUTENBERG-
-tm etexts, is a "public domain" work distributed by Professor
-Michael S. Hart through the Project Gutenberg Association at
-Carnegie-Mellon University (the "Project").  Among other
-things, this means that no one owns a United States copyright
-on or for this work, so the Project (and you!) can copy and
-distribute it in the United States without permission and
-without paying copyright royalties.  Special rules, set forth
-below, apply if you wish to copy and distribute this etext
-under the Project's "PROJECT GUTENBERG" trademark.
-
-To create these etexts, the Project expends considerable
-efforts to identify, transcribe and proofread public domain
-works.  Despite these efforts, the Project's etexts and any
-medium they may be on may contain "Defects".  Among other
-things, Defects may take the form of incomplete, inaccurate or
-corrupt data, transcription errors, a copyright or other
-intellectual property infringement, a defective or damaged
-disk or other etext medium, a computer virus, or computer
-codes that damage or cannot be read by your equipment.
-
-LIMITED WARRANTY; DISCLAIMER OF DAMAGES
-But for the "Right of Replacement or Refund" described below,
-[1] the Project (and any other party you may receive this
-etext from as a PROJECT GUTENBERG-tm etext) disclaims all
-liability to you for damages, costs and expenses, including
-legal fees, and [2] YOU HAVE NO REMEDIES FOR NEGLIGENCE OR
-UNDER STRICT LIABILITY, OR FOR BREACH OF WARRANTY OR CONTRACT,
-INCLUDING BUT NOT LIMITED TO INDIRECT, CONSEQUENTIAL, PUNITIVE
-OR INCIDENTAL DAMAGES, EVEN IF YOU GIVE NOTICE OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-If you discover a Defect in this etext within 90 days of
-receiving it, you can receive a refund of the money (if any)
-you paid for it by sending an explanatory note within that
-time to the person you received it from.  If you received it
-on a physical medium, you must return it with your note, and
-such person may choose to alternatively give you a replacement
-copy.  If you received it electronically, such person may
-choose to alternatively give you a second opportunity to
-receive it electronically.
-
-THIS ETEXT IS OTHERWISE PROVIDED TO YOU "AS-IS".  NO OTHER
-WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, ARE MADE TO YOU AS
-TO THE ETEXT OR ANY MEDIUM IT MAY BE ON, INCLUDING BUT NOT
-LIMITED TO WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
-PARTICULAR PURPOSE.
-
-Some states do not allow disclaimers of implied warranties or
-the exclusion or limitation of consequential damages, so the
-above disclaimers and exclusions may not apply to you, and you
-may have other legal rights.
-
-INDEMNITY
-You will indemnify and hold the Project, its directors,
-officers, members and agents harmless from all liability, cost
-and expense, including legal fees, that arise directly or
-indirectly from any of the following that you do or cause:
-[1] distribution of this etext, [2] alteration, modification,
-or addition to the etext, or [3] any Defect.
-
-DISTRIBUTION UNDER "PROJECT GUTENBERG-tm"
-You may distribute copies of this etext electronically, or by
-disk, book or any other medium if you either delete this
-"Small Print!" and all other references to Project Gutenberg,
-or:
-
-[1]  Only give exact copies of it.  Among other things, this
-     requires that you do not remove, alter or modify the
-     etext or this "small print!" statement.  You may however,
-     if you wish, distribute this etext in machine readable
-     binary, compressed, mark-up, or proprietary form,
-     including any form resulting from conversion by word pro-
-     cessing or hypertext software, but only so long as
-     *EITHER*:
-
-     [*]  The etext, when displayed, is clearly readable, and
-          does *not* contain characters other than those
-          intended by the author of the work, although tilde
-          (~), asterisk (*) and underline (_) characters may
-          be used to convey punctuation intended by the
-          author, and additional characters may be used to
-          indicate hypertext links; OR
-
-     [*]  The etext may be readily converted by the reader at
-          no expense into plain ASCII, EBCDIC or equivalent
-          form by the program that displays the etext (as is
-          the case, for instance, with most word processors);
-          OR
-
-     [*]  You provide, or agree to also provide on request at
-          no additional cost, fee or expense, a copy of the
-          etext in its original plain ASCII form (or in EBCDIC
-          or other equivalent proprietary form).
-
-[2]  Honor the etext refund and replacement provisions of this
-     "Small Print!" statement.
-
-[3]  Pay a trademark license fee to the Project of 20% of the
-     net profits you derive calculated using the method you
-     already use to calculate your applicable taxes.  If you
-     don't derive profits, no royalty is due.  Royalties are
-     payable to "Project Gutenberg Association/Carnegie-Mellon
-     University" within the 60 days following each
-     date you prepare (or were legally required to prepare)
-     your annual (or equivalent periodic) tax return.
-
-WHAT IF YOU *WANT* TO SEND MONEY EVEN IF YOU DON'T HAVE TO?
-The Project gratefully accepts contributions in money, time,
-scanning machines, OCR software, public domain etexts, royalty
-free copyright licenses, and every other sort of contribution
-you can think of.  Money should be paid to "Project Gutenberg
-Association / Carnegie-Mellon University".
-
-*END*THE SMALL PRINT! FOR PUBLIC DOMAIN ETEXTS*Ver.04.29.93*END*
-
-
-
-
-
-Project Gutenberg's Etext of Shakespeare's The Tragedie of Macbeth
-
-
-
-
-
-Executive Director's Notes:
-
-In addition to the notes below, and so you will *NOT* think all
-the spelling errors introduced by the printers of the time have
-been corrected, here are the first few lines of Hamlet, as they
-are presented herein:
-
-  Barnardo. Who's there?
-  Fran. Nay answer me: Stand & vnfold
-your selfe
-
-   Bar. Long liue the King
-
-***
-
-As I understand it, the printers often ran out of certain words
-or letters they had often packed into a "cliche". . .this is the
-original meaning of the term cliche. . .and thus, being unwilling
-to unpack the cliches, and thus you will see some substitutions
-that look very odd. . .such as the exchanges of u for v, v for u,
-above. . .and you may wonder why they did it this way, presuming
-Shakespeare did not actually write the play in this manner. . . .
-
-The answer is that they MAY have packed "liue" into a cliche at a
-time when they were out of "v"'s. . .possibly having used "vv" in
-place of some "w"'s, etc.  This was a common practice of the day,
-as print was still quite expensive, and they didn't want to spend
-more on a wider selection of characters than they had to.
-
-You will find a lot of these kinds of "errors" in this text, as I
-have mentioned in other times and places, many "scholars" have an
-extreme attachment to these errors, and many have accorded them a
-very high place in the "canon" of Shakespeare.  My father read an
-assortment of these made available to him by Cambridge University
-in England for several months in a glass room constructed for the
-purpose.  To the best of my knowledge he read ALL those available
-. . .in great detail. . .and determined from the various changes,
-that Shakespeare most likely did not write in nearly as many of a
-variety of errors we credit him for, even though he was in/famous
-for signing his name with several different spellings.
-
-So, please take this into account when reading the comments below
-made by our volunteer who prepared this file:  you may see errors
-that are "not" errors. . . .
-
-So. . .with this caveat. . .we have NOT changed the canon errors,
-here is the Project Gutenberg Etext of Shakespeare's The Tragedie 
-of Macbeth.
-
-Michael S. Hart
-Project Gutenberg
-Executive Director
-
-
-***
-
-
-Scanner's Notes: What this is and isn't.  This was taken from
-a copy of Shakespeare's first folio and it is as close as I can
-come in ASCII to the printed text.
-
-The elongated S's have been changed to small s's and the
-conjoined ae have been changed to ae.  I have left the spelling,
-punctuation, capitalization as close as possible to the
-printed text.  I have corrected some spelling mistakes (I have put
-together a spelling dictionary devised from the spellings of the
-Geneva Bible and Shakespeare's First Folio and have unified
-spellings according to this template), typo's and expanded
-abbreviations as I have come across them.  Everything within
-brackets [] is what I have added.  So if you don't like that
-you can delete everything within the brackets if you want a
-purer Shakespeare.
-
-Another thing that you should be aware of is that there are textual
-differences between various copies of the first folio.  So there may
-be differences (other than what I have mentioned above) between
-this and other first folio editions.  This is due to the printer's
-habit of setting the type and running off a number of copies and
-then proofing the printed copy and correcting the type and then
-continuing the printing run.  The proof run wasn't thrown away but
-incorporated into the printed copies.  This is just the way it is.
-The text I have used was a composite of more than 30 different
-First Folio editions' best pages.
-
-If you find any scanning errors, out and out typos, punctuation
-errors, or if you disagree with my spelling choices please feel
-free to email me those errors.  I wish to make this the best
-etext possible.  My email address for right now are haradda@aol.com
-and davidr@inconnect.com.  I hope that you enjoy this.
-
-David Reed
-
-The Tragedie of Macbeth
-
-Actus Primus. Scoena Prima.
-
-Thunder and Lightning. Enter three Witches.
-
-  1. When shall we three meet againe?
-In Thunder, Lightning, or in Raine?
-  2. When the Hurley-burley's done,
-When the Battaile's lost, and wonne
-
-   3. That will be ere the set of Sunne
-
-   1. Where the place?
-  2. Vpon the Heath
-
-   3. There to meet with Macbeth
-
-   1. I come, Gray-Malkin
-
-   All. Padock calls anon: faire is foule, and foule is faire,
-Houer through the fogge and filthie ayre.
-
-Exeunt.
-
-
-Scena Secunda.
-
-Alarum within. Enter King Malcome, Donalbaine, Lenox, with
-attendants,
-meeting a bleeding Captaine.
-
-  King. What bloody man is that? he can report,
-As seemeth by his plight, of the Reuolt
-The newest state
-
-   Mal. This is the Serieant,
-Who like a good and hardie Souldier fought
-'Gainst my Captiuitie: Haile braue friend;
-Say to the King, the knowledge of the Broyle,
-As thou didst leaue it
-
-   Cap. Doubtfull it stood,
-As two spent Swimmers, that doe cling together,
-And choake their Art: The mercilesse Macdonwald
-(Worthie to be a Rebell, for to that
-The multiplying Villanies of Nature
-Doe swarme vpon him) from the Westerne Isles
-Of Kernes and Gallowgrosses is supply'd,
-And Fortune on his damned Quarry smiling,
-Shew'd like a Rebells Whore: but all's too weake:
-For braue Macbeth (well hee deserues that Name)
-Disdayning Fortune, with his brandisht Steele,
-Which smoak'd with bloody execution
-(Like Valours Minion) caru'd out his passage,
-Till hee fac'd the Slaue:
-Which neu'r shooke hands, nor bad farwell to him,
-Till he vnseam'd him from the Naue toth' Chops,
-And fix'd his Head vpon our Battlements
-
-   King. O valiant Cousin, worthy Gentleman
-
-   Cap. As whence the Sunne 'gins his reflection,
-Shipwracking Stormes, and direfull Thunders:
-So from that Spring, whence comfort seem'd to come,
-Discomfort swells: Marke King of Scotland, marke,
-No sooner Iustice had, with Valour arm'd,
-Compell'd these skipping Kernes to trust their heeles,
-But the Norweyan Lord, surueying vantage,
-With furbusht Armes, and new supplyes of men,
-Began a fresh assault
-
-   King. Dismay'd not this our Captaines, Macbeth and
-Banquoh?
-  Cap. Yes, as Sparrowes, Eagles;
-Or the Hare, the Lyon:
-If I say sooth, I must report they were
-As Cannons ouer-charg'd with double Cracks,
-So they doubly redoubled stroakes vpon the Foe:
-Except they meant to bathe in reeking Wounds,
-Or memorize another Golgotha,
-I cannot tell: but I am faint,
-My Gashes cry for helpe
-
-   King. So well thy words become thee, as thy wounds,
-They smack of Honor both: Goe get him Surgeons.
-Enter Rosse and Angus.
-
-Who comes here?
-  Mal. The worthy Thane of Rosse
-
-   Lenox. What a haste lookes through his eyes?
-So should he looke, that seemes to speake things strange
-
-   Rosse. God saue the King
-
-   King. Whence cam'st thou, worthy Thane?
-  Rosse. From Fiffe, great King,
-Where the Norweyan Banners flowt the Skie,
-And fanne our people cold.
-Norway himselfe, with terrible numbers,
-Assisted by that most disloyall Traytor,
-The Thane of Cawdor, began a dismall Conflict,
-Till that Bellona's Bridegroome, lapt in proofe,
-Confronted him with selfe-comparisons,
-Point against Point, rebellious Arme 'gainst Arme,
-Curbing his lauish spirit: and to conclude,
-The Victorie fell on vs
-
-   King. Great happinesse
-
-   Rosse. That now Sweno, the Norwayes King,
-Craues composition:
-Nor would we deigne him buriall of his men,
-Till he disbursed, at Saint Colmes ynch,
-Ten thousand Dollars, to our generall vse
-
-   King. No more that Thane of Cawdor shall deceiue
-Our Bosome interest: Goe pronounce his present death,
-And with his former Title greet Macbeth
-
-   Rosse. Ile see it done
-
-   King. What he hath lost, Noble Macbeth hath wonne.
-
-Exeunt.
-
-
-Scena Tertia.
-
-Thunder. Enter the three Witches.
-
-  1. Where hast thou beene, Sister?
-  2. Killing Swine
-
-   3. Sister, where thou?
-  1. A Saylors Wife had Chestnuts in her Lappe,
-And mouncht, & mouncht, and mouncht:
-Giue me, quoth I.
-Aroynt thee, Witch, the rumpe-fed Ronyon cryes.
-Her Husband's to Aleppo gone, Master o'th' Tiger:
-But in a Syue Ile thither sayle,
-And like a Rat without a tayle,
-Ile doe, Ile doe, and Ile doe
-
-   2. Ile giue thee a Winde
-
-   1. Th'art kinde
-
-   3. And I another
-
-   1. I my selfe haue all the other,
-And the very Ports they blow,
-All the Quarters that they know,
-I'th' Ship-mans Card.
-Ile dreyne him drie as Hay:
-Sleepe shall neyther Night nor Day
-Hang vpon his Pent-house Lid:
-He shall liue a man forbid:
-Wearie Seu'nights, nine times nine,
-Shall he dwindle, peake, and pine:
-Though his Barke cannot be lost,
-Yet it shall be Tempest-tost.
-Looke what I haue
-
-   2. Shew me, shew me
-
-   1. Here I haue a Pilots Thumbe,
-Wrackt, as homeward he did come.
-
-Drum within.
-
-  3. A Drumme, a Drumme:
-Macbeth doth come
-
-   All. The weyward Sisters, hand in hand,
-Posters of the Sea and Land,
-Thus doe goe, about, about,
-Thrice to thine, and thrice to mine,
-And thrice againe, to make vp nine.
-Peace, the Charme's wound vp.
-Enter Macbeth and Banquo.
-
-  Macb. So foule and faire a day I haue not seene
-
-   Banquo. How farre is't call'd to Soris? What are these,
-So wither'd, and so wilde in their attyre,
-That looke not like th' Inhabitants o'th' Earth,
-And yet are on't? Liue you, or are you aught
-That man may question? you seeme to vnderstand me,
-By each at once her choppie finger laying
-Vpon her skinnie Lips: you should be Women,
-And yet your Beards forbid me to interprete
-That you are so
-
-   Mac. Speake if you can: what are you?
-  1. All haile Macbeth, haile to thee Thane of Glamis
-
-   2. All haile Macbeth, haile to thee Thane of Cawdor
-
-   3. All haile Macbeth, that shalt be King hereafter
-
-   Banq. Good Sir, why doe you start, and seeme to feare
-Things that doe sound so faire? i'th' name of truth
-Are ye fantasticall, or that indeed
-Which outwardly ye shew? My Noble Partner
-You greet with present Grace, and great prediction
-Of Noble hauing, and of Royall hope,
-That he seemes wrapt withall: to me you speake not.
-If you can looke into the Seedes of Time,
-And say, which Graine will grow, and which will not,
-Speake then to me, who neyther begge, nor feare
-Your fauors, nor your hate
-
-   1. Hayle
-
-   2. Hayle
-
-   3. Hayle
-
-   1. Lesser than Macbeth, and greater
-
-   2. Not so happy, yet much happyer
-
-   3. Thou shalt get Kings, though thou be none:
-So all haile Macbeth, and Banquo
-
-   1. Banquo, and Macbeth, all haile
-
-   Macb. Stay you imperfect Speakers, tell me more:
-By Sinells death, I know I am Thane of Glamis,
-But how, of Cawdor? the Thane of Cawdor liues
-A prosperous Gentleman: And to be King,
-Stands not within the prospect of beleefe,
-No more then to be Cawdor. Say from whence
-You owe this strange Intelligence, or why
-Vpon this blasted Heath you stop our way
-With such Prophetique greeting?
-Speake, I charge you.
-
-Witches vanish.
-
-  Banq. The Earth hath bubbles, as the Water ha's,
-And these are of them: whither are they vanish'd?
-  Macb. Into the Ayre: and what seem'd corporall,
-Melted, as breath into the Winde.
-Would they had stay'd
-
-   Banq. Were such things here, as we doe speake about?
-Or haue we eaten on the insane Root,
-That takes the Reason Prisoner?
-  Macb. Your Children shall be Kings
-
-   Banq. You shall be King
-
-   Macb. And Thane of Cawdor too: went it not so?
-  Banq. Toth' selfe-same tune and words: who's here?
-Enter Rosse and Angus.
-
-  Rosse. The King hath happily receiu'd, Macbeth,
-The newes of thy successe: and when he reades
-Thy personall Venture in the Rebels sight,
-His Wonders and his Prayses doe contend,
-Which should be thine, or his: silenc'd with that,
-In viewing o're the rest o'th' selfe-same day,
-He findes thee in the stout Norweyan Rankes,
-Nothing afeard of what thy selfe didst make
-Strange Images of death, as thick as Tale
-Can post with post, and euery one did beare
-Thy prayses in his Kingdomes great defence,
-And powr'd them downe before him
-
-   Ang. Wee are sent,
-To giue thee from our Royall Master thanks,
-Onely to harrold thee into his sight,
-Not pay thee
-
-   Rosse. And for an earnest of a greater Honor,
-He bad me, from him, call thee Thane of Cawdor:
-In which addition, haile most worthy Thane,
-For it is thine
-
-   Banq. What, can the Deuill speake true?
-  Macb. The Thane of Cawdor liues:
-Why doe you dresse me in borrowed Robes?
-  Ang. Who was the Thane, liues yet,
-But vnder heauie Iudgement beares that Life,
-Which he deserues to loose.
-Whether he was combin'd with those of Norway,
-Or did lyne the Rebell with hidden helpe,
-And vantage; or that with both he labour'd
-In his Countreyes wracke, I know not:
-But Treasons Capitall, confess'd, and prou'd,
-Haue ouerthrowne him
-
-   Macb. Glamys, and Thane of Cawdor:
-The greatest is behinde. Thankes for your paines.
-Doe you not hope your Children shall be Kings,
-When those that gaue the Thane of Cawdor to me,
-Promis'd no lesse to them
-
-   Banq. That trusted home,
-Might yet enkindle you vnto the Crowne,
-Besides the Thane of Cawdor. But 'tis strange:
-And oftentimes, to winne vs to our harme,
-The Instruments of Darknesse tell vs Truths,
-Winne vs with honest Trifles, to betray's
-In deepest consequence.
-Cousins, a word, I pray you
-
-   Macb. Two Truths are told,
-As happy Prologues to the swelling Act
-Of the Imperiall Theame. I thanke you Gentlemen:
-This supernaturall solliciting
-Cannot be ill; cannot be good.
-If ill? why hath it giuen me earnest of successe,
-Commencing in a Truth? I am Thane of Cawdor.
-If good? why doe I yeeld to that suggestion,
-Whose horrid Image doth vnfixe my Heire,
-And make my seated Heart knock at my Ribbes,
-Against the vse of Nature? Present Feares
-Are lesse then horrible Imaginings:
-My Thought, whose Murther yet is but fantasticall,
-Shakes so my single state of Man,
-That Function is smother'd in surmise,
-And nothing is, but what is not
-
-   Banq. Looke how our Partner's rapt
-
-   Macb. If Chance will haue me King,
-Why Chance may Crowne me,
-Without my stirre
-
-   Banq. New Honors come vpon him
-Like our strange Garments, cleaue not to their mould,
-But with the aid of vse
-
-   Macb. Come what come may,
-Time, and the Houre, runs through the roughest Day
-
-   Banq. Worthy Macbeth, wee stay vpon your leysure
-
-   Macb. Giue me your fauour:
-My dull Braine was wrought with things forgotten.
-Kinde Gentlemen, your paines are registred,
-Where euery day I turne the Leafe,
-To reade them.
-Let vs toward the King: thinke vpon
-What hath chanc'd: and at more time,
-The Interim hauing weigh'd it, let vs speake
-Our free Hearts each to other
-
-   Banq. Very gladly
-
-   Macb. Till then enough:
-Come friends.
-
-Exeunt.
-
-
-Scena Quarta.
-
-Flourish. Enter King, Lenox, Malcolme, Donalbaine, and
-Attendants.
-
-  King. Is execution done on Cawdor?
-Or not those in Commission yet return'd?
-  Mal. My Liege, they are not yet come back.
-But I haue spoke with one that saw him die:
-Who did report, that very frankly hee
-Confess'd his Treasons, implor'd your Highnesse Pardon,
-And set forth a deepe Repentance:
-Nothing in his Life became him,
-Like the leauing it. Hee dy'de,
-As one that had beene studied in his death,
-To throw away the dearest thing he ow'd,
-As 'twere a carelesse Trifle
-
-   King. There's no Art,
-To finde the Mindes construction in the Face.
-He was a Gentleman, on whom I built
-An absolute Trust.
-Enter Macbeth, Banquo, Rosse, and Angus.
-
-O worthyest Cousin,
-The sinne of my Ingratitude euen now
-Was heauie on me. Thou art so farre before,
-That swiftest Wing of Recompence is slow,
-To ouertake thee. Would thou hadst lesse deseru'd,
-That the proportion both of thanks, and payment,
-Might haue beene mine: onely I haue left to say,
-More is thy due, then more then all can pay
-
-   Macb. The seruice, and the loyaltie I owe,
-In doing it, payes it selfe.
-Your Highnesse part, is to receiue our Duties:
-And our Duties are to your Throne, and State,
-Children, and Seruants; which doe but what they should,
-By doing euery thing safe toward your Loue
-And Honor
-
-   King. Welcome hither:
-I haue begun to plant thee, and will labour
-To make thee full of growing. Noble Banquo,
-That hast no lesse deseru'd, nor must be knowne
-No lesse to haue done so: Let me enfold thee,
-And hold thee to my Heart
-
-   Banq. There if I grow,
-The Haruest is your owne
-
-   King. My plenteous Ioyes,
-Wanton in fulnesse, seeke to hide themselues
-In drops of sorrow. Sonnes, Kinsmen, Thanes,
-And you whose places are the nearest, know,
-We will establish our Estate vpon
-Our eldest, Malcolme, whom we name hereafter,
-The Prince of Cumberland: which Honor must
-Not vnaccompanied, inuest him onely,
-But signes of Noblenesse, like Starres, shall shine
-On all deseruers. From hence to Envernes,
-And binde vs further to you
-
-   Macb. The Rest is Labor, which is not vs'd for you:
-Ile be my selfe the Herbenger, and make ioyfull
-The hearing of my Wife, with your approach:
-So humbly take my leaue
-
-   King. My worthy Cawdor
-
-   Macb. The Prince of Cumberland: that is a step,
-On which I must fall downe, or else o're-leape,
-For in my way it lyes. Starres hide your fires,
-Let not Light see my black and deepe desires:
-The Eye winke at the Hand: yet let that bee,
-Which the Eye feares, when it is done to see.
-Enter.
-
-  King. True worthy Banquo: he is full so valiant,
-And in his commendations, I am fed:
-It is a Banquet to me. Let's after him,
-Whose care is gone before, to bid vs welcome:
-It is a peerelesse Kinsman.
-
-Flourish. Exeunt.
-
-
-Scena Quinta.
-
-Enter Macbeths Wife alone with a Letter.
-
-  Lady. They met me in the day of successe: and I haue
-learn'd by the perfect'st report, they haue more in them, then
-mortall knowledge. When I burnt in desire to question them
-further, they made themselues Ayre, into which they vanish'd.
-Whiles I stood rapt in the wonder of it, came Missiues from
-the King, who all-hail'd me Thane of Cawdor, by which Title
-before, these weyward Sisters saluted me, and referr'd me to
-the comming on of time, with haile King that shalt be. This
-haue I thought good to deliuer thee (my dearest Partner of
-Greatnesse) that thou might'st not loose the dues of reioycing
-by being ignorant of what Greatnesse is promis'd thee. Lay
-it to thy heart and farewell.
-Glamys thou art, and Cawdor, and shalt be
-What thou art promis'd: yet doe I feare thy Nature,
-It is too full o'th' Milke of humane kindnesse,
-To catch the neerest way. Thou would'st be great,
-Art not without Ambition, but without
-The illnesse should attend it. What thou would'st highly,
-That would'st thou holily: would'st not play false,
-And yet would'st wrongly winne.
-Thould'st haue, great Glamys, that which cryes,
-Thus thou must doe, if thou haue it;
-And that which rather thou do'st feare to doe,
-Then wishest should be vndone. High thee hither,
-That I may powre my Spirits in thine Eare,
-And chastise with the valour of my Tongue
-All that impeides thee from the Golden Round,
-Which Fate and Metaphysicall ayde doth seeme
-To haue thee crown'd withall.
-Enter Messenger.
-
-What is your tidings?
-  Mess. The King comes here to Night
-
-   Lady. Thou'rt mad to say it.
-Is not thy Master with him? who, wer't so,
-Would haue inform'd for preparation
-
-   Mess. So please you, it is true: our Thane is comming:
-One of my fellowes had the speed of him;
-Who almost dead for breath, had scarcely more
-Then would make vp his Message
-
-   Lady. Giue him tending,
-He brings great newes,
-
-Exit Messenger.
-
-The Rauen himselfe is hoarse,
-That croakes the fatall entrance of Duncan
-Vnder my Battlements. Come you Spirits,
-That tend on mortall thoughts, vnsex me here,
-And fill me from the Crowne to the Toe, top-full
-Of direst Crueltie: make thick my blood,
-Stop vp th' accesse, and passage to Remorse,
-That no compunctious visitings of Nature
-Shake my fell purpose, nor keepe peace betweene
-Th' effect, and hit. Come to my Womans Brests,
-And take my Milke for Gall, you murth'ring Ministers,
-Where-euer, in your sightlesse substances,
-You wait on Natures Mischiefe. Come thick Night,
-And pall thee in the dunnest smoake of Hell,
-
-That my keene Knife see not the Wound it makes,
-Nor Heauen peepe through the Blanket of the darke,
-To cry, hold, hold.
-Enter Macbeth.
-
-Great Glamys, worthy Cawdor,
-Greater then both, by the all-haile hereafter,
-Thy Letters haue transported me beyond
-This ignorant present, and I feele now
-The future in the instant
-
-   Macb. My dearest Loue,
-Duncan comes here to Night
-
-   Lady. And when goes hence?
-  Macb. To morrow, as he purposes
-
-   Lady. O neuer,
-Shall Sunne that Morrow see.
-Your Face, my Thane, is as a Booke, where men
-May reade strange matters, to beguile the time.
-Looke like the time, beare welcome in your Eye,
-Your Hand, your Tongue: looke like th' innocent flower,
-But be the Serpent vnder't. He that's comming,
-Must be prouided for: and you shall put
-This Nights great Businesse into my dispatch,
-Which shall to all our Nights, and Dayes to come,
-Giue solely soueraigne sway, and Masterdome
-
-   Macb. We will speake further,
-  Lady. Onely looke vp cleare:
-To alter fauor, euer is to feare:
-Leaue all the rest to me.
-
-Exeunt.
-
-
-Scena Sexta.
-
-Hoboyes, and Torches. Enter King, Malcolme, Donalbaine,
-Banquo, Lenox,
-Macduff, Rosse, Angus, and Attendants.
-
-  King. This Castle hath a pleasant seat,
-The ayre nimbly and sweetly recommends it selfe
-Vnto our gentle sences
-
-   Banq. This Guest of Summer,
-The Temple-haunting Barlet does approue,
-By his loued Mansonry, that the Heauens breath
-Smells wooingly here: no Iutty frieze,
-Buttrice, nor Coigne of Vantage, but this Bird
-Hath made his pendant Bed, and procreant Cradle,
-Where they must breed, and haunt: I haue obseru'd
-The ayre is delicate.
-Enter Lady.
-
-  King. See, see our honor'd Hostesse:
-The Loue that followes vs, sometime is our trouble,
-Which still we thanke as Loue. Herein I teach you,
-How you shall bid God-eyld vs for your paines,
-And thanke vs for your trouble
-
-   Lady. All our seruice,
-In euery point twice done, and then done double,
-Were poore, and single Businesse, to contend
-Against those Honors deepe, and broad,
-Wherewith your Maiestie loades our House:
-For those of old, and the late Dignities,
-Heap'd vp to them, we rest your Ermites
-
-   King. Where's the Thane of Cawdor?
-We courst him at the heeles, and had a purpose
-To be his Purueyor: But he rides well,
-And his great Loue (sharpe as his Spurre) hath holp him
-To his home before vs: Faire and Noble Hostesse
-We are your guest to night
-
-   La. Your Seruants euer,
-Haue theirs, themselues, and what is theirs in compt,
-To make their Audit at your Highnesse pleasure,
-Still to returne your owne
-
-   King. Giue me your hand:
-Conduct me to mine Host we loue him highly,
-And shall continue, our Graces towards him.
-By your leaue Hostesse.
-
-Exeunt.
-
-Scena Septima.
-
-Hoboyes. Torches. Enter a Sewer, and diuers Seruants with Dishes
-and
-Seruice ouer the Stage. Then enter Macbeth
-
-   Macb. If it were done, when 'tis done, then 'twer well,
-It were done quickly: If th' Assassination
-Could trammell vp the Consequence, and catch
-With his surcease, Successe: that but this blow
-Might be the be all, and the end all. Heere,
-But heere, vpon this Banke and Schoole of time,
-Wee'ld iumpe the life to come. But in these Cases,
-We still haue iudgement heere, that we but teach
-Bloody Instructions, which being taught, returne
-To plague th' Inuenter, this euen-handed Iustice
-Commends th' Ingredience of our poyson'd Challice
-To our owne lips. Hee's heere in double trust;
-First, as I am his Kinsman, and his Subiect,
-Strong both against the Deed: Then, as his Host,
-Who should against his Murtherer shut the doore,
-Not beare the knife my selfe. Besides, this Duncane
-Hath borne his Faculties so meeke; hath bin
-So cleere in his great Office, that his Vertues
-Will pleade like Angels, Trumpet-tongu'd against
-The deepe damnation of his taking off:
-And Pitty, like a naked New-borne-Babe,
-Striding the blast, or Heauens Cherubin, hors'd
-Vpon the sightlesse Curriors of the Ayre,
-Shall blow the horrid deed in euery eye,
-That teares shall drowne the winde. I haue no Spurre
-To pricke the sides of my intent, but onely
-Vaulting Ambition, which ore-leapes it selfe,
-And falles on th' other.
-Enter Lady.
-
-How now? What Newes?
-  La. He has almost supt: why haue you left the chamber?
-  Mac. Hath he ask'd for me?
-  La. Know you not, he ha's?
-  Mac. We will proceed no further in this Businesse:
-He hath Honour'd me of late, and I haue bought
-Golden Opinions from all sorts of people,
-Which would be worne now in their newest glosse,
-Not cast aside so soone
-
-   La. Was the hope drunke,
-Wherein you drest your selfe? Hath it slept since?
-And wakes it now to looke so greene, and pale,
-At what it did so freely? From this time,
-Such I account thy loue. Art thou affear'd
-To be the same in thine owne Act, and Valour,
-As thou art in desire? Would'st thou haue that
-Which thou esteem'st the Ornament of Life,
-And liue a Coward in thine owne Esteeme?
-Letting I dare not, wait vpon I would,
-Like the poore Cat i'th' Addage
-
-   Macb. Prythee peace:
-I dare do all that may become a man,
-Who dares do more, is none
-
-   La. What Beast was't then
-That made you breake this enterprize to me?
-When you durst do it, then you were a man:
-And to be more then what you were, you would
-Be so much more the man. Nor time, nor place
-Did then adhere, and yet you would make both:
-They haue made themselues, and that their fitnesse now
-Do's vnmake you. I haue giuen Sucke, and know
-How tender 'tis to loue the Babe that milkes me,
-I would, while it was smyling in my Face,
-Haue pluckt my Nipple from his Bonelesse Gummes,
-And dasht the Braines out, had I so sworne
-As you haue done to this
-
-   Macb. If we should faile?
-  Lady. We faile?
-But screw your courage to the sticking place,
-And wee'le not fayle: when Duncan is asleepe,
-(Whereto the rather shall his dayes hard Iourney
-Soundly inuite him) his two Chamberlaines
-Will I with Wine, and Wassell, so conuince,
-That Memorie, the Warder of the Braine,
-Shall be a Fume, and the Receit of Reason
-A Lymbeck onely: when in Swinish sleepe,
-Their drenched Natures lyes as in a Death,
-What cannot you and I performe vpon
-Th' vnguarded Duncan? What not put vpon
-His spungie Officers? who shall beare the guilt
-Of our great quell
-
-   Macb. Bring forth Men-Children onely:
-For thy vndaunted Mettle should compose
-Nothing but Males. Will it not be receiu'd,
-When we haue mark'd with blood those sleepie two
-Of his owne Chamber, and vs'd their very Daggers,
-That they haue don't?
-  Lady. Who dares receiue it other,
-As we shall make our Griefes and Clamor rore,
-Vpon his Death?
-  Macb. I am settled, and bend vp
-Each corporall Agent to this terrible Feat.
-Away, and mock the time with fairest show,
-False Face must hide what the false Heart doth know.
-
-Exeunt.
-
-
-Actus Secundus. Scena Prima.
-
-Enter Banquo, and Fleance, with a Torch before him.
-
-  Banq. How goes the Night, Boy?
-  Fleance. The Moone is downe: I haue not heard the
-Clock
-
-   Banq. And she goes downe at Twelue
-
-   Fleance. I take't, 'tis later, Sir
-
-   Banq. Hold, take my Sword:
-There's Husbandry in Heauen,
-Their Candles are all out: take thee that too.
-A heauie Summons lyes like Lead vpon me,
-And yet I would not sleepe:
-Mercifull Powers, restraine in me the cursed thoughts
-That Nature giues way to in repose.
-Enter Macbeth, and a Seruant with a Torch.
-
-Giue me my Sword: who's there?
-  Macb. A Friend
-
-   Banq. What Sir, not yet at rest? the King's a bed.
-He hath beene in vnusuall Pleasure,
-And sent forth great Largesse to your Offices.
-This Diamond he greetes your Wife withall,
-By the name of most kind Hostesse,
-And shut vp in measurelesse content
-
-   Mac. Being vnprepar'd,
-Our will became the seruant to defect,
-Which else should free haue wrought
-
-   Banq. All's well.
-I dreamt last Night of the three weyward Sisters:
-To you they haue shew'd some truth
-
-   Macb. I thinke not of them:
-Yet when we can entreat an houre to serue,
-We would spend it in some words vpon that Businesse,
-If you would graunt the time
-
-   Banq. At your kind'st leysure
-
-   Macb. If you shall cleaue to my consent,
-When 'tis, it shall make Honor for you
-
-   Banq. So I lose none,
-In seeking to augment it, but still keepe
-My Bosome franchis'd, and Allegeance cleare,
-I shall be counsail'd
-
-   Macb. Good repose the while
-
-   Banq. Thankes Sir: the like to you.
-
-Exit Banquo.
-
-  Macb. Goe bid thy Mistresse, when my drinke is ready,
-She strike vpon the Bell. Get thee to bed.
-Enter.
-
-Is this a Dagger, which I see before me,
-The Handle toward my Hand? Come, let me clutch thee:
-I haue thee not, and yet I see thee still.
-Art thou not fatall Vision, sensible
-To feeling, as to sight? or art thou but
-A Dagger of the Minde, a false Creation,
-Proceeding from the heat-oppressed Braine?
-I see thee yet, in forme as palpable,
-As this which now I draw.
-Thou marshall'st me the way that I was going,
-And such an Instrument I was to vse.
-Mine Eyes are made the fooles o'th' other Sences,
-Or else worth all the rest: I see thee still;
-And on thy Blade, and Dudgeon, Gouts of Blood,
-Which was not so before. There's no such thing:
-It is the bloody Businesse, which informes
-Thus to mine Eyes. Now o're the one halfe World
-Nature seemes dead, and wicked Dreames abuse
-The Curtain'd sleepe: Witchcraft celebrates
-Pale Heccats Offrings: and wither'd Murther,
-Alarum'd by his Centinell, the Wolfe,
-Whose howle's his Watch, thus with his stealthy pace,
-With Tarquins rauishing sides, towards his designe
-Moues like a Ghost. Thou sowre and firme-set Earth
-Heare not my steps, which they may walke, for feare
-Thy very stones prate of my where-about,
-And take the present horror from the time,
-Which now sutes with it. Whiles I threat, he liues:
-Words to the heat of deedes too cold breath giues.
-
-A Bell rings.
-
-I goe, and it is done: the Bell inuites me.
-Heare it not, Duncan, for it is a Knell,
-That summons thee to Heauen, or to Hell.
-Enter.
-
-
-Scena Secunda.
-
-Enter Lady.
-
-  La. That which hath made the[m] drunk, hath made me bold:
-What hath quench'd them, hath giuen me fire.
-Hearke, peace: it was the Owle that shriek'd,
-The fatall Bell-man, which giues the stern'st good-night.
-He is about it, the Doores are open:
-And the surfeted Groomes doe mock their charge
-With Snores. I haue drugg'd their Possets,
-That Death and Nature doe contend about them,
-Whether they liue, or dye.
-Enter Macbeth.
-
-  Macb. Who's there? what hoa?
-  Lady. Alack, I am afraid they haue awak'd,
-And 'tis not done: th' attempt, and not the deed,
-Confounds vs: hearke: I lay'd their Daggers ready,
-He could not misse 'em. Had he not resembled
-My Father as he slept, I had don't.
-My Husband?
-  Macb. I haue done the deed:
-Didst thou not heare a noyse?
-  Lady. I heard the Owle schreame, and the Crickets cry.
-Did not you speake?
-  Macb. When?
-  Lady. Now
-
-   Macb. As I descended?
-  Lady. I
-
-   Macb. Hearke, who lyes i'th' second Chamber?
-  Lady. Donalbaine
-
-   Mac. This is a sorry sight
-
-   Lady. A foolish thought, to say a sorry sight
-
-   Macb. There's one did laugh in's sleepe,
-And one cry'd Murther, that they did wake each other:
-I stood, and heard them: But they did say their Prayers,
-And addrest them againe to sleepe
-
-   Lady. There are two lodg'd together
-
-   Macb. One cry'd God blesse vs, and Amen the other,
-As they had seene me with these Hangmans hands:
-Listning their feare, I could not say Amen,
-When they did say God blesse vs
-
-   Lady. Consider it not so deepely
-
-   Mac. But wherefore could not I pronounce Amen?
-I had most need of Blessing, and Amen stuck in my throat
-
-   Lady. These deeds must not be thought
-After these wayes: so, it will make vs mad
-
-   Macb. Me thought I heard a voyce cry, Sleep no more:
-Macbeth does murther Sleepe, the innocent Sleepe,
-Sleepe that knits vp the rauel'd Sleeue of Care,
-The death of each dayes Life, sore Labors Bath,
-Balme of hurt Mindes, great Natures second Course,
-Chiefe nourisher in Life's Feast
-
-   Lady. What doe you meane?
-  Macb. Still it cry'd, Sleepe no more to all the House:
-Glamis hath murther'd Sleepe, and therefore Cawdor
-Shall sleepe no more: Macbeth shall sleepe no more
-
-   Lady. Who was it, that thus cry'd? why worthy Thane,
-You doe vnbend your Noble strength, to thinke
-So braine-sickly of things: Goe get some Water,
-And wash this filthie Witnesse from your Hand.
-Why did you bring these Daggers from the place?
-They must lye there: goe carry them, and smeare
-The sleepie Groomes with blood
-
-   Macb. Ile goe no more:
-I am afraid, to thinke what I haue done:
-Looke on't againe, I dare not
-
-   Lady. Infirme of purpose:
-Giue me the Daggers: the sleeping, and the dead,
-Are but as Pictures: 'tis the Eye of Childhood,
-That feares a painted Deuill. If he doe bleed,
-Ile guild the Faces of the Groomes withall,
-For it must seeme their Guilt.
-Enter.
-
-Knocke within.
-
-  Macb. Whence is that knocking?
-How is't with me, when euery noyse appalls me?
-What Hands are here? hah: they pluck out mine Eyes.
-Will all great Neptunes Ocean wash this blood
-Cleane from my Hand? no: this my Hand will rather
-The multitudinous Seas incarnardine,
-Making the Greene one, Red.
-Enter Lady.
-
-  Lady. My Hands are of your colour: but I shame
-To weare a Heart so white.
-
-Knocke.
-
-I heare a knocking at the South entry:
-Retyre we to our Chamber:
-A little Water cleares vs of this deed.
-How easie is it then? your Constancie
-Hath left you vnattended.
-
-Knocke.
-
-Hearke, more knocking.
-Get on your Night-Gowne, least occasion call vs,
-And shew vs to be Watchers: be not lost
-So poorely in your thoughts
-
-   Macb. To know my deed,
-
-Knocke.
-
-'Twere best not know my selfe.
-Wake Duncan with thy knocking:
-I would thou could'st.
-
-Exeunt.
-
-
-Scena Tertia.
-
-Enter a Porter. Knocking within.
-
-  Porter. Here's a knocking indeede: if a man were
-Porter of Hell Gate, hee should haue old turning the
-Key.
-
-Knock.
-
-Knock, Knock, Knock. Who's there
-i'th' name of Belzebub? Here's a Farmer, that hang'd
-himselfe on th' expectation of Plentie: Come in time, haue
-Napkins enow about you, here you'le sweat for't.
-
-Knock.
-
-Knock, knock. Who's there in th' other Deuils Name?
-Faith here's an Equiuocator, that could sweare in both
-the Scales against eyther Scale, who committed Treason
-enough for Gods sake, yet could not equiuocate to Heauen:
-oh come in, Equiuocator.
-
-Knock.
-
-Knock, Knock, Knock. Who's there? 'Faith here's an English
-Taylor come hither, for stealing out of a French Hose:
-Come in Taylor, here you may rost your Goose.
-Knock.
-
-Knock, Knock. Neuer at quiet: What are you? but this
-place is too cold for Hell. Ile Deuill-Porter it no further:
-I had thought to haue let in some of all Professions, that
-goe the Primrose way to th' euerlasting Bonfire.
-
-Knock.
-
-Anon, anon, I pray you remember the Porter.
-Enter Macduff, and Lenox.
-
-  Macd. Was it so late, friend, ere you went to Bed,
-That you doe lye so late?
-  Port. Faith Sir, we were carowsing till the second Cock:
-And Drinke, Sir, is a great prouoker of three things
-
-   Macd. What three things does Drinke especially
-prouoke?
-  Port. Marry, Sir, Nose-painting, Sleepe, and Vrine.
-Lecherie, Sir, it prouokes, and vnprouokes: it prouokes
-the desire, but it takes away the performance. Therefore
-much Drinke may be said to be an Equiuocator with Lecherie:
-it makes him, and it marres him; it sets him on,
-and it takes him off; it perswades him, and dis-heartens
-him; makes him stand too, and not stand too: in conclusion,
-equiuocates him in a sleepe, and giuing him the Lye,
-leaues him
-
-   Macd. I beleeue, Drinke gaue thee the Lye last Night
-
-   Port. That it did, Sir, i'the very Throat on me: but I
-requited him for his Lye, and (I thinke) being too strong
-for him, though he tooke vp my Legges sometime, yet I
-made a Shift to cast him.
-Enter Macbeth.
-
-  Macd. Is thy Master stirring?
-Our knocking ha's awak'd him: here he comes
-
-   Lenox. Good morrow, Noble Sir
-
-   Macb. Good morrow both
-
-   Macd. Is the King stirring, worthy Thane?
-  Macb. Not yet
-
-   Macd. He did command me to call timely on him,
-I haue almost slipt the houre
-
-   Macb. Ile bring you to him
-
-   Macd. I know this is a ioyfull trouble to you:
-But yet 'tis one
-
-   Macb. The labour we delight in, Physicks paine:
-This is the Doore
-
-   Macd. Ile make so bold to call, for 'tis my limitted
-seruice.
-
-Exit Macduffe.
-
-  Lenox. Goes the King hence to day?
-  Macb. He does: he did appoint so
-
-   Lenox. The Night ha's been vnruly:
-Where we lay, our Chimneys were blowne downe,
-And (as they say) lamentings heard i'th' Ayre;
-Strange Schreemes of Death,
-And Prophecying, with Accents terrible,
-Of dyre Combustion, and confus'd Euents,
-New hatch'd toth' wofull time.
-The obscure Bird clamor'd the liue-long Night.
-Some say, the Earth was Feuorous,
-And did shake
-
-   Macb. 'Twas a rough Night
-
-   Lenox. My young remembrance cannot paralell
-A fellow to it.
-Enter Macduff.
-
-  Macd. O horror, horror, horror,
-Tongue nor Heart cannot conceiue, nor name thee
-
-   Macb. and Lenox. What's the matter?
-  Macd. Confusion now hath made his Master-peece:
-Most sacrilegious Murther hath broke ope
-The Lords anoynted Temple, and stole thence
-The Life o'th' Building
-
-   Macb. What is't you say, the Life?
-  Lenox. Meane you his Maiestie?
-  Macd. Approch the Chamber, and destroy your sight
-With a new Gorgon. Doe not bid me speake:
-See, and then speake your selues: awake, awake,
-
-Exeunt. Macbeth and Lenox.
-
-Ring the Alarum Bell: Murther, and Treason,
-Banquo, and Donalbaine: Malcolme awake,
-Shake off this Downey sleepe, Deaths counterfeit,
-And looke on Death it selfe: vp, vp, and see
-The great Doomes Image: Malcolme, Banquo,
-As from your Graues rise vp, and walke like Sprights,
-To countenance this horror. Ring the Bell.
-
-Bell rings. Enter Lady.
-
-  Lady. What's the Businesse?
-That such a hideous Trumpet calls to parley
-The sleepers of the House? speake, speake
-
-   Macd. O gentle Lady,
-'Tis not for you to heare what I can speake:
-The repetition in a Womans eare,
-Would murther as it fell.
-Enter Banquo.
-
-O Banquo, Banquo, Our Royall Master's murther'd
-
-   Lady. Woe, alas:
-What, in our House?
-  Ban. Too cruell, any where.
-Deare Duff, I prythee contradict thy selfe,
-And say, it is not so.
-Enter Macbeth, Lenox, and Rosse.
-
-  Macb. Had I but dy'd an houre before this chance,
-I had liu'd a blessed time: for from this instant,
-There's nothing serious in Mortalitie:
-All is but Toyes: Renowne and Grace is dead,
-The Wine of Life is drawne, and the meere Lees
-Is left this Vault, to brag of.
-Enter Malcolme and Donalbaine.
-
-  Donal. What is amisse?
-  Macb. You are, and doe not know't:
-The Spring, the Head, the Fountaine of your Blood
-Is stopt, the very Source of it is stopt
-
-   Macd. Your Royall Father's murther'd
-
-   Mal. Oh, by whom?
-  Lenox. Those of his Chamber, as it seem'd, had don't:
-Their Hands and Faces were all badg'd with blood,
-So were their Daggers, which vnwip'd, we found
-Vpon their Pillowes: they star'd, and were distracted,
-No mans Life was to be trusted with them
-
-   Macb. O, yet I doe repent me of my furie,
-That I did kill them
-
-   Macd. Wherefore did you so?
-  Macb. Who can be wise, amaz'd, temp'rate, & furious,
-Loyall, and Neutrall, in a moment? No man:
-Th' expedition of my violent Loue
-Out-run the pawser, Reason. Here lay Duncan,
-His Siluer skinne, lac'd with His Golden Blood,
-And his gash'd Stabs, look'd like a Breach in Nature,
-For Ruines wastfull entrance: there the Murtherers,
-Steep'd in the Colours of their Trade; their Daggers
-Vnmannerly breech'd with gore: who could refraine,
-That had a heart to loue; and in that heart,
-Courage, to make's loue knowne?
-  Lady. Helpe me hence, hoa
-
-   Macd. Looke to the Lady
-
-   Mal. Why doe we hold our tongues,
-That most may clayme this argument for ours?
-  Donal. What should be spoken here,
-Where our Fate hid in an augure hole,
-May rush, and seize vs? Let's away,
-Our Teares are not yet brew'd
-
-   Mal. Nor our strong Sorrow
-Vpon the foot of Motion
-
-   Banq. Looke to the Lady:
-And when we haue our naked Frailties hid,
-That suffer in exposure; let vs meet,
-And question this most bloody piece of worke,
-To know it further. Feares and scruples shake vs:
-In the great Hand of God I stand, and thence,
-Against the vndivulg'd pretence, I fight
-Of Treasonous Mallice
-
-   Macd. And so doe I
-
-   All. So all
-
-   Macb. Let's briefely put on manly readinesse,
-And meet i'th' Hall together
-
-   All. Well contented.
-
-Exeunt.
-
-  Malc. What will you doe?
-Let's not consort with them:
-To shew an vnfelt Sorrow, is an Office
-Which the false man do's easie.
-Ile to England
-
-   Don. To Ireland, I:
-Our seperated fortune shall keepe vs both the safer:
-Where we are, there's Daggers in mens smiles;
-The neere in blood, the neerer bloody
-
-   Malc. This murtherous Shaft that's shot,
-Hath not yet lighted: and our safest way,
-Is to auoid the ayme. Therefore to Horse,
-And let vs not be daintie of leaue-taking,
-But shift away: there's warrant in that Theft,
-Which steales it selfe, when there's no mercie left.
-
-Exeunt.
-
-
-
-Scena Quarta.
-
-Enter Rosse, with an Old man.
-
-  Old man. Threescore and ten I can remember well,
-Within the Volume of which Time, I haue seene
-Houres dreadfull, and things strange: but this sore Night
-Hath trifled former knowings
-
-   Rosse. Ha, good Father,
-Thou seest the Heauens, as troubled with mans Act,
-Threatens his bloody Stage: byth' Clock 'tis Day,
-And yet darke Night strangles the trauailing Lampe:
-Is't Nights predominance, or the Dayes shame,
-That Darknesse does the face of Earth intombe,
-When liuing Light should kisse it?
-  Old man. 'Tis vnnaturall,
-Euen like the deed that's done: On Tuesday last,
-A Faulcon towring in her pride of place,
-Was by a Mowsing Owle hawkt at, and kill'd
-
-   Rosse. And Duncans Horses,
-(A thing most strange, and certaine)
-Beauteous, and swift, the Minions of their Race,
-Turn'd wilde in nature, broke their stalls, flong out,
-Contending 'gainst Obedience, as they would
-Make Warre with Mankinde
-
-   Old man. 'Tis said, they eate each other
-
-   Rosse. They did so:
-To th' amazement of mine eyes that look'd vpon't.
-Enter Macduffe.
-
-Heere comes the good Macduffe.
-How goes the world Sir, now?
-  Macd. Why see you not?
-  Ross. Is't known who did this more then bloody deed?
-  Macd. Those that Macbeth hath slaine
-
-   Ross. Alas the day,
-What good could they pretend?
-  Macd. They were subborned,
-Malcolme, and Donalbaine the Kings two Sonnes
-Are stolne away and fled, which puts vpon them
-Suspition of the deed
-
-   Rosse. 'Gainst Nature still,
-Thriftlesse Ambition, that will rauen vp
-Thine owne liues meanes: Then 'tis most like,
-The Soueraignty will fall vpon Macbeth
-
-   Macd. He is already nam'd, and gone to Scone
-To be inuested
-
-   Rosse. Where is Duncans body?
-  Macd. Carried to Colmekill,
-The Sacred Store-house of his Predecessors,
-And Guardian of their Bones
-
-   Rosse. Will you to Scone?
-  Macd. No Cosin, Ile to Fife
-
-   Rosse. Well, I will thither
-
-   Macd. Well may you see things wel done there: Adieu
-Least our old Robes sit easier then our new
-
-   Rosse. Farewell, Father
-
-   Old M. Gods benyson go with you, and with those
-That would make good of bad, and Friends of Foes.
-
-Exeunt. omnes
-
-Actus Tertius. Scena Prima.
-
-Enter Banquo.
-
-  Banq. Thou hast it now, King, Cawdor, Glamis, all,
-As the weyard Women promis'd, and I feare
-Thou playd'st most fowly for't: yet it was saide
-It should not stand in thy Posterity,
-But that my selfe should be the Roote, and Father
-Of many Kings. If there come truth from them,
-As vpon thee Macbeth, their Speeches shine,
-Why by the verities on thee made good,
-May they not be my Oracles as well,
-And set me vp in hope. But hush, no more.
-
-Senit sounded. Enter Macbeth as King, Lady Lenox, Rosse, Lords,
-and
-Attendants.
-
-  Macb. Heere's our chiefe Guest
-
-   La. If he had beene forgotten,
-It had bene as a gap in our great Feast,
-And all-thing vnbecomming
-
-   Macb. To night we hold a solemne Supper sir,
-And Ile request your presence
-
-   Banq. Let your Highnesse
-Command vpon me, to the which my duties
-Are with a most indissoluble tye
-For euer knit
-
-   Macb. Ride you this afternoone?
-  Ban. I, my good Lord
-
-   Macb. We should haue else desir'd your good aduice
-(Which still hath been both graue, and prosperous)
-In this dayes Councell: but wee'le take to morrow.
-Is't farre you ride?
-  Ban. As farre, my Lord, as will fill vp the time
-'Twixt this, and Supper. Goe not my Horse the better,
-I must become a borrower of the Night,
-For a darke houre, or twaine
-
-   Macb. Faile not our Feast
-
-   Ban. My Lord, I will not
-
-   Macb. We heare our bloody Cozens are bestow'd
-In England, and in Ireland, not confessing
-Their cruell Parricide, filling their hearers
-With strange inuention. But of that to morrow,
-When therewithall, we shall haue cause of State,
-Crauing vs ioyntly. Hye you to Horse:
-Adieu, till you returne at Night.
-Goes Fleance with you?
-  Ban. I, my good Lord: our time does call vpon's
-
-   Macb. I wish your Horses swift, and sure of foot:
-And so I doe commend you to their backs.
-Farwell.
-
-Exit Banquo.
-
-Let euery man be master of his time,
-Till seuen at Night, to make societie
-The sweeter welcome:
-We will keepe our selfe till Supper time alone:
-While then, God be with you.
-
-Exeunt. Lords.
-
-Sirrha, a word with you: Attend those men
-Our pleasure?
-  Seruant. They are, my Lord, without the Pallace
-Gate
-
-   Macb. Bring them before vs.
-
-Exit Seruant.
-
-To be thus, is nothing, but to be safely thus
-Our feares in Banquo sticke deepe,
-And in his Royaltie of Nature reignes that
-Which would be fear'd. 'Tis much he dares,
-And to that dauntlesse temper of his Minde,
-He hath a Wisdome, that doth guide his Valour,
-To act in safetie. There is none but he,
-Whose being I doe feare: and vnder him,
-My Genius is rebuk'd, as it is said
-Mark Anthonies was by Caesar. He chid the Sisters,
-When first they put the Name of King vpon me,
-And bad them speake to him. Then Prophet-like,
-They hayl'd him Father to a Line of Kings.
-Vpon my Head they plac'd a fruitlesse Crowne,
-And put a barren Scepter in my Gripe,
-Thence to be wrencht with an vnlineall Hand,
-No Sonne of mine succeeding: if't be so,
-For Banquo's Issue haue I fil'd my Minde,
-For them, the gracious Duncan haue I murther'd,
-Put Rancours in the Vessell of my Peace
-Onely for them, and mine eternall Iewell
-Giuen to the common Enemie of Man,
-To make them Kings, the Seedes of Banquo Kings.
-Rather then so, come Fate into the Lyst,
-And champion me to th' vtterance.
-Who's there?
-Enter Seruant, and two Murtherers.
-
-Now goe to the Doore, and stay there till we call.
-
-Exit Seruant.
-
-Was it not yesterday we spoke together?
-  Murth. It was, so please your Highnesse
-
-   Macb. Well then,
-Now haue you consider'd of my speeches:
-Know, that it was he, in the times past,
-Which held you so vnder fortune,
-Which you thought had been our innocent selfe.
-This I made good to you, in our last conference,
-Past in probation with you:
-How you were borne in hand, how crost:
-The Instruments: who wrought with them:
-And all things else, that might
-To halfe a Soule, and to a Notion craz'd,
-Say, Thus did Banquo
-
-   1.Murth. You made it knowne to vs
-
-   Macb. I did so:
-And went further, which is now
-Our point of second meeting.
-Doe you finde your patience so predominant,
-In your nature, that you can let this goe?
-Are you so Gospell'd, to pray for this good man,
-And for his Issue, whose heauie hand
-Hath bow'd you to the Graue, and begger'd
-Yours for euer?
-  1.Murth. We are men, my Liege
-
-   Macb. I, in the Catalogue ye goe for men,
-As Hounds, and Greyhounds, Mungrels, Spaniels, Curres,
-Showghes, Water-Rugs, and Demy-Wolues are clipt
-All by the Name of Dogges: the valued file
-Distinguishes the swift, the slow, the subtle,
-The House-keeper, the Hunter, euery one
-According to the gift, which bounteous Nature
-Hath in him clos'd: whereby he does receiue
-Particular addition, from the Bill,
-That writes them all alike: and so of men.
-Now, if you haue a station in the file,
-Not i'th' worst ranke of Manhood, say't,
-And I will put that Businesse in your Bosomes,
-Whose execution takes your Enemie off,
-Grapples you to the heart; and loue of vs,
-Who weare our Health but sickly in his Life,
-Which in his Death were perfect
-
-   2.Murth. I am one, my Liege,
-Whom the vile Blowes and Buffets of the World
-Hath so incens'd, that I am recklesse what I doe,
-To spight the World
-
-   1.Murth. And I another,
-So wearie with Disasters, tugg'd with Fortune,
-That I would set my Life on any Chance,
-To mend it, or be rid on't
-
-   Macb. Both of you know Banquo was your Enemie
-
-   Murth. True, my Lord
-
-   Macb. So is he mine: and in such bloody distance,
-That euery minute of his being, thrusts
-Against my neer'st of Life: and though I could
-With bare-fac'd power sweepe him from my sight,
-And bid my will auouch it; yet I must not,
-For certaine friends that are both his, and mine,
-Whose loues I may not drop, but wayle his fall,
-Who I my selfe struck downe: and thence it is,
-That I to your assistance doe make loue,
-Masking the Businesse from the common Eye,
-For sundry weightie Reasons
-
-   2.Murth. We shall, my Lord,
-Performe what you command vs
-
-   1.Murth. Though our Liues-
-  Macb. Your Spirits shine through you.
-Within this houre, at most,
-I will aduise you where to plant your selues,
-Acquaint you with the perfect Spy o'th' time,
-The moment on't, for't must be done to Night,
-And something from the Pallace: alwayes thought,
-That I require a clearenesse; and with him,
-To leaue no Rubs nor Botches in the Worke:
-  Fleans , his Sonne, that keepes him companie,
-Whose absence is no lesse materiall to me,
-Then is his Fathers, must embrace the fate
-Of that darke houre: resolue your selues apart,
-Ile come to you anon
-
-   Murth. We are resolu'd, my Lord
-
-   Macb. Ile call vpon you straight: abide within,
-It is concluded: Banquo, thy Soules flight,
-If it finde Heauen, must finde it out to Night.
-
-Exeunt.
-
-
-Scena Secunda.
-
-Enter Macbeths Lady, and a Seruant.
-
-  Lady. Is Banquo gone from Court?
-  Seruant. I, Madame, but returnes againe to Night
-
-   Lady. Say to the King, I would attend his leysure,
-For a few words
-
-   Seruant. Madame, I will.
-Enter.
-
-  Lady. Nought's had, all's spent.
-Where our desire is got without content:
-'Tis safer, to be that which we destroy,
-Then by destruction dwell in doubtfull ioy.
-Enter Macbeth.
-
-How now, my Lord, why doe you keepe alone?
-Of sorryest Fancies your Companions making,
-Vsing those Thoughts, which should indeed haue dy'd
-With them they thinke on: things without all remedie
-Should be without regard: what's done, is done
-
-   Macb. We haue scorch'd the Snake, not kill'd it:
-Shee'le close, and be her selfe, whilest our poore Mallice
-Remaines in danger of her former Tooth.
-But let the frame of things dis-ioynt,
-Both the Worlds suffer,
-Ere we will eate our Meale in feare, and sleepe
-In the affliction of these terrible Dreames,
-That shake vs Nightly: Better be with the dead,
-Whom we, to gayne our peace, haue sent to peace,
-Then on the torture of the Minde to lye
-In restlesse extasie.
-Duncane is in his Graue:
-After Lifes fitfull Feuer, he sleepes well,
-Treason ha's done his worst: nor Steele, nor Poyson,
-Mallice domestique, forraine Leuie, nothing,
-Can touch him further
-
-   Lady. Come on:
-Gentle my Lord, sleeke o're your rugged Lookes,
-Be bright and Iouiall among your Guests to Night
-
-   Macb. So shall I Loue, and so I pray be you:
-Let your remembrance apply to Banquo,
-Present him Eminence, both with Eye and Tongue:
-Vnsafe the while, that wee must laue
-Our Honors in these flattering streames,
-And make our Faces Vizards to our Hearts,
-Disguising what they are
-
-   Lady. You must leaue this
-
-   Macb. O, full of Scorpions is my Minde, deare Wife:
-Thou know'st, that Banquo and his Fleans liues
-
-   Lady. But in them, Natures Coppie's not eterne
-
-   Macb. There's comfort yet, they are assaileable,
-Then be thou iocund: ere the Bat hath flowne
-His Cloyster'd flight, ere to black Heccats summons
-The shard-borne Beetle, with his drowsie hums,
-Hath rung Nights yawning Peale,
-There shall be done a deed of dreadfull note
-
-   Lady. What's to be done?
-  Macb. Be innocent of the knowledge, dearest Chuck,
-Till thou applaud the deed: Come, seeling Night,
-Skarfe vp the tender Eye of pittifull Day,
-And with thy bloodie and inuisible Hand
-Cancell and teare to pieces that great Bond,
-Which keepes me pale. Light thickens,
-And the Crow makes Wing toth' Rookie Wood:
-Good things of Day begin to droope, and drowse,
-Whiles Nights black Agents to their Prey's doe rowse.
-Thou maruell'st at my words: but hold thee still,
-Things bad begun, make strong themselues by ill:
-So prythee goe with me.
-
-Exeunt.
-
-
-Scena Tertia.
-
-Enter three Murtherers.
-
-  1. But who did bid thee ioyne with vs?
-  3. Macbeth
-
-   2. He needes not our mistrust, since he deliuers
-Our Offices, and what we haue to doe,
-To the direction iust
-
-   1. Then stand with vs:
-The West yet glimmers with some streakes of Day.
-Now spurres the lated Traueller apace,
-To gayne the timely Inne, and neere approches
-The subiect of our Watch
-
-   3. Hearke, I heare Horses
-
-   Banquo within. Giue vs a Light there, hoa
-
-   2. Then 'tis hee:
-The rest, that are within the note of expectation,
-Alreadie are i'th' Court
-
-   1. His Horses goe about
-
-   3. Almost a mile: but he does vsually,
-So all men doe, from hence toth' Pallace Gate
-Make it their Walke.
-Enter Banquo and Fleans, with a Torch.
-
-  2. A Light, a Light
-
-   3. 'Tis hee
-
-   1. Stand too't
-
-   Ban. It will be Rayne to Night
-
-   1. Let it come downe
-
-   Ban. O, Trecherie!
-Flye good Fleans, flye, flye, flye,
-Thou may'st reuenge. O Slaue!
-  3. Who did strike out the Light?
-  1. Was't not the way?
-  3. There's but one downe: the Sonne is fled
-
-   2. We haue lost
-Best halfe of our Affaire
-
-   1. Well, let's away, and say how much is done.
-
-Exeunt.
-
-
-Scaena Quarta.
-
-Banquet prepar'd. Enter Macbeth, Lady, Rosse, Lenox, Lords, and
-Attendants.
-
-  Macb. You know your owne degrees, sit downe:
-At first and last, the hearty welcome
-
-   Lords. Thankes to your Maiesty
-
-   Macb. Our selfe will mingle with Society,
-And play the humble Host:
-Our Hostesse keepes her State, but in best time
-We will require her welcome
-
-   La. Pronounce it for me Sir, to all our Friends,
-For my heart speakes, they are welcome.
-Enter first Murtherer.
-
-  Macb. See they encounter thee with their harts thanks
-Both sides are euen: heere Ile sit i'th' mid'st,
-Be large in mirth, anon wee'l drinke a Measure
-The Table round. There's blood vpon thy face
-
-   Mur. 'Tis Banquo's then
-
-   Macb. 'Tis better thee without, then he within.
-Is he dispatch'd?
-  Mur. My Lord his throat is cut, that I did for him
-
-   Mac. Thou art the best o'th' Cut-throats,
-Yet hee's good that did the like for Fleans:
-If thou did'st it, thou art the Non-pareill
-
-   Mur. Most Royall Sir
-Fleans is scap'd
-
-   Macb. Then comes my Fit againe:
-I had else beene perfect;
-Whole as the Marble, founded as the Rocke,
-As broad, and generall, as the casing Ayre:
-But now I am cabin'd, crib'd, confin'd, bound in
-To sawcy doubts, and feares. But Banquo's safe?
-  Mur. I, my good Lord: safe in a ditch he bides,
-With twenty trenched gashes on his head;
-The least a Death to Nature
-
-   Macb. Thankes for that:
-There the growne Serpent lyes, the worme that's fled
-Hath Nature that in time will Venom breed,
-No teeth for th' present. Get thee gone, to morrow
-Wee'l heare our selues againe.
-
-Exit Murderer.
-
-  Lady. My Royall Lord,
-You do not giue the Cheere, the Feast is sold
-That is not often vouch'd, while 'tis a making:
-'Tis giuen, with welcome: to feede were best at home:
-From thence, the sawce to meate is Ceremony,
-Meeting were bare without it.
-Enter the Ghost of Banquo, and sits in Macbeths place.
-
-  Macb. Sweet Remembrancer:
-Now good digestion waite on Appetite,
-And health on both
-
-   Lenox. May't please your Highnesse sit
-
-   Macb. Here had we now our Countries Honor, roof'd,
-Were the grac'd person of our Banquo present:
-Who, may I rather challenge for vnkindnesse,
-Then pitty for Mischance
-
-   Rosse. His absence (Sir)
-Layes blame vpon his promise. Pleas't your Highnesse
-To grace vs with your Royall Company?
-  Macb. The Table's full
-
-   Lenox. Heere is a place reseru'd Sir
-
-   Macb. Where?
-  Lenox. Heere my good Lord.
-What is't that moues your Highnesse?
-  Macb. Which of you haue done this?
-  Lords. What, my good Lord?
-  Macb. Thou canst not say I did it: neuer shake
-Thy goary lockes at me
-
-   Rosse. Gentlemen rise, his Highnesse is not well
-
-   Lady. Sit worthy Friends: my Lord is often thus,
-And hath beene from his youth. Pray you keepe Seat,
-The fit is momentary, vpon a thought
-He will againe be well. If much you note him
-You shall offend him, and extend his Passion,
-Feed, and regard him not. Are you a man?
-  Macb. I, and a bold one, that dare looke on that
-Which might appall the Diuell
-
-   La. O proper stuffe:
-This is the very painting of your feare:
-This is the Ayre-drawne-Dagger which you said
-Led you to Duncan. O, these flawes and starts
-(Impostors to true feare) would well become
-A womans story, at a Winters fire
-Authoriz'd by her Grandam: shame it selfe,
-Why do you make such faces? When all's done
-You looke but on a stoole
-
-   Macb. Prythee see there:
-Behold, looke, loe, how say you:
-Why what care I, if thou canst nod, speake too.
-If Charnell houses, and our Graues must send
-Those that we bury, backe; our Monuments
-Shall be the Mawes of Kytes
-
-   La. What? quite vnmann'd in folly
-
-   Macb. If I stand heere, I saw him
-
-   La. Fie for shame
-
-   Macb. Blood hath bene shed ere now, i'th' olden time
-Ere humane Statute purg'd the gentle Weale:
-I, and since too, Murthers haue bene perform'd
-Too terrible for the eare. The times has bene,
-That when the Braines were out, the man would dye,
-And there an end: But now they rise againe
-With twenty mortall murthers on their crownes,
-And push vs from our stooles. This is more strange
-Then such a murther is
-
-   La. My worthy Lord
-Your Noble Friends do lacke you
-
-   Macb. I do forget:
-Do not muse at me my most worthy Friends,
-I haue a strange infirmity, which is nothing
-To those that know me. Come, loue and health to all,
-Then Ile sit downe: Giue me some Wine, fill full:
-Enter Ghost.
-
-I drinke to th' generall ioy o'th' whole Table,
-And to our deere Friend Banquo, whom we misse:
-Would he were heere: to all, and him we thirst,
-And all to all
-
-   Lords. Our duties, and the pledge
-
-   Mac. Auant, & quit my sight, let the earth hide thee:
-Thy bones are marrowlesse, thy blood is cold:
-Thou hast no speculation in those eyes
-Which thou dost glare with
-
-   La. Thinke of this good Peeres
-But as a thing of Custome: 'Tis no other,
-Onely it spoyles the pleasure of the time
-
-   Macb. What man dare, I dare:
-Approach thou like the rugged Russian Beare,
-The arm'd Rhinoceros, or th' Hircan Tiger,
-Take any shape but that, and my firme Nerues
-Shall neuer tremble. Or be aliue againe,
-And dare me to the Desart with thy Sword:
-If trembling I inhabit then, protest mee
-The Baby of a Girle. Hence horrible shadow,
-Vnreall mock'ry hence. Why so, being gone
-I am a man againe: pray you sit still
-
-   La. You haue displac'd the mirth,
-Broke the good meeting, with most admir'd disorder
-
-   Macb. Can such things be,
-And ouercome vs like a Summers Clowd,
-Without our speciall wonder? You make me strange
-Euen to the disposition that I owe,
-When now I thinke you can behold such sights,
-And keepe the naturall Rubie of your Cheekes,
-When mine is blanch'd with feare
-
-   Rosse. What sights, my Lord?
-  La. I pray you speake not: he growes worse & worse
-Question enrages him: at once, goodnight.
-Stand not vpon the order of your going,
-But go at once
-
-   Len. Good night, and better health
-Attend his Maiesty
-
-   La. A kinde goodnight to all.
-
-Exit Lords.
-
-  Macb. It will haue blood they say:
-Blood will haue Blood:
-Stones haue beene knowne to moue, & Trees to speake:
-Augures, and vnderstood Relations, haue
-By Maggot Pyes, & Choughes, & Rookes brought forth
-The secret'st man of Blood. What is the night?
-  La. Almost at oddes with morning, which is which
-
-   Macb. How say'st thou that Macduff denies his person
-At our great bidding
-
-   La. Did you send to him Sir?
-  Macb. I heare it by the way: But I will send:
-There's not a one of them but in his house
-I keepe a Seruant Feed. I will to morrow
-(And betimes I will) to the weyard Sisters.
-More shall they speake: for now I am bent to know
-By the worst meanes, the worst, for mine owne good,
-All causes shall giue way. I am in blood
-Stept in so farre, that should I wade no more,
-Returning were as tedious as go ore:
-Strange things I haue in head, that will to hand,
-Which must be acted, ere they may be scand
-
-   La. You lacke the season of all Natures, sleepe
-
-   Macb. Come, wee'l to sleepe: My strange & self-abuse
-Is the initiate feare, that wants hard vse:
-We are yet but yong indeed.
-
-Exeunt.
-
-
-Scena Quinta.
-
-Thunder. Enter the three Witches, meeting Hecat.
-
-  1. Why how now Hecat, you looke angerly?
-  Hec. Haue I not reason (Beldams) as you are?
-Sawcy, and ouer-bold, how did you dare
-To Trade, and Trafficke with Macbeth,
-In Riddles, and Affaires of death;
-And I the Mistris of your Charmes,
-The close contriuer of all harmes,
-Was neuer call'd to beare my part,
-Or shew the glory of our Art?
-And which is worse, all you haue done
-Hath bene but for a wayward Sonne,
-Spightfull, and wrathfull, who (as others do)
-Loues for his owne ends, not for you.
-But make amends now: Get you gon,
-And at the pit of Acheron
-Meete me i'th' Morning: thither he
-Will come, to know his Destinie.
-Your Vessels, and your Spels prouide,
-Your Charmes, and euery thing beside;
-I am for th' Ayre: This night Ile spend
-Vnto a dismall, and a Fatall end.
-Great businesse must be wrought ere Noone.
-Vpon the Corner of the Moone
-There hangs a vap'rous drop, profound,
-Ile catch it ere it come to ground;
-And that distill'd by Magicke slights,
-Shall raise such Artificiall Sprights,
-As by the strength of their illusion,
-Shall draw him on to his Confusion.
-He shall spurne Fate, scorne Death, and beare
-His hopes 'boue Wisedome, Grace, and Feare:
-And you all know, Security
-Is Mortals cheefest Enemie.
-
-Musicke, and a Song.
-
-Hearke, I am call'd: my little Spirit see
-Sits in Foggy cloud, and stayes for me.
-
-Sing within. Come away, come away, &c.
-
-  1 Come, let's make hast, shee'l soone be
-Backe againe.
-
-Exeunt.
-
-
-Scaena Sexta.
-
-Enter Lenox, and another Lord.
-
-  Lenox. My former Speeches,
-Haue but hit your Thoughts
-Which can interpret farther: Onely I say
-Things haue bin strangely borne. The gracious Duncan
-Was pittied of Macbeth: marry he was dead:
-And the right valiant Banquo walk'd too late,
-Whom you may say (if't please you) Fleans kill'd,
-For Fleans fled: Men must not walke too late.
-Who cannot want the thought, how monstrous
-It was for Malcolme, and for Donalbane
-To kill their gracious Father? Damned Fact,
-How it did greeue Macbeth? Did he not straight
-In pious rage, the two delinquents teare,
-That were the Slaues of drinke, and thralles of sleepe?
-Was not that Nobly done? I, and wisely too:
-For 'twould haue anger'd any heart aliue
-To heare the men deny't. So that I say,
-He ha's borne all things well, and I do thinke,
-That had he Duncans Sonnes vnder his Key,
-(As, and't please Heauen he shall not) they should finde
-What 'twere to kill a Father: So should Fleans.
-But peace; for from broad words, and cause he fayl'd
-His presence at the Tyrants Feast, I heare
-Macduffe liues in disgrace. Sir, can you tell
-Where he bestowes himselfe?
-  Lord. The Sonnes of Duncane
-(From whom this Tyrant holds the due of Birth)
-Liues in the English Court, and is receyu'd
-Of the most Pious Edward, with such grace,
-That the maleuolence of Fortune, nothing
-Takes from his high respect. Thither Macduffe
-Is gone, to pray the Holy King, vpon his ayd
-To wake Northumberland, and warlike Seyward,
-That by the helpe of these (with him aboue)
-To ratifie the Worke) we may againe
-Giue to our Tables meate, sleepe to our Nights:
-Free from our Feasts, and Banquets bloody kniues;
-Do faithfull Homage, and receiue free Honors,
-All which we pine for now. And this report
-Hath so exasperate their King, that hee
-Prepares for some attempt of Warre
-
-   Len. Sent he to Macduffe?
-  Lord. He did: and with an absolute Sir, not I
-The clowdy Messenger turnes me his backe,
-And hums; as who should say, you'l rue the time
-That clogges me with this Answer
-
-   Lenox. And that well might
-Aduise him to a Caution, t' hold what distance
-His wisedome can prouide. Some holy Angell
-Flye to the Court of England, and vnfold
-His Message ere he come, that a swift blessing
-May soone returne to this our suffering Country,
-Vnder a hand accurs'd
-
-   Lord. Ile send my Prayers with him.
-
-Exeunt.
-
-Actus Quartus. Scena Prima.
-
-Thunder. Enter the three Witches.
-
-  1 Thrice the brinded Cat hath mew'd
-
-   2 Thrice, and once the Hedge-Pigge whin'd
-
-   3 Harpier cries, 'tis time, 'tis time
-
-   1 Round about the Caldron go:
-In the poysond Entrailes throw
-Toad, that vnder cold stone,
-Dayes and Nights, ha's thirty one:
-Sweltred Venom sleeping got,
-Boyle thou first i'th' charmed pot
-
-   All. Double, double, toile and trouble;
-Fire burne, and Cauldron bubble
-
-   2 Fillet of a Fenny Snake,
-In the Cauldron boyle and bake:
-Eye of Newt, and Toe of Frogge,
-Wooll of Bat, and Tongue of Dogge:
-Adders Forke, and Blinde-wormes Sting,
-Lizards legge, and Howlets wing:
-For a Charme of powrefull trouble,
-Like a Hell-broth, boyle and bubble
-
-   All. Double, double, toyle and trouble,
-Fire burne, and Cauldron bubble
-
-   3 Scale of Dragon, Tooth of Wolfe,
-Witches Mummey, Maw, and Gulfe
-Of the rauin'd salt Sea sharke:
-Roote of Hemlocke, digg'd i'th' darke:
-Liuer of Blaspheming Iew,
-Gall of Goate, and Slippes of Yew,
-Sliuer'd in the Moones Ecclipse:
-Nose of Turke, and Tartars lips:
-Finger of Birth-strangled Babe,
-Ditch-deliuer'd by a Drab,
-Make the Grewell thicke, and slab.
-Adde thereto a Tigers Chawdron,
-For th' Ingredience of our Cawdron
-
-   All. Double, double, toyle and trouble,
-Fire burne, and Cauldron bubble
-
-   2 Coole it with a Baboones blood,
-Then the Charme is firme and good.
-Enter Hecat, and the other three Witches.
-
-  Hec. O well done: I commend your paines,
-And euery one shall share i'th' gaines:
-And now about the Cauldron sing
-Like Elues and Fairies in a Ring,
-Inchanting all that you put in.
-
-Musicke and a Song. Blacke Spirits, &c.
-
-  2 By the pricking of my Thumbes,
-Something wicked this way comes:
-Open Lockes, who euer knockes.
-Enter Macbeth.
-
-  Macb. How now you secret, black, & midnight Hags?
-What is't you do?
-  All. A deed without a name
-
-   Macb. I coniure you, by that which you Professe,
-(How ere you come to know it) answer me:
-Though you vntye the Windes, and let them fight
-Against the Churches: Though the yesty Waues
-Confound and swallow Nauigation vp:
-Though bladed Corne be lodg'd, & Trees blown downe,
-Though Castles topple on their Warders heads:
-Though Pallaces, and Pyramids do slope
-Their heads to their Foundations: Though the treasure
-Of Natures Germaine, tumble altogether,
-Euen till destruction sicken: Answer me
-To what I aske you
-
-   1 Speake
-
-   2 Demand
-
-   3 Wee'l answer
-
-   1 Say, if th'hadst rather heare it from our mouthes,
-Or from our Masters
-
-   Macb. Call 'em: let me see 'em
-
-   1 Powre in Sowes blood, that hath eaten
-Her nine Farrow: Greaze that's sweaten
-From the Murderers Gibbet, throw
-Into the Flame
-
-   All. Come high or low:
-Thy Selfe and Office deaftly show.
-Thunder. 1. Apparation, an Armed Head.
-
-  Macb. Tell me, thou vnknowne power
-
-   1 He knowes thy thought:
-Heare his speech, but say thou nought
-
-   1 Appar. Macbeth, Macbeth, Macbeth:
-Beware Macduffe,
-Beware the Thane of Fife: dismisse me. Enough.
-
-He Descends.
-
-  Macb. What ere thou art, for thy good caution, thanks
-Thou hast harp'd my feare aright. But one word more
-
-   1 He will not be commanded: heere's another
-More potent then the first.
-
-Thunder. 2 Apparition, a Bloody Childe.
-
-  2 Appar. Macbeth, Macbeth, Macbeth
-
-   Macb. Had I three eares, Il'd heare thee
-
-   Appar. Be bloody, bold, & resolute:
-Laugh to scorne
-The powre of man: For none of woman borne
-Shall harme Macbeth.
-
-Descends.
-
-  Mac. Then liue Macduffe: what need I feare of thee?
-But yet Ile make assurance: double sure,
-And take a Bond of Fate: thou shalt not liue,
-That I may tell pale-hearted Feare, it lies;
-And sleepe in spight of Thunder.
-
-Thunder 3 Apparation, a Childe Crowned, with a Tree in his hand.
-
-What is this, that rises like the issue of a King,
-And weares vpon his Baby-brow, the round
-And top of Soueraignty?
-  All. Listen, but speake not too't
-
-   3 Appar. Be Lyon metled, proud, and take no care:
-Who chafes, who frets, or where Conspirers are:
-Macbeth shall neuer vanquish'd be, vntill
-Great Byrnam Wood, to high Dunsmane Hill
-Shall come against him.
-
-Descend.
-
-  Macb. That will neuer bee:
-Who can impresse the Forrest, bid the Tree
-Vnfixe his earth-bound Root? Sweet boadments, good:
-Rebellious dead, rise neuer till the Wood
-Of Byrnan rise, and our high plac'd Macbeth
-Shall liue the Lease of Nature, pay his breath
-To time, and mortall Custome. Yet my Hart
-Throbs to know one thing: Tell me, if your Art
-Can tell so much: Shall Banquo's issue euer
-Reigne in this Kingdome?
-  All. Seeke to know no more
-
-   Macb. I will be satisfied. Deny me this,
-And an eternall Curse fall on you: Let me know.
-Why sinkes that Caldron? & what noise is this?
-
-Hoboyes
-
-  1 Shew
-
-   2 Shew
-
-   3 Shew
-
-   All. Shew his Eyes, and greeue his Hart,
-Come like shadowes, so depart.
-
-A shew of eight Kings, and Banquo last, with a glasse in his hand.
-
-  Macb. Thou art too like the Spirit of Banquo: Down:
-Thy Crowne do's seare mine Eye-bals. And thy haire
-Thou other Gold-bound-brow, is like the first:
-A third, is like the former. Filthy Hagges,
-Why do you shew me this? - A fourth? Start eyes!
-What will the Line stretch out to'th' cracke of Doome?
-Another yet? A seauenth? Ile see no more:
-And yet the eighth appeares, who beares a glasse,
-Which shewes me many more: and some I see,
-That two-fold Balles, and trebble Scepters carry.
-Horrible sight: Now I see 'tis true,
-For the Blood-bolter'd Banquo smiles vpon me,
-And points at them for his. What? is this so?
-  1 I Sir, all this is so. But why
-Stands Macbeth thus amazedly?
-Come Sisters, cheere we vp his sprights,
-And shew the best of our delights.
-Ile Charme the Ayre to giue a sound,
-While you performe your Antique round:
-That this great King may kindly say,
-Our duties, did his welcome pay.
-
-Musicke. The Witches Dance, and vanish.
-
-  Macb. Where are they? Gone?
-Let this pernitious houre,
-Stand aye accursed in the Kalender.
-Come in, without there.
-Enter Lenox.
-
-  Lenox. What's your Graces will
-
-   Macb. Saw you the Weyard Sisters?
-  Lenox. No my Lord
-
-   Macb. Came they not by you?
-  Lenox. No indeed my Lord
-
-   Macb. Infected be the Ayre whereon they ride,
-And damn'd all those that trust them. I did heare
-The gallopping of Horse. Who was't came by?
-  Len. 'Tis two or three my Lord, that bring you word:
-Macduff is fled to England
-
-   Macb. Fled to England?
-  Len. I, my good Lord
-
-   Macb. Time, thou anticipat'st my dread exploits:
-The flighty purpose neuer is o're-tooke
-Vnlesse the deed go with it. From this moment,
-The very firstlings of my heart shall be
-The firstlings of my hand. And euen now
-To Crown my thoughts with Acts: be it thoght & done:
-The Castle of Macduff, I will surprize.
-Seize vpon Fife; giue to th' edge o'th' Sword
-His Wife, his Babes, and all vnfortunate Soules
-That trace him in his Line. No boasting like a Foole,
-This deed Ile do, before this purpose coole,
-But no more sights. Where are these Gentlemen?
-Come bring me where they are.
-
-Exeunt.
-
-Scena Secunda.
-
-Enter Macduffes Wife, her Son, and Rosse.
-
-  Wife. What had he done, to make him fly the Land?
-  Rosse. You must haue patience Madam
-
-   Wife. He had none:
-His flight was madnesse: when our Actions do not,
-Our feares do make vs Traitors
-
-   Rosse. You know not
-Whether it was his wisedome, or his feare
-
-   Wife. Wisedom? to leaue his wife, to leaue his Babes,
-His Mansion, and his Titles, in a place
-From whence himselfe do's flye? He loues vs not,
-He wants the naturall touch. For the poore Wren
-(The most diminitiue of Birds) will fight,
-Her yong ones in her Nest, against the

<TRUNCATED>
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/sort_by_value.txt
----------------------------------------------------------------------
diff --git a/crunch/src/it/resources/sort_by_value.txt b/crunch/src/it/resources/sort_by_value.txt
deleted file mode 100644
index 73f7d11..0000000
--- a/crunch/src/it/resources/sort_by_value.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-A	2
-B	1
-C	3
-D	2
-E	1


[14/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Pipeline.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Pipeline.java b/crunch/src/main/java/org/apache/crunch/Pipeline.java
deleted file mode 100644
index 84c720c..0000000
--- a/crunch/src/main/java/org/apache/crunch/Pipeline.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * Manages the state of a pipeline execution.
- * 
- */
-public interface Pipeline {
-
-  /**
-   * Set the {@code Configuration} to use with this pipeline.
-   */
-  void setConfiguration(Configuration conf);
-
-  /**
-   * Returns the name of this pipeline.
-   * 
-   * @return Name of the pipeline
-   */
-  String getName();
-
-  /**
-   * Returns the {@code Configuration} instance associated with this pipeline.
-   */
-  Configuration getConfiguration();
-
-  /**
-   * Converts the given {@code Source} into a {@code PCollection} that is
-   * available to jobs run using this {@code Pipeline} instance.
-   * 
-   * @param source
-   *          The source of data
-   * @return A PCollection that references the given source
-   */
-  <T> PCollection<T> read(Source<T> source);
-
-  /**
-   * A version of the read method for {@code TableSource} instances that map to
-   * {@code PTable}s.
-   * 
-   * @param tableSource
-   *          The source of the data
-   * @return A PTable that references the given source
-   */
-  <K, V> PTable<K, V> read(TableSource<K, V> tableSource);
-
-  /**
-   * Write the given collection to the given target on the next pipeline run. The
-   * system will check to see if the target's location already exists using the
-   * {@code WriteMode.DEFAULT} rule for the given {@code Target}.
-   * 
-   * @param collection
-   *          The collection
-   * @param target
-   *          The output target
-   */
-  void write(PCollection<?> collection, Target target);
-
-  /**
-  * Write the contents of the {@code PCollection} to the given {@code Target},
-  * using the storage format specified by the target and the given
-  * {@code WriteMode} for cases where the referenced {@code Target}
-  * already exists.
-  *
-  * @param collection
-  *          The collection
-  * @param target
-  *          The target to write to
-  * @param writeMode
-  *          The strategy to use for handling existing outputs
-  */
- void write(PCollection<?> collection, Target target,
-     Target.WriteMode writeMode);
-
- /**
-   * Create the given PCollection and read the data it contains into the
-   * returned Collection instance for client use.
-   * 
-   * @param pcollection
-   *          The PCollection to materialize
-   * @return the data from the PCollection as a read-only Collection
-   */
-  <T> Iterable<T> materialize(PCollection<T> pcollection);
-
-  /**
-   * Constructs and executes a series of MapReduce jobs in order to write data
-   * to the output targets.
-   */
-  PipelineResult run();
-
-  /**
-   * Constructs and starts a series of MapReduce jobs in order ot write data to
-   * the output targets, but returns a {@code ListenableFuture} to allow clients to control
-   * job execution.
-   * @return
-   */
-  PipelineExecution runAsync();
-  
-  /**
-   * Run any remaining jobs required to generate outputs and then clean up any
-   * intermediate data files that were created in this run or previous calls to
-   * {@code run}.
-   */
-  PipelineResult done();
-
-  /**
-   * A convenience method for reading a text file.
-   */
-  PCollection<String> readTextFile(String pathName);
-
-  /**
-   * A convenience method for writing a text file.
-   */
-  <T> void writeTextFile(PCollection<T> collection, String pathName);
-
-  /**
-   * Turn on debug logging for jobs that are run from this pipeline.
-   */
-  void enableDebug();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/PipelineExecution.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/PipelineExecution.java b/crunch/src/main/java/org/apache/crunch/PipelineExecution.java
deleted file mode 100644
index fc6bb91..0000000
--- a/crunch/src/main/java/org/apache/crunch/PipelineExecution.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.concurrent.TimeUnit;
-
-/**
- * A handle to allow clients to control a Crunch pipeline as it runs.
- *
- * This interface is thread-safe.
- */
-public interface PipelineExecution {
-
-  enum Status { READY, RUNNING, SUCCEEDED, FAILED, KILLED }
-
-  /** Returns the .dot file that allows a client to graph the Crunch execution plan for this
-   * pipeline.
-   */
-  String getPlanDotFile();
-
-  /** Blocks until pipeline completes or the specified waiting time elapsed. */
-   void waitFor(long timeout, TimeUnit timeUnit) throws InterruptedException;
-
-   /** Blocks until pipeline completes, i.e. {@code SUCCEEDED}, {@code FAILED} or {@code KILLED}. */
-  void waitUntilDone() throws InterruptedException;
-
-  Status getStatus();
-
-  /** Retrieve the result of a pipeline if it has been completed, otherwise {@code null}. */
-  PipelineResult getResult();
-
-  /**
-   * Kills the pipeline if it is running, no-op otherwise.
-   *
-   * This method only delivers a kill signal to the pipeline, and does not guarantee the pipeline exits on return.
-   * To wait for completely exits, use {@link #waitUntilDone()} after this call.
-   */
-  void kill() throws InterruptedException;
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/PipelineResult.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/PipelineResult.java b/crunch/src/main/java/org/apache/crunch/PipelineResult.java
deleted file mode 100644
index 90b1067..0000000
--- a/crunch/src/main/java/org/apache/crunch/PipelineResult.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.List;
-
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Counters;
-
-import com.google.common.collect.ImmutableList;
-
-/**
- * Container for the results of a call to {@code run} or {@code done} on the
- * Pipeline interface that includes details and statistics about the component
- * stages of the data pipeline.
- */
-public class PipelineResult {
-
-  public static class StageResult {
-
-    private final String stageName;
-    private final Counters counters;
-
-    public StageResult(String stageName, Counters counters) {
-      this.stageName = stageName;
-      this.counters = counters;
-    }
-
-    public String getStageName() {
-      return stageName;
-    }
-
-    public Counters getCounters() {
-      return counters;
-    }
-
-    public Counter findCounter(Enum<?> key) {
-      return counters.findCounter(key);
-    }
-
-    public long getCounterValue(Enum<?> key) {
-      return findCounter(key).getValue();
-    }
-  }
-
-  public static final PipelineResult EMPTY = new PipelineResult(ImmutableList.<StageResult> of());
-
-  private final List<StageResult> stageResults;
-
-  public PipelineResult(List<StageResult> stageResults) {
-    this.stageResults = ImmutableList.copyOf(stageResults);
-  }
-
-  public boolean succeeded() {
-    return !stageResults.isEmpty();
-  }
-
-  public List<StageResult> getStageResults() {
-    return stageResults;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Source.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Source.java b/crunch/src/main/java/org/apache/crunch/Source.java
deleted file mode 100644
index f54d135..0000000
--- a/crunch/src/main/java/org/apache/crunch/Source.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.io.IOException;
-
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * A {@code Source} represents an input data set that is an input to one or more
- * MapReduce jobs.
- * 
- */
-public interface Source<T> {
-  /**
-   * Returns the {@code PType} for this source.
-   */
-  PType<T> getType();
-
-  /**
-   * Configure the given job to use this source as an input.
-   * 
-   * @param job
-   *          The job to configure
-   * @param inputId
-   *          For a multi-input job, an identifier for this input to the job
-   * @throws IOException
-   */
-  void configureSource(Job job, int inputId) throws IOException;
-
-  /**
-   * Returns the number of bytes in this {@code Source}.
-   */
-  long getSize(Configuration configuration);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/SourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/SourceTarget.java b/crunch/src/main/java/org/apache/crunch/SourceTarget.java
deleted file mode 100644
index 09c03c6..0000000
--- a/crunch/src/main/java/org/apache/crunch/SourceTarget.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * An interface for classes that implement both the {@code Source} and the
- * {@code Target} interfaces.
- *
- */
-public interface SourceTarget<T> extends Source<T>, Target {
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/TableSource.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/TableSource.java b/crunch/src/main/java/org/apache/crunch/TableSource.java
deleted file mode 100644
index ff27346..0000000
--- a/crunch/src/main/java/org/apache/crunch/TableSource.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.crunch.types.PTableType;
-
-/**
- * The interface {@code Source} implementations that return a {@link PTable}.
- * 
- */
-public interface TableSource<K, V> extends Source<Pair<K, V>> {
-  PTableType<K, V> getTableType();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java
deleted file mode 100644
index 9b1ed34..0000000
--- a/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * An interface for classes that implement both the {@code TableSource} and the
- * {@code Target} interfaces.
- */
-public interface TableSourceTarget<K, V> extends TableSource<K, V>, SourceTarget<Pair<K, V>> {
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Target.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Target.java b/crunch/src/main/java/org/apache/crunch/Target.java
deleted file mode 100644
index 0a0c23d..0000000
--- a/crunch/src/main/java/org/apache/crunch/Target.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.crunch.io.OutputHandler;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * A {@code Target} represents the output destination of a Crunch {@code PCollection}
- * in the context of a Crunch job.
- */
-public interface Target {
-
-  /**
-   * An enum to represent different options the client may specify
-   * for handling the case where the output path, table, etc. referenced
-   * by a {@code Target} already exists.
-   */
-  enum WriteMode {
-    /**
-     * Check to see if the output target already exists before running
-     * the pipeline, and if it does, print an error and throw an exception.
-     */
-    DEFAULT,
-    
-    /**
-     * Check to see if the output target already exists, and if it does,
-     * delete it and overwrite it with the new output (if any).
-     */
-    OVERWRITE,
-
-    /**
-     * If the output target does not exist, create it. If it does exist,
-     * add the output of this pipeline to the target. This was the
-     * behavior in Crunch up to version 0.4.0.
-     */
-    APPEND
-  }
-
-  /**
-   * Apply the given {@code WriteMode} to this {@code Target} instance.
-   * 
-   * @param writeMode The strategy for handling existing outputs
-   * @param conf The ever-useful {@code Configuration} instance
-   */
-  void handleExisting(WriteMode writeMode, Configuration conf);
-  
-  /**
-   * Checks to see if this {@code Target} instance is compatible with the
-   * given {@code PType}.
-   * 
-   * @param handler The {@link OutputHandler} that is managing the output for the job
-   * @param ptype The {@code PType} to check
-   * @return True if this Target can write data in the form of the given {@code PType},
-   * false otherwise
-   */
-  boolean accept(OutputHandler handler, PType<?> ptype);
-
-  /**
-   * Attempt to create the {@code SourceTarget} type that corresponds to this {@code Target}
-   * for the given {@code PType}, if possible. If it is not possible, return {@code null}.
-   * 
-   * @param ptype The {@code PType} to use in constructing the {@code SourceTarget}
-   * @return A new {@code SourceTarget} or null if such a {@code SourceTarget} does not exist
-   */
-  <T> SourceTarget<T> asSourceTarget(PType<T> ptype);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Tuple.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Tuple.java b/crunch/src/main/java/org/apache/crunch/Tuple.java
deleted file mode 100644
index 4e602ff..0000000
--- a/crunch/src/main/java/org/apache/crunch/Tuple.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-/**
- * A fixed-size collection of Objects, used in Crunch for representing joins
- * between {@code PCollection}s.
- * 
- */
-public interface Tuple {
-
-  /**
-   * Returns the Object at the given index.
-   */
-  Object get(int index);
-
-  /**
-   * Returns the number of elements in this Tuple.
-   */
-  int size();
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Tuple3.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Tuple3.java b/crunch/src/main/java/org/apache/crunch/Tuple3.java
deleted file mode 100644
index 4372811..0000000
--- a/crunch/src/main/java/org/apache/crunch/Tuple3.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-
-/**
- * A convenience class for three-element {@link Tuple}s.
- */
-public class Tuple3<V1, V2, V3> implements Tuple {
-
-  private final V1 first;
-  private final V2 second;
-  private final V3 third;
-
-  public static <A, B, C> Tuple3<A, B, C> of(A a, B b, C c) {
-    return new Tuple3<A, B, C>(a, b, c);
-  }
-
-  public Tuple3(V1 first, V2 second, V3 third) {
-    this.first = first;
-    this.second = second;
-    this.third = third;
-  }
-
-  public V1 first() {
-    return first;
-  }
-
-  public V2 second() {
-    return second;
-  }
-
-  public V3 third() {
-    return third;
-  }
-
-  public Object get(int index) {
-    switch (index) {
-    case 0:
-      return first;
-    case 1:
-      return second;
-    case 2:
-      return third;
-    default:
-      throw new ArrayIndexOutOfBoundsException();
-    }
-  }
-
-  public int size() {
-    return 3;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(first).append(second).append(third).toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    Tuple3<?, ?, ?> other = (Tuple3<?, ?, ?>) obj;
-    return (first == other.first || (first != null && first.equals(other.first)))
-        && (second == other.second || (second != null && second.equals(other.second)))
-        && (third == other.third || (third != null && third.equals(other.third)));
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("Tuple3[");
-    sb.append(first).append(",").append(second).append(",").append(third);
-    return sb.append("]").toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/Tuple4.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/Tuple4.java b/crunch/src/main/java/org/apache/crunch/Tuple4.java
deleted file mode 100644
index f161371..0000000
--- a/crunch/src/main/java/org/apache/crunch/Tuple4.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-
-/**
- * A convenience class for four-element {@link Tuple}s.
- */
-public class Tuple4<V1, V2, V3, V4> implements Tuple {
-
-  private final V1 first;
-  private final V2 second;
-  private final V3 third;
-  private final V4 fourth;
-
-  public static <A, B, C, D> Tuple4<A, B, C, D> of(A a, B b, C c, D d) {
-    return new Tuple4<A, B, C, D>(a, b, c, d);
-  }
-
-  public Tuple4(V1 first, V2 second, V3 third, V4 fourth) {
-    this.first = first;
-    this.second = second;
-    this.third = third;
-    this.fourth = fourth;
-  }
-
-  public V1 first() {
-    return first;
-  }
-
-  public V2 second() {
-    return second;
-  }
-
-  public V3 third() {
-    return third;
-  }
-
-  public V4 fourth() {
-    return fourth;
-  }
-
-  public Object get(int index) {
-    switch (index) {
-    case 0:
-      return first;
-    case 1:
-      return second;
-    case 2:
-      return third;
-    case 3:
-      return fourth;
-    default:
-      throw new ArrayIndexOutOfBoundsException();
-    }
-  }
-
-  public int size() {
-    return 4;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(first).append(second).append(third).append(fourth).toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    Tuple4<?, ?, ?, ?> other = (Tuple4<?, ?, ?, ?>) obj;
-    return (first == other.first || (first != null && first.equals(other.first)))
-        && (second == other.second || (second != null && second.equals(other.second)))
-        && (third == other.third || (third != null && third.equals(other.third)))
-        && (fourth == other.fourth || (fourth != null && fourth.equals(other.fourth)));
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("Tuple4[");
-    sb.append(first).append(",").append(second).append(",").append(third);
-    return sb.append(",").append(fourth).append("]").toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/TupleN.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/TupleN.java b/crunch/src/main/java/org/apache/crunch/TupleN.java
deleted file mode 100644
index e5eceb5..0000000
--- a/crunch/src/main/java/org/apache/crunch/TupleN.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch;
-
-import java.util.Arrays;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-
-/**
- * A {@link Tuple} instance for an arbitrary number of values.
- */
-public class TupleN implements Tuple {
-
-  private final Object values[];
-
-  public static TupleN of(Object... values) {
-    return new TupleN(values);
-  }
-
-  public TupleN(Object... values) {
-    this.values = new Object[values.length];
-    System.arraycopy(values, 0, this.values, 0, values.length);
-  }
-
-  public Object get(int index) {
-    return values[index];
-  }
-
-  public int size() {
-    return values.length;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    for (Object v : values) {
-      hcb.append(v);
-    }
-    return hcb.toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    TupleN other = (TupleN) obj;
-    return Arrays.equals(this.values, other.values);
-  }
-
-  @Override
-  public String toString() {
-    return Arrays.toString(values);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java b/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java
deleted file mode 100644
index 0ac79e2..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java
+++ /dev/null
@@ -1,1111 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import java.math.BigInteger;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-import java.util.SortedSet;
-
-import org.apache.crunch.Aggregator;
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.util.Tuples;
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-
-/**
- * A collection of pre-defined {@link org.apache.crunch.Aggregator}s.
- *
- * <p>The factory methods of this class return {@link org.apache.crunch.Aggregator}
- * instances that you can use to combine the values of a {@link PGroupedTable}.
- * In most cases, they turn a multimap (multiple entries per key) into a map (one
- * entry per key).</p>
- *
- * <p><strong>Note</strong>: When using composed aggregators, like those built by the
- * {@link #pairAggregator(Aggregator, Aggregator) pairAggregator()}
- * factory method, you typically don't want to put in the same child aggregator more than once,
- * even if all child aggregators have the same type. In most cases, this is what you want:</p>
- *
- * <pre>
- *   PTable&lt;K, Long&gt; result = groupedTable.combineValues(
- *      pairAggregator(SUM_LONGS(), SUM_LONGS())
- *   );
- * </pre>
- */
-public final class Aggregators {
-
-  private Aggregators() {
-    // utility class, not for instantiation
-  }
-
-  /**
-   * Sum up all {@code long} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Long> SUM_LONGS() {
-    return new SumLongs();
-  }
-
-  /**
-   * Sum up all {@code int} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Integer> SUM_INTS() {
-    return new SumInts();
-  }
-
-  /**
-   * Sum up all {@code float} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Float> SUM_FLOATS() {
-    return new SumFloats();
-  }
-
-  /**
-   * Sum up all {@code double} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Double> SUM_DOUBLES() {
-    return new SumDoubles();
-  }
-
-  /**
-   * Sum up all {@link BigInteger} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<BigInteger> SUM_BIGINTS() {
-    return new SumBigInts();
-  }
-
-  /**
-   * Return the maximum of all given {@code long} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Long> MAX_LONGS() {
-    return new MaxLongs();
-  }
-
-  /**
-   * Return the {@code n} largest {@code long} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Long> MAX_LONGS(int n) {
-    return new MaxLongs();
-  }
-
-  /**
-   * Return the maximum of all given {@code int} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Integer> MAX_INTS() {
-    return new MaxInts();
-  }
-
-  /**
-   * Return the {@code n} largest {@code int} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Integer> MAX_INTS(int n) {
-    return new MaxNAggregator<Integer>(n);
-  }
-
-  /**
-   * Return the maximum of all given {@code float} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Float> MAX_FLOATS() {
-    return new MaxFloats();
-  }
-
-  /**
-   * Return the {@code n} largest {@code float} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Float> MAX_FLOATS(int n) {
-    return new MaxNAggregator<Float>(n);
-  }
-
-  /**
-   * Return the maximum of all given {@code double} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Double> MAX_DOUBLES() {
-    return new MaxDoubles();
-  }
-
-  /**
-   * Return the {@code n} largest {@code double} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Double> MAX_DOUBLES(int n) {
-    return new MaxNAggregator<Double>(n);
-  }
-
-  /**
-   * Return the maximum of all given {@link BigInteger} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<BigInteger> MAX_BIGINTS() {
-    return new MaxBigInts();
-  }
-
-  /**
-   * Return the {@code n} largest {@link BigInteger} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<BigInteger> MAX_BIGINTS(int n) {
-    return new MaxNAggregator<BigInteger>(n);
-  }
-
-  /**
-   * Return the {@code n} largest values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @param cls The type of the values to aggregate (must implement {@link Comparable}!)
-   * @return The newly constructed instance
-   */
-  public static <V extends Comparable<V>> Aggregator<V> MAX_N(int n, Class<V> cls) {
-    return new MaxNAggregator<V>(n);
-  }
-
-  /**
-   * Return the minimum of all given {@code long} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Long> MIN_LONGS() {
-    return new MinLongs();
-  }
-
-  /**
-   * Return the {@code n} smallest {@code long} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Long> MIN_LONGS(int n) {
-    return new MinNAggregator<Long>(n);
-  }
-
-  /**
-   * Return the minimum of all given {@code int} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Integer> MIN_INTS() {
-    return new MinInts();
-  }
-
-  /**
-   * Return the {@code n} smallest {@code int} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Integer> MIN_INTS(int n) {
-    return new MinNAggregator<Integer>(n);
-  }
-
-  /**
-   * Return the minimum of all given {@code float} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Float> MIN_FLOATS() {
-    return new MinFloats();
-  }
-
-  /**
-   * Return the {@code n} smallest {@code float} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Float> MIN_FLOATS(int n) {
-    return new MinNAggregator<Float>(n);
-  }
-
-  /**
-   * Return the minimum of all given {@code double} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Double> MIN_DOUBLES() {
-    return new MinDoubles();
-  }
-
-  /**
-   * Return the {@code n} smallest {@code double} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<Double> MIN_DOUBLES(int n) {
-    return new MinNAggregator<Double>(n);
-  }
-
-  /**
-   * Return the minimum of all given {@link BigInteger} values.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<BigInteger> MIN_BIGINTS() {
-    return new MinBigInts();
-  }
-
-  /**
-   * Return the {@code n} smallest {@link BigInteger} values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static Aggregator<BigInteger> MIN_BIGINTS(int n) {
-    return new MinNAggregator<BigInteger>(n);
-  }
-
-  /**
-   * Return the {@code n} smallest values (or fewer if there are fewer
-   * values than {@code n}).
-   * @param n The number of values to return
-   * @param cls The type of the values to aggregate (must implement {@link Comparable}!)
-   * @return The newly constructed instance
-   */
-  public static <V extends Comparable<V>> Aggregator<V> MIN_N(int n, Class<V> cls) {
-    return new MinNAggregator<V>(n);
-  }
-
-  /**
-   * Return the first {@code n} values (or fewer if there are fewer values than {@code n}).
-   *
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static <V> Aggregator<V> FIRST_N(int n) {
-    return new FirstNAggregator<V>(n);
-  }
-
-  /**
-   * Return the last {@code n} values (or fewer if there are fewer values than {@code n}).
-   *
-   * @param n The number of values to return
-   * @return The newly constructed instance
-   */
-  public static <V> Aggregator<V> LAST_N(int n) {
-    return new LastNAggregator<V>(n);
-  }
-  
-  /**
-   * Concatenate strings, with a separator between strings. There
-   * is no limits of length for the concatenated string.
-   *
-   * <p><em>Note: String concatenation is not commutative, which means the
-   * result of the aggregation is not deterministic!</em></p>
-   *
-   * @param separator
-   *            the separator which will be appended between each string
-   * @param skipNull
-   *            define if we should skip null values. Throw
-   *            NullPointerException if set to false and there is a null
-   *            value.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<String> STRING_CONCAT(String separator, boolean skipNull) {
-    return new StringConcatAggregator(separator, skipNull);
-  }
-
-  /**
-   * Concatenate strings, with a separator between strings. You can specify
-   * the maximum length of the output string and of the input strings, if
-   * they are &gt; 0. If a value is &lt;= 0, there is no limit.
-   *
-   * <p>Any too large string (or any string which would made the output too
-   * large) will be silently discarded.</p>
-   *
-   * <p><em>Note: String concatenation is not commutative, which means the
-   * result of the aggregation is not deterministic!</em></p>
-   *
-   * @param separator
-   *            the separator which will be appended between each string
-   * @param skipNull
-   *            define if we should skip null values. Throw
-   *            NullPointerException if set to false and there is a null
-   *            value.
-   * @param maxOutputLength
-   *            the maximum length of the output string. If it's set &lt;= 0,
-   *            there is no limit. The number of characters of the output
-   *            string will be &lt; maxOutputLength.
-   * @param maxInputLength
-   *            the maximum length of the input strings. If it's set <= 0,
-   *            there is no limit. The number of characters of the input string
-   *            will be &lt; maxInputLength to be concatenated.
-   * @return The newly constructed instance
-   */
-  public static Aggregator<String> STRING_CONCAT(String separator, boolean skipNull,
-      long maxOutputLength, long maxInputLength) {
-    return new StringConcatAggregator(separator, skipNull, maxOutputLength, maxInputLength);
-  }
-
-  /**
-   * Collect the unique elements of the input, as defined by the {@code equals} method for
-   * the input objects. No guarantees are made about the order in which the final elements
-   * will be returned.
-   * 
-   * @return The newly constructed instance
-   */
-  public static <V> Aggregator<V> UNIQUE_ELEMENTS() {
-    return new SetAggregator<V>();
-  }
-  
-  /**
-   * Collect a sample of unique elements from the input, where 'unique' is defined by
-   * the {@code equals} method for the input objects. No guarantees are made about which
-   * elements will be returned, simply that there will not be any more than the given sample
-   * size for any key.
-   * 
-   * @param maximumSampleSize The maximum number of unique elements to return per key
-   * @return The newly constructed instance
-   */
-  public static <V> Aggregator<V> SAMPLE_UNIQUE_ELEMENTS(int maximumSampleSize) {
-    return new SetAggregator<V>(maximumSampleSize);
-  }
-  
-  /**
-   * Apply separate aggregators to each component of a {@link Pair}.
-   */
-  public static <V1, V2> Aggregator<Pair<V1, V2>> pairAggregator(
-      Aggregator<V1> a1, Aggregator<V2> a2) {
-    return new PairAggregator<V1, V2>(a1, a2);
-  }
-
-  /**
-   * Apply separate aggregators to each component of a {@link Tuple3}.
-   */
-  public static <V1, V2, V3> Aggregator<Tuple3<V1, V2, V3>> tripAggregator(
-      Aggregator<V1> a1, Aggregator<V2> a2, Aggregator<V3> a3) {
-    return new TripAggregator<V1, V2, V3>(a1, a2, a3);
-  }
-
-  /**
-   * Apply separate aggregators to each component of a {@link Tuple4}.
-   */
-  public static <V1, V2, V3, V4> Aggregator<Tuple4<V1, V2, V3, V4>> quadAggregator(
-      Aggregator<V1> a1, Aggregator<V2> a2, Aggregator<V3> a3, Aggregator<V4> a4) {
-    return new QuadAggregator<V1, V2, V3, V4>(a1, a2, a3, a4);
-  }
-
-  /**
-   * Apply separate aggregators to each component of a {@link Tuple}.
-   */
-  public static Aggregator<TupleN> tupleAggregator(Aggregator<?>... aggregators) {
-    return new TupleNAggregator(aggregators);
-  }
-
-  /**
-   * Wrap a {@link CombineFn} adapter around the given aggregator.
-   *
-   * @param aggregator The instance to wrap
-   * @return A {@link CombineFn} delegating to {@code aggregator}
-   */
-  public static final <K, V> CombineFn<K, V> toCombineFn(Aggregator<V> aggregator) {
-    return new AggregatorCombineFn<K, V>(aggregator);
-  }
-
-  /**
-   * Base class for aggregators that do not require any initialization.
-   */
-  public static abstract class SimpleAggregator<T> implements Aggregator<T> {
-    @Override
-    public void initialize(Configuration conf) {
-      // No-op
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that delegates all of the actual work to an
-   * {@code Aggregator} instance.
-   */
-  private static class AggregatorCombineFn<K, V> extends CombineFn<K, V> {
-    // TODO: Has to be fully qualified until CombineFn.Aggregator can be removed.
-    private final org.apache.crunch.Aggregator<V> aggregator;
-
-    public AggregatorCombineFn(org.apache.crunch.Aggregator<V> aggregator) {
-      this.aggregator = aggregator;
-    }
-
-    @Override
-    public void initialize() {
-      aggregator.initialize(getConfiguration());
-    }
-
-    @Override
-    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
-      aggregator.reset();
-      for (V v : input.second()) {
-        aggregator.update(v);
-      }
-      for (V v : aggregator.results()) {
-        emitter.emit(Pair.of(input.first(), v));
-      }
-    }
-  }
-
-  private static class SumLongs extends SimpleAggregator<Long> {
-    private long sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0;
-    }
-
-    @Override
-    public void update(Long next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  private static class SumInts extends SimpleAggregator<Integer> {
-    private int sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0;
-    }
-
-    @Override
-    public void update(Integer next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  private static class SumFloats extends SimpleAggregator<Float> {
-    private float sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0f;
-    }
-
-    @Override
-    public void update(Float next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  private static class SumDoubles extends SimpleAggregator<Double> {
-    private double sum = 0;
-
-    @Override
-    public void reset() {
-      sum = 0f;
-    }
-
-    @Override
-    public void update(Double next) {
-      sum += next;
-    }
-
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  private static class SumBigInts extends SimpleAggregator<BigInteger> {
-    private BigInteger sum = BigInteger.ZERO;
-
-    @Override
-    public void reset() {
-      sum = BigInteger.ZERO;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      sum = sum.add(next);
-    }
-
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-
-  private static class MaxLongs extends SimpleAggregator<Long> {
-    private Long max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Long next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  private static class MaxInts extends SimpleAggregator<Integer> {
-    private Integer max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Integer next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  private static class MaxFloats extends SimpleAggregator<Float> {
-    private Float max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Float next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  private static class MaxDoubles extends SimpleAggregator<Double> {
-    private Double max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(Double next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  private static class MaxBigInts extends SimpleAggregator<BigInteger> {
-    private BigInteger max = null;
-
-    @Override
-    public void reset() {
-      max = null;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      if (max == null || max.compareTo(next) < 0) {
-        max = next;
-      }
-    }
-
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(max);
-    }
-  }
-
-  private static class MinLongs extends SimpleAggregator<Long> {
-    private Long min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Long next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  private static class MinInts extends SimpleAggregator<Integer> {
-    private Integer min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Integer next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  private static class MinFloats extends SimpleAggregator<Float> {
-    private Float min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Float next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  private static class MinDoubles extends SimpleAggregator<Double> {
-    private Double min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(Double next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  private static class MinBigInts extends SimpleAggregator<BigInteger> {
-    private BigInteger min = null;
-
-    @Override
-    public void reset() {
-      min = null;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      if (min == null || min.compareTo(next) > 0) {
-        min = next;
-      }
-    }
-
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(min);
-    }
-  }
-
-  private static class MaxNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
-    private final int arity;
-    private transient SortedSet<V> elements;
-
-    public MaxNAggregator(int arity) {
-      this.arity = arity;
-    }
-
-    @Override
-    public void reset() {
-      if (elements == null) {
-        elements = Sets.newTreeSet();
-      } else {
-        elements.clear();
-      }
-    }
-
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      } else if (value.compareTo(elements.first()) > 0) {
-        elements.remove(elements.first());
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  private static class MinNAggregator<V extends Comparable<V>> extends SimpleAggregator<V> {
-    private final int arity;
-    private transient SortedSet<V> elements;
-
-    public MinNAggregator(int arity) {
-      this.arity = arity;
-    }
-
-    @Override
-    public void reset() {
-      if (elements == null) {
-        elements = Sets.newTreeSet();
-      } else {
-        elements.clear();
-      }
-    }
-
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      } else if (value.compareTo(elements.last()) < 0) {
-        elements.remove(elements.last());
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  private static class FirstNAggregator<V> extends SimpleAggregator<V> {
-    private final int arity;
-    private final List<V> elements;
-
-    public FirstNAggregator(int arity) {
-      this.arity = arity;
-      this.elements = Lists.newArrayList();
-    }
-
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  private static class LastNAggregator<V> extends SimpleAggregator<V> {
-    private final int arity;
-    private final LinkedList<V> elements;
-
-    public LastNAggregator(int arity) {
-      this.arity = arity;
-      this.elements = Lists.newLinkedList();
-    }
-
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-
-    @Override
-    public void update(V value) {
-      elements.add(value);
-      if (elements.size() == arity + 1) {
-        elements.removeFirst();
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  private static class StringConcatAggregator extends SimpleAggregator<String> {
-    private final String separator;
-    private final boolean skipNulls;
-    private final long maxOutputLength;
-    private final long maxInputLength;
-    private long currentLength;
-    private final LinkedList<String> list = new LinkedList<String>();
-
-    private transient Joiner joiner;
-
-    public StringConcatAggregator(final String separator, final boolean skipNulls) {
-      this.separator = separator;
-      this.skipNulls = skipNulls;
-      this.maxInputLength = 0;
-      this.maxOutputLength = 0;
-    }
-
-    public StringConcatAggregator(final String separator, final boolean skipNull, final long maxOutputLength, final long maxInputLength) {
-      this.separator = separator;
-      this.skipNulls = skipNull;
-      this.maxOutputLength = maxOutputLength;
-      this.maxInputLength = maxInputLength;
-      this.currentLength = -separator.length();
-    }
-
-    @Override
-    public void reset() {
-      if (joiner == null) {
-        joiner = skipNulls ? Joiner.on(separator).skipNulls() : Joiner.on(separator);
-      }
-      currentLength = -separator.length();
-      list.clear();
-    }
-
-    @Override
-    public void update(final String next) {
-      long length = (next == null) ? 0 : next.length() + separator.length();
-      if (maxOutputLength > 0 && currentLength + length > maxOutputLength || maxInputLength > 0 && next.length() > maxInputLength) {
-        return;
-      }
-      if (maxOutputLength > 0) {
-        currentLength += length;
-      }
-      list.add(next);
-    }
-
-    @Override
-    public Iterable<String> results() {
-      return ImmutableList.of(joiner.join(list));
-    }
-  }
-
-
-  private static abstract class TupleAggregator<T> implements Aggregator<T> {
-    private final List<Aggregator<Object>> aggregators;
-
-    @SuppressWarnings("unchecked")
-    public TupleAggregator(Aggregator<?>... aggregators) {
-      this.aggregators = Lists.newArrayList();
-      for (Aggregator<?> a : aggregators) {
-        this.aggregators.add((Aggregator<Object>) a);
-      }
-    }
-
-    @Override
-    public void initialize(Configuration configuration) {
-      for (Aggregator<?> a : aggregators) {
-        a.initialize(configuration);
-      }
-    }
-
-    @Override
-    public void reset() {
-      for (Aggregator<?> a : aggregators) {
-        a.reset();
-      }
-    }
-
-    protected void updateTuple(Tuple t) {
-      for (int i = 0; i < aggregators.size(); i++) {
-        aggregators.get(i).update(t.get(i));
-      }
-    }
-
-    protected Iterable<Object> results(int index) {
-      return aggregators.get(index).results();
-    }
-  }
-
-  private static class PairAggregator<V1, V2> extends TupleAggregator<Pair<V1, V2>> {
-
-    public PairAggregator(Aggregator<V1> a1, Aggregator<V2> a2) {
-      super(a1, a2);
-    }
-
-    @Override
-    public void update(Pair<V1, V2> value) {
-      updateTuple(value);
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public Iterable<Pair<V1, V2>> results() {
-      return new Tuples.PairIterable<V1, V2>((Iterable<V1>) results(0), (Iterable<V2>) results(1));
-    }
-  }
-
-  private static class TripAggregator<A, B, C> extends TupleAggregator<Tuple3<A, B, C>> {
-
-    public TripAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3) {
-      super(a1, a2, a3);
-    }
-
-    @Override
-    public void update(Tuple3<A, B, C> value) {
-      updateTuple(value);
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public Iterable<Tuple3<A, B, C>> results() {
-      return new Tuples.TripIterable<A, B, C>((Iterable<A>) results(0), (Iterable<B>) results(1),
-          (Iterable<C>) results(2));
-    }
-  }
-
-  private static class QuadAggregator<A, B, C, D> extends TupleAggregator<Tuple4<A, B, C, D>> {
-
-    public QuadAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3, Aggregator<D> a4) {
-      super(a1, a2, a3, a4);
-    }
-
-    @Override
-    public void update(Tuple4<A, B, C, D> value) {
-      updateTuple(value);
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public Iterable<Tuple4<A, B, C, D>> results() {
-      return new Tuples.QuadIterable<A, B, C, D>((Iterable<A>) results(0), (Iterable<B>) results(1),
-          (Iterable<C>) results(2), (Iterable<D>) results(3));
-    }
-  }
-
-  private static class TupleNAggregator extends TupleAggregator<TupleN> {
-    private final int size;
-
-    public TupleNAggregator(Aggregator<?>... aggregators) {
-      super(aggregators);
-      size = aggregators.length;
-    }
-
-    @Override
-    public void update(TupleN value) {
-      updateTuple(value);
-    }
-
-    @Override
-    public Iterable<TupleN> results() {
-      Iterable<?>[] iterables = new Iterable[size];
-      for (int i = 0; i < size; i++) {
-        iterables[i] = results(i);
-      }
-      return new Tuples.TupleNIterable(iterables);
-    }
-  }
-
-  private static class SetAggregator<V> extends SimpleAggregator<V> {
-    private final Set<V> elements;
-    private final int sizeLimit;
-    
-    public SetAggregator() {
-      this(-1);
-    }
-    
-    public SetAggregator(int sizeLimit) {
-      this.elements = Sets.newHashSet();
-      this.sizeLimit = sizeLimit;
-    }
-    
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-
-    @Override
-    public void update(V value) {
-      if (sizeLimit == -1 || elements.size() < sizeLimit) {
-        elements.add(value);
-      }
-    }
-
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/CompositeMapFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/CompositeMapFn.java b/crunch/src/main/java/org/apache/crunch/fn/CompositeMapFn.java
deleted file mode 100644
index 2a8e7d9..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/CompositeMapFn.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.MapFn;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-public class CompositeMapFn<R, S, T> extends MapFn<R, T> {
-
-  private final MapFn<R, S> first;
-  private final MapFn<S, T> second;
-
-  public CompositeMapFn(MapFn<R, S> first, MapFn<S, T> second) {
-    this.first = first;
-    this.second = second;
-  }
-
-  @Override
-  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-    first.setContext(context);
-    second.setContext(context);
-  }
-  
-  @Override
-  public void initialize() {
-    first.initialize();
-    second.initialize();
-  }
-
-  public MapFn<R, S> getFirst() {
-    return first;
-  }
-
-  public MapFn<S, T> getSecond() {
-    return second;
-  }
-
-  @Override
-  public T map(R input) {
-    return second.map(first.map(input));
-  }
-
-  @Override
-  public void cleanup(Emitter<T> emitter) {
-    first.cleanup(null);
-    second.cleanup(null);
-  }
-
-  @Override
-  public void configure(Configuration conf) {
-    first.configure(conf);
-    second.configure(conf);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java b/crunch/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java
deleted file mode 100644
index b8cc9df..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/ExtractKeyFn.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-/**
- * Wrapper function for converting a {@code MapFn} into a key-value pair that is
- * used to convert from a {@code PCollection<V>} to a {@code PTable<K, V>}.
- */
-public class ExtractKeyFn<K, V> extends MapFn<V, Pair<K, V>> {
-
-  private final MapFn<V, K> mapFn;
-
-  public ExtractKeyFn(MapFn<V, K> mapFn) {
-    this.mapFn = mapFn;
-  }
-
-  @Override
-  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-    mapFn.setContext(context);
-  }
-  
-  @Override
-  public void initialize() {
-    mapFn.initialize();
-  }
-
-  @Override
-  public Pair<K, V> map(V input) {
-    return Pair.of(mapFn.map(input), input);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/FilterFns.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/FilterFns.java b/crunch/src/main/java/org/apache/crunch/fn/FilterFns.java
deleted file mode 100644
index 8dc4268..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/FilterFns.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.FilterFn;
-import org.apache.crunch.FilterFn.AndFn;
-import org.apache.crunch.FilterFn.NotFn;
-import org.apache.crunch.FilterFn.OrFn;
-
-
-/**
- * A collection of pre-defined {@link FilterFn} implementations.
- */
-public final class FilterFns {
-  // Note: We delegate to the deprecated implementation classes in FilterFn. When their
-  //       time is up, we just move them here.
-
-  private FilterFns() {
-    // utility class, not for instantiation
-  }
-
-  /**
-   * Accept an entry if all of the given filters accept it, using short-circuit evaluation.
-   * @param fn1 The first functions to delegate to
-   * @param fn2 The second functions to delegate to
-   * @return The composed filter function
-   */
-  public static <S> FilterFn<S> and(FilterFn<S> fn1, FilterFn<S> fn2) {
-    return new AndFn<S>(fn1, fn2);
-  }
-
-  /**
-   * Accept an entry if all of the given filters accept it, using short-circuit evaluation.
-   * @param fns The functions to delegate to (in the given order)
-   * @return The composed filter function
-   */
-  public static <S> FilterFn<S> and(FilterFn<S>... fns) {
-    return new AndFn<S>(fns);
-  }
-
-  /**
-   * Accept an entry if at least one of the given filters accept it, using short-circuit evaluation.
-   * @param fn1 The first functions to delegate to
-   * @param fn2 The second functions to delegate to
-   * @return The composed filter function
-   */
-  public static <S> FilterFn<S> or(FilterFn<S> fn1, FilterFn<S> fn2) {
-    return new OrFn<S>(fn1, fn2);
-  }
-
-  /**
-   * Accept an entry if at least one of the given filters accept it, using short-circuit evaluation.
-   * @param fns The functions to delegate to (in the given order)
-   * @return The composed filter function
-   */
-  public static <S> FilterFn<S> or(FilterFn<S>... fns) {
-    return new OrFn<S>(fns);
-  }
-
-  /**
-   * Accept an entry if the given filter <em>does not</em> accept it.
-   * @param fn The function to delegate to
-   * @return The composed filter function
-   */
-  public static <S> FilterFn<S> not(FilterFn<S> fn) {
-    return new NotFn<S>(fn);
-  }
-
-  /**
-   * Accept everything.
-   * @return A filter function that accepts everything.
-   */
-  public static <S> FilterFn<S> ACCEPT_ALL() {
-    return new AcceptAllFn<S>();
-  }
-
-  /**
-   * Reject everything.
-   * @return A filter function that rejects everything.
-   */
-  public static <S> FilterFn<S> REJECT_ALL() {
-    return not(new AcceptAllFn<S>());
-  }
-
-  private static class AcceptAllFn<S> extends FilterFn<S> {
-    @Override
-    public boolean accept(S input) {
-      return true;
-    }
-
-    @Override
-    public float scaleFactor() {
-      return 1.0f;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/IdentityFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/IdentityFn.java b/crunch/src/main/java/org/apache/crunch/fn/IdentityFn.java
deleted file mode 100644
index 0eadb06..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/IdentityFn.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.MapFn;
-
-public class IdentityFn<T> extends MapFn<T, T> {
-
-  private static final IdentityFn<Object> INSTANCE = new IdentityFn<Object>();
-
-  @SuppressWarnings("unchecked")
-  public static <T> IdentityFn<T> getInstance() {
-    return (IdentityFn<T>) INSTANCE;
-  }
-
-  // Non-instantiable
-  private IdentityFn() {
-  }
-
-  @Override
-  public T map(T input) {
-    return input;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/MapKeysFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/MapKeysFn.java b/crunch/src/main/java/org/apache/crunch/fn/MapKeysFn.java
deleted file mode 100644
index cbaf24d..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/MapKeysFn.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-
-public abstract class MapKeysFn<K1, K2, V> extends DoFn<Pair<K1, V>, Pair<K2, V>> {
-
-  @Override
-  public void process(Pair<K1, V> input, Emitter<Pair<K2, V>> emitter) {
-    emitter.emit(Pair.of(map(input.first()), input.second()));
-  }
-
-  public abstract K2 map(K1 k1);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/MapValuesFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/MapValuesFn.java b/crunch/src/main/java/org/apache/crunch/fn/MapValuesFn.java
deleted file mode 100644
index b90f5ff..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/MapValuesFn.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-
-public abstract class MapValuesFn<K, V1, V2> extends DoFn<Pair<K, V1>, Pair<K, V2>> {
-
-  @Override
-  public void process(Pair<K, V1> input, Emitter<Pair<K, V2>> emitter) {
-    emitter.emit(Pair.of(input.first(), map(input.second())));
-  }
-
-  public abstract V2 map(V1 v);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/PairMapFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/PairMapFn.java b/crunch/src/main/java/org/apache/crunch/fn/PairMapFn.java
deleted file mode 100644
index 9ee4336..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/PairMapFn.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.fn;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-public class PairMapFn<K, V, S, T> extends MapFn<Pair<K, V>, Pair<S, T>> {
-
-  private MapFn<K, S> keys;
-  private MapFn<V, T> values;
-
-  public PairMapFn(MapFn<K, S> keys, MapFn<V, T> values) {
-    this.keys = keys;
-    this.values = values;
-  }
-
-  @Override
-  public void configure(Configuration conf) {
-    keys.configure(conf);
-    values.configure(conf);
-  }
-
-  @Override
-  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-    keys.setContext(context);
-    values.setContext(context);
-  }
-
-  @Override
-  public void initialize() {
-    keys.initialize();
-    values.initialize();
-  }
-  
-  @Override
-  public Pair<S, T> map(Pair<K, V> input) {
-    return Pair.of(keys.map(input.first()), values.map(input.second()));
-  }
-
-  @Override
-  public void cleanup(Emitter<Pair<S, T>> emitter) {
-    keys.cleanup(null);
-    values.cleanup(null);
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/fn/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/fn/package-info.java b/crunch/src/main/java/org/apache/crunch/fn/package-info.java
deleted file mode 100644
index acefdff..0000000
--- a/crunch/src/main/java/org/apache/crunch/fn/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Commonly used functions for manipulating collections.
- */
-package org.apache.crunch.fn;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java b/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java
deleted file mode 100644
index 887c051..0000000
--- a/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/TaskAttemptContextFactory.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.hadoop.mapreduce;
-
-import java.lang.reflect.Constructor;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-
-/**
- * A factory class that allows us to hide the fact that {@code TaskAttemptContext} is a class in
- * Hadoop 1.x.x and an interface in Hadoop 2.x.x.
- */
-@SuppressWarnings("unchecked")
-public class TaskAttemptContextFactory {
-
-  private static final Log LOG = LogFactory.getLog(TaskAttemptContextFactory.class);
-
-  private static final TaskAttemptContextFactory INSTANCE = new TaskAttemptContextFactory();
-
-  public static TaskAttemptContext create(Configuration conf, TaskAttemptID taskAttemptId) {
-    return INSTANCE.createInternal(conf, taskAttemptId);
-  }
-
-  private Constructor<TaskAttemptContext> taskAttemptConstructor;
-
-  private TaskAttemptContextFactory() {
-    Class<TaskAttemptContext> implClass = TaskAttemptContext.class;
-    if (implClass.isInterface()) {
-      try {
-        implClass = (Class<TaskAttemptContext>) Class.forName(
-            "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl");
-      } catch (ClassNotFoundException e) {
-        LOG.fatal("Could not find TaskAttemptContextImpl class, exiting", e);
-      }
-    }
-    try {
-      this.taskAttemptConstructor = implClass.getConstructor(Configuration.class, TaskAttemptID.class);
-    } catch (Exception e) {
-      LOG.fatal("Could not access TaskAttemptContext constructor, exiting", e);
-    }
-  }
-
-  private TaskAttemptContext createInternal(Configuration conf, TaskAttemptID taskAttemptId) {
-    try {
-      return (TaskAttemptContext) taskAttemptConstructor.newInstance(conf, taskAttemptId);
-    } catch (Exception e) {
-      LOG.error("Could not construct a TaskAttemptContext instance", e);
-      return null;
-    }
-  }
-}


[30/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java
new file mode 100644
index 0000000..4d2b88a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/SourceTargetImpl.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import java.io.IOException;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.Source;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Target;
+import org.apache.crunch.io.OutputHandler;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+
+class SourceTargetImpl<T> implements SourceTarget<T> {
+
+  protected final Source<T> source;
+  protected final Target target;
+
+  public SourceTargetImpl(Source<T> source, Target target) {
+    this.source = source;
+    this.target = target;
+  }
+
+  @Override
+  public PType<T> getType() {
+    return source.getType();
+  }
+
+  @Override
+  public void configureSource(Job job, int inputId) throws IOException {
+    source.configureSource(job, inputId);
+  }
+
+  @Override
+  public long getSize(Configuration configuration) {
+    return source.getSize(configuration);
+  }
+
+  @Override
+  public boolean accept(OutputHandler handler, PType<?> ptype) {
+    return target.accept(handler, ptype);
+  }
+
+  @Override
+  public <S> SourceTarget<S> asSourceTarget(PType<S> ptype) {
+    return target.asSourceTarget(ptype);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other.getClass().equals(getClass()))) {
+      return false;
+    }
+    SourceTargetImpl sti = (SourceTargetImpl) other;
+    return source.equals(sti.source) && target.equals(sti.target);
+  }
+
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().append(source).append(target).toHashCode();
+  }
+
+  @Override
+  public String toString() {
+    return source.toString();
+  }
+
+  @Override
+  public void handleExisting(WriteMode strategy, Configuration conf) {
+    target.handleExisting(strategy, conf);  
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java
new file mode 100644
index 0000000..a8ff639
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourcePathTargetImpl.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.types.PTableType;
+
+public class TableSourcePathTargetImpl<K, V> extends SourcePathTargetImpl<Pair<K, V>> implements TableSource<K, V> {
+
+  public TableSourcePathTargetImpl(TableSource<K, V> source, PathTarget target) {
+    this(source, target, new SequentialFileNamingScheme());
+  }
+
+  public TableSourcePathTargetImpl(TableSource<K, V> source, PathTarget target, FileNamingScheme fileNamingScheme) {
+    super(source, target, fileNamingScheme);
+  }
+
+  @Override
+  public PTableType<K, V> getTableType() {
+    return ((TableSource<K, V>) source).getTableType();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java b/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java
new file mode 100644
index 0000000..965b0f9
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/impl/TableSourceTargetImpl.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.impl;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.Target;
+import org.apache.crunch.types.PTableType;
+
+public class TableSourceTargetImpl<K, V> extends SourceTargetImpl<Pair<K, V>> implements TableSource<K, V> {
+
+  public TableSourceTargetImpl(TableSource<K, V> source, Target target) {
+    super(source, target);
+  }
+
+  @Override
+  public PTableType<K, V> getTableType() {
+    return ((TableSource<K, V>) source).getTableType();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/package-info.java b/crunch-core/src/main/java/org/apache/crunch/io/package-info.java
new file mode 100644
index 0000000..022bc99
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Data input and output for Pipelines.
+ */
+package org.apache.crunch.io;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java
new file mode 100644
index 0000000..ba07506
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileHelper.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.writable.WritableType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+class SeqFileHelper {
+  static <T> Writable newInstance(PType<T> ptype, Configuration conf) {
+    return (Writable) ReflectionUtils.newInstance(((WritableType) ptype).getSerializationClass(), conf);
+  }
+
+  static <T> MapFn<Object, T> getInputMapFn(PType<T> ptype) {
+    return ptype.getInputMapFn();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java
new file mode 100644
index 0000000..3f45644
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileReaderFactory.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.io.FileReaderFactory;
+import org.apache.crunch.io.impl.AutoClosingIterator;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.UnmodifiableIterator;
+
+public class SeqFileReaderFactory<T> implements FileReaderFactory<T> {
+
+  private static final Log LOG = LogFactory.getLog(SeqFileReaderFactory.class);
+
+  private final Converter converter;
+  private final MapFn<Object, T> mapFn;
+  private final Writable key;
+  private final Writable value;
+
+  public SeqFileReaderFactory(PType<T> ptype) {
+    this.converter = ptype.getConverter();
+    this.mapFn = ptype.getInputMapFn();
+    if (ptype instanceof PTableType) {
+      PTableType ptt = (PTableType) ptype;
+      this.key = SeqFileHelper.newInstance(ptt.getKeyType(), null);
+      this.value = SeqFileHelper.newInstance(ptt.getValueType(), null);
+    } else {
+      this.key = NullWritable.get();
+      this.value = SeqFileHelper.newInstance(ptype, null);
+    }
+  }
+
+  public SeqFileReaderFactory(Class clazz) {
+    PType<T> ptype = Writables.writables(clazz);
+    this.converter = ptype.getConverter();
+    this.mapFn = ptype.getInputMapFn();
+    this.key = NullWritable.get();
+    this.value = (Writable) ReflectionUtils.newInstance(clazz, null);
+  }
+  
+  @Override
+  public Iterator<T> read(FileSystem fs, final Path path) {
+    mapFn.initialize();
+    try {
+      final SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
+      return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
+        boolean nextChecked = false;
+        boolean hasNext = false;
+
+        @Override
+        public boolean hasNext() {
+          if (nextChecked == true) {
+            return hasNext;
+          }
+          try {
+            hasNext = reader.next(key, value);
+            nextChecked = true;
+            return hasNext;
+          } catch (IOException e) {
+            LOG.info("Error reading from path: " + path, e);
+            return false;
+          }
+        }
+
+        @Override
+        public T next() {
+          if (!nextChecked && !hasNext()) {
+            return null;
+          }
+          nextChecked = false;
+          return mapFn.map(converter.convertInput(key, value));
+        }
+      });
+    } catch (IOException e) {
+      LOG.info("Could not read seqfile at path: " + path, e);
+      return Iterators.emptyIterator();
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java
new file mode 100644
index 0000000..8fac4ae
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSource.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import java.io.IOException;
+
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileSourceImpl;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+
+public class SeqFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
+
+  public SeqFileSource(Path path, PType<T> ptype) {
+    super(path, ptype, SequenceFileInputFormat.class);
+  }
+
+  @Override
+  public Iterable<T> read(Configuration conf) throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    return CompositePathIterable.create(fs, path, new SeqFileReaderFactory<T>(ptype));
+  }
+
+  @Override
+  public String toString() {
+    return "SeqFile(" + path.toString() + ")";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java
new file mode 100644
index 0000000..adc739f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileSourceTarget.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+
+public class SeqFileSourceTarget<T> extends ReadableSourcePathTargetImpl<T> {
+
+  public SeqFileSourceTarget(String path, PType<T> ptype) {
+    this(new Path(path), ptype);
+  }
+
+  public SeqFileSourceTarget(Path path, PType<T> ptype) {
+    this(path, ptype, new SequentialFileNamingScheme());
+  }
+
+  public SeqFileSourceTarget(Path path, PType<T> ptype, FileNamingScheme fileNamingScheme) {
+    super(new SeqFileSource<T>(path, ptype), new SeqFileTarget(path), fileNamingScheme);
+  }
+
+  @Override
+  public String toString() {
+    return target.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java
new file mode 100644
index 0000000..7a63272
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSource.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import java.io.IOException;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileTableSourceImpl;
+import org.apache.crunch.types.PTableType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+
+/**
+ * A {@code TableSource} that uses {@code SequenceFileInputFormat} to read the input
+ * file.
+ */
+public class SeqFileTableSource<K, V> extends FileTableSourceImpl<K, V> implements ReadableSource<Pair<K, V>> {
+
+  public SeqFileTableSource(String path, PTableType<K, V> ptype) {
+    this(new Path(path), ptype);
+  }
+
+  public SeqFileTableSource(Path path, PTableType<K, V> ptype) {
+    super(path, ptype, SequenceFileInputFormat.class);
+  }
+
+  @Override
+  public Iterable<Pair<K, V>> read(Configuration conf) throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    return CompositePathIterable.create(fs, path,
+        new SeqFileReaderFactory<Pair<K, V>>(getTableType()));
+  }
+
+  @Override
+  public String toString() {
+    return "SeqFile(" + path.toString() + ")";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java
new file mode 100644
index 0000000..ebdf319
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.TableSourceTarget;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
+import org.apache.crunch.types.PTableType;
+import org.apache.hadoop.fs.Path;
+
+public class SeqFileTableSourceTarget<K, V> extends ReadableSourcePathTargetImpl<Pair<K, V>> implements
+    TableSourceTarget<K, V> {
+  private final PTableType<K, V> tableType;
+
+  public SeqFileTableSourceTarget(String path, PTableType<K, V> tableType) {
+    this(new Path(path), tableType);
+  }
+
+  public SeqFileTableSourceTarget(Path path, PTableType<K, V> tableType) {
+    this(path, tableType, new SequentialFileNamingScheme());
+  }
+
+  public SeqFileTableSourceTarget(Path path, PTableType<K, V> tableType, FileNamingScheme fileNamingScheme) {
+    super(new SeqFileTableSource<K, V>(path, tableType), new SeqFileTarget(path), fileNamingScheme);
+    this.tableType = tableType;
+  }
+
+  @Override
+  public PTableType<K, V> getTableType() {
+    return tableType;
+  }
+
+  @Override
+  public String toString() {
+    return target.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java
new file mode 100644
index 0000000..60e4739
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/seq/SeqFileTarget.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.seq;
+
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.FileTargetImpl;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+public class SeqFileTarget extends FileTargetImpl {
+  public SeqFileTarget(String path) {
+    this(new Path(path));
+  }
+
+  public SeqFileTarget(Path path) {
+    this(path, new SequentialFileNamingScheme());
+  }
+
+  public SeqFileTarget(Path path, FileNamingScheme fileNamingScheme) {
+    super(path, SequenceFileOutputFormat.class, fileNamingScheme);
+  }
+
+  @Override
+  public String toString() {
+    return "SeqFile(" + path.toString() + ")";
+  }
+
+  @Override
+  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
+    if (ptype instanceof PTableType) {
+      return new SeqFileTableSourceTarget(path, (PTableType) ptype);
+    } else {
+      return new SeqFileSourceTarget(path, ptype);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java b/crunch-core/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java
new file mode 100644
index 0000000..67a8870
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/BZip2TextInputFormat.java
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+
+class BZip2TextInputFormat extends FileInputFormat<LongWritable, Text> {
+  /**
+   * Treats keys as offset in file and value as line. Since the input file is
+   * compressed, the offset for a particular line is not well-defined. This
+   * implementation returns the starting position of a compressed block as the
+   * key for every line in that block.
+   */
+
+  private static class BZip2LineRecordReader extends RecordReader<LongWritable, Text> {
+
+    private long start;
+
+    private long end;
+
+    private long pos;
+
+    private CBZip2InputStream in;
+
+    private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
+
+    // flag to indicate if previous character read was Carriage Return ('\r')
+    // and the next character was not Line Feed ('\n')
+    private boolean CRFollowedByNonLF = false;
+
+    // in the case where a Carriage Return ('\r') was not followed by a
+    // Line Feed ('\n'), this variable will hold that non Line Feed character
+    // that was read from the underlying stream.
+    private byte nonLFChar;
+
+    /**
+     * Provide a bridge to get the bytes from the ByteArrayOutputStream without
+     * creating a new byte array.
+     */
+    private static class TextStuffer extends OutputStream {
+      public Text target;
+
+      @Override
+      public void write(int b) {
+        throw new UnsupportedOperationException("write(byte) not supported");
+      }
+
+      @Override
+      public void write(byte[] data, int offset, int len) throws IOException {
+        target.clear();
+        target.set(data, offset, len);
+      }
+    }
+
+    private TextStuffer bridge = new TextStuffer();
+
+    private LongWritable key = new LongWritable();
+    private Text value = new Text();
+
+    public BZip2LineRecordReader(Configuration job, FileSplit split) throws IOException {
+      start = split.getStart();
+      end = start + split.getLength();
+      final Path file = split.getPath();
+
+      // open the file and seek to the start of the split
+      FileSystem fs = file.getFileSystem(job);
+      FSDataInputStream fileIn = fs.open(split.getPath());
+      fileIn.seek(start);
+
+      in = new CBZip2InputStream(fileIn, 9, end);
+      if (start != 0) {
+        // skip first line and re-establish "start".
+        // LineRecordReader.readLine(this.in, null);
+        readLine(this.in, null);
+        start = in.getPos();
+      }
+      pos = in.getPos();
+    }
+
+    /*
+     * LineRecordReader.readLine() is depricated in HAdoop 0.17. So it is added
+     * here locally.
+     */
+    private long readLine(InputStream in, OutputStream out) throws IOException {
+      long bytes = 0;
+      while (true) {
+        int b = -1;
+        if (CRFollowedByNonLF) {
+          // In the previous call, a Carriage Return ('\r') was followed
+          // by a non Line Feed ('\n') character - in that call we would
+          // have not returned the non Line Feed character but would have
+          // read it from the stream - lets use that already read character
+          // now
+          b = nonLFChar;
+          CRFollowedByNonLF = false;
+        } else {
+          b = in.read();
+        }
+        if (b == -1) {
+          break;
+        }
+        bytes += 1;
+
+        byte c = (byte) b;
+        if (c == '\n') {
+          break;
+        }
+
+        if (c == '\r') {
+          byte nextC = (byte) in.read();
+          if (nextC != '\n') {
+            CRFollowedByNonLF = true;
+            nonLFChar = nextC;
+          } else {
+            bytes += 1;
+          }
+          break;
+        }
+
+        if (out != null) {
+          out.write(c);
+        }
+      }
+      return bytes;
+    }
+
+    /** Read a line. */
+    public boolean next(LongWritable key, Text value) throws IOException {
+      if (pos > end)
+        return false;
+
+      key.set(pos); // key is position
+      buffer.reset();
+      // long bytesRead = LineRecordReader.readLine(in, buffer);
+      long bytesRead = readLine(in, buffer);
+      if (bytesRead == 0) {
+        return false;
+      }
+      pos = in.getPos();
+      // if we have read ahead because we encountered a carriage return
+      // char followed by a non line feed char, decrement the pos
+      if (CRFollowedByNonLF) {
+        pos--;
+      }
+
+      bridge.target = value;
+      buffer.writeTo(bridge);
+      return true;
+    }
+
+    /**
+     * Get the progress within the split
+     */
+    @Override
+    public float getProgress() {
+      if (start == end) {
+        return 0.0f;
+      } else {
+        return Math.min(1.0f, (pos - start) / (float) (end - start));
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public LongWritable getCurrentKey() throws IOException, InterruptedException {
+      return key;
+    }
+
+    @Override
+    public Text getCurrentValue() throws IOException, InterruptedException {
+      return value;
+    }
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+      // no op
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      return next(key, value);
+    }
+
+  }
+
+  @Override
+  protected boolean isSplitable(JobContext context, Path file) {
+    return true;
+  }
+
+  @Override
+  public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
+    try {
+      return new BZip2LineRecordReader(context.getConfiguration(), (FileSplit) split);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java b/crunch-core/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java
new file mode 100644
index 0000000..92bb787
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/CBZip2InputStream.java
@@ -0,0 +1,980 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.compress.bzip2.BZip2Constants;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+/**
+ * An input stream that decompresses from the BZip2 format (without the file
+ * header chars) to be read as any other stream.
+ * 
+ * @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
+ */
+class CBZip2InputStream extends InputStream implements BZip2Constants {
+  private static void cadvise(String reason) throws IOException {
+    throw new IOException(reason);
+  }
+
+  private static void compressedStreamEOF() throws IOException {
+    cadvise("compressedStream EOF");
+  }
+
+  private void makeMaps() {
+    int i;
+    nInUse = 0;
+    for (i = 0; i < 256; i++) {
+      if (inUse[i]) {
+        seqToUnseq[nInUse] = (char) i;
+        unseqToSeq[i] = (char) nInUse;
+        nInUse++;
+      }
+    }
+  }
+
+  /*
+   * index of the last char in the block, so the block size == last + 1.
+   */
+  private int last;
+
+  /*
+   * index in zptr[] of original string after sorting.
+   */
+  private int origPtr;
+
+  /*
+   * always: in the range 0 .. 9. The current block size is 100000 * this
+   * number.
+   */
+  private int blockSize100k;
+
+  private boolean blockRandomised;
+
+  // a buffer to keep the read byte
+  private int bsBuff;
+
+  // since bzip is bit-aligned at block boundaries there can be a case wherein
+  // only few bits out of a read byte are consumed and the remaining bits
+  // need to be consumed while processing the next block.
+  // indicate how many bits in bsBuff have not been processed yet
+  private int bsLive;
+  private CRC mCrc = new CRC();
+
+  private boolean[] inUse = new boolean[256];
+  private int nInUse;
+
+  private char[] seqToUnseq = new char[256];
+  private char[] unseqToSeq = new char[256];
+
+  private char[] selector = new char[MAX_SELECTORS];
+  private char[] selectorMtf = new char[MAX_SELECTORS];
+
+  private int[] tt;
+  private char[] ll8;
+
+  /*
+   * freq table collected to save a pass over the data during decompression.
+   */
+  private int[] unzftab = new int[256];
+
+  private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
+  private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
+  private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
+  private int[] minLens = new int[N_GROUPS];
+
+  private FSDataInputStream innerBsStream;
+  long readLimit = Long.MAX_VALUE;
+
+  public long getReadLimit() {
+    return readLimit;
+  }
+
+  public void setReadLimit(long readLimit) {
+    this.readLimit = readLimit;
+  }
+
+  long readCount;
+
+  public long getReadCount() {
+    return readCount;
+  }
+
+  private boolean streamEnd = false;
+
+  private int currentChar = -1;
+
+  private static final int START_BLOCK_STATE = 1;
+  private static final int RAND_PART_A_STATE = 2;
+  private static final int RAND_PART_B_STATE = 3;
+  private static final int RAND_PART_C_STATE = 4;
+  private static final int NO_RAND_PART_A_STATE = 5;
+  private static final int NO_RAND_PART_B_STATE = 6;
+  private static final int NO_RAND_PART_C_STATE = 7;
+
+  private int currentState = START_BLOCK_STATE;
+
+  private int storedBlockCRC, storedCombinedCRC;
+  private int computedBlockCRC, computedCombinedCRC;
+  private boolean checkComputedCombinedCRC = true;
+
+  int i2, count, chPrev, ch2;
+  int i, tPos;
+  int rNToGo = 0;
+  int rTPos = 0;
+  int j2;
+  char z;
+
+  // see comment in getPos()
+  private long retPos = -1;
+  // the position offset which corresponds to the end of the InputSplit that
+  // will be processed by this instance
+  private long endOffsetOfSplit;
+
+  private boolean signalToStopReading;
+
+  public CBZip2InputStream(FSDataInputStream zStream, int blockSize, long end) throws IOException {
+    endOffsetOfSplit = end;
+    // initialize retPos to the beginning of the current InputSplit
+    // see comments in getPos() to understand how this is used.
+    retPos = zStream.getPos();
+    ll8 = null;
+    tt = null;
+    checkComputedCombinedCRC = blockSize == -1;
+    bsSetStream(zStream);
+    initialize(blockSize);
+    initBlock(blockSize != -1);
+    setupBlock();
+  }
+
+  @Override
+  public int read() throws IOException {
+    if (streamEnd) {
+      return -1;
+    } else {
+
+      // if we just started reading a bzip block which starts at a position
+      // >= end of current split, then we should set up retpos such that
+      // after a record is read, future getPos() calls will get a value
+      // > end of current split - this way we will read only one record out
+      // of this bzip block - the rest of the records from this bzip block
+      // should be read by the next map task while processing the next split
+      if (signalToStopReading) {
+        retPos = endOffsetOfSplit + 1;
+      }
+
+      int retChar = currentChar;
+      switch (currentState) {
+      case START_BLOCK_STATE:
+        break;
+      case RAND_PART_A_STATE:
+        break;
+      case RAND_PART_B_STATE:
+        setupRandPartB();
+        break;
+      case RAND_PART_C_STATE:
+        setupRandPartC();
+        break;
+      case NO_RAND_PART_A_STATE:
+        break;
+      case NO_RAND_PART_B_STATE:
+        setupNoRandPartB();
+        break;
+      case NO_RAND_PART_C_STATE:
+        setupNoRandPartC();
+        break;
+      default:
+        break;
+      }
+      return retChar;
+    }
+  }
+
+  /**
+   * getPos is used by the caller to know when the processing of the current
+   * {@link InputSplit} is complete. In this method, as we read each bzip block,
+   * we keep returning the beginning of the {@link InputSplit} as the return
+   * value until we hit a block which starts at a position >= end of current
+   * split. At that point we should set up retpos such that after a record is
+   * read, future getPos() calls will get a value > end of current split - this
+   * way we will read only one record out of that bzip block - the rest of the
+   * records from that bzip block should be read by the next map task while
+   * processing the next split
+   * 
+   * @return
+   * @throws IOException
+   */
+  public long getPos() throws IOException {
+    return retPos;
+  }
+
+  private void initialize(int blockSize) throws IOException {
+    if (blockSize == -1) {
+      char magic1, magic2;
+      char magic3, magic4;
+      magic1 = bsGetUChar();
+      magic2 = bsGetUChar();
+      magic3 = bsGetUChar();
+      magic4 = bsGetUChar();
+      if (magic1 != 'B' || magic2 != 'Z' || magic3 != 'h' || magic4 < '1' || magic4 > '9') {
+        bsFinishedWithStream();
+        streamEnd = true;
+        return;
+      }
+      blockSize = magic4 - '0';
+    }
+
+    setDecompressStructureSizes(blockSize);
+    computedCombinedCRC = 0;
+  }
+
+  private final static long mask = 0xffffffffffffL;
+  private final static long eob = 0x314159265359L & mask;
+  private final static long eos = 0x177245385090L & mask;
+
+  private void initBlock(boolean searchForMagic) throws IOException {
+    if (readCount >= readLimit) {
+      bsFinishedWithStream();
+      streamEnd = true;
+      return;
+    }
+
+    // position before beginning of bzip block header
+    long pos = innerBsStream.getPos();
+    if (!searchForMagic) {
+      char magic1, magic2, magic3, magic4;
+      char magic5, magic6;
+      magic1 = bsGetUChar();
+      magic2 = bsGetUChar();
+      magic3 = bsGetUChar();
+      magic4 = bsGetUChar();
+      magic5 = bsGetUChar();
+      magic6 = bsGetUChar();
+      if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
+        complete();
+        return;
+      }
+
+      if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
+        badBlockHeader();
+        streamEnd = true;
+        return;
+      }
+    } else {
+      long magic = 0;
+      for (int i = 0; i < 6; i++) {
+        magic <<= 8;
+        magic |= bsGetUChar();
+      }
+      while (magic != eos && magic != eob) {
+        magic <<= 1;
+        magic &= mask;
+        magic |= bsR(1);
+        // if we just found the block header, the beginning of the bzip
+        // header would be 6 bytes before the current stream position
+        // when we eventually break from this while(), if it is because
+        // we found a block header then pos will have the correct start
+        // of header position
+        pos = innerBsStream.getPos() - 6;
+      }
+      if (magic == eos) {
+        complete();
+        return;
+      }
+
+    }
+    // if the previous block finished a few bits into the previous byte,
+    // then we will first be reading the remaining bits from the previous
+    // byte - so logically pos needs to be one behind
+    if (bsLive > 0) {
+      pos--;
+    }
+
+    if (pos >= endOffsetOfSplit) {
+      // we have reached a block which begins exactly at the next InputSplit
+      // or >1 byte into the next InputSplit - lets record this fact
+      signalToStopReading = true;
+    }
+    storedBlockCRC = bsGetInt32();
+
+    if (bsR(1) == 1) {
+      blockRandomised = true;
+    } else {
+      blockRandomised = false;
+    }
+
+    // currBlockNo++;
+    getAndMoveToFrontDecode();
+
+    mCrc.initialiseCRC();
+    currentState = START_BLOCK_STATE;
+  }
+
+  private void endBlock() throws IOException {
+    computedBlockCRC = mCrc.getFinalCRC();
+    /* A bad CRC is considered a fatal error. */
+    if (storedBlockCRC != computedBlockCRC) {
+      crcError();
+    }
+
+    computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >>> 31);
+    computedCombinedCRC ^= computedBlockCRC;
+  }
+
+  private void complete() throws IOException {
+    storedCombinedCRC = bsGetInt32();
+    if (checkComputedCombinedCRC && storedCombinedCRC != computedCombinedCRC) {
+      crcError();
+    }
+    if (innerBsStream.getPos() < endOffsetOfSplit) {
+      throw new IOException("Encountered additional bytes in the filesplit past the crc block. "
+          + "Loading of concatenated bz2 files is not supported");
+    }
+    bsFinishedWithStream();
+    streamEnd = true;
+  }
+
+  private static void blockOverrun() throws IOException {
+    cadvise("block overrun");
+  }
+
+  private static void badBlockHeader() throws IOException {
+    cadvise("bad block header");
+  }
+
+  private static void crcError() throws IOException {
+    cadvise("CRC error");
+  }
+
+  private void bsFinishedWithStream() {
+    if (this.innerBsStream != null) {
+      if (this.innerBsStream != System.in) {
+        this.innerBsStream = null;
+      }
+    }
+  }
+
+  private void bsSetStream(FSDataInputStream f) {
+    innerBsStream = f;
+    bsLive = 0;
+    bsBuff = 0;
+  }
+
+  final private int readBs() throws IOException {
+    readCount++;
+    return innerBsStream.read();
+  }
+
+  private int bsR(int n) throws IOException {
+    int v;
+    while (bsLive < n) {
+      int zzi;
+      zzi = readBs();
+      if (zzi == -1) {
+        compressedStreamEOF();
+      }
+      bsBuff = (bsBuff << 8) | (zzi & 0xff);
+      bsLive += 8;
+    }
+
+    v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
+    bsLive -= n;
+    return v;
+  }
+
+  private char bsGetUChar() throws IOException {
+    return (char) bsR(8);
+  }
+
+  private int bsGetint() throws IOException {
+    int u = 0;
+    u = (u << 8) | bsR(8);
+    u = (u << 8) | bsR(8);
+    u = (u << 8) | bsR(8);
+    u = (u << 8) | bsR(8);
+    return u;
+  }
+
+  private int bsGetIntVS(int numBits) throws IOException {
+    return bsR(numBits);
+  }
+
+  private int bsGetInt32() throws IOException {
+    return bsGetint();
+  }
+
+  private void hbCreateDecodeTables(int[] limit, int[] base, int[] perm, char[] length, int minLen, int maxLen,
+      int alphaSize) {
+    int pp, i, j, vec;
+
+    pp = 0;
+    for (i = minLen; i <= maxLen; i++) {
+      for (j = 0; j < alphaSize; j++) {
+        if (length[j] == i) {
+          perm[pp] = j;
+          pp++;
+        }
+      }
+    }
+
+    for (i = 0; i < MAX_CODE_LEN; i++) {
+      base[i] = 0;
+    }
+    for (i = 0; i < alphaSize; i++) {
+      base[length[i] + 1]++;
+    }
+
+    for (i = 1; i < MAX_CODE_LEN; i++) {
+      base[i] += base[i - 1];
+    }
+
+    for (i = 0; i < MAX_CODE_LEN; i++) {
+      limit[i] = 0;
+    }
+    vec = 0;
+
+    for (i = minLen; i <= maxLen; i++) {
+      vec += (base[i + 1] - base[i]);
+      limit[i] = vec - 1;
+      vec <<= 1;
+    }
+    for (i = minLen + 1; i <= maxLen; i++) {
+      base[i] = ((limit[i - 1] + 1) << 1) - base[i];
+    }
+  }
+
+  private void recvDecodingTables() throws IOException {
+    char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
+    int i, j, t, nGroups, nSelectors, alphaSize;
+    int minLen, maxLen;
+    boolean[] inUse16 = new boolean[16];
+
+    /* Receive the mapping table */
+    for (i = 0; i < 16; i++) {
+      if (bsR(1) == 1) {
+        inUse16[i] = true;
+      } else {
+        inUse16[i] = false;
+      }
+    }
+
+    for (i = 0; i < 256; i++) {
+      inUse[i] = false;
+    }
+
+    for (i = 0; i < 16; i++) {
+      if (inUse16[i]) {
+        for (j = 0; j < 16; j++) {
+          if (bsR(1) == 1) {
+            inUse[i * 16 + j] = true;
+          }
+        }
+      }
+    }
+
+    makeMaps();
+    alphaSize = nInUse + 2;
+
+    /* Now the selectors */
+    nGroups = bsR(3);
+    nSelectors = bsR(15);
+    for (i = 0; i < nSelectors; i++) {
+      j = 0;
+      while (bsR(1) == 1) {
+        j++;
+      }
+      selectorMtf[i] = (char) j;
+    }
+
+    /* Undo the MTF values for the selectors. */
+    {
+      char[] pos = new char[N_GROUPS];
+      char tmp, v;
+      for (v = 0; v < nGroups; v++) {
+        pos[v] = v;
+      }
+
+      for (i = 0; i < nSelectors; i++) {
+        v = selectorMtf[i];
+        tmp = pos[v];
+        while (v > 0) {
+          pos[v] = pos[v - 1];
+          v--;
+        }
+        pos[0] = tmp;
+        selector[i] = tmp;
+      }
+    }
+
+    /* Now the coding tables */
+    for (t = 0; t < nGroups; t++) {
+      int curr = bsR(5);
+      for (i = 0; i < alphaSize; i++) {
+        while (bsR(1) == 1) {
+          if (bsR(1) == 0) {
+            curr++;
+          } else {
+            curr--;
+          }
+        }
+        len[t][i] = (char) curr;
+      }
+    }
+
+    /* Create the Huffman decoding tables */
+    for (t = 0; t < nGroups; t++) {
+      minLen = 32;
+      maxLen = 0;
+      for (i = 0; i < alphaSize; i++) {
+        if (len[t][i] > maxLen) {
+          maxLen = len[t][i];
+        }
+        if (len[t][i] < minLen) {
+          minLen = len[t][i];
+        }
+      }
+      hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize);
+      minLens[t] = minLen;
+    }
+  }
+
+  private void getAndMoveToFrontDecode() throws IOException {
+    char[] yy = new char[256];
+    int i, j, nextSym, limitLast;
+    int EOB, groupNo, groupPos;
+
+    limitLast = baseBlockSize * blockSize100k;
+    origPtr = bsGetIntVS(24);
+
+    recvDecodingTables();
+    EOB = nInUse + 1;
+    groupNo = -1;
+    groupPos = 0;
+
+    /*
+     * Setting up the unzftab entries here is not strictly necessary, but it
+     * does save having to do it later in a separate pass, and so saves a
+     * block's worth of cache misses.
+     */
+    for (i = 0; i <= 255; i++) {
+      unzftab[i] = 0;
+    }
+
+    for (i = 0; i <= 255; i++) {
+      yy[i] = (char) i;
+    }
+
+    last = -1;
+
+    {
+      int zt, zn, zvec, zj;
+      if (groupPos == 0) {
+        groupNo++;
+        groupPos = G_SIZE;
+      }
+      groupPos--;
+      zt = selector[groupNo];
+      zn = minLens[zt];
+      zvec = bsR(zn);
+      while (zvec > limit[zt][zn]) {
+        zn++;
+        {
+          {
+            while (bsLive < 1) {
+              int zzi = 0;
+              try {
+                zzi = readBs();
+              } catch (IOException e) {
+                compressedStreamEOF();
+              }
+              if (zzi == -1) {
+                compressedStreamEOF();
+              }
+              bsBuff = (bsBuff << 8) | (zzi & 0xff);
+              bsLive += 8;
+            }
+          }
+          zj = (bsBuff >> (bsLive - 1)) & 1;
+          bsLive--;
+        }
+        zvec = (zvec << 1) | zj;
+      }
+      nextSym = perm[zt][zvec - base[zt][zn]];
+    }
+
+    while (true) {
+
+      if (nextSym == EOB) {
+        break;
+      }
+
+      if (nextSym == RUNA || nextSym == RUNB) {
+        char ch;
+        int s = -1;
+        int N = 1;
+        do {
+          if (nextSym == RUNA) {
+            s = s + (0 + 1) * N;
+          } else if (nextSym == RUNB) {
+            s = s + (1 + 1) * N;
+          }
+          N = N * 2;
+          {
+            int zt, zn, zvec, zj;
+            if (groupPos == 0) {
+              groupNo++;
+              groupPos = G_SIZE;
+            }
+            groupPos--;
+            zt = selector[groupNo];
+            zn = minLens[zt];
+            zvec = bsR(zn);
+            while (zvec > limit[zt][zn]) {
+              zn++;
+              {
+                {
+                  while (bsLive < 1) {
+                    int zzi = 0;
+                    try {
+                      zzi = readBs();
+                    } catch (IOException e) {
+                      compressedStreamEOF();
+                    }
+                    if (zzi == -1) {
+                      compressedStreamEOF();
+                    }
+                    bsBuff = (bsBuff << 8) | (zzi & 0xff);
+                    bsLive += 8;
+                  }
+                }
+                zj = (bsBuff >> (bsLive - 1)) & 1;
+                bsLive--;
+              }
+              zvec = (zvec << 1) | zj;
+            }
+            nextSym = perm[zt][zvec - base[zt][zn]];
+          }
+        } while (nextSym == RUNA || nextSym == RUNB);
+
+        s++;
+        ch = seqToUnseq[yy[0]];
+        unzftab[ch] += s;
+
+        while (s > 0) {
+          last++;
+          ll8[last] = ch;
+          s--;
+        }
+
+        if (last >= limitLast) {
+          blockOverrun();
+        }
+        continue;
+      } else {
+        char tmp;
+        last++;
+        if (last >= limitLast) {
+          blockOverrun();
+        }
+
+        tmp = yy[nextSym - 1];
+        unzftab[seqToUnseq[tmp]]++;
+        ll8[last] = seqToUnseq[tmp];
+
+        /*
+         * This loop is hammered during decompression, hence the unrolling.
+         * 
+         * for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
+         */
+
+        j = nextSym - 1;
+        for (; j > 3; j -= 4) {
+          yy[j] = yy[j - 1];
+          yy[j - 1] = yy[j - 2];
+          yy[j - 2] = yy[j - 3];
+          yy[j - 3] = yy[j - 4];
+        }
+        for (; j > 0; j--) {
+          yy[j] = yy[j - 1];
+        }
+
+        yy[0] = tmp;
+        {
+          int zt, zn, zvec, zj;
+          if (groupPos == 0) {
+            groupNo++;
+            groupPos = G_SIZE;
+          }
+          groupPos--;
+          zt = selector[groupNo];
+          zn = minLens[zt];
+          zvec = bsR(zn);
+          while (zvec > limit[zt][zn]) {
+            zn++;
+            {
+              {
+                while (bsLive < 1) {
+                  int zzi;
+                  char thech = 0;
+                  try {
+                    thech = (char) readBs();
+                  } catch (IOException e) {
+                    compressedStreamEOF();
+                  }
+                  zzi = thech;
+                  bsBuff = (bsBuff << 8) | (zzi & 0xff);
+                  bsLive += 8;
+                }
+              }
+              zj = (bsBuff >> (bsLive - 1)) & 1;
+              bsLive--;
+            }
+            zvec = (zvec << 1) | zj;
+          }
+          nextSym = perm[zt][zvec - base[zt][zn]];
+        }
+        continue;
+      }
+    }
+  }
+
+  private void setupBlock() throws IOException {
+    int[] cftab = new int[257];
+    char ch;
+
+    cftab[0] = 0;
+    for (i = 1; i <= 256; i++) {
+      cftab[i] = unzftab[i - 1];
+    }
+    for (i = 1; i <= 256; i++) {
+      cftab[i] += cftab[i - 1];
+    }
+
+    for (i = 0; i <= last; i++) {
+      ch = ll8[i];
+      tt[cftab[ch]] = i;
+      cftab[ch]++;
+    }
+    cftab = null;
+
+    tPos = tt[origPtr];
+
+    count = 0;
+    i2 = 0;
+    ch2 = 256; /* not a char and not EOF */
+
+    if (blockRandomised) {
+      rNToGo = 0;
+      rTPos = 0;
+      setupRandPartA();
+    } else {
+      setupNoRandPartA();
+    }
+  }
+
+  private void setupRandPartA() throws IOException {
+    if (i2 <= last) {
+      chPrev = ch2;
+      ch2 = ll8[tPos];
+      tPos = tt[tPos];
+      if (rNToGo == 0) {
+        rNToGo = rNums[rTPos];
+        rTPos++;
+        if (rTPos == 512) {
+          rTPos = 0;
+        }
+      }
+      rNToGo--;
+      ch2 ^= ((rNToGo == 1) ? 1 : 0);
+      i2++;
+
+      currentChar = ch2;
+      currentState = RAND_PART_B_STATE;
+      mCrc.updateCRC(ch2);
+    } else {
+      endBlock();
+      initBlock(false);
+      setupBlock();
+    }
+  }
+
+  private void setupNoRandPartA() throws IOException {
+    if (i2 <= last) {
+      chPrev = ch2;
+      ch2 = ll8[tPos];
+      tPos = tt[tPos];
+      i2++;
+
+      currentChar = ch2;
+      currentState = NO_RAND_PART_B_STATE;
+      mCrc.updateCRC(ch2);
+    } else {
+      endBlock();
+      initBlock(false);
+      setupBlock();
+    }
+  }
+
+  private void setupRandPartB() throws IOException {
+    if (ch2 != chPrev) {
+      currentState = RAND_PART_A_STATE;
+      count = 1;
+      setupRandPartA();
+    } else {
+      count++;
+      if (count >= 4) {
+        z = ll8[tPos];
+        tPos = tt[tPos];
+        if (rNToGo == 0) {
+          rNToGo = rNums[rTPos];
+          rTPos++;
+          if (rTPos == 512) {
+            rTPos = 0;
+          }
+        }
+        rNToGo--;
+        z ^= ((rNToGo == 1) ? 1 : 0);
+        j2 = 0;
+        currentState = RAND_PART_C_STATE;
+        setupRandPartC();
+      } else {
+        currentState = RAND_PART_A_STATE;
+        setupRandPartA();
+      }
+    }
+  }
+
+  private void setupRandPartC() throws IOException {
+    if (j2 < (int) z) {
+      currentChar = ch2;
+      mCrc.updateCRC(ch2);
+      j2++;
+    } else {
+      currentState = RAND_PART_A_STATE;
+      i2++;
+      count = 0;
+      setupRandPartA();
+    }
+  }
+
+  private void setupNoRandPartB() throws IOException {
+    if (ch2 != chPrev) {
+      currentState = NO_RAND_PART_A_STATE;
+      count = 1;
+      setupNoRandPartA();
+    } else {
+      count++;
+      if (count >= 4) {
+        z = ll8[tPos];
+        tPos = tt[tPos];
+        currentState = NO_RAND_PART_C_STATE;
+        j2 = 0;
+        setupNoRandPartC();
+      } else {
+        currentState = NO_RAND_PART_A_STATE;
+        setupNoRandPartA();
+      }
+    }
+  }
+
+  private void setupNoRandPartC() throws IOException {
+    if (j2 < (int) z) {
+      currentChar = ch2;
+      mCrc.updateCRC(ch2);
+      j2++;
+    } else {
+      currentState = NO_RAND_PART_A_STATE;
+      i2++;
+      count = 0;
+      setupNoRandPartA();
+    }
+  }
+
+  private void setDecompressStructureSizes(int newSize100k) {
+    if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k && blockSize100k <= 9)) {
+      // throw new IOException("Invalid block size");
+    }
+
+    blockSize100k = newSize100k;
+
+    if (newSize100k == 0) {
+      return;
+    }
+
+    int n = baseBlockSize * newSize100k;
+    ll8 = new char[n];
+    tt = new int[n];
+  }
+
+  private static class CRC {
+    public static int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
+        0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a,
+        0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+        0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0,
+        0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef,
+        0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe,
+        0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
+        0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab,
+        0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4,
+        0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5,
+        0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+        0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f,
+        0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050,
+        0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31,
+        0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
+        0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a,
+        0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5,
+        0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94,
+        0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+        0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e,
+        0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
+        0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7,
+        0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
+        0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35,
+        0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a,
+        0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb,
+        0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+        0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1,
+        0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 };
+
+    public CRC() {
+      initialiseCRC();
+    }
+
+    void initialiseCRC() {
+      globalCrc = 0xffffffff;
+    }
+
+    int getFinalCRC() {
+      return ~globalCrc;
+    }
+
+    void updateCRC(int inCh) {
+      int temp = (globalCrc >> 24) ^ inCh;
+      if (temp < 0) {
+        temp = 256 + temp;
+      }
+      globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
+    }
+
+    int globalCrc;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/LineParser.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/LineParser.java b/crunch-core/src/main/java/org/apache/crunch/io/text/LineParser.java
new file mode 100644
index 0000000..9438014
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/LineParser.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.fn.CompositeMapFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableList;
+
+/**
+ * An abstraction for parsing the lines of a text file using a {@code PType<T>} to
+ * convert the lines of text into a given data type. 
+ *
+ * @param <T> The type returned by the text parsing
+ */
+abstract class LineParser<T> {
+
+  public static <S> LineParser<S> forType(PType<S> ptype) {
+    return new SimpleLineParser<S>(ptype);
+  }
+  
+  public static <K, V> LineParser<Pair<K, V>> forTableType(PTableType<K, V> ptt, String sep) {
+    return new KeyValueLineParser<K, V>(ptt, sep); 
+  }
+  
+  private MapFn<String, T> mapFn;
+  
+  public void initialize() {
+    mapFn = getMapFn();
+    mapFn.initialize();
+  }
+    
+  public T parse(String line) {
+    return mapFn.map(line);
+  }
+  
+  protected abstract MapFn<String, T> getMapFn();
+  
+  private static <T> MapFn<String, T> getMapFnForPType(PType<T> ptype) {
+    MapFn ret = null;
+    if (String.class.equals(ptype.getTypeClass())) {
+      ret = (MapFn) IdentityFn.getInstance();
+    } else {
+      // Check for a composite MapFn for the PType.
+      // Note that this won't work for Avro-- need to solve that.
+      ret = ptype.getInputMapFn();
+      if (ret instanceof CompositeMapFn) {
+        ret = ((CompositeMapFn) ret).getSecond();
+      }
+    }
+    return ret;
+  }
+  
+  private static class SimpleLineParser<S> extends LineParser<S> {
+
+    private final PType<S> ptype;
+    
+    public SimpleLineParser(PType<S> ptype) {
+      this.ptype = ptype;
+    }
+
+    @Override
+    protected MapFn<String, S> getMapFn() {
+      return getMapFnForPType(ptype);
+    }
+  }
+  
+  private static class KeyValueLineParser<K, V> extends LineParser<Pair<K, V>> {
+
+    private final PTableType<K, V> ptt;
+    private final String sep;
+    
+    public KeyValueLineParser(PTableType<K, V> ptt, String sep) {
+      this.ptt = ptt;
+      this.sep = sep;
+    }
+
+    @Override
+    protected MapFn<String, Pair<K, V>> getMapFn() {
+      final MapFn<String, K> keyMapFn = getMapFnForPType(ptt.getKeyType());
+      final MapFn<String, V> valueMapFn = getMapFnForPType(ptt.getValueType());
+      
+      return new MapFn<String, Pair<K, V>>() {
+        @Override
+        public void initialize() {
+          keyMapFn.initialize();
+          valueMapFn.initialize();
+        }
+        
+        @Override
+        public Pair<K, V> map(String input) {
+          List<String> kv = ImmutableList.copyOf(Splitter.on(sep).limit(1).split(input));
+          if (kv.size() != 2) {
+            throw new RuntimeException("Invalid input string: " + input);
+          }
+          return Pair.of(keyMapFn.map(kv.get(0)), valueMapFn.map(kv.get(1)));
+        }
+      };
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/NLineFileSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/NLineFileSource.java b/crunch-core/src/main/java/org/apache/crunch/io/text/NLineFileSource.java
new file mode 100644
index 0000000..40e2dbd
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/NLineFileSource.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.io.IOException;
+
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.FormatBundle;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileSourceImpl;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+
+/**
+ * A {@code Source} instance that uses the {@code NLineInputFormat}, which gives each map
+ * task a fraction of the lines in a text file as input. Most useful when running simulations
+ * on Hadoop, where each line represents configuration information about each simulation
+ * run.
+ */
+public class NLineFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
+
+  private static FormatBundle getBundle(int linesPerTask) {
+    FormatBundle bundle = FormatBundle.forInput(NLineInputFormat.class);
+    bundle.set(NLineInputFormat.LINES_PER_MAP, String.valueOf(linesPerTask));
+    return bundle;
+  }
+  
+  /**
+   * Create a new {@code NLineFileSource} instance.
+   * 
+   * @param path The path to the input data, as a String
+   * @param ptype The PType to use for processing the data
+   * @param linesPerTask The number of lines from the input each map task will process
+   */
+  public NLineFileSource(String path, PType<T> ptype, int linesPerTask) {
+    this(new Path(path), ptype, linesPerTask);
+  }
+  
+  /**
+   * Create a new {@code NLineFileSource} instance.
+   *  
+   * @param path The {@code Path} to the input data
+   * @param ptype The PType to use for processing the data
+   * @param linesPerTask The number of lines from the input each map task will process
+   */
+  public NLineFileSource(Path path, PType<T> ptype, int linesPerTask) {
+    super(path, ptype, getBundle(linesPerTask));
+  }
+
+  @Override
+  public String toString() {
+    return "NLine(" + path + ")";
+  }
+  
+  @Override
+  public Iterable<T> read(Configuration conf) throws IOException {
+    return CompositePathIterable.create(path.getFileSystem(conf), path,
+        new TextFileReaderFactory<T>(LineParser.forType(ptype)));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java
new file mode 100644
index 0000000..e1fea6e
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileReaderFactory.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.io.FileReaderFactory;
+import org.apache.crunch.io.impl.AutoClosingIterator;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.UnmodifiableIterator;
+
+public class TextFileReaderFactory<T> implements FileReaderFactory<T> {
+
+  private static final Log LOG = LogFactory.getLog(TextFileReaderFactory.class);
+
+  private final LineParser<T> parser;
+
+  public TextFileReaderFactory(PType<T> ptype) {
+    this(LineParser.forType(ptype));
+  }
+  
+  public TextFileReaderFactory(LineParser<T> parser) {
+    this.parser = parser;
+  }
+
+  @Override
+  public Iterator<T> read(FileSystem fs, Path path) {
+    parser.initialize();
+
+    FSDataInputStream is;
+    try {
+      is = fs.open(path);
+    } catch (IOException e) {
+      LOG.info("Could not read path: " + path, e);
+      return Iterators.emptyIterator();
+    }
+
+    final BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+    return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
+      private String nextLine;
+
+      @Override
+      public boolean hasNext() {
+        try {
+          return (nextLine = reader.readLine()) != null;
+        } catch (IOException e) {
+          LOG.info("Exception reading text file stream", e);
+          return false;
+        }
+      }
+
+      @Override
+      public T next() {
+        return parser.parse(nextLine);
+      }
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSource.java b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSource.java
new file mode 100644
index 0000000..026fca9
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSource.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.io.IOException;
+
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileSourceImpl;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.avro.AvroTypeFamily;
+import org.apache.crunch.types.avro.AvroUtf8InputFormat;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+
+public class TextFileSource<T> extends FileSourceImpl<T> implements ReadableSource<T> {
+
+  private static boolean isBZip2(Path path) {
+    String strPath = path.toString();
+    return strPath.endsWith(".bz") || strPath.endsWith(".bz2");
+  }
+
+  private static <S> Class<? extends FileInputFormat<?, ?>> getInputFormat(Path path, PType<S> ptype) {
+    if (ptype.getFamily().equals(AvroTypeFamily.getInstance())) {
+      return AvroUtf8InputFormat.class;
+    } else if (isBZip2(path)) {
+      return BZip2TextInputFormat.class;
+    } else {
+      return TextInputFormat.class;
+    }
+  }
+
+  public TextFileSource(Path path, PType<T> ptype) {
+    super(path, ptype, getInputFormat(path, ptype));
+  }
+
+  @Override
+  public long getSize(Configuration conf) {
+    long sz = super.getSize(conf);
+    if (isBZip2(path)) {
+      sz *= 10; // Arbitrary compression factor
+    }
+    return sz;
+  }
+
+  @Override
+  public String toString() {
+    return "Text(" + path + ")";
+  }
+
+  @Override
+  public Iterable<T> read(Configuration conf) throws IOException {
+    return CompositePathIterable.create(path.getFileSystem(conf), path,
+        new TextFileReaderFactory<T>(LineParser.forType(ptype)));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java
new file mode 100644
index 0000000..1d1211e
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileSourceTarget.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+
+public class TextFileSourceTarget<T> extends ReadableSourcePathTargetImpl<T> {
+
+  public TextFileSourceTarget(String path, PType<T> ptype) {
+    this(new Path(path), ptype);
+  }
+
+  public TextFileSourceTarget(Path path, PType<T> ptype) {
+    this(path, ptype, new SequentialFileNamingScheme());
+  }
+
+  public TextFileSourceTarget(Path path, PType<T> ptype, FileNamingScheme fileNamingScheme) {
+    super(new TextFileSource<T>(path, ptype), new TextFileTarget(path), fileNamingScheme);
+  }
+
+  @Override
+  public String toString() {
+    return target.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java
new file mode 100644
index 0000000..94fc5fd
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSource.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import java.io.IOException;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.io.CompositePathIterable;
+import org.apache.crunch.io.FormatBundle;
+import org.apache.crunch.io.ReadableSource;
+import org.apache.crunch.io.impl.FileTableSourceImpl;
+import org.apache.crunch.types.PTableType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
+
+/** 
+ * A {@code Source} that uses the {@code KeyValueTextInputFormat} to process
+ * input text. If a separator for the keys and values in the text file is not specified,
+ * a tab character is used. 
+ */
+public class TextFileTableSource<K, V> extends FileTableSourceImpl<K, V>
+    implements ReadableSource<Pair<K, V>> {
+
+  // CRUNCH-125: Maintain compatibility with both versions of the KeyValueTextInputFormat's
+  // configuration field for specifying the separator character.
+  private static final String OLD_KV_SEP = "key.value.separator.in.input.line";
+  private static final String NEW_KV_SEP = "mapreduce.input.keyvaluelinerecordreader.key.value.separator";
+  
+  private static FormatBundle getBundle(String sep) {
+    FormatBundle bundle = FormatBundle.forInput(KeyValueTextInputFormat.class);
+    bundle.set(OLD_KV_SEP, sep);
+    bundle.set(NEW_KV_SEP, sep);
+    return bundle;
+  }
+  
+  private final String separator;
+  
+  public TextFileTableSource(String path, PTableType<K, V> tableType) {
+    this(new Path(path), tableType);
+  }
+  
+  public TextFileTableSource(Path path, PTableType<K, V> tableType) {
+    this(path, tableType, "\t");
+  }
+  
+  public TextFileTableSource(String path, PTableType<K, V> tableType, String separator) {
+    this(new Path(path), tableType, separator);
+  }
+  
+  public TextFileTableSource(Path path, PTableType<K, V> tableType, String separator) {
+    super(path, tableType, getBundle(separator));
+    this.separator = separator;
+  }
+
+  @Override
+  public String toString() {
+    return "KeyValueText(" + path + ")";
+  }
+
+  @Override
+  public Iterable<Pair<K, V>> read(Configuration conf) throws IOException {
+    return CompositePathIterable.create(path.getFileSystem(conf), path,
+        new TextFileReaderFactory<Pair<K, V>>(LineParser.forTableType(getTableType(), separator)));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java
new file mode 100644
index 0000000..dec97e5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io.text;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.TableSourceTarget;
+import org.apache.crunch.io.FileNamingScheme;
+import org.apache.crunch.io.SequentialFileNamingScheme;
+import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl;
+import org.apache.crunch.types.PTableType;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A {@code TableSource} and {@code SourceTarget} implementation that uses the
+ * {@code KeyValueTextInputFormat} and {@code TextOutputFormat} to support reading
+ * and writing text files as {@code PTable} instances using a tab separator for
+ * the keys and the values.
+ */
+public class TextFileTableSourceTarget<K, V> extends ReadableSourcePathTargetImpl<Pair<K, V>> implements
+    TableSourceTarget<K, V> {
+
+  private final PTableType<K, V> tableType;
+  
+  public TextFileTableSourceTarget(String path, PTableType<K, V> tableType) {
+    this(new Path(path), tableType);
+  }
+
+  public TextFileTableSourceTarget(Path path, PTableType<K, V> tableType) {
+    this(path, tableType, new SequentialFileNamingScheme());
+  }
+
+  public TextFileTableSourceTarget(Path path, PTableType<K, V> tableType,
+      FileNamingScheme fileNamingScheme) {
+    super(new TextFileTableSource<K, V>(path, tableType), new TextFileTarget(path),
+        fileNamingScheme);
+    this.tableType = tableType;
+  }
+
+  @Override
+  public PTableType<K, V> getTableType() {
+    return tableType;
+  }
+
+  @Override
+  public String toString() {
+    return target.toString();
+  }
+}


[08/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/io/text/TextFileTarget.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTarget.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileTarget.java
deleted file mode 100644
index 0c3e6a4..0000000
--- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTarget.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.io.text;
-
-import org.apache.avro.Schema;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.io.FileNamingScheme;
-import org.apache.crunch.io.SequentialFileNamingScheme;
-import org.apache.crunch.io.impl.FileTargetImpl;
-import org.apache.crunch.types.Converter;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.avro.AvroTextOutputFormat;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableType;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-
-public class TextFileTarget extends FileTargetImpl {
-  private static Class<? extends FileOutputFormat> getOutputFormat(PType<?> ptype) {
-    if (ptype.getFamily().equals(AvroTypeFamily.getInstance())) {
-      return AvroTextOutputFormat.class;
-    } else {
-      return TextOutputFormat.class;
-    }
-  }
-
-  public <T> TextFileTarget(String path) {
-    this(new Path(path));
-  }
-
-  public <T> TextFileTarget(Path path) {
-    this(path, new SequentialFileNamingScheme());
-  }
-
-  public <T> TextFileTarget(Path path, FileNamingScheme fileNamingScheme) {
-    super(path, null, fileNamingScheme);
-  }
-
-  @Override
-  public Path getPath() {
-    return path;
-  }
-
-  @Override
-  public String toString() {
-    return "Text(" + path + ")";
-  }
-
-  @Override
-  public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
-    Converter converter = ptype.getConverter();
-    Class keyClass = converter.getKeyClass();
-    Class valueClass = converter.getValueClass();
-    configureForMapReduce(job, keyClass, valueClass, getOutputFormat(ptype), outputPath, name);
-  }
-
-  @Override
-  public <T> SourceTarget<T> asSourceTarget(PType<T> ptype) {
-    if (!isTextCompatible(ptype)) {
-      return null;
-    }
-    if (ptype instanceof PTableType) {
-      return new TextFileTableSourceTarget(path, (PTableType) ptype);
-    }
-    return new TextFileSourceTarget<T>(path, ptype);
-  }
-  
-  private <T> boolean isTextCompatible(PType<T> ptype) {
-    if (AvroTypeFamily.getInstance().equals(ptype.getFamily())) {
-      AvroType<T> at = (AvroType<T>) ptype;
-      if (at.getSchema().equals(Schema.create(Schema.Type.STRING))) {
-        return true;
-      }
-    } else if (WritableTypeFamily.getInstance().equals(ptype.getFamily())) {
-      if (ptype instanceof PTableType) {
-        PTableType ptt = (PTableType) ptype;
-        return isText(ptt.getKeyType()) && isText(ptt.getValueType());
-      } else {
-        return isText(ptype);
-      }
-    }
-    return false;
-  }
-  
-  private <T> boolean isText(PType<T> wtype) {
-    return Text.class.equals(((WritableType) wtype).getSerializationClass());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Aggregate.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Aggregate.java b/crunch/src/main/java/org/apache/crunch/lib/Aggregate.java
deleted file mode 100644
index d4109cc..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Aggregate.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.PriorityQueue;
-
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.fn.Aggregators;
-import org.apache.crunch.fn.MapValuesFn;
-import org.apache.crunch.materialize.pobject.FirstElementPObject;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-import com.google.common.collect.Lists;
-
-/**
- * Methods for performing various types of aggregations over {@link PCollection} instances.
- * 
- */
-public class Aggregate {
-
-  /**
-   * Returns a {@code PTable} that contains the unique elements of this collection mapped to a count
-   * of their occurrences.
-   */
-  public static <S> PTable<S, Long> count(PCollection<S> collect) {
-    PTypeFamily tf = collect.getTypeFamily();
-    return collect.parallelDo("Aggregate.count", new MapFn<S, Pair<S, Long>>() {
-      public Pair<S, Long> map(S input) {
-        return Pair.of(input, 1L);
-      }
-    }, tf.tableOf(collect.getPType(), tf.longs())).groupByKey()
-        .combineValues(Aggregators.SUM_LONGS());
-  }
-
-  /**
-   * Returns the number of elements in the provided PCollection.
-   * 
-   * @param collect The PCollection whose elements should be counted.
-   * @param <S> The type of the PCollection.
-   * @return A {@code PObject} containing the number of elements in the {@code PCollection}.
-   */
-  public static <S> PObject<Long> length(PCollection<S> collect) {
-    PTypeFamily tf = collect.getTypeFamily();
-    PTable<Integer, Long> countTable = collect
-        .parallelDo("Aggregate.count", new MapFn<S, Pair<Integer, Long>>() {
-          public Pair<Integer, Long> map(S input) {
-            return Pair.of(1, 1L);
-          }
-        }, tf.tableOf(tf.ints(), tf.longs()))
-        .groupByKey(GroupingOptions.builder().numReducers(1).build())
-        .combineValues(Aggregators.SUM_LONGS());
-    PCollection<Long> count = countTable.values();
-    return new FirstElementPObject<Long>(count);
-  }
-
-  public static class PairValueComparator<K, V> implements Comparator<Pair<K, V>> {
-    private final boolean ascending;
-
-    public PairValueComparator(boolean ascending) {
-      this.ascending = ascending;
-    }
-
-    @Override
-    public int compare(Pair<K, V> left, Pair<K, V> right) {
-      int cmp = ((Comparable<V>) left.second()).compareTo(right.second());
-      return ascending ? cmp : -cmp;
-    }
-  }
-
-  public static class TopKFn<K, V> extends DoFn<Pair<K, V>, Pair<Integer, Pair<K, V>>> {
-
-    private final int limit;
-    private final boolean maximize;
-    private transient PriorityQueue<Pair<K, V>> values;
-
-    public TopKFn(int limit, boolean ascending) {
-      this.limit = limit;
-      this.maximize = ascending;
-    }
-
-    public void initialize() {
-      this.values = new PriorityQueue<Pair<K, V>>(limit, new PairValueComparator<K, V>(maximize));
-    }
-
-    public void process(Pair<K, V> input, Emitter<Pair<Integer, Pair<K, V>>> emitter) {
-      values.add(input);
-      if (values.size() > limit) {
-        values.poll();
-      }
-    }
-
-    public void cleanup(Emitter<Pair<Integer, Pair<K, V>>> emitter) {
-      for (Pair<K, V> p : values) {
-        emitter.emit(Pair.of(0, p));
-      }
-    }
-  }
-
-  public static class TopKCombineFn<K, V> extends CombineFn<Integer, Pair<K, V>> {
-
-    private final int limit;
-    private final boolean maximize;
-
-    public TopKCombineFn(int limit, boolean maximize) {
-      this.limit = limit;
-      this.maximize = maximize;
-    }
-
-    @Override
-    public void process(Pair<Integer, Iterable<Pair<K, V>>> input,
-        Emitter<Pair<Integer, Pair<K, V>>> emitter) {
-      Comparator<Pair<K, V>> cmp = new PairValueComparator<K, V>(maximize);
-      PriorityQueue<Pair<K, V>> queue = new PriorityQueue<Pair<K, V>>(limit, cmp);
-      for (Pair<K, V> pair : input.second()) {
-        queue.add(pair);
-        if (queue.size() > limit) {
-          queue.poll();
-        }
-      }
-
-      List<Pair<K, V>> values = Lists.newArrayList(queue);
-      Collections.sort(values, cmp);
-      for (int i = values.size() - 1; i >= 0; i--) {
-        emitter.emit(Pair.of(0, values.get(i)));
-      }
-    }
-  }
-
-  public static <K, V> PTable<K, V> top(PTable<K, V> ptable, int limit, boolean maximize) {
-    PTypeFamily ptf = ptable.getTypeFamily();
-    PTableType<K, V> base = ptable.getPTableType();
-    PType<Pair<K, V>> pairType = ptf.pairs(base.getKeyType(), base.getValueType());
-    PTableType<Integer, Pair<K, V>> inter = ptf.tableOf(ptf.ints(), pairType);
-    return ptable.parallelDo("top" + limit + "map", new TopKFn<K, V>(limit, maximize), inter)
-        .groupByKey(1).combineValues(new TopKCombineFn<K, V>(limit, maximize))
-        .parallelDo("top" + limit + "reduce", new DoFn<Pair<Integer, Pair<K, V>>, Pair<K, V>>() {
-          public void process(Pair<Integer, Pair<K, V>> input, Emitter<Pair<K, V>> emitter) {
-            emitter.emit(input.second());
-          }
-        }, base);
-  }
-
-  /**
-   * Returns the largest numerical element from the input collection.
-   */
-  public static <S> PObject<S> max(PCollection<S> collect) {
-    Class<S> clazz = collect.getPType().getTypeClass();
-    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
-      throw new IllegalArgumentException("Can only get max for Comparable elements, not for: "
-          + collect.getPType().getTypeClass());
-    }
-    PTypeFamily tf = collect.getTypeFamily();
-    PCollection<S> maxCollect = PTables.values(collect
-        .parallelDo("max", new DoFn<S, Pair<Boolean, S>>() {
-          private transient S max = null;
-
-          public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
-            if (max == null || ((Comparable<S>) max).compareTo(input) < 0) {
-              max = input;
-            }
-          }
-
-          public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
-            if (max != null) {
-              emitter.emit(Pair.of(true, max));
-            }
-          }
-        }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1)
-        .combineValues(new CombineFn<Boolean, S>() {
-          public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
-            S max = null;
-            for (S v : input.second()) {
-              if (max == null || ((Comparable<S>) max).compareTo(v) < 0) {
-                max = v;
-              }
-            }
-            emitter.emit(Pair.of(input.first(), max));
-          }
-        }));
-    return new FirstElementPObject<S>(maxCollect);
-  }
-
-  /**
-   * Returns the smallest numerical element from the input collection.
-   */
-  public static <S> PObject<S> min(PCollection<S> collect) {
-    Class<S> clazz = collect.getPType().getTypeClass();
-    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
-      throw new IllegalArgumentException("Can only get min for Comparable elements, not for: "
-          + collect.getPType().getTypeClass());
-    }
-    PTypeFamily tf = collect.getTypeFamily();
-    PCollection<S> minCollect = PTables.values(collect
-        .parallelDo("min", new DoFn<S, Pair<Boolean, S>>() {
-          private transient S min = null;
-
-          public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
-            if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
-              min = input;
-            }
-          }
-
-          public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
-            if (min != null) {
-              emitter.emit(Pair.of(false, min));
-            }
-          }
-        }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1)
-        .combineValues(new CombineFn<Boolean, S>() {
-          public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
-            S min = null;
-            for (S v : input.second()) {
-              if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
-                min = v;
-              }
-            }
-            emitter.emit(Pair.of(input.first(), min));
-          }
-        }));
-    return new FirstElementPObject<S>(minCollect);
-  }
-
-  public static <K, V> PTable<K, Collection<V>> collectValues(PTable<K, V> collect) {
-    PTypeFamily tf = collect.getTypeFamily();
-    final PType<V> valueType = collect.getValueType();
-    return collect.groupByKey().parallelDo("collect",
-        new MapValuesFn<K, Iterable<V>, Collection<V>>() {
-
-          @Override
-          public void initialize() {
-            valueType.initialize(getConfiguration());
-          }
-
-          public Collection<V> map(Iterable<V> values) {
-            List<V> collected = Lists.newArrayList();
-            for (V value : values) {
-              collected.add(valueType.getDetachedValue(value));
-            }
-            return collected;
-          }
-        }, tf.tableOf(collect.getKeyType(), tf.collections(collect.getValueType())));
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Cartesian.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Cartesian.java b/crunch/src/main/java/org/apache/crunch/lib/Cartesian.java
deleted file mode 100644
index 08327dd..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Cartesian.java
+++ /dev/null
@@ -1,216 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.Random;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PTypeFamily;
-
-/**
- * Utilities for Cartesian products of two {@code PTable} or {@code PCollection}
- * instances.
- */
-@SuppressWarnings("serial")
-public class Cartesian {
-
-  /**
-   * Helper for building the artificial cross keys. This technique was taken
-   * from Pig's CROSS.
-   */
-  private static class GFCross<V> extends DoFn<V, Pair<Pair<Integer, Integer>, V>> {
-
-    private final int constantField;
-    private final int parallelism;
-    private final Random r;
-
-    public GFCross(int constantField, int parallelism) {
-      this.constantField = constantField;
-      this.parallelism = parallelism;
-      this.r = new Random();
-    }
-
-    public void process(V input, Emitter<Pair<Pair<Integer, Integer>, V>> emitter) {
-      int c = r.nextInt(parallelism);
-      if (constantField == 0) {
-        for (int i = 0; i < parallelism; i++) {
-          emitter.emit(Pair.of(Pair.of(c, i), input));
-        }
-      } else {
-        for (int i = 0; i < parallelism; i++) {
-          emitter.emit(Pair.of(Pair.of(i, c), input));
-        }
-      }
-    }
-  }
-
-  static final int DEFAULT_PARALLELISM = 6;
-
-  /**
-   * Performs a full cross join on the specified {@link PTable}s (using the same
-   * strategy as Pig's CROSS operator).
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
-   *      Join</a>
-   * @param left
-   *          A PTable to perform a cross join on.
-   * @param right
-   *          A PTable to perform a cross join on.
-   * @param <K1>
-   *          Type of left PTable's keys.
-   * @param <K2>
-   *          Type of right PTable's keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result as tuples of ((K1,K2), (U,V)).
-   */
-  public static <K1, K2, U, V> PTable<Pair<K1, K2>, Pair<U, V>> cross(PTable<K1, U> left, PTable<K2, V> right) {
-    return cross(left, right, DEFAULT_PARALLELISM);
-  }
-
-  /**
-   * Performs a full cross join on the specified {@link PTable}s (using the same
-   * strategy as Pig's CROSS operator).
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
-   *      Join</a>
-   * @param left
-   *          A PTable to perform a cross join on.
-   * @param right
-   *          A PTable to perform a cross join on.
-   * @param parallelism
-   *          The square root of the number of reducers to use. Increasing
-   *          parallelism also increases copied data.
-   * @param <K1>
-   *          Type of left PTable's keys.
-   * @param <K2>
-   *          Type of right PTable's keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result as tuples of ((K1,K2), (U,V)).
-   */
-  public static <K1, K2, U, V> PTable<Pair<K1, K2>, Pair<U, V>> cross(PTable<K1, U> left, PTable<K2, V> right,
-      int parallelism) {
-
-    /*
-     * The strategy here is to simply emulate the following PigLatin: A =
-     * foreach table1 generate flatten(GFCross(0, 2)), flatten(*); B = foreach
-     * table2 generate flatten(GFCross(1, 2)), flatten(*); C = cogroup A by ($0,
-     * $1), B by ($0, $1); result = foreach C generate flatten(A), flatten(B);
-     */
-
-    PTypeFamily ltf = left.getTypeFamily();
-    PTypeFamily rtf = right.getTypeFamily();
-
-    PTable<Pair<Integer, Integer>, Pair<K1, U>> leftCross = left.parallelDo(new GFCross<Pair<K1, U>>(0, parallelism),
-        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), ltf.pairs(left.getKeyType(), left.getValueType())));
-    PTable<Pair<Integer, Integer>, Pair<K2, V>> rightCross = right.parallelDo(new GFCross<Pair<K2, V>>(1, parallelism),
-        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), rtf.pairs(right.getKeyType(), right.getValueType())));
-
-    PTable<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>> cg = leftCross.join(rightCross);
-
-    PTypeFamily ctf = cg.getTypeFamily();
-
-    return cg.parallelDo(
-        new MapFn<Pair<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>>, Pair<Pair<K1, K2>, Pair<U, V>>>() {
-
-          @Override
-          public Pair<Pair<K1, K2>, Pair<U, V>> map(Pair<Pair<Integer, Integer>, Pair<Pair<K1, U>, Pair<K2, V>>> input) {
-            Pair<Pair<K1, U>, Pair<K2, V>> valuePair = input.second();
-            return Pair.of(Pair.of(valuePair.first().first(), valuePair.second().first()),
-                Pair.of(valuePair.first().second(), valuePair.second().second()));
-          }
-        },
-        ctf.tableOf(ctf.pairs(left.getKeyType(), right.getKeyType()),
-            ctf.pairs(left.getValueType(), right.getValueType())));
-  }
-
-  /**
-   * Performs a full cross join on the specified {@link PCollection}s (using the
-   * same strategy as Pig's CROSS operator).
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
-   *      Join</a>
-   * @param left
-   *          A PCollection to perform a cross join on.
-   * @param right
-   *          A PCollection to perform a cross join on.
-   * @param <U>
-   *          Type of the first {@link PCollection}'s values
-   * @param <V>
-   *          Type of the second {@link PCollection}'s values
-   * @return The joined result as tuples of (U,V).
-   */
-  public static <U, V> PCollection<Pair<U, V>> cross(PCollection<U> left, PCollection<V> right) {
-    return cross(left, right, DEFAULT_PARALLELISM);
-  }
-
-  /**
-   * Performs a full cross join on the specified {@link PCollection}s (using the
-   * same strategy as Pig's CROSS operator).
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Cross_join">Cross
-   *      Join</a>
-   * @param left
-   *          A PCollection to perform a cross join on.
-   * @param right
-   *          A PCollection to perform a cross join on.
-   * @param <U>
-   *          Type of the first {@link PCollection}'s values
-   * @param <V>
-   *          Type of the second {@link PCollection}'s values
-   * @return The joined result as tuples of (U,V).
-   */
-  public static <U, V> PCollection<Pair<U, V>> cross(PCollection<U> left, PCollection<V> right, int parallelism) {
-
-    PTypeFamily ltf = left.getTypeFamily();
-    PTypeFamily rtf = right.getTypeFamily();
-
-    PTableType<Pair<Integer, Integer>, U> ptt = ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), left.getPType());
-
-    if (ptt == null)
-      throw new Error();
-
-    PTable<Pair<Integer, Integer>, U> leftCross = left.parallelDo(new GFCross<U>(0, parallelism),
-        ltf.tableOf(ltf.pairs(ltf.ints(), ltf.ints()), left.getPType()));
-    PTable<Pair<Integer, Integer>, V> rightCross = right.parallelDo(new GFCross<V>(1, parallelism),
-        rtf.tableOf(rtf.pairs(rtf.ints(), rtf.ints()), right.getPType()));
-
-    PTable<Pair<Integer, Integer>, Pair<U, V>> cg = leftCross.join(rightCross);
-
-    PTypeFamily ctf = cg.getTypeFamily();
-
-    return cg.parallelDo(new MapFn<Pair<Pair<Integer, Integer>, Pair<U, V>>, Pair<U, V>>() {
-      @Override
-      public Pair<U, V> map(Pair<Pair<Integer, Integer>, Pair<U, V>> input) {
-        return input.second();
-      }
-    }, ctf.pairs(left.getPType(), right.getPType()));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Cogroup.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Cogroup.java b/crunch/src/main/java/org/apache/crunch/lib/Cogroup.java
deleted file mode 100644
index 07d873c..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Cogroup.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.Collection;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.fn.MapValuesFn;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-import com.google.common.collect.Lists;
-
-public class Cogroup {
-
-  /**
-   * Co-groups the two {@link PTable} arguments.
-   * 
-   * @return a {@code PTable} representing the co-grouped tables.
-   */
-  public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(PTable<K, U> left, PTable<K, V> right) {
-    PTypeFamily ptf = left.getTypeFamily();
-    PType<K> keyType = left.getPTableType().getKeyType();
-    PType<U> leftType = left.getPTableType().getValueType();
-    PType<V> rightType = right.getPTableType().getValueType();
-    PType<Pair<U, V>> itype = ptf.pairs(leftType, rightType);
-
-    PTable<K, Pair<U, V>> cgLeft = left.parallelDo("coGroupTag1", new CogroupFn1<K, U, V>(),
-        ptf.tableOf(keyType, itype));
-    PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(),
-        ptf.tableOf(keyType, itype));
-
-    PTable<K, Pair<U, V>> both = cgLeft.union(cgRight);
-
-    PType<Pair<Collection<U>, Collection<V>>> otype = ptf.pairs(ptf.collections(leftType), ptf.collections(rightType));
-    return both.groupByKey().parallelDo("cogroup", 
-        new PostGroupFn<K, U, V>(leftType, rightType), ptf.tableOf(keyType, otype));
-  }
-
-  private static class CogroupFn1<K, V, U> extends MapValuesFn<K, V, Pair<V, U>> {
-    @Override
-    public Pair<V, U> map(V v) {
-      return Pair.of(v, null);
-    }
-  }
-
-  private static class CogroupFn2<K, V, U> extends MapValuesFn<K, U, Pair<V, U>> {
-    @Override
-    public Pair<V, U> map(U u) {
-      return Pair.of(null, u);
-    }
-  }
-
-  private static class PostGroupFn<K, V, U> extends
-      DoFn<Pair<K, Iterable<Pair<V, U>>>, Pair<K, Pair<Collection<V>, Collection<U>>>> {
-    
-    private PType<V> ptypeV;
-    private PType<U> ptypeU;
-    
-    public PostGroupFn(PType<V> ptypeV, PType<U> ptypeU) {
-      this.ptypeV = ptypeV;
-      this.ptypeU = ptypeU;
-    }
-    
-    @Override
-    public void initialize() {
-      super.initialize();
-      ptypeV.initialize(getConfiguration());
-      ptypeU.initialize(getConfiguration());
-    }
-    
-    @Override
-    public void process(Pair<K, Iterable<Pair<V, U>>> input,
-        Emitter<Pair<K, Pair<Collection<V>, Collection<U>>>> emitter) {
-      Collection<V> cv = Lists.newArrayList();
-      Collection<U> cu = Lists.newArrayList();
-      for (Pair<V, U> pair : input.second()) {
-        if (pair.first() != null) {
-          cv.add(ptypeV.getDetachedValue(pair.first()));
-        } else if (pair.second() != null) {
-          cu.add(ptypeU.getDetachedValue(pair.second()));
-        }
-      }
-      emitter.emit(Pair.of(input.first(), Pair.of(cv, cu)));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Distinct.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Distinct.java b/crunch/src/main/java/org/apache/crunch/lib/Distinct.java
deleted file mode 100644
index 994830d..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Distinct.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.Set;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Sets;
-
-/**
- * Functions for computing the distinct elements of a {@code PCollection}.
- */
-public final class Distinct {
-
-  private static final int DEFAULT_FLUSH_EVERY = 50000;
-  
-  /**
-   * Construct a new {@code PCollection} that contains the unique elements of a
-   * given input {@code PCollection}.
-   * 
-   * @param input The input {@code PCollection}
-   * @return A new {@code PCollection} that contains the unique elements of the input
-   */
-  public static <S> PCollection<S> distinct(PCollection<S> input) {
-    return distinct(input, DEFAULT_FLUSH_EVERY);
-  }
-  
-  /**
-   * A {@code PTable<K, V>} analogue of the {@code distinct} function.
-   */
-  public static <K, V> PTable<K, V> distinct(PTable<K, V> input) {
-    return PTables.asPTable(distinct((PCollection<Pair<K, V>>) input));
-  }
-  
-  /**
-   * A {@code distinct} operation that gives the client more control over how frequently
-   * elements are flushed to disk in order to allow control over performance or
-   * memory consumption.
-   * 
-   * @param input The input {@code PCollection}
-   * @param flushEvery Flush the elements to disk whenever we encounter this many unique values
-   * @return A new {@code PCollection} that contains the unique elements of the input
-   */
-  public static <S> PCollection<S> distinct(PCollection<S> input, int flushEvery) {
-    Preconditions.checkArgument(flushEvery > 0);
-    PType<S> pt = input.getPType();
-    PTypeFamily ptf = pt.getFamily();
-    return input
-        .parallelDo("pre-distinct", new PreDistinctFn<S>(flushEvery, pt), ptf.tableOf(pt, ptf.nulls()))
-        .groupByKey()
-        .parallelDo("post-distinct", new PostDistinctFn<S>(), pt);
-  }
-  
-  /**
-   * A {@code PTable<K, V>} analogue of the {@code distinct} function.
-   */
-  public static <K, V> PTable<K, V> distinct(PTable<K, V> input, int flushEvery) {
-    return PTables.asPTable(distinct((PCollection<Pair<K, V>>) input, flushEvery));
-  }
-  
-  private static class PreDistinctFn<S> extends DoFn<S, Pair<S, Void>> {
-    private final Set<S> values = Sets.newHashSet();
-    private final int flushEvery;
-    private final PType<S> ptype;
-    
-    public PreDistinctFn(int flushEvery, PType<S> ptype) {
-      this.flushEvery = flushEvery;
-      this.ptype = ptype;
-    }
-    
-    @Override
-    public void initialize() {
-      super.initialize();
-      ptype.initialize(getConfiguration());
-    }
-    
-    @Override
-    public void process(S input, Emitter<Pair<S, Void>> emitter) {
-      values.add(ptype.getDetachedValue(input));
-      if (values.size() > flushEvery) {
-        cleanup(emitter);
-      }
-    }
-    
-    @Override
-    public void cleanup(Emitter<Pair<S, Void>> emitter) {
-      for (S in : values) {
-        emitter.emit(Pair.<S, Void>of(in, null));
-      }
-      values.clear();
-    }
-  }
-  
-  private static class PostDistinctFn<S> extends DoFn<Pair<S, Iterable<Void>>, S> {
-    @Override
-    public void process(Pair<S, Iterable<Void>> input, Emitter<S> emitter) {
-      emitter.emit(input.first());
-    }
-  }
-  
-  // No instantiation
-  private Distinct() {}
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Join.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Join.java b/crunch/src/main/java/org/apache/crunch/lib/Join.java
deleted file mode 100644
index c0c4a6b..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Join.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.lib.join.FullOuterJoinFn;
-import org.apache.crunch.lib.join.InnerJoinFn;
-import org.apache.crunch.lib.join.JoinFn;
-import org.apache.crunch.lib.join.JoinUtils;
-import org.apache.crunch.lib.join.LeftOuterJoinFn;
-import org.apache.crunch.lib.join.RightOuterJoinFn;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PTypeFamily;
-
-/**
- * Utilities for joining multiple {@code PTable} instances based on a common
- * lastKey.
- */
-public class Join {
-  /**
-   * Performs an inner join on the specified {@link PTable}s.
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Inner_join">Inner
-   *      Join</a>
-   * @param left
-   *          A PTable to perform an inner join on.
-   * @param right
-   *          A PTable to perform an inner join on.
-   * @param <K>
-   *          Type of the keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result.
-   */
-  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right) {
-    return innerJoin(left, right);
-  }
-
-  /**
-   * Performs an inner join on the specified {@link PTable}s.
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Inner_join">Inner
-   *      Join</a>
-   * @param left
-   *          A PTable to perform an inner join on.
-   * @param right
-   *          A PTable to perform an inner join on.
-   * @param <K>
-   *          Type of the keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result.
-   */
-  public static <K, U, V> PTable<K, Pair<U, V>> innerJoin(PTable<K, U> left, PTable<K, V> right) {
-    return join(left, right, new InnerJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
-  }
-
-  /**
-   * Performs a left outer join on the specified {@link PTable}s.
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Left_outer_join">Left
-   *      Join</a>
-   * @param left
-   *          A PTable to perform an left join on. All of this PTable's entries
-   *          will appear in the resulting PTable.
-   * @param right
-   *          A PTable to perform an left join on.
-   * @param <K>
-   *          Type of the keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result.
-   */
-  public static <K, U, V> PTable<K, Pair<U, V>> leftJoin(PTable<K, U> left, PTable<K, V> right) {
-    return join(left, right, new LeftOuterJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
-  }
-
-  /**
-   * Performs a right outer join on the specified {@link PTable}s.
-   * 
-   * @see <a
-   *      href="http://en.wikipedia.org/wiki/Join_(SQL)#Right_outer_join">Right
-   *      Join</a>
-   * @param left
-   *          A PTable to perform an right join on.
-   * @param right
-   *          A PTable to perform an right join on. All of this PTable's entries
-   *          will appear in the resulting PTable.
-   * @param <K>
-   *          Type of the keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result.
-   */
-  public static <K, U, V> PTable<K, Pair<U, V>> rightJoin(PTable<K, U> left, PTable<K, V> right) {
-    return join(left, right, new RightOuterJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
-  }
-
-  /**
-   * Performs a full outer join on the specified {@link PTable}s.
-   * 
-   * @see <a href="http://en.wikipedia.org/wiki/Join_(SQL)#Full_outer_join">Full
-   *      Join</a>
-   * @param left
-   *          A PTable to perform an full join on.
-   * @param right
-   *          A PTable to perform an full join on.
-   * @param <K>
-   *          Type of the keys.
-   * @param <U>
-   *          Type of the first {@link PTable}'s values
-   * @param <V>
-   *          Type of the second {@link PTable}'s values
-   * @return The joined result.
-   */
-  public static <K, U, V> PTable<K, Pair<U, V>> fullJoin(PTable<K, U> left, PTable<K, V> right) {
-    return join(left, right, new FullOuterJoinFn<K, U, V>(left.getKeyType(), left.getValueType()));
-  }
-
-  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right, JoinFn<K, U, V> joinFn) {
-    PTypeFamily ptf = left.getTypeFamily();
-    PGroupedTable<Pair<K, Integer>, Pair<U, V>> grouped = preJoin(left, right);
-    PTableType<K, Pair<U, V>> ret = ptf
-        .tableOf(left.getKeyType(), ptf.pairs(left.getValueType(), right.getValueType()));
-
-    return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
-  }
-
-  private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
-    PTypeFamily ptf = left.getTypeFamily();
-    PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()),
-        ptf.pairs(left.getValueType(), right.getValueType()));
-
-    PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft",
-        new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
-          @Override
-          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) {
-            return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null));
-          }
-        }, ptt);
-    PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight",
-        new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
-          @Override
-          public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) {
-            return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second()));
-          }
-        }, ptt);
-
-    GroupingOptions.Builder optionsBuilder = GroupingOptions.builder();
-    optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf));
-
-    return (tag1.union(tag2)).groupByKey(optionsBuilder.build());
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/PTables.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/PTables.java b/crunch/src/main/java/org/apache/crunch/lib/PTables.java
deleted file mode 100644
index e907680..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/PTables.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.List;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.types.PGroupedTableType;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-import com.google.common.collect.Lists;
-
-/**
- * Methods for performing common operations on PTables.
- * 
- */
-public class PTables {
-
-  /**
-   * Convert the given {@code PCollection<Pair<K, V>>} to a {@code PTable<K, V>}.
-   * @param pcollect The {@code PCollection} to convert
-   * @return A {@code PTable} that contains the same data as the input {@code PCollection}
-   */
-  public static <K, V> PTable<K, V> asPTable(PCollection<Pair<K, V>> pcollect) {
-    PType<Pair<K, V>> pt = pcollect.getPType();
-    PTypeFamily ptf = pt.getFamily();
-    PTableType<K, V> ptt = ptf.tableOf(pt.getSubTypes().get(0), pt.getSubTypes().get(1));
-    DoFn<Pair<K, V>, Pair<K, V>> id = IdentityFn.getInstance();
-    return pcollect.parallelDo("asPTable", id, ptt);
-  }
-  
-  /**
-   * Extract the keys from the given {@code PTable<K, V>} as a {@code PCollection<K>}.
-   * @param ptable The {@code PTable}
-   * @return A {@code PCollection<K>}
-   */
-  public static <K, V> PCollection<K> keys(PTable<K, V> ptable) {
-    return ptable.parallelDo("PTables.keys", new DoFn<Pair<K, V>, K>() {
-      @Override
-      public void process(Pair<K, V> input, Emitter<K> emitter) {
-        emitter.emit(input.first());
-      }
-    }, ptable.getKeyType());
-  }
-
-  /**
-   * Extract the values from the given {@code PTable<K, V>} as a {@code PCollection<V>}.
-   * @param ptable The {@code PTable}
-   * @return A {@code PCollection<V>}
-   */
-  public static <K, V> PCollection<V> values(PTable<K, V> ptable) {
-    return ptable.parallelDo("PTables.values", new DoFn<Pair<K, V>, V>() {
-      @Override
-      public void process(Pair<K, V> input, Emitter<V> emitter) {
-        emitter.emit(input.second());
-      }
-    }, ptable.getValueType());
-  }
-
-  /**
-   * Create a detached value for a table {@link Pair}.
-   * 
-   * @param tableType The table type
-   * @param value The value from which a detached value is to be created
-   * @return The detached value
-   * @see PType#getDetachedValue(Object)
-   */
-  public static <K, V> Pair<K, V> getDetachedValue(PTableType<K, V> tableType, Pair<K, V> value) {
-    return Pair.of(tableType.getKeyType().getDetachedValue(value.first()), tableType.getValueType()
-        .getDetachedValue(value.second()));
-  }
-
-  /**
-   * Created a detached value for a {@link PGroupedTable} value.
-   * 
-   * 
-   * @param groupedTableType The grouped table type
-   * @param value The value from which a detached value is to be created
-   * @return The detached value
-   * @see PType#getDetachedValue(Object)
-   */
-  public static <K, V> Pair<K, Iterable<V>> getGroupedDetachedValue(
-      PGroupedTableType<K, V> groupedTableType, Pair<K, Iterable<V>> value) {
-
-    PTableType<K, V> tableType = groupedTableType.getTableType();
-    List<V> detachedIterable = Lists.newArrayList();
-    PType<V> valueType = tableType.getValueType();
-    for (V v : value.second()) {
-      detachedIterable.add(valueType.getDetachedValue(v));
-    }
-    return Pair.of(tableType.getKeyType().getDetachedValue(value.first()),
-        (Iterable<V>) detachedIterable);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Sample.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Sample.java b/crunch/src/main/java/org/apache/crunch/lib/Sample.java
deleted file mode 100644
index 5a66101..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Sample.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.lib.SampleUtils.ReservoirSampleFn;
-import org.apache.crunch.lib.SampleUtils.SampleFn;
-import org.apache.crunch.lib.SampleUtils.WRSCombineFn;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-/**
- * Methods for performing random sampling in a distributed fashion, either by accepting each
- * record in a {@code PCollection} with an independent probability in order to sample some
- * fraction of the overall data set, or by using reservoir sampling in order to pull a uniform
- * or weighted sample of fixed size from a {@code PCollection} of an unknown size. For more details
- * on the reservoir sampling algorithms used by this library, see the A-ES algorithm described in
- * <a href="http://arxiv.org/pdf/1012.0256.pdf">Efraimidis (2012)</a>.
- */
-public class Sample {
-
-  /**
-   * Output records from the given {@code PCollection} with the given probability.
-   * 
-   * @param input The {@code PCollection} to sample from
-   * @param probability The probability (0.0 &lt; p %lt; 1.0)
-   * @return The output {@code PCollection} created from sampling
-   */
-  public static <S> PCollection<S> sample(PCollection<S> input, double probability) {
-    return sample(input, null, probability);
-  }
-
-  /**
-   * Output records from the given {@code PCollection} using a given seed. Useful for unit
-   * testing.
-   * 
-   * @param input The {@code PCollection} to sample from
-   * @param seed The seed for the random number generator
-   * @param probability The probability (0.0 &lt; p &lt; 1.0)
-   * @return The output {@code PCollection} created from sampling
-   */
-  public static <S> PCollection<S> sample(PCollection<S> input, Long seed, double probability) {
-    String stageName = String.format("sample(%.2f)", probability);
-    return input.parallelDo(stageName, new SampleFn<S>(probability, seed), input.getPType());
-  }
-  
-  /**
-   * A {@code PTable<K, V>} analogue of the {@code sample} function.
-   * 
-   * @param input The {@code PTable} to sample from
-   * @param probability The probability (0.0 &lt; p &lt; 1.0)
-   * @return The output {@code PTable} created from sampling
-   */
-  public static <K, V> PTable<K, V> sample(PTable<K, V> input, double probability) {
-    return PTables.asPTable(sample((PCollection<Pair<K, V>>) input, probability));
-  }
-  
-  /**
-   * A {@code PTable<K, V>} analogue of the {@code sample} function, with the seed argument
-   * exposed for testing purposes.
-   * 
-   * @param input The {@code PTable} to sample from
-   * @param seed The seed for the random number generator
-   * @param probability The probability (0.0 &lt; p &lt; 1.0)
-   * @return The output {@code PTable} created from sampling
-   */
-  public static <K, V> PTable<K, V> sample(PTable<K, V> input, Long seed, double probability) {
-    return PTables.asPTable(sample((PCollection<Pair<K, V>>) input, seed, probability));
-  }
-  
-  /**
-   * Select a fixed number of elements from the given {@code PCollection} with each element
-   * equally likely to be included in the sample.
-   * 
-   * @param input The input data
-   * @param sampleSize The number of elements to select
-   * @return A {@code PCollection} made up of the sampled elements
-   */
-  public static <T> PCollection<T> reservoirSample(
-      PCollection<T> input,
-      int sampleSize) {
-    return reservorSample(input, sampleSize, null);
-  }
-
-  /**
-   * A version of the reservoir sampling algorithm that uses a given seed, primarily for
-   * testing purposes.
-   * 
-   * @param input The input data
-   * @param sampleSize The number of elements to select
-   * @param seed The test seed
-   * @return A {@code PCollection} made up of the sampled elements
-
-   */
-  public static <T> PCollection<T> reservorSample(
-      PCollection<T> input,
-      int sampleSize,
-      Long seed) {
-    PTypeFamily ptf = input.getTypeFamily();
-    PType<Pair<T, Integer>> ptype = ptf.pairs(input.getPType(), ptf.ints());
-    return weightedReservoirSample(
-        input.parallelDo(new MapFn<T, Pair<T, Integer>>() {
-          public Pair<T, Integer> map(T t) { return Pair.of(t, 1); }
-        }, ptype),
-        sampleSize,
-        seed);
-  }
-  
-  /**
-   * Selects a weighted sample of the elements of the given {@code PCollection}, where the second term in
-   * the input {@code Pair} is a numerical weight.
-   * 
-   * @param input the weighted observations
-   * @param sampleSize The number of elements to select
-   * @return A random sample of the given size that respects the weighting values
-   */
-  public static <T, N extends Number> PCollection<T> weightedReservoirSample(
-      PCollection<Pair<T, N>> input,
-      int sampleSize) {
-    return weightedReservoirSample(input, sampleSize, null);
-  }
-  
-  /**
-   * The weighted reservoir sampling function with the seed term exposed for testing purposes.
-   * 
-   * @param input the weighted observations
-   * @param sampleSize The number of elements to select
-   * @param seed The test seed
-   * @return A random sample of the given size that respects the weighting values
-   */
-  public static <T, N extends Number> PCollection<T> weightedReservoirSample(
-      PCollection<Pair<T, N>> input,
-      int sampleSize,
-      Long seed) {
-    PTypeFamily ptf = input.getTypeFamily();
-    PTable<Integer, Pair<T, N>> groupedIn = input.parallelDo(
-        new MapFn<Pair<T, N>, Pair<Integer, Pair<T, N>>>() {
-          @Override
-          public Pair<Integer, Pair<T, N>> map(Pair<T, N> p) {
-            return Pair.of(0, p);
-          }
-        }, ptf.tableOf(ptf.ints(), input.getPType()));
-    int[] ss = new int[] { sampleSize };
-    return groupedWeightedReservoirSample(groupedIn, ss, seed)
-        .parallelDo(new MapFn<Pair<Integer, T>, T>() {
-          @Override
-          public T map(Pair<Integer, T> p) {
-            return p.second();
-          }
-        }, (PType<T>) input.getPType().getSubTypes().get(0));
-  }
-  
-  /**
-   * The most general purpose of the weighted reservoir sampling patterns that allows us to choose
-   * a random sample of elements for each of N input groups.
-   * 
-   * @param input A {@code PTable} with the key a group ID and the value a weighted observation in that group
-   * @param sampleSizes An array of length N, with each entry is the number of elements to include in that group
-   * @return A {@code PCollection} of the sampled elements for each of the groups
-   */
-  
-  public static <T, N extends Number> PCollection<Pair<Integer, T>> groupedWeightedReservoirSample(
-      PTable<Integer, Pair<T, N>> input,
-      int[] sampleSizes) {
-    return groupedWeightedReservoirSample(input, sampleSizes, null);
-  }
-  
-  /**
-   * Same as the other groupedWeightedReservoirSample method, but include a seed for testing
-   * purposes.
-   * 
-   * @param input A {@code PTable} with the key a group ID and the value a weighted observation in that group
-   * @param sampleSizes An array of length N, with each entry is the number of elements to include in that group
-   * @param seed The test seed
-   * @return A {@code PCollection} of the sampled elements for each of the groups
-   */
-  public static <T, N extends Number> PCollection<Pair<Integer, T>> groupedWeightedReservoirSample(
-      PTable<Integer, Pair<T, N>> input,
-      int[] sampleSizes,
-      Long seed) {
-    PTypeFamily ptf = input.getTypeFamily();
-    PType<T> ttype = (PType<T>) input.getPTableType().getValueType().getSubTypes().get(0);
-    PTableType<Integer, Pair<Double, T>> ptt = ptf.tableOf(ptf.ints(),
-        ptf.pairs(ptf.doubles(), ttype));
-    
-    return input.parallelDo(new ReservoirSampleFn<T, N>(sampleSizes, seed, ttype), ptt)
-        .groupByKey(1)
-        .combineValues(new WRSCombineFn<T>(sampleSizes, ttype))
-        .parallelDo(new MapFn<Pair<Integer, Pair<Double, T>>, Pair<Integer, T>>() {
-          @Override
-          public Pair<Integer, T> map(Pair<Integer, Pair<Double, T>> p) {
-            return Pair.of(p.first(), p.second().second());
-          }
-        }, ptf.pairs(ptf.ints(), ttype));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/SampleUtils.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/SampleUtils.java b/crunch/src/main/java/org/apache/crunch/lib/SampleUtils.java
deleted file mode 100644
index 8769eed..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/SampleUtils.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.SortedMap;
-
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.FilterFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-class SampleUtils {
-  
-  static class SampleFn<S> extends FilterFn<S> {
-
-    private final Long seed;
-    private final double acceptanceProbability;
-    private transient Random r;
-
-    public SampleFn(double acceptanceProbability, Long seed) {
-      Preconditions.checkArgument(0.0 < acceptanceProbability && acceptanceProbability < 1.0);
-      this.seed = seed == null ? System.currentTimeMillis() : seed;
-      this.acceptanceProbability = acceptanceProbability;
-    }
-
-    @Override
-    public void initialize() {
-      if (r == null) {
-        r = new Random(seed);
-      }
-    }
-
-    @Override
-    public boolean accept(S input) {
-      return r.nextDouble() < acceptanceProbability;
-    }
-  }
-
-
-  static class ReservoirSampleFn<T, N extends Number>
-      extends DoFn<Pair<Integer, Pair<T, N>>, Pair<Integer, Pair<Double, T>>> {
-  
-    private int[] sampleSizes;
-    private Long seed;
-    private PType<T> valueType;
-    private transient List<SortedMap<Double, T>> reservoirs;
-    private transient Random random;
-    
-    public ReservoirSampleFn(int[] sampleSizes, Long seed, PType<T> valueType) {
-      this.sampleSizes = sampleSizes;
-      this.seed = seed;
-      this.valueType = valueType;
-    }
-    
-    @Override
-    public void initialize() {
-      this.reservoirs = Lists.newArrayList();
-      this.valueType.initialize(getConfiguration());
-      for (int i = 0; i < sampleSizes.length; i++) {
-        reservoirs.add(Maps.<Double, T>newTreeMap());
-      }
-      if (random == null) {
-        if (seed == null) {
-          this.random = new Random();
-        } else {
-          this.random = new Random(seed);
-        }
-      }
-    }
-    
-    @Override
-    public void process(Pair<Integer, Pair<T, N>> input,
-        Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
-      int id = input.first();
-      Pair<T, N> p = input.second();
-      double weight = p.second().doubleValue();
-      if (weight > 0.0) {
-        double score = Math.log(random.nextDouble()) / weight;
-        SortedMap<Double, T> reservoir = reservoirs.get(id);
-        if (reservoir.size() < sampleSizes[id]) { 
-          reservoir.put(score, valueType.getDetachedValue(p.first()));        
-        } else if (score > reservoir.firstKey()) {
-          reservoir.remove(reservoir.firstKey());
-          reservoir.put(score, valueType.getDetachedValue(p.first()));
-        }
-      }
-    }
-    
-    @Override
-    public void cleanup(Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
-      for (int id = 0; id < reservoirs.size(); id++) {
-        SortedMap<Double, T> reservoir = reservoirs.get(id);
-        for (Map.Entry<Double, T> e : reservoir.entrySet()) {
-          emitter.emit(Pair.of(id, Pair.of(e.getKey(), e.getValue())));
-        }
-      }
-    }
-  }
-  
-  static class WRSCombineFn<T> extends CombineFn<Integer, Pair<Double, T>> {
-
-    private int[] sampleSizes;
-    private PType<T> valueType;
-    private List<SortedMap<Double, T>> reservoirs;
-    
-    public WRSCombineFn(int[] sampleSizes, PType<T> valueType) {
-      this.sampleSizes = sampleSizes;
-      this.valueType = valueType;
-    }
-
-    @Override
-    public void initialize() {
-      this.reservoirs = Lists.newArrayList();
-      for (int i = 0; i < sampleSizes.length; i++) {
-        reservoirs.add(Maps.<Double, T>newTreeMap());
-      }
-      this.valueType.initialize(getConfiguration());
-    }
-    
-    @Override
-    public void process(Pair<Integer, Iterable<Pair<Double, T>>> input,
-        Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
-      SortedMap<Double, T> reservoir = reservoirs.get(input.first());
-      for (Pair<Double, T> p : input.second()) {
-        if (reservoir.size() < sampleSizes[input.first()]) { 
-          reservoir.put(p.first(), valueType.getDetachedValue(p.second()));        
-        } else if (p.first() > reservoir.firstKey()) {
-          reservoir.remove(reservoir.firstKey());
-          reservoir.put(p.first(), valueType.getDetachedValue(p.second()));  
-        }
-      }
-    }
-    
-    @Override
-    public void cleanup(Emitter<Pair<Integer, Pair<Double, T>>> emitter) {
-      for (int i = 0; i < reservoirs.size(); i++) {
-        SortedMap<Double, T> reservoir = reservoirs.get(i);
-        for (Map.Entry<Double, T> e : reservoir.entrySet()) {
-          emitter.emit(Pair.of(i, Pair.of(e.getKey(), e.getValue())));
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/SecondarySort.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/SecondarySort.java b/crunch/src/main/java/org/apache/crunch/lib/SecondarySort.java
deleted file mode 100644
index 54b4396..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/SecondarySort.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.Collection;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.lib.join.JoinUtils;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * Utilities for performing a secondary sort on a {@code PTable<K, Pair<V1, V2>>} collection.
- * <p>
- * Secondary sorts are usually performed during sessionization: given a collection
- * of events, we want to group them by a key (such as a user ID), then sort the grouped
- * records by an auxillary key (such as a timestamp), and then perform some additional
- * processing on the sorted records.
- */
-public class SecondarySort {
-  
-  /**
-   * Perform a secondary sort on the given {@code PTable} instance and then apply a
-   * {@code DoFn} to the resulting sorted data to yield an output {@code PCollection<T>}.
-   */
-  public static <K, V1, V2, T> PCollection<T> sortAndApply(PTable<K, Pair<V1, V2>> input,
-      DoFn<Pair<K, Iterable<Pair<V1, V2>>>, T> doFn, PType<T> ptype) {
-    return prepare(input)
-        .parallelDo("SecondarySort.apply", new SSWrapFn<K, V1, V2, T>(doFn), ptype);
-  }
-  
-  /**
-   * Perform a secondary sort on the given {@code PTable} instance and then apply a
-   * {@code DoFn} to the resulting sorted data to yield an output {@code PTable<U, V>}.
-   */
-  public static <K, V1, V2, U, V> PTable<U, V> sortAndApply(PTable<K, Pair<V1, V2>> input,
-      DoFn<Pair<K, Iterable<Pair<V1, V2>>>, Pair<U, V>> doFn, PTableType<U, V> ptype) {
-    return prepare(input)
-        .parallelDo("SecondarySort.apply", new SSWrapFn<K, V1, V2, Pair<U, V>>(doFn), ptype);
-  }
-  
-  private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare(
-      PTable<K, Pair<V1, V2>> input) {
-    PTypeFamily ptf = input.getTypeFamily();
-    PType<Pair<V1, V2>> valueType = input.getValueType();
-    PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf(
-        ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)),
-        valueType);
-    PTableType<K, Collection<Pair<V1, V2>>> out = ptf.tableOf(input.getKeyType(),
-        ptf.collections(input.getValueType()));
-    return input.parallelDo("SecondarySort.format", new SSFormatFn<K, V1, V2>(), inter)
-        .groupByKey(
-            GroupingOptions.builder()
-            .groupingComparatorClass(JoinUtils.getGroupingComparator(ptf))
-            .partitionerClass(JoinUtils.getPartitionerClass(ptf))
-            .build());
-  }
-  
-  private static class SSFormatFn<K, V1, V2> extends MapFn<Pair<K, Pair<V1, V2>>, Pair<Pair<K, V1>, Pair<V1, V2>>> {
-    @Override
-    public Pair<Pair<K, V1>, Pair<V1, V2>> map(Pair<K, Pair<V1, V2>> input) {
-      return Pair.of(Pair.of(input.first(), input.second().first()), input.second());
-    }
-  }  
-
-  private static class SSWrapFn<K, V1, V2, T> extends DoFn<Pair<Pair<K, V1>, Iterable<Pair<V1, V2>>>, T> {
-    private final DoFn<Pair<K, Iterable<Pair<V1, V2>>>, T> intern;
-    
-    public SSWrapFn(DoFn<Pair<K, Iterable<Pair<V1, V2>>>, T> intern) {
-      this.intern = intern;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      intern.configure(conf);
-    }
-
-    @Override
-    public void initialize() {
-      intern.setContext(getContext());
-      intern.initialize();
-    }
-    
-    @Override
-    public void process(Pair<Pair<K, V1>, Iterable<Pair<V1, V2>>> input, Emitter<T> emitter) {
-      intern.process(Pair.of(input.first().first(), input.second()), emitter);
-    }
-    
-    @Override
-    public void cleanup(Emitter<T> emitter) {
-      intern.cleanup(emitter);
-    }
-  }  
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Set.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Set.java b/crunch/src/main/java/org/apache/crunch/lib/Set.java
deleted file mode 100644
index 0ba879c..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Set.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import java.util.Collection;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-
-/**
- * Utilities for performing set operations (difference, intersection, etc) on
- * {@code PCollection} instances.
- */
-public class Set {
-
-  /**
-   * Compute the set difference between two sets of elements.
-   * 
-   * @return a collection containing elements that are in <code>coll1</code> but
-   *         not in <code>coll2</code>
-   */
-  public static <T> PCollection<T> difference(PCollection<T> coll1, PCollection<T> coll2) {
-    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
-        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, T>() {
-          @Override
-          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input, Emitter<T> emitter) {
-            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
-            if (!groups.first().isEmpty() && groups.second().isEmpty()) {
-              emitter.emit(input.first());
-            }
-          }
-        }, coll1.getPType());
-  }
-
-  /**
-   * Compute the intersection of two sets of elements.
-   * 
-   * @return a collection containing elements that common to both sets
-   *         <code>coll1</code> and <code>coll2</code>
-   */
-  public static <T> PCollection<T> intersection(PCollection<T> coll1, PCollection<T> coll2) {
-    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
-        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, T>() {
-          @Override
-          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input, Emitter<T> emitter) {
-            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
-            if (!groups.first().isEmpty() && !groups.second().isEmpty()) {
-              emitter.emit(input.first());
-            }
-          }
-        }, coll1.getPType());
-  }
-
-  /**
-   * Find the elements that are common to two sets, like the Unix
-   * <code>comm</code> utility. This method returns a {@link PCollection} of
-   * {@link Tuple3} objects, and the position in the tuple that an element
-   * appears is determined by the collections that it is a member of, as
-   * follows:
-   * <ol>
-   * <li>elements only in <code>coll1</code>,</li>
-   * <li>elements only in <code>coll2</code>, or</li>
-   * <li>elements in both collections</li>
-   * </ol>
-   * Tuples are otherwise filled with <code>null</code>.
-   * 
-   * @return a collection of {@link Tuple3} objects
-   */
-  public static <T> PCollection<Tuple3<T, T, T>> comm(PCollection<T> coll1, PCollection<T> coll2) {
-    PTypeFamily typeFamily = coll1.getTypeFamily();
-    PType<T> type = coll1.getPType();
-    return Cogroup.cogroup(toTable(coll1), toTable(coll2)).parallelDo(
-        new DoFn<Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>>, Tuple3<T, T, T>>() {
-          @Override
-          public void process(Pair<T, Pair<Collection<Boolean>, Collection<Boolean>>> input,
-              Emitter<Tuple3<T, T, T>> emitter) {
-            Pair<Collection<Boolean>, Collection<Boolean>> groups = input.second();
-            boolean inFirst = !groups.first().isEmpty();
-            boolean inSecond = !groups.second().isEmpty();
-            T t = input.first();
-            emitter.emit(Tuple3.of(inFirst && !inSecond ? t : null, !inFirst && inSecond ? t : null, inFirst
-                && inSecond ? t : null));
-          }
-        }, typeFamily.triples(type, type, type));
-  }
-
-  private static <T> PTable<T, Boolean> toTable(PCollection<T> coll) {
-    PTypeFamily typeFamily = coll.getTypeFamily();
-    return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
-      @Override
-      public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
-        emitter.emit(Pair.of(input, Boolean.TRUE));
-      }
-    }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/Sort.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/Sort.java b/crunch/src/main/java/org/apache/crunch/lib/Sort.java
deleted file mode 100644
index 23bcaee..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/Sort.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib;
-
-import static org.apache.crunch.lib.sort.Comparators.*;
-import static org.apache.crunch.lib.sort.SortFns.*;
-
-import org.apache.avro.Schema;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.GroupingOptions.Builder;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-import org.apache.crunch.lib.sort.TotalOrderPartitioner;
-import org.apache.crunch.materialize.MaterializableIterable;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.crunch.util.PartitionUtils;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * Utilities for sorting {@code PCollection} instances.
- */
-public class Sort {
-
-  /**
-   * For signaling the order in which a sort should be done.
-   */
-  public enum Order {
-    ASCENDING,
-    DESCENDING,
-    IGNORE
-  }
-
-  /**
-   * To sort by column 2 ascending then column 1 descending, you would use:
-   * <code>
-   * sortPairs(coll, by(2, ASCENDING), by(1, DESCENDING))
-   * </code> Column numbering is 1-based.
-   */
-  public static class ColumnOrder {
-    private int column;
-    private Order order;
-
-    public ColumnOrder(int column, Order order) {
-      this.column = column;
-      this.order = order;
-    }
-
-    public static ColumnOrder by(int column, Order order) {
-      return new ColumnOrder(column, order);
-    }
-
-    public int column() {
-      return column;
-    }
-    
-    public Order order() {
-      return order;
-    }
-    
-    @Override
-    public String toString() {
-      return "ColumnOrder: column:" + column + ", Order: " + order;
-    }
-  }
-
-  /**
-   * Sorts the {@code PCollection} using the natural ordering of its elements in ascending order.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <T> PCollection<T> sort(PCollection<T> collection) {
-    return sort(collection, Order.ASCENDING);
-  }
-
-  /**
-   * Sorts the {@code PCollection} using the natural order of its elements with the given {@code Order}.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
-    return sort(collection, -1, order);
-  }
-  
-  /**
-   * Sorts the {@code PCollection} using the natural ordering of its elements in
-   * the order specified using the given number of reducers.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
-    PTypeFamily tf = collection.getTypeFamily();
-    PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
-    Configuration conf = collection.getPipeline().getConfiguration();
-    PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
-      @Override
-      public void process(T input, Emitter<Pair<T, Void>> emitter) {
-        emitter.emit(Pair.of(input, (Void) null));
-      }
-    }, type);
-    GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, order);
-    return pt.groupByKey(options).ungroup().keys();
-  }
-
-  /**
-   * Sorts the {@code PTable} using the natural ordering of its keys in ascending order.
-   * 
-   * @return a {@code PTable} representing the sorted table.
-   */
-  public static <K, V> PTable<K, V> sort(PTable<K, V> table) {
-    return sort(table, Order.ASCENDING);
-  }
-
-  /**
-   * Sorts the {@code PTable} using the natural ordering of its keys with the given {@code Order}.
-   *
-   * @return a {@code PTable} representing the sorted table.
-   */
-  public static <K, V> PTable<K, V> sort(PTable<K, V> table, Order key) {
-    return sort(table, -1, key);
-  }
-  
-  /**
-   * Sorts the {@code PTable} using the natural ordering of its keys in the
-   * order specified with a client-specified number of reducers.
-   * 
-   * @return a {@code PTable} representing the sorted collection.
-   */
-  public static <K, V> PTable<K, V> sort(PTable<K, V> table, int numReducers, Order key) {
-    Configuration conf = table.getPipeline().getConfiguration();
-    GroupingOptions options = buildGroupingOptions(table, conf, numReducers, key);
-    return table.groupByKey(options).ungroup();
-  }
-
-  
-  /**
-   * Sorts the {@code PCollection} of {@code Pair}s using the specified column
-   * ordering.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <U, V> PCollection<Pair<U, V>> sortPairs(PCollection<Pair<U, V>> collection,
-      ColumnOrder... columnOrders) {
-    return sortTuples(collection, columnOrders);
-  }
-
-  /**
-   * Sorts the {@code PCollection} of {@code Tuple3}s using the specified column
-   * ordering.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <V1, V2, V3> PCollection<Tuple3<V1, V2, V3>> sortTriples(PCollection<Tuple3<V1, V2, V3>> collection,
-      ColumnOrder... columnOrders) {
-    return sortTuples(collection, columnOrders);
-  }
-
-  /**
-   * Sorts the {@code PCollection} of {@code Tuple4}s using the specified column
-   * ordering.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <V1, V2, V3, V4> PCollection<Tuple4<V1, V2, V3, V4>> sortQuads(
-      PCollection<Tuple4<V1, V2, V3, V4>> collection, ColumnOrder... columnOrders) {
-    return sortTuples(collection, columnOrders);
-  }
-
-  /**
-   * Sorts the {@code PCollection} of tuples using the specified column ordering.
-   *
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection,
-      ColumnOrder... columnOrders) {
-    return sortTuples(collection, -1, columnOrders);
-  }
-  
-  /**
-   * Sorts the {@code PCollection} of {@link TupleN}s using the specified column
-   * ordering and a client-specified number of reducers.
-   * 
-   * @return a {@code PCollection} representing the sorted collection.
-   */
-  public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection, int numReducers,
-      ColumnOrder... columnOrders) {
-    PType<T> pType = collection.getPType();
-    KeyExtraction<T> ke = new KeyExtraction<T>(pType, columnOrders);
-    PTable<Object, T> pt = collection.by(ke.getByFn(), ke.getKeyType());
-    Configuration conf = collection.getPipeline().getConfiguration();
-    GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, columnOrders);
-    return pt.groupByKey(options).ungroup().values();
-  }
-
-  // TODO: move to type family?
-  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
-      int numReducers, Order order) {
-    PType<K> ptype = ptable.getKeyType();
-    PTypeFamily tf = ptable.getTypeFamily();
-    Builder builder = GroupingOptions.builder();
-    if (order == Order.DESCENDING) {
-      if (tf == WritableTypeFamily.getInstance()) {
-        builder.sortComparatorClass(ReverseWritableComparator.class);
-      } else if (tf == AvroTypeFamily.getInstance()) {
-        AvroType<K> avroType = (AvroType<K>) ptype;
-        Schema schema = avroType.getSchema();
-        builder.conf("crunch.schema", schema.toString());
-        builder.sortComparatorClass(ReverseAvroComparator.class);
-      } else {
-        throw new RuntimeException("Unrecognized type family: " + tf);
-      }
-    } else if (tf == AvroTypeFamily.getInstance()) {
-      builder.conf("crunch.schema", ((AvroType<K>) ptype).getSchema().toString());
-    }
-    configureReducers(builder, ptable, conf, numReducers);
-    return builder.build();
-  }
-
-  private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf,
-      int numReducers, ColumnOrder[] columnOrders) {
-    PTypeFamily tf = ptable.getTypeFamily();
-    PType<K> keyType = ptable.getKeyType();
-    Builder builder = GroupingOptions.builder();
-    if (tf == WritableTypeFamily.getInstance()) {
-      if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) {
-        builder.sortComparatorClass(ReverseWritableComparator.class);
-      } else {
-        TupleWritableComparator.configureOrdering(conf, columnOrders);
-        builder.sortComparatorClass(TupleWritableComparator.class);
-      }
-    } else if (tf == AvroTypeFamily.getInstance()) {
-      AvroType<K> avroType = (AvroType<K>) keyType;
-      Schema schema = avroType.getSchema();
-      builder.conf("crunch.schema", schema.toString());
-      if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) {
-        builder.sortComparatorClass(ReverseAvroComparator.class);
-      }
-    } else {
-      throw new RuntimeException("Unrecognized type family: " + tf);
-    }
-    configureReducers(builder, ptable, conf, numReducers);
-    return builder.build();
-  }
-
-  private static <K, V> void configureReducers(GroupingOptions.Builder builder,
-      PTable<K, V> ptable, Configuration conf, int numReducers) {
-    if (numReducers <= 0) {
-      numReducers = PartitionUtils.getRecommendedPartitions(ptable, conf);
-      if (numReducers < 5) {
-        // Not worth the overhead, force it to 1
-        numReducers = 1;
-      }
-    }
-    builder.numReducers(numReducers);
-    if (numReducers > 1) {
-      Iterable<K> iter = Sample.reservoirSample(ptable.keys(), numReducers - 1).materialize();
-      MaterializableIterable<K> mi = (MaterializableIterable<K>) iter;
-      if (mi.isSourceTarget()) {
-        builder.sourceTarget((SourceTarget) mi.getSource());
-      }
-      builder.partitionerClass(TotalOrderPartitioner.class);
-      builder.conf(TotalOrderPartitioner.PARTITIONER_PATH, mi.getPath().toString());
-      //TODO: distcache handling
-    }   
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java b/crunch/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java
deleted file mode 100644
index c0ce727..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/FullOuterJoinFn.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.Lists;
-
-/**
- * Used to perform the last step of an full outer join.
- * 
- * @param <K> Type of the keys.
- * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
- * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
- */
-public class FullOuterJoinFn<K, U, V> extends JoinFn<K, U, V> {
-
-  private transient int lastId;
-  private transient K lastKey;
-  private transient List<U> leftValues;
-
-  public FullOuterJoinFn(PType<K> keyType, PType<U> leftValueType) {
-    super(keyType, leftValueType);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void initialize() {
-    super.initialize();
-    lastId = 1;
-    lastKey = null;
-    this.leftValues = Lists.newArrayList();
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
-    if (!key.equals(lastKey)) {
-      // Make sure that left side gets emitted.
-      if (0 == lastId) {
-        for (U u : leftValues) {
-          emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
-        }
-      }
-      lastKey = keyType.getDetachedValue(key);
-      leftValues.clear();
-    }
-    if (id == 0) {
-      for (Pair<U, V> pair : pairs) {
-        if (pair.first() != null)
-          leftValues.add(leftValueType.getDetachedValue(pair.first()));
-      }
-    } else {
-      for (Pair<U, V> pair : pairs) {
-        // Make sure that right side gets emitted.
-        if (leftValues.isEmpty()) {
-          leftValues.add(null);
-        }
-        for (U u : leftValues) {
-          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
-        }
-      }
-    }
-
-    lastId = id;
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void cleanup(Emitter<Pair<K, Pair<U, V>>> emitter) {
-    if (0 == lastId) {
-      for (U u : leftValues) {
-        emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
-      }
-    }
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public String getJoinType() {
-    return "fullOuterJoin";
-  }
-}


[32/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
new file mode 100644
index 0000000..f22b5a1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.Target;
+import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob;
+import org.apache.crunch.impl.mr.collect.DoTableImpl;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+import org.apache.crunch.impl.mr.exec.CrunchJobHooks;
+import org.apache.crunch.impl.mr.run.CrunchCombiner;
+import org.apache.crunch.impl.mr.run.CrunchInputFormat;
+import org.apache.crunch.impl.mr.run.CrunchMapper;
+import org.apache.crunch.impl.mr.run.CrunchReducer;
+import org.apache.crunch.impl.mr.run.NodeContext;
+import org.apache.crunch.impl.mr.run.RTNode;
+import org.apache.crunch.util.DistCache;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+class JobPrototype {
+
+  public static JobPrototype createMapReduceJob(int jobID, PGroupedTableImpl<?, ?> group,
+      Set<NodePath> inputs, Path workingPath) {
+    return new JobPrototype(jobID, inputs, group, workingPath);
+  }
+
+  public static JobPrototype createMapOnlyJob(int jobID, HashMultimap<Target, NodePath> mapNodePaths, Path workingPath) {
+    return new JobPrototype(jobID, mapNodePaths, workingPath);
+  }
+
+  private final int jobID; // TODO: maybe stageID sounds better
+  private final Set<NodePath> mapNodePaths;
+  private final PGroupedTableImpl<?, ?> group;
+  private final Set<JobPrototype> dependencies = Sets.newHashSet();
+  private final Map<PCollectionImpl<?>, DoNode> nodes = Maps.newHashMap();
+  private final Path workingPath;
+
+  private HashMultimap<Target, NodePath> targetsToNodePaths;
+  private DoTableImpl<?, ?> combineFnTable;
+
+  private CrunchControlledJob job;
+
+  private JobPrototype(int jobID, Set<NodePath> inputs, PGroupedTableImpl<?, ?> group, Path workingPath) {
+    this.jobID = jobID;
+    this.mapNodePaths = ImmutableSet.copyOf(inputs);
+    this.group = group;
+    this.workingPath = workingPath;
+    this.targetsToNodePaths = null;
+  }
+
+  private JobPrototype(int jobID, HashMultimap<Target, NodePath> outputPaths, Path workingPath) {
+    this.jobID = jobID;
+    this.group = null;
+    this.mapNodePaths = null;
+    this.workingPath = workingPath;
+    this.targetsToNodePaths = outputPaths;
+  }
+
+  public int getJobID() {
+    return jobID;
+  }
+
+  public boolean isMapOnly() {
+    return this.group == null;
+  }
+
+  Set<NodePath> getMapNodePaths() {
+    return mapNodePaths;
+  }
+
+  PGroupedTableImpl<?, ?> getGroupingTable() {
+    return group;
+  }
+
+  HashMultimap<Target, NodePath> getTargetsToNodePaths() {
+    return targetsToNodePaths;
+  }
+
+  public void addReducePaths(HashMultimap<Target, NodePath> outputPaths) {
+    if (group == null) {
+      throw new IllegalStateException("Cannot add a reduce phase to a map-only job");
+    }
+    this.targetsToNodePaths = outputPaths;
+  }
+
+  public void addDependency(JobPrototype dependency) {
+    this.dependencies.add(dependency);
+  }
+
+  public CrunchControlledJob getCrunchJob(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
+    if (job == null) {
+      job = build(jarClass, conf, pipeline);
+      for (JobPrototype proto : dependencies) {
+        job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline));
+      }
+    }
+    return job;
+  }
+
+  private CrunchControlledJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
+    Job job = new Job(conf);
+    conf = job.getConfiguration();
+    conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString());
+    job.setJarByClass(jarClass);
+
+    Set<DoNode> outputNodes = Sets.newHashSet();
+    Set<Target> targets = targetsToNodePaths.keySet();
+    Path outputPath = new Path(workingPath, "output");
+    MSCROutputHandler outputHandler = new MSCROutputHandler(job, outputPath, group == null);
+    for (Target target : targets) {
+      DoNode node = null;
+      for (NodePath nodePath : targetsToNodePaths.get(target)) {
+        if (node == null) {
+          PCollectionImpl<?> collect = nodePath.tail();
+          node = DoNode.createOutputNode(target.toString(), collect.getPType());
+          outputHandler.configureNode(node, target);
+        }
+        outputNodes.add(walkPath(nodePath.descendingIterator(), node));
+      }
+    }
+
+    job.setMapperClass(CrunchMapper.class);
+    List<DoNode> inputNodes;
+    DoNode reduceNode = null;
+    if (group != null) {
+      job.setReducerClass(CrunchReducer.class);
+      List<DoNode> reduceNodes = Lists.newArrayList(outputNodes);
+      serialize(reduceNodes, conf, workingPath, NodeContext.REDUCE);
+      reduceNode = reduceNodes.get(0);
+
+      if (combineFnTable != null) {
+        job.setCombinerClass(CrunchCombiner.class);
+        DoNode combinerInputNode = group.createDoNode();
+        DoNode combineNode = combineFnTable.createDoNode();
+        combineNode.addChild(group.getGroupingNode());
+        combinerInputNode.addChild(combineNode);
+        serialize(ImmutableList.of(combinerInputNode), conf, workingPath, NodeContext.COMBINE);
+      }
+
+      group.configureShuffle(job);
+
+      DoNode mapOutputNode = group.getGroupingNode();
+      Set<DoNode> mapNodes = Sets.newHashSet();
+      for (NodePath nodePath : mapNodePaths) {
+        // Advance these one step, since we've already configured
+        // the grouping node, and the PGroupedTableImpl is the tail
+        // of the NodePath.
+        Iterator<PCollectionImpl<?>> iter = nodePath.descendingIterator();
+        iter.next();
+        mapNodes.add(walkPath(iter, mapOutputNode));
+      }
+      inputNodes = Lists.newArrayList(mapNodes);
+    } else { // No grouping
+      job.setNumReduceTasks(0);
+      inputNodes = Lists.newArrayList(outputNodes);
+    }
+    serialize(inputNodes, conf, workingPath, NodeContext.MAP);
+
+    if (inputNodes.size() == 1) {
+      DoNode inputNode = inputNodes.get(0);
+      inputNode.getSource().configureSource(job, -1);
+    } else {
+      for (int i = 0; i < inputNodes.size(); i++) {
+        DoNode inputNode = inputNodes.get(i);
+        inputNode.getSource().configureSource(job, i);
+      }
+      job.setInputFormatClass(CrunchInputFormat.class);
+    }
+    job.setJobName(createJobName(pipeline.getName(), inputNodes, reduceNode));
+
+    return new CrunchControlledJob(
+        jobID,
+        job,
+        new CrunchJobHooks.PrepareHook(job),
+        new CrunchJobHooks.CompletionHook(job, outputPath, outputHandler.getMultiPaths(), group == null));
+  }
+
+  private void serialize(List<DoNode> nodes, Configuration conf, Path workingPath, NodeContext context)
+      throws IOException {
+    List<RTNode> rtNodes = Lists.newArrayList();
+    for (DoNode node : nodes) {
+      rtNodes.add(node.toRTNode(true, conf, context));
+    }
+    Path path = new Path(workingPath, context.toString());
+    DistCache.write(conf, path, rtNodes);
+  }
+
+  private String createJobName(String pipelineName, List<DoNode> mapNodes, DoNode reduceNode) {
+    JobNameBuilder builder = new JobNameBuilder(pipelineName);
+    builder.visit(mapNodes);
+    if (reduceNode != null) {
+      builder.visit(reduceNode);
+    }
+    return builder.build();
+  }
+
+  private DoNode walkPath(Iterator<PCollectionImpl<?>> iter, DoNode working) {
+    while (iter.hasNext()) {
+      PCollectionImpl<?> collect = iter.next();
+      if (combineFnTable != null && !(collect instanceof PGroupedTableImpl)) {
+        combineFnTable = null;
+      } else if (collect instanceof DoTableImpl && ((DoTableImpl<?, ?>) collect).hasCombineFn()) {
+        combineFnTable = (DoTableImpl<?, ?>) collect;
+      }
+      if (!nodes.containsKey(collect)) {
+        nodes.put(collect, collect.createDoNode());
+      }
+      DoNode parent = nodes.get(collect);
+      parent.addChild(working);
+      working = parent;
+    }
+    return working;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java
new file mode 100644
index 0000000..36c565e
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCROutputHandler.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.Map;
+
+import org.apache.crunch.Target;
+import org.apache.crunch.io.MapReduceTarget;
+import org.apache.crunch.io.OutputHandler;
+import org.apache.crunch.io.PathTarget;
+import org.apache.crunch.types.PType;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.google.common.collect.Maps;
+
+public class MSCROutputHandler implements OutputHandler {
+
+  private final Job job;
+  private final Path path;
+  private final boolean mapOnlyJob;
+
+  private DoNode workingNode;
+  private Map<Integer, PathTarget> multiPaths;
+  private int jobCount;
+
+  public MSCROutputHandler(Job job, Path outputPath, boolean mapOnlyJob) {
+    this.job = job;
+    this.path = outputPath;
+    this.mapOnlyJob = mapOnlyJob;
+    this.multiPaths = Maps.newHashMap();
+  }
+
+  public void configureNode(DoNode node, Target target) {
+    workingNode = node;
+    target.accept(this, node.getPType());
+  }
+
+  public boolean configure(Target target, PType<?> ptype) {
+    if (target instanceof MapReduceTarget) {
+      if (target instanceof PathTarget) {
+        multiPaths.put(jobCount, (PathTarget) target);
+      }
+
+      String name = PlanningParameters.MULTI_OUTPUT_PREFIX + jobCount;
+      jobCount++;
+      workingNode.setOutputName(name);
+      ((MapReduceTarget) target).configureForMapReduce(job, ptype, path, name);
+      return true;
+    }
+
+    return false;
+  }
+
+  public boolean isMapOnlyJob() {
+    return mapOnlyJob;
+  }
+
+  public Map<Integer, PathTarget> getMultiPaths() {
+    return multiPaths;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
new file mode 100644
index 0000000..3e1de38
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
@@ -0,0 +1,378 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+import org.apache.crunch.impl.mr.exec.MRExecutor;
+import org.apache.crunch.materialize.MaterializableIterable;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+
+public class MSCRPlanner {
+
+  private final MRPipeline pipeline;
+  private final Map<PCollectionImpl<?>, Set<Target>> outputs;
+  private final Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize;
+  private int lastJobID = 0;
+
+  public MSCRPlanner(MRPipeline pipeline, Map<PCollectionImpl<?>, Set<Target>> outputs,
+      Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize) {
+    this.pipeline = pipeline;
+    this.outputs = new TreeMap<PCollectionImpl<?>, Set<Target>>(DEPTH_COMPARATOR);
+    this.outputs.putAll(outputs);
+    this.toMaterialize = toMaterialize;
+  }
+
+  // Used to ensure that we always build pipelines starting from the deepest
+  // outputs, which helps ensure that we handle intermediate outputs correctly.
+  private static final Comparator<PCollectionImpl<?>> DEPTH_COMPARATOR = new Comparator<PCollectionImpl<?>>() {
+    @Override
+    public int compare(PCollectionImpl<?> left, PCollectionImpl<?> right) {
+      int cmp = right.getDepth() - left.getDepth();
+      if (cmp == 0) {
+        // Ensure we don't throw away two output collections at the same depth.
+        // Using the collection name would be nicer here, but names aren't
+        // necessarily unique.
+        cmp = new Integer(right.hashCode()).compareTo(left.hashCode());
+      }
+      return cmp;
+    }
+  };  
+
+  public MRExecutor plan(Class<?> jarClass, Configuration conf) throws IOException {
+    Map<PCollectionImpl<?>, Set<SourceTarget<?>>> targetDeps = Maps.newTreeMap(DEPTH_COMPARATOR);
+    for (PCollectionImpl<?> pcollect : outputs.keySet()) {
+      targetDeps.put(pcollect, pcollect.getTargetDependencies());
+    }
+    
+    Multimap<Vertex, JobPrototype> assignments = HashMultimap.create();
+    Multimap<PCollectionImpl<?>, Vertex> protoDependency = HashMultimap.create();
+    while (!targetDeps.isEmpty()) {
+      Set<Target> allTargets = Sets.newHashSet();
+      for (PCollectionImpl<?> pcollect : targetDeps.keySet()) {
+        allTargets.addAll(outputs.get(pcollect));
+      }
+      GraphBuilder graphBuilder = new GraphBuilder();
+      
+      // Walk the current plan tree and build a graph in which the vertices are
+      // sources, targets, and GBK operations.
+      Set<PCollectionImpl<?>> currentStage = Sets.newHashSet();
+      Set<PCollectionImpl<?>> laterStage = Sets.newHashSet();
+      for (PCollectionImpl<?> output : targetDeps.keySet()) {
+        if (Sets.intersection(allTargets, targetDeps.get(output)).isEmpty()) {
+          graphBuilder.visitOutput(output);
+          currentStage.add(output);
+        } else {
+          laterStage.add(output);
+        }
+      }
+      
+      Graph baseGraph = graphBuilder.getGraph();
+      
+      // Create a new graph that splits up up dependent GBK nodes.
+      Graph graph = prepareFinalGraph(baseGraph);
+      
+      // Break the graph up into connected components.
+      List<List<Vertex>> components = graph.connectedComponents();
+      
+      // For each component, we will create one or more job prototypes,
+      // depending on its profile.
+      // For dependency handling, we only need to care about which
+      // job prototype a particular GBK is assigned to.
+      for (List<Vertex> component : components) {
+        assignments.putAll(constructJobPrototypes(component));
+      }
+
+      // Add in the job dependency information here.
+      for (Map.Entry<Vertex, JobPrototype> e : assignments.entries()) {
+        JobPrototype current = e.getValue();
+        List<Vertex> parents = graph.getParents(e.getKey());
+        for (Vertex parent : parents) {
+          for (JobPrototype parentJobProto : assignments.get(parent)) {
+            current.addDependency(parentJobProto);
+          }
+        }
+      }
+      
+      // Add cross-stage dependencies.
+      for (PCollectionImpl<?> output : currentStage) {
+        Set<Target> targets = outputs.get(output);
+        Vertex vertex = graph.getVertexAt(output);
+        for (PCollectionImpl<?> later : laterStage) {
+          if (!Sets.intersection(targets, targetDeps.get(later)).isEmpty()) {
+            protoDependency.put(later, vertex);
+          }
+        }
+        targetDeps.remove(output);
+      }
+    }
+    
+    // Cross-job dependencies.
+    for (Entry<PCollectionImpl<?>, Vertex> pd : protoDependency.entries()) {
+      Vertex d = new Vertex(pd.getKey());
+      Vertex dj = pd.getValue();
+      for (JobPrototype parent : assignments.get(dj)) {
+        for (JobPrototype child : assignments.get(d)) {
+          child.addDependency(parent);
+        }
+      }
+    }
+    
+    // Finally, construct the jobs from the prototypes and return.
+    DotfileWriter dotfileWriter = new DotfileWriter();
+    MRExecutor exec = new MRExecutor(jarClass, outputs, toMaterialize);
+    for (JobPrototype proto : Sets.newHashSet(assignments.values())) {
+      dotfileWriter.addJobPrototype(proto);
+      exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline));
+    }
+
+    String planDotFile = dotfileWriter.buildDotfile();
+    exec.setPlanDotFile(planDotFile);
+    conf.set(PlanningParameters.PIPELINE_PLAN_DOTFILE, planDotFile);
+
+    return exec;
+  }
+  
+  private Graph prepareFinalGraph(Graph baseGraph) {
+    Graph graph = new Graph();
+    
+    for (Vertex baseVertex : baseGraph) {
+      // Add all of the vertices in the base graph, but no edges (yet).
+      graph.addVertex(baseVertex.getPCollection(), baseVertex.isOutput());
+    }
+    
+    for (Edge e : baseGraph.getAllEdges()) {
+      // Add back all of the edges where neither vertex is a GBK and we do not
+      // have an output feeding into a GBK.
+      if (!(e.getHead().isGBK() && e.getTail().isGBK()) &&
+          !(e.getHead().isOutput() && e.getTail().isGBK())) {
+        Vertex head = graph.getVertexAt(e.getHead().getPCollection());
+        Vertex tail = graph.getVertexAt(e.getTail().getPCollection());
+        graph.getEdge(head, tail).addAllNodePaths(e.getNodePaths());
+      }
+    }
+    
+    for (Vertex baseVertex : baseGraph) {
+      if (baseVertex.isGBK()) {
+        Vertex vertex = graph.getVertexAt(baseVertex.getPCollection());
+        for (Edge e : baseVertex.getIncomingEdges()) {
+          if (e.getHead().isOutput()) {
+            // Execute an edge split.
+            Vertex splitTail = e.getHead();
+            PCollectionImpl<?> split = splitTail.getPCollection();
+            InputCollection<?> inputNode = handleSplitTarget(split);
+            Vertex splitHead = graph.addVertex(inputNode, false);
+            
+            // Divide up the node paths in the edge between the two GBK nodes so
+            // that each node is either owned by GBK1 -> newTail or newHead -> GBK2.
+            for (NodePath path : e.getNodePaths()) {
+              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
+              graph.getEdge(vertex, splitTail).addNodePath(headPath);
+              graph.getEdge(splitHead, vertex).addNodePath(path);
+            }
+            
+            // Note the dependency between the vertices in the graph.
+            graph.markDependency(splitHead, splitTail);
+          } else if (!e.getHead().isGBK()) {
+            Vertex newHead = graph.getVertexAt(e.getHead().getPCollection());
+            graph.getEdge(newHead, vertex).addAllNodePaths(e.getNodePaths());
+          }
+        }
+        for (Edge e : baseVertex.getOutgoingEdges()) {
+          if (!e.getTail().isGBK()) {
+            Vertex newTail = graph.getVertexAt(e.getTail().getPCollection());
+            graph.getEdge(vertex, newTail).addAllNodePaths(e.getNodePaths());
+          } else {
+            // Execute an Edge split
+            Vertex newGraphTail = graph.getVertexAt(e.getTail().getPCollection());
+            PCollectionImpl split = e.getSplit();
+            InputCollection<?> inputNode = handleSplitTarget(split);
+            Vertex splitTail = graph.addVertex(split, true);
+            Vertex splitHead = graph.addVertex(inputNode, false);
+            
+            // Divide up the node paths in the edge between the two GBK nodes so
+            // that each node is either owned by GBK1 -> newTail or newHead -> GBK2.
+            for (NodePath path : e.getNodePaths()) {
+              NodePath headPath = path.splitAt(split, splitHead.getPCollection());
+              graph.getEdge(vertex, splitTail).addNodePath(headPath);
+              graph.getEdge(splitHead, newGraphTail).addNodePath(path);
+            }
+            
+            // Note the dependency between the vertices in the graph.
+            graph.markDependency(splitHead, splitTail);
+          }
+        }
+      }
+    }
+    
+    return graph;
+  }
+  
+  private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component) {
+    Multimap<Vertex, JobPrototype> assignment = HashMultimap.create();
+    List<Vertex> gbks = Lists.newArrayList();
+    for (Vertex v : component) {
+      if (v.isGBK()) {
+        gbks.add(v);
+      }
+    }
+
+    if (gbks.isEmpty()) {
+      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
+      for (Vertex v : component) {
+        if (v.isInput()) {
+          for (Edge e : v.getOutgoingEdges()) {
+            for (NodePath nodePath : e.getNodePaths()) {
+              PCollectionImpl target = nodePath.tail();
+              for (Target t : outputs.get(target)) {
+                outputPaths.put(t, nodePath);
+              }
+            }
+          }
+        }
+      }
+      if (outputPaths.isEmpty()) {
+        throw new IllegalStateException("No outputs?");
+      }
+      JobPrototype prototype = JobPrototype.createMapOnlyJob(
+          ++lastJobID, outputPaths, pipeline.createTempPath());
+      for (Vertex v : component) {
+        assignment.put(v, prototype);
+      }
+    } else {
+      Set<Edge> usedEdges = Sets.newHashSet();
+      for (Vertex g : gbks) {
+        Set<NodePath> inputs = Sets.newHashSet();
+        for (Edge e : g.getIncomingEdges()) {
+          inputs.addAll(e.getNodePaths());
+          usedEdges.add(e);
+        }
+        JobPrototype prototype = JobPrototype.createMapReduceJob(
+            ++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
+        assignment.put(g, prototype);
+        for (Edge e : g.getIncomingEdges()) {
+          assignment.put(e.getHead(), prototype);
+          usedEdges.add(e);
+        }
+        HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
+        for (Edge e : g.getOutgoingEdges()) {
+          Vertex output = e.getTail();
+          for (Target t : outputs.get(output.getPCollection())) {
+            outputPaths.putAll(t, e.getNodePaths());
+          }
+          assignment.put(output, prototype);
+          usedEdges.add(e);
+        }
+        prototype.addReducePaths(outputPaths);
+      }
+      
+      // Check for any un-assigned vertices, which should be map-side outputs
+      // that we will need to run in a map-only job.
+      HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
+      Set<Vertex> orphans = Sets.newHashSet();
+      for (Vertex v : component) {
+
+        // Check if this vertex has multiple inputs but only a subset of
+        // them have already been assigned
+        boolean vertexHasUnassignedIncomingEdges = false;
+        if (v.isOutput()) {
+          for (Edge e : v.getIncomingEdges()) {
+            if (!usedEdges.contains(e)) {
+              vertexHasUnassignedIncomingEdges = true;
+            }
+          }
+        }
+
+        if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
+          orphans.add(v);
+          for (Edge e : v.getIncomingEdges()) {
+            if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
+              // We've already dealt with this incoming edge
+              continue;
+            }
+            orphans.add(e.getHead());
+            for (NodePath nodePath : e.getNodePaths()) {
+              PCollectionImpl target = nodePath.tail();
+              for (Target t : outputs.get(target)) {
+                outputPaths.put(t, nodePath);
+              }
+            }
+          }
+        }
+
+      }
+      if (!outputPaths.isEmpty()) {
+        JobPrototype prototype = JobPrototype.createMapOnlyJob(
+            ++lastJobID, outputPaths, pipeline.createTempPath());
+        for (Vertex orphan : orphans) {
+          assignment.put(orphan, prototype);
+        }
+      }
+    }
+    
+    return assignment;
+  }
+  
+  private InputCollection<?> handleSplitTarget(PCollectionImpl<?> splitTarget) {
+    if (!outputs.containsKey(splitTarget)) {
+      outputs.put(splitTarget, Sets.<Target> newHashSet());
+    }
+
+    SourceTarget srcTarget = null;
+    Target targetToReplace = null;
+    for (Target t : outputs.get(splitTarget)) {
+      if (t instanceof SourceTarget) {
+        srcTarget = (SourceTarget<?>) t;
+        break;
+      } else {
+        srcTarget = t.asSourceTarget(splitTarget.getPType());
+        if (srcTarget != null) {
+          targetToReplace = t;
+          break;
+        }
+      }
+    }
+    if (targetToReplace != null) {
+      outputs.get(splitTarget).remove(targetToReplace);
+    } else if (srcTarget == null) {
+      srcTarget = pipeline.createIntermediateOutput(splitTarget.getPType());
+    }
+    outputs.get(splitTarget).add(srcTarget);
+    splitTarget.materializeAt(srcTarget);
+
+    return (InputCollection<?>) pipeline.read(srcTarget);
+  }  
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java
new file mode 100644
index 0000000..a090d93
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/NodePath.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+
+import com.google.common.collect.Lists;
+
+class NodePath implements Iterable<PCollectionImpl<?>> {
+  private LinkedList<PCollectionImpl<?>> path;
+
+  public NodePath() {
+    this.path = Lists.newLinkedList();
+  }
+
+  public NodePath(PCollectionImpl<?> tail) {
+    this.path = Lists.newLinkedList();
+    this.path.add(tail);
+  }
+
+  public NodePath(NodePath other) {
+    this.path = Lists.newLinkedList(other.path);
+  }
+
+  public void push(PCollectionImpl<?> stage) {
+    this.path.push((PCollectionImpl<?>) stage);
+  }
+
+  public NodePath close(PCollectionImpl<?> head) {
+    this.path.push(head);
+    return this;
+  }
+
+  public Iterator<PCollectionImpl<?>> iterator() {
+    return path.iterator();
+  }
+
+  public Iterator<PCollectionImpl<?>> descendingIterator() {
+    return path.descendingIterator();
+  }
+
+  public PCollectionImpl<?> get(int index) {
+    return path.get(index);
+  }
+
+  public PCollectionImpl<?> head() {
+    return path.peekFirst();
+  }
+
+  public PCollectionImpl<?> tail() {
+    return path.peekLast();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof NodePath)) {
+      return false;
+    }
+    NodePath nodePath = (NodePath) other;
+    return path.equals(nodePath.path);
+  }
+
+  @Override
+  public int hashCode() {
+    return 17 + 37 * path.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    for (PCollectionImpl<?> collect : path) {
+      sb.append(collect.getName() + "|");
+    }
+    sb.deleteCharAt(sb.length() - 1);
+    return sb.toString();
+  }
+
+  public NodePath splitAt(int splitIndex, PCollectionImpl<?> newHead) {
+    NodePath top = new NodePath();
+    for (int i = 0; i <= splitIndex; i++) {
+      top.path.add(path.get(i));
+    }
+    LinkedList<PCollectionImpl<?>> nextPath = Lists.newLinkedList();
+    nextPath.add(newHead);
+    nextPath.addAll(path.subList(splitIndex + 1, path.size()));
+    path = nextPath;
+    return top;
+  }
+  
+  public NodePath splitAt(PCollectionImpl split, PCollectionImpl<?> newHead) {
+    NodePath top = new NodePath();
+    int splitIndex = 0;
+    for (PCollectionImpl p : path) {
+      top.path.add(p);
+      if (p == split) {
+        break;
+      }
+      splitIndex++;
+    }
+    LinkedList<PCollectionImpl<?>> nextPath = Lists.newLinkedList();
+    nextPath.add(newHead);
+    nextPath.addAll(path.subList(splitIndex + 1, path.size()));
+    path = nextPath;
+    return top;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
new file mode 100644
index 0000000..b90a911
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+/**
+ * Collection of Configuration keys and various constants used when planning MapReduce jobs for a
+ * pipeline.
+ */
+public class PlanningParameters {
+
+  public static final String MULTI_OUTPUT_PREFIX = "out";
+
+  public static final String CRUNCH_WORKING_DIRECTORY = "crunch.work.dir";
+
+  /**
+   * Configuration key under which a <a href="http://www.graphviz.org">DOT</a> file containing the
+   * pipeline job graph is stored by the planner.
+   */
+  public static final String PIPELINE_PLAN_DOTFILE = "crunch.planner.dotfile";
+
+  private PlanningParameters() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java
new file mode 100644
index 0000000..f4aa668
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Vertex.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.plan;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang.builder.ReflectionToStringBuilder;
+import org.apache.commons.lang.builder.ToStringStyle;
+import org.apache.crunch.Source;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ *
+ */
+class Vertex {
+  private final PCollectionImpl impl;
+  
+  private boolean output;
+  private Set<Edge> incoming;
+  private Set<Edge> outgoing;
+  
+  public Vertex(PCollectionImpl impl) {
+    this.impl = impl;
+    this.incoming = Sets.newHashSet();
+    this.outgoing = Sets.newHashSet();
+  }
+  
+  public PCollectionImpl getPCollection() {
+    return impl;
+  }
+  
+  public boolean isInput() {
+    return impl instanceof InputCollection;
+  }
+  
+  public boolean isGBK() {
+    return impl instanceof PGroupedTableImpl;
+  }
+  
+  public void setOutput() {
+    this.output = true;
+  }
+  
+  public boolean isOutput() {
+    return output;
+  }
+  
+  public Source getSource() {
+    if (isInput()) {
+      return ((InputCollection) impl).getSource();
+    }
+    return null;
+  }
+  
+  public void addIncoming(Edge edge) {
+    this.incoming.add(edge);
+  }
+  
+  public void addOutgoing(Edge edge) {
+    this.outgoing.add(edge);
+  }
+  
+  public List<Vertex> getAllNeighbors() {
+    List<Vertex> n = Lists.newArrayList();
+    for (Edge e : incoming) {
+      n.add(e.getHead());
+    }
+    for (Edge e : outgoing) {
+      n.add(e.getTail());
+    }
+    return n;
+  }
+  
+  public Set<Edge> getAllEdges() {
+    return Sets.union(incoming, outgoing);
+  }
+  
+  public Set<Edge> getIncomingEdges() {
+    return incoming;
+  }
+  
+  public Set<Edge> getOutgoingEdges() {
+    return outgoing;
+  }
+  
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || !(obj instanceof Vertex)) {
+      return false;
+    }
+    Vertex other = (Vertex) obj;
+    return impl.equals(other.impl);
+  }
+  
+  @Override
+  public int hashCode() {
+    return 17 + 37 * impl.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return new ReflectionToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).setExcludeFieldNames(
+        new String[] { "outgoing", "incoming" }).toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java
new file mode 100644
index 0000000..47a3ded
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchCombiner.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+public class CrunchCombiner extends CrunchReducer {
+
+  @Override
+  protected NodeContext getNodeContext() {
+    return NodeContext.COMBINE;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java
new file mode 100644
index 0000000..eb5dd8a
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputFormat.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.crunch.io.CrunchInputs;
+import org.apache.crunch.io.FormatBundle;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.collect.Lists;
+
+public class CrunchInputFormat<K, V> extends InputFormat<K, V> {
+
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
+    List<InputSplit> splits = Lists.newArrayList();
+    Configuration base = job.getConfiguration();
+    Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = CrunchInputs.getFormatNodeMap(job);
+
+    // First, build a map of InputFormats to Paths
+    for (Map.Entry<FormatBundle, Map<Integer, List<Path>>> entry : formatNodeMap.entrySet()) {
+      FormatBundle inputBundle = entry.getKey();
+      Configuration conf = new Configuration(base);
+      inputBundle.configure(conf);
+      Job jobCopy = new Job(conf);
+      InputFormat<?, ?> format = (InputFormat<?, ?>) ReflectionUtils.newInstance(inputBundle.getFormatClass(),
+          jobCopy.getConfiguration());
+      for (Map.Entry<Integer, List<Path>> nodeEntry : entry.getValue().entrySet()) {
+        Integer nodeIndex = nodeEntry.getKey();
+        List<Path> paths = nodeEntry.getValue();
+        FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));
+
+        // Get splits for each input path and tag with InputFormat
+        // and Mapper types by wrapping in a TaggedInputSplit.
+        List<InputSplit> pathSplits = format.getSplits(jobCopy);
+        for (InputSplit pathSplit : pathSplits) {
+          splits.add(new CrunchInputSplit(pathSplit, inputBundle.getFormatClass(),
+              nodeIndex, jobCopy.getConfiguration()));
+        }
+      }
+    }
+    return splits;
+  }
+
+  @Override
+  public RecordReader<K, V> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
+      InterruptedException {
+    return new CrunchRecordReader<K, V>(inputSplit, context);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java
new file mode 100644
index 0000000..b41062b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchInputSplit.java
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.util.ReflectionUtils;
+
+class CrunchInputSplit extends InputSplit implements Writable {
+
+  private InputSplit inputSplit;
+  private Class<? extends InputFormat<?, ?>> inputFormatClass;
+  private int nodeIndex;
+  private Configuration conf;
+
+  public CrunchInputSplit() {
+    // default constructor
+  }
+
+  public CrunchInputSplit(
+      InputSplit inputSplit,
+      Class<? extends InputFormat<?, ?>> inputFormatClass,
+      int nodeIndex,
+      Configuration conf) {
+    this.inputSplit = inputSplit;
+    this.inputFormatClass = inputFormatClass;
+    this.nodeIndex = nodeIndex;
+    this.conf = conf;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+  
+  public int getNodeIndex() {
+    return nodeIndex;
+  }
+
+  public InputSplit getInputSplit() {
+    return inputSplit;
+  }
+
+  public Class<? extends InputFormat<?, ?>> getInputFormatClass() {
+    return inputFormatClass;
+  }
+
+  @Override
+  public long getLength() throws IOException, InterruptedException {
+    return inputSplit.getLength();
+  }
+
+  @Override
+  public String[] getLocations() throws IOException, InterruptedException {
+    return inputSplit.getLocations();
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    nodeIndex = in.readInt();
+    conf = new Configuration();
+    conf.readFields(in);
+    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
+    Class<? extends InputSplit> inputSplitClass = (Class<? extends InputSplit>) readClass(in);
+    inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf);
+    SerializationFactory factory = new SerializationFactory(conf);
+    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
+    deserializer.open((DataInputStream) in);
+    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
+  }
+
+  private Class<?> readClass(DataInput in) throws IOException {
+    String className = Text.readString(in);
+    try {
+      return conf.getClassByName(className);
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException("readObject can't find class", e);
+    }
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(nodeIndex);
+    conf.write(out);
+    Text.writeString(out, inputFormatClass.getName());
+    Text.writeString(out, inputSplit.getClass().getName());
+    SerializationFactory factory = new SerializationFactory(conf);
+    Serializer serializer = factory.getSerializer(inputSplit.getClass());
+    serializer.open((DataOutputStream) out);
+    serializer.serialize(inputSplit);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java
new file mode 100644
index 0000000..70f0b01
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchMapper.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.hadoop.mapreduce.Mapper;
+
+public class CrunchMapper extends Mapper<Object, Object, Object, Object> {
+
+  private static final Log LOG = LogFactory.getLog(CrunchMapper.class);
+
+  private RTNode node;
+  private CrunchTaskContext ctxt;
+  private boolean debug;
+
+  @Override
+  protected void setup(Mapper<Object, Object, Object, Object>.Context context) {
+    List<RTNode> nodes;
+    this.ctxt = new CrunchTaskContext(context, NodeContext.MAP);
+    try {
+      nodes = ctxt.getNodes();
+    } catch (IOException e) {
+      LOG.info("Crunch deserialization error", e);
+      throw new CrunchRuntimeException(e);
+    }
+    if (nodes.size() == 1) {
+      this.node = nodes.get(0);
+    } else {
+      CrunchInputSplit split = (CrunchInputSplit) context.getInputSplit();
+      this.node = nodes.get(split.getNodeIndex());
+    }
+    this.debug = ctxt.isDebugRun();
+  }
+
+  @Override
+  protected void map(Object k, Object v, Mapper<Object, Object, Object, Object>.Context context) {
+    if (debug) {
+      try {
+        node.process(k, v);
+      } catch (Exception e) {
+        LOG.error("Mapper exception", e);
+      }
+    } else {
+      node.process(k, v);
+    }
+  }
+
+  @Override
+  protected void cleanup(Mapper<Object, Object, Object, Object>.Context context) {
+    node.cleanup();
+    ctxt.cleanup();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java
new file mode 100644
index 0000000..fc8fb32
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchRecordReader.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.IOException;
+
+import org.apache.crunch.hadoop.mapreduce.TaskAttemptContextFactory;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.ReflectionUtils;
+
+class CrunchRecordReader<K, V> extends RecordReader<K, V> {
+
+  private final RecordReader<K, V> delegate;
+
+  public CrunchRecordReader(InputSplit inputSplit, final TaskAttemptContext context) throws IOException,
+      InterruptedException {
+    CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit;
+    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils.newInstance(crunchSplit.getInputFormatClass(),
+        crunchSplit.getConf());
+    this.delegate = inputFormat.createRecordReader(crunchSplit.getInputSplit(),
+        TaskAttemptContextFactory.create(crunchSplit.getConf(), context.getTaskAttemptID()));
+  }
+
+  @Override
+  public void close() throws IOException {
+    delegate.close();
+  }
+
+  @Override
+  public K getCurrentKey() throws IOException, InterruptedException {
+    return delegate.getCurrentKey();
+  }
+
+  @Override
+  public V getCurrentValue() throws IOException, InterruptedException {
+    return delegate.getCurrentValue();
+  }
+
+  @Override
+  public float getProgress() throws IOException, InterruptedException {
+    return delegate.getProgress();
+  }
+
+  @Override
+  public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+    CrunchInputSplit crunchSplit = (CrunchInputSplit) inputSplit;
+    InputSplit delegateSplit = crunchSplit.getInputSplit();
+    delegate.initialize(delegateSplit,
+        TaskAttemptContextFactory.create(crunchSplit.getConf(), context.getTaskAttemptID()));
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    return delegate.nextKeyValue();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java
new file mode 100644
index 0000000..e5ddbd2
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchReducer.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.impl.SingleUseIterable;
+import org.apache.hadoop.mapreduce.Reducer;
+
+public class CrunchReducer extends Reducer<Object, Object, Object, Object> {
+
+  private static final Log LOG = LogFactory.getLog(CrunchReducer.class);
+
+  private RTNode node;
+  private CrunchTaskContext ctxt;
+  private boolean debug;
+
+  protected NodeContext getNodeContext() {
+    return NodeContext.REDUCE;
+  }
+
+  @Override
+  protected void setup(Reducer<Object, Object, Object, Object>.Context context) {
+    this.ctxt = new CrunchTaskContext(context, getNodeContext());
+    try {
+      List<RTNode> nodes = ctxt.getNodes();
+      this.node = nodes.get(0);
+    } catch (IOException e) {
+      LOG.info("Crunch deserialization error", e);
+      throw new CrunchRuntimeException(e);
+    }
+    this.debug = ctxt.isDebugRun();
+  }
+
+  @Override
+  protected void reduce(Object key, Iterable<Object> values, Reducer<Object, Object, Object, Object>.Context context) {
+    values = new SingleUseIterable<Object>(values);
+    if (debug) {
+      try {
+        node.processIterable(key, values);
+      } catch (Exception e) {
+        LOG.error("Reducer exception", e);
+      }
+    } else {
+      node.processIterable(key, values);
+    }
+  }
+
+  @Override
+  protected void cleanup(Reducer<Object, Object, Object, Object>.Context context) {
+    node.cleanup();
+    ctxt.cleanup();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java
new file mode 100644
index 0000000..c4f2873
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/CrunchTaskContext.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.impl.mr.plan.PlanningParameters;
+import org.apache.crunch.io.CrunchOutputs;
+import org.apache.crunch.util.DistCache;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+class CrunchTaskContext {
+
+  private final TaskInputOutputContext<Object, Object, Object, Object> taskContext;
+  private final NodeContext nodeContext;
+  private CrunchOutputs<Object, Object> multipleOutputs;
+
+  public CrunchTaskContext(TaskInputOutputContext<Object, Object, Object, Object> taskContext, NodeContext nodeContext) {
+    this.taskContext = taskContext;
+    this.nodeContext = nodeContext;
+  }
+
+  public TaskInputOutputContext<Object, Object, Object, Object> getContext() {
+    return taskContext;
+  }
+
+  public NodeContext getNodeContext() {
+    return nodeContext;
+  }
+
+  public List<RTNode> getNodes() throws IOException {
+    Configuration conf = taskContext.getConfiguration();
+    Path path = new Path(new Path(conf.get(PlanningParameters.CRUNCH_WORKING_DIRECTORY)), nodeContext.toString());
+    @SuppressWarnings("unchecked")
+    List<RTNode> nodes = (List<RTNode>) DistCache.read(conf, path);
+    if (nodes != null) {
+      for (RTNode node : nodes) {
+        node.initialize(this);
+      }
+    }
+    return nodes;
+  }
+
+  public boolean isDebugRun() {
+    Configuration conf = taskContext.getConfiguration();
+    return conf.getBoolean(RuntimeParameters.DEBUG, false);
+  }
+
+  public void cleanup() {
+    if (multipleOutputs != null) {
+      try {
+        multipleOutputs.close();
+      } catch (IOException e) {
+        throw new CrunchRuntimeException(e);
+      } catch (InterruptedException e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+  }
+
+  public CrunchOutputs<Object, Object> getMultipleOutputs() {
+    if (multipleOutputs == null) {
+      multipleOutputs = new CrunchOutputs<Object, Object>(taskContext);
+    }
+    return multipleOutputs;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java
new file mode 100644
index 0000000..ffc9e7c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/NodeContext.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import org.apache.crunch.impl.mr.plan.DoNode;
+
+/**
+ * Enum that is associated with a serialized {@link DoNode} instance, so we know
+ * how to use it within the context of a particular MR job.
+ * 
+ */
+public enum NodeContext {
+  MAP,
+  REDUCE,
+  COMBINE;
+
+  public String getConfigurationKey() {
+    return "crunch.donode." + toString().toLowerCase();
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java
new file mode 100644
index 0000000..ce7b795
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RTNode.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.impl.mr.emit.IntermediateEmitter;
+import org.apache.crunch.impl.mr.emit.MultipleOutputEmitter;
+import org.apache.crunch.impl.mr.emit.OutputEmitter;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PType;
+
+public class RTNode implements Serializable {
+
+  private static final Log LOG = LogFactory.getLog(RTNode.class);
+
+  private final String nodeName;
+  private DoFn<Object, Object> fn;
+  private PType<Object> outputPType;
+  private final List<RTNode> children;
+  private final Converter inputConverter;
+  private final Converter outputConverter;
+  private final String outputName;
+
+  private transient Emitter<Object> emitter;
+
+  public RTNode(DoFn<Object, Object> fn, PType<Object> outputPType, String name, List<RTNode> children,
+      Converter inputConverter,
+      Converter outputConverter, String outputName) {
+    this.fn = fn;
+    this.outputPType = outputPType;
+    this.nodeName = name;
+    this.children = children;
+    this.inputConverter = inputConverter;
+    this.outputConverter = outputConverter;
+    this.outputName = outputName;
+  }
+
+  public void initialize(CrunchTaskContext ctxt) {
+    if (emitter != null) {
+      // Already initialized
+      return;
+    }
+
+    fn.setContext(ctxt.getContext());
+    fn.initialize();
+    for (RTNode child : children) {
+      child.initialize(ctxt);
+    }
+
+    if (outputConverter != null) {
+      if (outputName != null) {
+        this.emitter = new MultipleOutputEmitter(outputConverter, ctxt.getMultipleOutputs(),
+            outputName);
+      } else {
+        this.emitter = new OutputEmitter(outputConverter, ctxt.getContext());
+      }
+    } else if (!children.isEmpty()) {
+      this.emitter = new IntermediateEmitter(outputPType, children,
+          ctxt.getContext().getConfiguration());
+    } else {
+      throw new CrunchRuntimeException("Invalid RTNode config: no emitter for: " + nodeName);
+    }
+  }
+
+  public boolean isLeafNode() {
+    return outputConverter != null && children.isEmpty();
+  }
+
+  public void process(Object input) {
+    try {
+      fn.process(input, emitter);
+    } catch (CrunchRuntimeException e) {
+      if (!e.wasLogged()) {
+        LOG.info(String.format("Crunch exception in '%s' for input: %s", nodeName, input.toString()), e);
+        e.markLogged();
+      }
+      throw e;
+    }
+  }
+
+  public void process(Object key, Object value) {
+    process(inputConverter.convertInput(key, value));
+  }
+
+  public void processIterable(Object key, Iterable values) {
+    process(inputConverter.convertIterableInput(key, values));
+  }
+
+  public void cleanup() {
+    fn.cleanup(emitter);
+    emitter.flush();
+    for (RTNode child : children) {
+      child.cleanup();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "RTNode [nodeName=" + nodeName + ", fn=" + fn + ", children=" + children + ", inputConverter="
+        + inputConverter + ", outputConverter=" + outputConverter + ", outputName=" + outputName + "]";
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java
new file mode 100644
index 0000000..604c49c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/run/RuntimeParameters.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.impl.mr.run;
+
+/**
+ * Parameters used during the runtime execution.
+ */
+public class RuntimeParameters {
+
+  public static final String AGGREGATOR_BUCKETS = "crunch.aggregator.buckets";
+
+  public static final String DEBUG = "crunch.debug";
+
+  public static final String TMP_DIR = "crunch.tmp.dir";
+
+  public static final String LOG_JOB_PROGRESS = "crunch.log.job.progress";
+
+  public static final String CREATE_DIR = "mapreduce.jobcontrol.createdir.ifnotexist";
+
+  // Not instantiated
+  private RuntimeParameters() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/At.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/At.java b/crunch-core/src/main/java/org/apache/crunch/io/At.java
new file mode 100644
index 0000000..a6f0782
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/At.java
@@ -0,0 +1,257 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import org.apache.avro.specific.SpecificRecord;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.TableSourceTarget;
+import org.apache.crunch.io.avro.AvroFileSourceTarget;
+import org.apache.crunch.io.seq.SeqFileSourceTarget;
+import org.apache.crunch.io.seq.SeqFileTableSourceTarget;
+import org.apache.crunch.io.text.TextFileSourceTarget;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * <p>Static factory methods for creating common {@link SourceTarget} types, which may be treated as both a {@code Source}
+ * and a {@code Target}.</p>
+ * 
+ * <p>The {@code At} methods is analogous to the {@link From} and {@link To} factory methods, but is used for
+ * storing intermediate outputs that need to be passed from one run of a MapReduce pipeline to another run. The
+ * {@code SourceTarget} object acts as both a {@code Source} and a {@Target}, which enables it to provide this
+ * functionality.
+ * 
+ * <code>
+ *   Pipeline pipeline = new MRPipeline(this.getClass());
+ *   // Create our intermediate storage location
+ *   SourceTarget<String> intermediate = At.textFile("/temptext");
+ *   ...
+ *   // Write out the output of the first phase of a pipeline.
+ *   pipeline.write(phase1, intermediate);
+ *   
+ *   // Explicitly call run to kick off the pipeline.
+ *   pipeline.run();
+ *   
+ *   // And then kick off a second phase by consuming the output
+ *   // from the first phase.
+ *   PCollection<String> phase2Input = pipeline.read(intermediate);
+ *   ...
+ * </code>
+ * </p>
+ * 
+ * <p>The {@code SourceTarget} abstraction is useful when we care about reading the intermediate
+ * outputs of a pipeline as well as the final results.</p>
+ */
+public class At {
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T extends SpecificRecord> SourceTarget<T> avroFile(String pathName, Class<T> avroClass) {
+    return avroFile(new Path(pathName), avroClass);  
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T extends SpecificRecord> SourceTarget<T> avroFile(Path path, Class<T> avroClass) {
+    return avroFile(path, Avros.specifics(avroClass));  
+  }
+  
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param avroType The {@code AvroType} for the Avro records
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T> SourceTarget<T> avroFile(String pathName, AvroType<T> avroType) {
+    return avroFile(new Path(pathName), avroType);
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @param avroType The {@code AvroType} for the Avro records
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T> SourceTarget<T> avroFile(Path path, AvroType<T> avroType) {
+    return new AvroFileSourceTarget<T>(path, avroType);
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given path name
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T extends Writable> SourceTarget<T> sequenceFile(String pathName, Class<T> valueClass) {
+    return sequenceFile(new Path(pathName), valueClass);
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given {@code Path}
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param path The {@code Path} to the data
+   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T extends Writable> SourceTarget<T> sequenceFile(Path path, Class<T> valueClass) {
+    return sequenceFile(path, Writables.writables(valueClass));
+  }
+  
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given path name
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param ptype The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T> SourceTarget<T> sequenceFile(String pathName, PType<T> ptype) {
+    return sequenceFile(new Path(pathName), ptype);
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given {@code Path}
+   * from the value field of each key-value pair in the SequenceFile(s).
+   * 
+   * @param path The {@code Path} to the data
+   * @param ptype The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T> SourceTarget<T> sequenceFile(Path path, PType<T> ptype) {
+    return new SeqFileSourceTarget<T>(path, ptype);
+  }
+
+  /**
+   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given path name
+   * from the key-value pairs in the SequenceFile(s).
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param keyClass The {@code Writable} type for the key of the SequenceFile entry
+   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+   * @return A new {@code TableSourceTarget<K, V>} instance
+   */
+  public static <K extends Writable, V extends Writable> TableSourceTarget<K, V> sequenceFile(
+      String pathName, Class<K> keyClass, Class<V> valueClass) {
+    return sequenceFile(new Path(pathName), keyClass, valueClass);
+  }
+
+  /**
+   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given {@code Path}
+   * from the key-value pairs in the SequenceFile(s).
+   * 
+   * @param path The {@code Path} to the data
+   * @param keyClass The {@code Writable} type for the key of the SequenceFile entry
+   * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+   * @return A new {@code TableSourceTarget<K, V>} instance
+   */
+  public static <K extends Writable, V extends Writable> TableSourceTarget<K, V> sequenceFile(
+      Path path, Class<K> keyClass, Class<V> valueClass) {
+    return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass));
+  }
+  
+  /**
+   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given path name
+   * from the key-value pairs in the SequenceFile(s).
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param keyType The {@code PType} for the key of the SequenceFile entry
+   * @param valueType The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code TableSourceTarget<K, V>} instance
+   */
+  public static <K, V> TableSourceTarget<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) {
+    return sequenceFile(new Path(pathName), keyType, valueType);
+  }
+
+  /**
+   * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given {@code Path}
+   * from the key-value pairs in the SequenceFile(s).
+   * 
+   * @param path The {@code Path} to the data
+   * @param keyType The {@code PType} for the key of the SequenceFile entry
+   * @param valueType The {@code PType} for the value of the SequenceFile entry
+   * @return A new {@code TableSourceTarget<K, V>} instance
+   */
+  public static <K, V> TableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
+    PTypeFamily ptf = keyType.getFamily();
+    return new SeqFileTableSourceTarget<K, V>(path, ptf.tableOf(keyType, valueType));
+  }
+
+  /**
+   * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given path name.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @return A new {@code SourceTarget<String>} instance
+   */
+  public static SourceTarget<String> textFile(String pathName) {
+    return textFile(new Path(pathName));
+  }
+
+  /**
+   * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given {@code Path}.
+   * 
+   * @param path The {@code Path} to the data
+   * @return A new {@code SourceTarget<String>} instance
+   */
+  public static SourceTarget<String> textFile(Path path) {
+    return textFile(path, Writables.strings());
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance for the text file(s) at the given path name using
+   * the provided {@code PType<T>} to convert the input text.
+   * 
+   * @param pathName The name of the path to the data on the filesystem
+   * @param ptype The {@code PType<T>} to use to process the input text
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T> SourceTarget<T> textFile(String pathName, PType<T> ptype) {
+    return textFile(new Path(pathName), ptype);
+  }
+
+  /**
+   * Creates a {@code SourceTarget<T>} instance for the text file(s) at the given {@code Path} using
+   * the provided {@code PType<T>} to convert the input text.
+   * 
+   * @param path The {@code Path} to the data
+   * @param ptype The {@code PType<T>} to use to process the input text
+   * @return A new {@code SourceTarget<T>} instance
+   */
+  public static <T> SourceTarget<T> textFile(Path path, PType<T> ptype) {
+    return new TextFileSourceTarget<T>(path, ptype);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/CompositePathIterable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/CompositePathIterable.java b/crunch-core/src/main/java/org/apache/crunch/io/CompositePathIterable.java
new file mode 100644
index 0000000..a4723e9
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/CompositePathIterable.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+
+import com.google.common.collect.UnmodifiableIterator;
+
+public class CompositePathIterable<T> implements Iterable<T> {
+
+  private final FileStatus[] stati;
+  private final FileSystem fs;
+  private final FileReaderFactory<T> readerFactory;
+
+  private static final PathFilter FILTER = new PathFilter() {
+    @Override
+    public boolean accept(Path path) {
+      return !path.getName().startsWith("_");
+    }
+  };
+
+  public static <S> Iterable<S> create(FileSystem fs, Path path, FileReaderFactory<S> readerFactory) throws IOException {
+
+    if (!fs.exists(path)) {
+      throw new IOException("No files found to materialize at: " + path);
+    }
+
+    FileStatus[] stati = null;
+    try {
+      stati = fs.listStatus(path, FILTER);
+    } catch (FileNotFoundException e) {
+      stati = null;
+    }
+    if (stati == null) {
+      throw new IOException("No files found to materialize at: " + path);
+    }
+
+    if (stati.length == 0) {
+      return Collections.emptyList();
+    } else {
+      return new CompositePathIterable<S>(stati, fs, readerFactory);
+    }
+
+  }
+
+  private CompositePathIterable(FileStatus[] stati, FileSystem fs, FileReaderFactory<T> readerFactory) {
+    this.stati = stati;
+    this.fs = fs;
+    this.readerFactory = readerFactory;
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+
+    return new UnmodifiableIterator<T>() {
+      private int index = 0;
+      private Iterator<T> iter = readerFactory.read(fs, stati[index++].getPath());
+
+      @Override
+      public boolean hasNext() {
+        if (!iter.hasNext()) {
+          while (index < stati.length) {
+            iter = readerFactory.read(fs, stati[index++].getPath());
+            if (iter.hasNext()) {
+              return true;
+            }
+          }
+          return false;
+        }
+        return true;
+      }
+
+      @Override
+      public T next() {
+        return iter.next();
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/io/CrunchInputs.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/CrunchInputs.java b/crunch-core/src/main/java/org/apache/crunch/io/CrunchInputs.java
new file mode 100644
index 0000000..d154db2
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/io/CrunchInputs.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.io;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * Helper functions for configuring multiple {@code InputFormat} instances within a single
+ * Crunch MapReduce job.
+ */
+public class CrunchInputs {
+  public static final String CRUNCH_INPUTS = "crunch.inputs.dir";
+
+  private static final char RECORD_SEP = ',';
+  private static final char FIELD_SEP = ';';
+  private static final Joiner JOINER = Joiner.on(FIELD_SEP);
+  private static final Splitter SPLITTER = Splitter.on(FIELD_SEP);
+
+  public static void addInputPath(Job job, Path path, FormatBundle inputBundle, int nodeIndex) {
+    Configuration conf = job.getConfiguration();
+    String inputs = JOINER.join(inputBundle.serialize(), String.valueOf(nodeIndex), path.toString());
+    String existing = conf.get(CRUNCH_INPUTS);
+    conf.set(CRUNCH_INPUTS, existing == null ? inputs : existing + RECORD_SEP + inputs);
+  }
+
+  public static Map<FormatBundle, Map<Integer, List<Path>>> getFormatNodeMap(JobContext job) {
+    Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = Maps.newHashMap();
+    Configuration conf = job.getConfiguration();
+    for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_INPUTS))) {
+      List<String> fields = Lists.newArrayList(SPLITTER.split(input));
+      FormatBundle<InputFormat> inputBundle = FormatBundle.fromSerialized(fields.get(0), InputFormat.class);
+      if (!formatNodeMap.containsKey(inputBundle)) {
+        formatNodeMap.put(inputBundle, Maps.<Integer, List<Path>> newHashMap());
+      }
+      Integer nodeIndex = Integer.valueOf(fields.get(1));
+      if (!formatNodeMap.get(inputBundle).containsKey(nodeIndex)) {
+        formatNodeMap.get(inputBundle).put(nodeIndex, Lists.<Path> newLinkedList());
+      }
+      formatNodeMap.get(inputBundle).get(nodeIndex).add(new Path(fields.get(2)));
+    }
+    return formatNodeMap;
+  }
+
+}


[27/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/PTypeUtils.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/PTypeUtils.java b/crunch-core/src/main/java/org/apache/crunch/types/PTypeUtils.java
new file mode 100644
index 0000000..e61b98b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/PTypeUtils.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+
+/**
+ * Utilities for converting between {@code PType}s from different
+ * {@code PTypeFamily} implementations.
+ * 
+ */
+public class PTypeUtils {
+
+  public static <T> PType<T> convert(PType<T> ptype, PTypeFamily tf) {
+    if (ptype instanceof PTableType) {
+      PTableType ptt = (PTableType) ptype;
+      return tf.tableOf(tf.as(ptt.getKeyType()), tf.as(ptt.getValueType()));
+    }
+    Class<T> typeClass = ptype.getTypeClass();
+    if (Tuple.class.isAssignableFrom(typeClass)) {
+      List<PType> subTypes = ptype.getSubTypes();
+      if (Pair.class.equals(typeClass)) {
+        return tf.pairs(tf.as(subTypes.get(0)), tf.as(subTypes.get(1)));
+      } else if (Tuple3.class.equals(typeClass)) {
+        return tf.triples(tf.as(subTypes.get(0)), tf.as(subTypes.get(1)), tf.as(subTypes.get(2)));
+      } else if (Tuple4.class.equals(typeClass)) {
+        return tf.quads(tf.as(subTypes.get(0)), tf.as(subTypes.get(1)), tf.as(subTypes.get(2)), tf.as(subTypes.get(3)));
+      } else if (TupleN.class.equals(typeClass)) {
+        PType[] newPTypes = subTypes.toArray(new PType[0]);
+        for (int i = 0; i < newPTypes.length; i++) {
+          newPTypes[i] = tf.as(subTypes.get(i));
+        }
+        return (PType<T>) tf.tuples(newPTypes);
+      }
+    }
+    if (Collection.class.isAssignableFrom(typeClass)) {
+      return tf.collections(tf.as(ptype.getSubTypes().get(0)));
+    }
+    return tf.records(typeClass);
+  }
+
+  private PTypeUtils() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/PTypes.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/PTypes.java b/crunch-core/src/main/java/org/apache/crunch/types/PTypes.java
new file mode 100644
index 0000000..546719c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/PTypes.java
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.UUID;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.MapFn;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.thrift.TBase;
+import org.apache.thrift.TDeserializer;
+import org.apache.thrift.TException;
+import org.apache.thrift.TSerializer;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.Message;
+
+/**
+ * Utility functions for creating common types of derived PTypes, e.g., for JSON
+ * data, protocol buffers, and Thrift records.
+ * 
+ */
+public class PTypes {
+
+  public static PType<BigInteger> bigInt(PTypeFamily typeFamily) {
+    return typeFamily.derived(BigInteger.class, BYTE_TO_BIGINT, BIGINT_TO_BYTE, typeFamily.bytes());
+  }
+
+  public static PType<UUID> uuid(PTypeFamily ptf) {
+    return ptf.derived(UUID.class, BYTE_TO_UUID, UUID_TO_BYTE, ptf.bytes());
+  }
+  
+  public static <T> PType<T> jsonString(Class<T> clazz, PTypeFamily typeFamily) {
+    return typeFamily
+        .derived(clazz, new JacksonInputMapFn<T>(clazz), new JacksonOutputMapFn<T>(), typeFamily.strings());
+  }
+
+  public static <T extends Message> PType<T> protos(Class<T> clazz, PTypeFamily typeFamily) {
+    return typeFamily.derived(clazz, new ProtoInputMapFn<T>(clazz), new ProtoOutputMapFn<T>(), typeFamily.bytes());
+  }
+
+  public static <T extends TBase> PType<T> thrifts(Class<T> clazz, PTypeFamily typeFamily) {
+    return typeFamily.derived(clazz, new ThriftInputMapFn<T>(clazz), new ThriftOutputMapFn<T>(), typeFamily.bytes());
+  }
+
+  public static final <T extends Enum> PType<T> enums(final Class<T> type, PTypeFamily typeFamily) {
+    return typeFamily.derived(type, new EnumInputMapper<T>(type), new EnumOutputMapper<T>(), typeFamily.strings());
+  }
+
+  public static MapFn<ByteBuffer, BigInteger> BYTE_TO_BIGINT = new MapFn<ByteBuffer, BigInteger>() {
+    public BigInteger map(ByteBuffer input) {
+      return input == null ? null : new BigInteger(input.array());
+    }
+  };
+
+  public static MapFn<BigInteger, ByteBuffer> BIGINT_TO_BYTE = new MapFn<BigInteger, ByteBuffer>() {
+    public ByteBuffer map(BigInteger input) {
+      return input == null ? null : ByteBuffer.wrap(input.toByteArray());
+    }
+  };
+
+  private static class JacksonInputMapFn<T> extends MapFn<String, T> {
+
+    private final Class<T> clazz;
+    private transient ObjectMapper mapper;
+
+    public JacksonInputMapFn(Class<T> clazz) {
+      this.clazz = clazz;
+    }
+
+    @Override
+    public void initialize() {
+      this.mapper = new ObjectMapper();
+    }
+
+    @Override
+    public T map(String input) {
+      try {
+        return mapper.readValue(input, clazz);
+      } catch (Exception e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+  }
+
+  private static class JacksonOutputMapFn<T> extends MapFn<T, String> {
+
+    private transient ObjectMapper mapper;
+
+    @Override
+    public void initialize() {
+      this.mapper = new ObjectMapper();
+    }
+
+    @Override
+    public String map(T input) {
+      try {
+        return mapper.writeValueAsString(input);
+      } catch (Exception e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+  }
+
+  private static class ProtoInputMapFn<T extends Message> extends MapFn<ByteBuffer, T> {
+
+    private final Class<T> clazz;
+    private transient T instance;
+
+    public ProtoInputMapFn(Class<T> clazz) {
+      this.clazz = clazz;
+    }
+
+    @Override
+    public void initialize() {
+      this.instance = Protos.getDefaultInstance(clazz);
+    }
+
+    @Override
+    public T map(ByteBuffer bb) {
+      try {
+        return (T) instance.newBuilderForType().mergeFrom(bb.array(), bb.position(), bb.limit()).build();
+      } catch (InvalidProtocolBufferException e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+  }
+
+  private static class ProtoOutputMapFn<T extends Message> extends MapFn<T, ByteBuffer> {
+
+    public ProtoOutputMapFn() {
+    }
+
+    @Override
+    public ByteBuffer map(T proto) {
+      return ByteBuffer.wrap(proto.toByteArray());
+    }
+  }
+
+  private static class ThriftInputMapFn<T extends TBase> extends MapFn<ByteBuffer, T> {
+
+    private final Class<T> clazz;
+    private transient T instance;
+    private transient TDeserializer deserializer;
+    private transient byte[] bytes;
+
+    public ThriftInputMapFn(Class<T> clazz) {
+      this.clazz = clazz;
+    }
+
+    @Override
+    public void initialize() {
+      this.instance = ReflectionUtils.newInstance(clazz, null);
+      this.deserializer = new TDeserializer(new TBinaryProtocol.Factory());
+      this.bytes = new byte[0];
+    }
+
+    @Override
+    public T map(ByteBuffer bb) {
+      T next = (T) instance.deepCopy();
+      int len = bb.limit() - bb.position();
+      if (len != bytes.length) {
+        bytes = new byte[len];
+      }
+      System.arraycopy(bb.array(), bb.position(), bytes, 0, len);
+      try {
+        deserializer.deserialize(next, bytes);
+      } catch (TException e) {
+        throw new CrunchRuntimeException(e);
+      }
+      return next;
+    }
+  }
+
+  private static class ThriftOutputMapFn<T extends TBase> extends MapFn<T, ByteBuffer> {
+
+    private transient TSerializer serializer;
+
+    public ThriftOutputMapFn() {
+    }
+
+    @Override
+    public void initialize() {
+      this.serializer = new TSerializer(new TBinaryProtocol.Factory());
+    }
+
+    @Override
+    public ByteBuffer map(T t) {
+      try {
+        return ByteBuffer.wrap(serializer.serialize(t));
+      } catch (TException e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+  }
+
+  private static class EnumInputMapper<T extends Enum> extends MapFn<String, T> {
+    private final Class<T> type;
+
+    public EnumInputMapper(Class<T> type) {
+      this.type = type;
+    }
+
+    @Override
+    public T map(String input) {
+      return (T) Enum.valueOf(type, input);
+    }
+  };
+
+  private static class EnumOutputMapper<T extends Enum> extends MapFn<T, String> {
+
+    @Override
+    public String map(T input) {
+      return input.name();
+    }
+  };
+  
+  private static MapFn<ByteBuffer, UUID> BYTE_TO_UUID = new MapFn<ByteBuffer, UUID>() {
+    @Override
+    public UUID map(ByteBuffer input) {
+      return new UUID(input.getLong(), input.getLong());
+    }
+  };
+  
+  private static MapFn<UUID, ByteBuffer> UUID_TO_BYTE = new MapFn<UUID, ByteBuffer>() {
+    @Override
+    public ByteBuffer map(UUID input) {
+      ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
+      bb.asLongBuffer().put(input.getMostSignificantBits()).put(input.getLeastSignificantBits());
+      return bb;
+    }
+  };
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/Protos.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/Protos.java b/crunch-core/src/main/java/org/apache/crunch/types/Protos.java
new file mode 100644
index 0000000..4cd5068
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/Protos.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+import org.apache.crunch.MapFn;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.base.Splitter;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.Message;
+import com.google.protobuf.Message.Builder;
+
+/**
+ * Utility functions for working with protocol buffers in Crunch.
+ */
+public class Protos {
+
+  /**
+   * Utility function for creating a default PB Messgae from a Class object that
+   * works with both protoc 2.3.0 and 2.4.x.
+   * @param clazz The class of the protocol buffer to create
+   * @return An instance of a protocol buffer
+   */
+  public static <M extends Message> M getDefaultInstance(Class<M> clazz) {
+    if (clazz.getConstructors().length > 0) {
+      // Protobuf 2.3.0
+      return ReflectionUtils.newInstance(clazz, null);
+    } else {
+      // Protobuf 2.4.x
+      try {
+        Message.Builder mb = (Message.Builder) clazz.getDeclaredMethod("newBuilder").invoke(null);
+        return (M) mb.getDefaultInstanceForType();
+      } catch (Exception e) {
+        throw new CrunchRuntimeException(e);
+      }  
+    }
+  }
+  
+  public static <M extends Message, K> MapFn<M, K> extractKey(String fieldName) {
+    return new ExtractKeyFn<M, K>(fieldName);
+  }
+
+  public static <M extends Message> DoFn<String, M> lineParser(String sep, Class<M> msgClass) {
+    return new TextToProtoFn<M>(sep, msgClass);
+  }
+
+  private static class ExtractKeyFn<M extends Message, K> extends MapFn<M, K> {
+
+    private final String fieldName;
+
+    private transient FieldDescriptor fd;
+
+    public ExtractKeyFn(String fieldName) {
+      this.fieldName = fieldName;
+    }
+
+    @Override
+    public K map(M input) {
+      if (input == null) {
+        throw new IllegalArgumentException("Null inputs not supported by Protos.ExtractKeyFn");
+      } else if (fd == null) {
+        fd = input.getDescriptorForType().findFieldByName(fieldName);
+        if (fd == null) {
+          throw new IllegalStateException("Could not find field: " + fieldName + " in message: " + input);
+        }
+      }
+      return (K) input.getField(fd);
+    }
+
+  }
+
+  private static class TextToProtoFn<M extends Message> extends DoFn<String, M> {
+
+    private final String sep;
+    private final Class<M> msgClass;
+
+    private transient M msgInstance;
+    private transient List<FieldDescriptor> fields;
+    private transient Splitter splitter;
+
+    enum ParseErrors {
+      TOTAL,
+      NUMBER_FORMAT
+    };
+
+    public TextToProtoFn(String sep, Class<M> msgClass) {
+      this.sep = sep;
+      this.msgClass = msgClass;
+    }
+
+    @Override
+    public void initialize() {
+      this.msgInstance = getDefaultInstance(msgClass);
+      this.fields = msgInstance.getDescriptorForType().getFields();
+      this.splitter = Splitter.on(sep);
+    }
+
+    @Override
+    public void process(String input, Emitter<M> emitter) {
+      if (input != null && !input.isEmpty()) {
+        Builder b = msgInstance.newBuilderForType();
+        Iterator<String> iter = splitter.split(input).iterator();
+        boolean parseError = false;
+        for (FieldDescriptor fd : fields) {
+          if (iter.hasNext()) {
+            String value = iter.next();
+            if (value != null && !value.isEmpty()) {
+              Object parsedValue = null;
+              try {
+                switch (fd.getJavaType()) {
+                case STRING:
+                  parsedValue = value;
+                  break;
+                case INT:
+                  parsedValue = Integer.valueOf(value);
+                  break;
+                case LONG:
+                  parsedValue = Long.valueOf(value);
+                  break;
+                case FLOAT:
+                  parsedValue = Float.valueOf(value);
+                  break;
+                case DOUBLE:
+                  parsedValue = Double.valueOf(value);
+                  break;
+                case BOOLEAN:
+                  parsedValue = Boolean.valueOf(value);
+                  break;
+                case ENUM:
+                  parsedValue = fd.getEnumType().findValueByName(value);
+                  break;
+                }
+                b.setField(fd, parsedValue);
+              } catch (NumberFormatException nfe) {
+                increment(ParseErrors.NUMBER_FORMAT);
+                parseError = true;
+                break;
+              }
+            }
+          }
+        }
+
+        if (parseError) {
+          increment(ParseErrors.TOTAL);
+        } else {
+          emitter.emit((M) b.build());
+        }
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/TupleDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/TupleDeepCopier.java b/crunch-core/src/main/java/org/apache/crunch/types/TupleDeepCopier.java
new file mode 100644
index 0000000..a2ffae3
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/TupleDeepCopier.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.util.List;
+
+import org.apache.crunch.Tuple;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Performs deep copies (based on underlying PType deep copying) of Tuple-based objects.
+ * 
+ * @param <T> The type of Tuple implementation being copied
+ */
+public class TupleDeepCopier<T extends Tuple> implements DeepCopier<T> {
+
+  private final TupleFactory<T> tupleFactory;
+  private final List<PType> elementTypes;
+
+  public TupleDeepCopier(Class<T> tupleClass, PType... elementTypes) {
+    tupleFactory = TupleFactory.getTupleFactory(tupleClass);
+    this.elementTypes = Lists.newArrayList(elementTypes);
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    for (PType elementType : elementTypes) {
+      elementType.initialize(conf);
+    }
+  }
+
+  @Override
+  public T deepCopy(T source) {
+    
+    if (source == null) {
+      return null;
+    }
+    
+    Object[] deepCopyValues = new Object[source.size()];
+
+    for (int valueIndex = 0; valueIndex < elementTypes.size(); valueIndex++) {
+      PType elementType = elementTypes.get(valueIndex);
+      deepCopyValues[valueIndex] = elementType.getDetachedValue(source.get(valueIndex));
+    }
+
+    return tupleFactory.makeTuple(deepCopyValues);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/TupleFactory.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/TupleFactory.java b/crunch-core/src/main/java/org/apache/crunch/types/TupleFactory.java
new file mode 100644
index 0000000..73b47de
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/TupleFactory.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types;
+
+import java.io.Serializable;
+import java.lang.reflect.Constructor;
+import java.util.Map;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+
+import com.google.common.collect.Maps;
+
+public abstract class TupleFactory<T extends Tuple> implements Serializable {
+
+  public void initialize() {
+  }
+
+  public abstract T makeTuple(Object... values);
+
+  
+  private static final Map<Class, TupleFactory> customTupleFactories = Maps.newHashMap();
+  
+  /**
+   * Get the {@link TupleFactory} for a given Tuple implementation.
+   * 
+   * @param tupleClass
+   *          The class for which the factory is to be retrieved
+   * @return The appropriate TupleFactory
+   */
+  public static <T extends Tuple> TupleFactory<T> getTupleFactory(Class<T> tupleClass) {
+    if (tupleClass == Pair.class) {
+      return (TupleFactory<T>) PAIR;
+    } else if (tupleClass == Tuple3.class) {
+      return (TupleFactory<T>) TUPLE3;
+    } else if (tupleClass == Tuple4.class) {
+      return (TupleFactory<T>) TUPLE4;
+    } else if (tupleClass == TupleN.class) {
+      return (TupleFactory<T>) TUPLEN;
+    } else if (customTupleFactories.containsKey(tupleClass)) {
+      return (TupleFactory<T>) customTupleFactories.get(tupleClass);
+    } else {
+      throw new IllegalArgumentException("Can't create TupleFactory for " + tupleClass);
+    }
+  }
+
+  public static final TupleFactory<Pair> PAIR = new TupleFactory<Pair>() {
+    @Override
+    public Pair makeTuple(Object... values) {
+      return Pair.of(values[0], values[1]);
+    }
+  };
+
+  public static final TupleFactory<Tuple3> TUPLE3 = new TupleFactory<Tuple3>() {
+    @Override
+    public Tuple3 makeTuple(Object... values) {
+      return Tuple3.of(values[0], values[1], values[2]);
+    }
+  };
+
+  public static final TupleFactory<Tuple4> TUPLE4 = new TupleFactory<Tuple4>() {
+    @Override
+    public Tuple4 makeTuple(Object... values) {
+      return Tuple4.of(values[0], values[1], values[2], values[3]);
+    }
+  };
+
+  public static final TupleFactory<TupleN> TUPLEN = new TupleFactory<TupleN>() {
+    @Override
+    public TupleN makeTuple(Object... values) {
+      return new TupleN(values);
+    }
+  };
+
+  public static <T extends Tuple> TupleFactory<T> create(Class<T> clazz, Class... typeArgs) {
+    if (customTupleFactories.containsKey(clazz)) {
+      return (TupleFactory<T>) customTupleFactories.get(clazz);
+    }
+    TupleFactory<T> custom = new CustomTupleFactory<T>(clazz, typeArgs);
+    customTupleFactories.put(clazz, custom);
+    return custom;
+  }
+
+  private static class CustomTupleFactory<T extends Tuple> extends TupleFactory<T> {
+
+    private final Class<T> clazz;
+    private final Class[] typeArgs;
+
+    private transient Constructor<T> constructor;
+
+    public CustomTupleFactory(Class<T> clazz, Class[] typeArgs) {
+      this.clazz = clazz;
+      this.typeArgs = typeArgs;
+    }
+
+    @Override
+    public void initialize() {
+      try {
+        constructor = clazz.getConstructor(typeArgs);
+      } catch (Exception e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+
+    @Override
+    public T makeTuple(Object... values) {
+      try {
+        return constructor.newInstance(values);
+      } catch (Exception e) {
+        throw new CrunchRuntimeException(e);
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java
new file mode 100644
index 0000000..cc1636c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroCapabilities.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.reflect.ReflectDatumReader;
+import org.apache.avro.reflect.ReflectDatumWriter;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Determines the capabilities of the Avro version that is currently being used.
+ */
+class AvroCapabilities {
+
+  public static class Record extends org.apache.avro.specific.SpecificRecordBase implements
+      org.apache.avro.specific.SpecificRecord {
+    public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
+        .parse("{\"type\":\"record\",\"name\":\"Record\",\"namespace\":\"org.apache.crunch.types.avro\",\"fields\":[{\"name\":\"subrecords\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}");
+    @Deprecated
+    public java.util.List<java.lang.CharSequence> subrecords;
+
+    public java.lang.Object get(int field$) {
+      switch (field$) {
+      case 0:
+        return subrecords;
+      default:
+        throw new org.apache.avro.AvroRuntimeException("Bad index");
+      }
+    }
+
+    // Used by DatumReader. Applications should not call.
+    @SuppressWarnings(value = "unchecked")
+    public void put(int field$, java.lang.Object value$) {
+      switch (field$) {
+      case 0:
+        subrecords = (java.util.List<java.lang.CharSequence>) value$;
+        break;
+      default:
+        throw new org.apache.avro.AvroRuntimeException("Bad index");
+      }
+    }
+
+    @Override
+    public Schema getSchema() {
+      return SCHEMA$;
+    }
+  }
+
+  /**
+   * Determine if the current Avro version can use the ReflectDatumReader to
+   * read SpecificData that includes an array. The inability to do this was a
+   * bug that was fixed in Avro 1.7.0.
+   * 
+   * @return true if SpecificData can be properly read using a
+   *         ReflectDatumReader
+   */
+  static boolean canDecodeSpecificSchemaWithReflectDatumReader() {
+    ReflectDatumReader<Record> datumReader = new ReflectDatumReader(Record.SCHEMA$);
+    ReflectDatumWriter<Record> datumWriter = new ReflectDatumWriter(Record.SCHEMA$);
+
+    Record record = new Record();
+    record.subrecords = Lists.<CharSequence> newArrayList("a", "b");
+
+    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
+
+    try {
+      datumWriter.write(record, encoder);
+      encoder.flush();
+      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(
+          byteArrayOutputStream.toByteArray(), null);
+      datumReader.read(record, decoder);
+    } catch (IOException ioe) {
+      throw new RuntimeException("Error performing specific schema test", ioe);
+    } catch (ClassCastException cce) {
+      // This indicates that we're using a pre-1.7.0 version of Avro, as the
+      // ReflectDatumReader in those versions could not correctly handle an
+      // array in a SpecificData value
+      return false;
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java
new file mode 100644
index 0000000..0fe9288
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroDeepCopier.java
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.ByteArrayOutputStream;
+import java.io.Serializable;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.crunch.types.DeepCopier;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Performs deep copies of Avro-serializable objects.
+ * <p>
+ * <b>Warning:</b> Methods in this class are not thread-safe. This shouldn't be a problem when
+ * running in a map-reduce context where each mapper/reducer is running in its own JVM, but it may
+ * well be a problem in any other kind of multi-threaded context.
+ */
+abstract class AvroDeepCopier<T> implements DeepCopier<T>, Serializable {
+
+  private String jsonSchema;
+  private transient Configuration conf;
+  private transient Schema schema;
+  private BinaryEncoder binaryEncoder;
+  private BinaryDecoder binaryDecoder;
+
+  private transient DatumWriter<T> datumWriter;
+  private transient DatumReader<T> datumReader;
+
+  public AvroDeepCopier(Schema schema) {
+    this.jsonSchema = schema.toString();
+  }
+
+  protected Schema getSchema() {
+    if (schema == null) {
+      schema = new Schema.Parser().parse(jsonSchema);
+    }
+    return schema;
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    this.conf = conf;
+  }
+
+  protected abstract T createCopyTarget();
+
+  protected abstract DatumWriter<T> createDatumWriter(Configuration conf);
+
+  protected abstract DatumReader<T> createDatumReader(Configuration conf);
+
+  /**
+   * Deep copier for Avro specific data objects.
+   */
+  public static class AvroSpecificDeepCopier<T> extends AvroDeepCopier<T> {
+
+    private Class<T> valueClass;
+
+    public AvroSpecificDeepCopier(Class<T> valueClass, Schema schema) {
+      super(schema);
+      this.valueClass = valueClass;
+    }
+
+    @Override
+    protected T createCopyTarget() {
+      return createNewInstance(valueClass);
+    }
+
+    @Override
+    protected DatumWriter<T> createDatumWriter(Configuration conf) {
+      return new SpecificDatumWriter<T>(getSchema());
+    }
+
+    @Override
+    protected DatumReader<T> createDatumReader(Configuration conf) {
+      return new SpecificDatumReader<T>(getSchema());
+    }
+
+  }
+
+  /**
+   * Deep copier for Avro generic data objects.
+   */
+  public static class AvroGenericDeepCopier extends AvroDeepCopier<Record> {
+
+    private transient Schema schema;
+
+    public AvroGenericDeepCopier(Schema schema) {
+      super(schema);
+    }
+
+    @Override
+    protected Record createCopyTarget() {
+      return new GenericData.Record(getSchema());
+    }
+
+    @Override
+    protected DatumReader<Record> createDatumReader(Configuration conf) {
+      return new GenericDatumReader<Record>(getSchema());
+    }
+
+    @Override
+    protected DatumWriter<Record> createDatumWriter(Configuration conf) {
+      return new GenericDatumWriter<Record>(getSchema());
+    }
+  }
+
+  /**
+   * Deep copier for Avro reflect data objects.
+   */
+  public static class AvroReflectDeepCopier<T> extends AvroDeepCopier<T> {
+
+    private Class<T> valueClass;
+
+    public AvroReflectDeepCopier(Class<T> valueClass, Schema schema) {
+      super(schema);
+      this.valueClass = valueClass;
+    }
+
+    @Override
+    protected T createCopyTarget() {
+      return createNewInstance(valueClass);
+    }
+
+    @Override
+    protected DatumReader<T> createDatumReader(Configuration conf) {
+      return Avros.getReflectDataFactory(conf).getReader(getSchema());
+    }
+
+    @Override
+    protected DatumWriter<T> createDatumWriter(Configuration conf) {
+      return Avros.getReflectDataFactory(conf).getWriter(getSchema());
+    }
+  }
+
+  /**
+   * Create a deep copy of an Avro value.
+   * 
+   * @param source The value to be copied
+   * @return The deep copy of the value
+   */
+  @Override
+  public T deepCopy(T source) {
+    
+    if (source == null) {
+      return null;
+    }
+    
+    if (datumReader == null) {
+      datumReader = createDatumReader(conf);
+    }
+    if (datumWriter == null) {
+      datumWriter = createDatumWriter(conf);
+    }
+    ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
+    binaryEncoder = EncoderFactory.get().binaryEncoder(byteOutStream, binaryEncoder);
+    T target = createCopyTarget();
+    try {
+      datumWriter.write(source, binaryEncoder);
+      binaryEncoder.flush();
+      binaryDecoder = DecoderFactory.get()
+          .binaryDecoder(byteOutStream.toByteArray(), binaryDecoder);
+      datumReader.read(target, binaryDecoder);
+    } catch (Exception e) {
+      throw new CrunchRuntimeException("Error while deep copying avro value " + source, e);
+    }
+
+    return target;
+  }
+
+  protected T createNewInstance(Class<T> targetClass) {
+    try {
+      return targetClass.newInstance();
+    } catch (InstantiationException e) {
+      throw new CrunchRuntimeException(e);
+    } catch (IllegalAccessException e) {
+      throw new CrunchRuntimeException(e);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java
new file mode 100644
index 0000000..598868f
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroGroupedTableType.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.util.Collection;
+
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroKeyComparator;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.crunch.GroupingOptions;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.fn.PairMapFn;
+import org.apache.crunch.lib.PTables;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ *
+ *
+ */
+class AvroGroupedTableType<K, V> extends PGroupedTableType<K, V> {
+
+  private static final AvroPairConverter CONVERTER = new AvroPairConverter();
+  private final MapFn inputFn;
+  private final MapFn outputFn;
+
+  public AvroGroupedTableType(AvroTableType<K, V> tableType) {
+    super(tableType);
+    AvroType keyType = (AvroType) tableType.getKeyType();
+    AvroType valueType = (AvroType) tableType.getValueType();
+    this.inputFn = new PairIterableMapFn(keyType.getInputMapFn(), valueType.getInputMapFn());
+    this.outputFn = new PairMapFn(keyType.getOutputMapFn(), valueType.getOutputMapFn());
+  }
+
+  @Override
+  public Class<Pair<K, Iterable<V>>> getTypeClass() {
+    return (Class<Pair<K, Iterable<V>>>) Pair.of(null, null).getClass();
+  }
+
+  @Override
+  public Converter getGroupingConverter() {
+    return CONVERTER;
+  }
+
+  @Override
+  public MapFn getInputMapFn() {
+    return inputFn;
+  }
+
+  @Override
+  public MapFn getOutputMapFn() {
+    return outputFn;
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    getTableType().initialize(conf);
+  }
+
+  @Override
+  public Pair<K, Iterable<V>> getDetachedValue(Pair<K, Iterable<V>> value) {
+    return PTables.getGroupedDetachedValue(this, value);
+  }
+
+  @Override
+  public void configureShuffle(Job job, GroupingOptions options) {
+    AvroTableType<K, V> att = (AvroTableType<K, V>) tableType;
+    String schemaJson = att.getSchema().toString();
+    Configuration conf = job.getConfiguration();
+
+    if (att.hasReflect()) {
+      if (att.hasSpecific()) {
+        Avros.checkCombiningSpecificAndReflectionSchemas();
+      }
+      conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true);
+    }
+    conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson);
+    job.setSortComparatorClass(AvroKeyComparator.class);
+    job.setMapOutputKeyClass(AvroKey.class);
+    job.setMapOutputValueClass(AvroValue.class);
+    if (options != null) {
+      options.configure(job);
+    }
+
+    Avros.configureReflectDataFactory(conf);
+
+    Collection<String> serializations = job.getConfiguration().getStringCollection(
+        "io.serializations");
+    if (!serializations.contains(SafeAvroSerialization.class.getName())) {
+      serializations.add(SafeAvroSerialization.class.getName());
+      job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0]));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java
new file mode 100644
index 0000000..b8bbebd
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroInputFormat.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+
+/** An {@link org.apache.hadoop.mapreduce.InputFormat} for Avro data files. */
+public class AvroInputFormat<T> extends FileInputFormat<AvroWrapper<T>, NullWritable> {
+  @Override
+  public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    context.setStatus(split.toString());
+    String jsonSchema = context.getConfiguration().get(AvroJob.INPUT_SCHEMA);
+    Schema schema = new Schema.Parser().parse(jsonSchema);
+    return new AvroRecordReader<T>(schema);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java
new file mode 100644
index 0000000..68b717d
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroKeyConverter.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.crunch.types.Converter;
+import org.apache.hadoop.io.NullWritable;
+
+class AvroKeyConverter<K> implements Converter<AvroWrapper<K>, NullWritable, K, Iterable<K>> {
+
+  private transient AvroWrapper<K> wrapper = null;
+
+  @Override
+  public K convertInput(AvroWrapper<K> key, NullWritable value) {
+    return key.datum();
+  }
+
+  @Override
+  public AvroWrapper<K> outputKey(K value) {
+    getWrapper().datum(value);
+    return wrapper;
+  }
+
+  @Override
+  public NullWritable outputValue(K value) {
+    return NullWritable.get();
+  }
+
+  @Override
+  public Class<AvroWrapper<K>> getKeyClass() {
+    return (Class<AvroWrapper<K>>) getWrapper().getClass();
+  }
+
+  @Override
+  public Class<NullWritable> getValueClass() {
+    return NullWritable.class;
+  }
+
+  private AvroWrapper<K> getWrapper() {
+    if (wrapper == null) {
+      wrapper = new AvroWrapper<K>();
+    }
+    return wrapper;
+  }
+
+  @Override
+  public Iterable<K> convertIterableInput(AvroWrapper<K> key, Iterable<NullWritable> value) {
+    throw new UnsupportedOperationException("Should not be possible");
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java
new file mode 100644
index 0000000..98d3f50
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroOutputFormat.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+/** An {@link org.apache.hadoop.mapreduce.OutputFormat} for Avro data files. */
+public class AvroOutputFormat<T> extends FileOutputFormat<AvroWrapper<T>, NullWritable> {
+
+  @Override
+  public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException,
+      InterruptedException {
+
+    Configuration conf = context.getConfiguration();
+    Schema schema = null;
+    String outputName = conf.get("crunch.namedoutput");
+    if (outputName != null && !outputName.isEmpty()) {
+      schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
+    } else {
+      schema = AvroJob.getOutputSchema(context.getConfiguration());
+    }
+
+    ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
+    final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T> getWriter(schema));
+
+    JobConf jc = new JobConf(conf);
+    /* copied from org.apache.avro.mapred.AvroOutputFormat */
+    
+    if (org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(jc)) {
+      int level = conf.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
+          org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
+      String codecName = conf.get(AvroJob.OUTPUT_CODEC, 
+          org.apache.avro.file.DataFileConstants.DEFLATE_CODEC);
+      CodecFactory codec = codecName.equals(org.apache.avro.file.DataFileConstants.DEFLATE_CODEC)
+          ? CodecFactory.deflateCodec(level)
+          : CodecFactory.fromString(codecName);
+      WRITER.setCodec(codec);
+    }
+
+    WRITER.setSyncInterval(jc.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, 
+        org.apache.avro.file.DataFileConstants.DEFAULT_SYNC_INTERVAL));
+
+    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
+    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
+    
+    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
+      @Override
+      public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
+        WRITER.append(wrapper.datum());
+      }
+
+      @Override
+      public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+        WRITER.close();
+      }
+    };
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java
new file mode 100644
index 0000000..d1d2627
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroPairConverter.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.util.Iterator;
+
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.crunch.Pair;
+import org.apache.crunch.types.Converter;
+
+class AvroPairConverter<K, V> implements Converter<AvroKey<K>, AvroValue<V>, Pair<K, V>, Pair<K, Iterable<V>>> {
+
+  private transient AvroKey<K> keyWrapper = null;
+  private transient AvroValue<V> valueWrapper = null;
+
+  @Override
+  public Pair<K, V> convertInput(AvroKey<K> key, AvroValue<V> value) {
+    return Pair.of(key.datum(), value.datum());
+  }
+
+  public Pair<K, Iterable<V>> convertIterableInput(AvroKey<K> key, Iterable<AvroValue<V>> iter) {
+    Iterable<V> it = new AvroWrappedIterable<V>(iter);
+    return Pair.of(key.datum(), it);
+  }
+
+  @Override
+  public AvroKey<K> outputKey(Pair<K, V> value) {
+    getKeyWrapper().datum(value.first());
+    return keyWrapper;
+  }
+
+  @Override
+  public AvroValue<V> outputValue(Pair<K, V> value) {
+    getValueWrapper().datum(value.second());
+    return valueWrapper;
+  }
+
+  @Override
+  public Class<AvroKey<K>> getKeyClass() {
+    return (Class<AvroKey<K>>) getKeyWrapper().getClass();
+  }
+
+  @Override
+  public Class<AvroValue<V>> getValueClass() {
+    return (Class<AvroValue<V>>) getValueWrapper().getClass();
+  }
+
+  private AvroKey<K> getKeyWrapper() {
+    if (keyWrapper == null) {
+      keyWrapper = new AvroKey<K>();
+    }
+    return keyWrapper;
+  }
+
+  private AvroValue<V> getValueWrapper() {
+    if (valueWrapper == null) {
+      valueWrapper = new AvroValue<V>();
+    }
+    return valueWrapper;
+  }
+
+  private static class AvroWrappedIterable<V> implements Iterable<V> {
+
+    private final Iterable<AvroValue<V>> iters;
+
+    public AvroWrappedIterable(Iterable<AvroValue<V>> iters) {
+      this.iters = iters;
+    }
+
+    @Override
+    public Iterator<V> iterator() {
+      return new Iterator<V>() {
+        private final Iterator<AvroValue<V>> it = iters.iterator();
+
+        @Override
+        public boolean hasNext() {
+          return it.hasNext();
+        }
+
+        @Override
+        public V next() {
+          return it.next().datum();
+        }
+
+        @Override
+        public void remove() {
+          it.remove();
+        }
+      };
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java
new file mode 100644
index 0000000..9c7578c
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroRecordReader.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.FileReader;
+import org.apache.avro.file.SeekableInput;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.avro.mapred.FsInput;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+
+/** An {@link RecordReader} for Avro data files. */
+class AvroRecordReader<T> extends RecordReader<AvroWrapper<T>, NullWritable> {
+
+  private FileReader<T> reader;
+  private long start;
+  private long end;
+  private AvroWrapper<T> key;
+  private NullWritable value;
+  private Schema schema;
+
+  public AvroRecordReader(Schema schema) {
+    this.schema = schema;
+  }
+
+  @Override
+  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+    FileSplit split = (FileSplit) genericSplit;
+    Configuration conf = context.getConfiguration();
+    SeekableInput in = new FsInput(split.getPath(), conf);
+    DatumReader<T> datumReader = null;
+    if (context.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, true)) {
+      ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
+      datumReader = factory.getReader(schema);
+    } else {
+      datumReader = new SpecificDatumReader<T>(schema);
+    }
+    this.reader = DataFileReader.openReader(in, datumReader);
+    reader.sync(split.getStart()); // sync to start
+    this.start = reader.tell();
+    this.end = split.getStart() + split.getLength();
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    if (!reader.hasNext() || reader.pastSync(end)) {
+      key = null;
+      value = null;
+      return false;
+    }
+    if (key == null) {
+      key = new AvroWrapper<T>();
+    }
+    if (value == null) {
+      value = NullWritable.get();
+    }
+    key.datum(reader.next(key.datum()));
+    return true;
+  }
+
+  @Override
+  public AvroWrapper<T> getCurrentKey() throws IOException, InterruptedException {
+    return key;
+  }
+
+  @Override
+  public NullWritable getCurrentValue() throws IOException, InterruptedException {
+    return value;
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    if (end == start) {
+      return 0.0f;
+    } else {
+      return Math.min(1.0f, (getPos() - start) / (float) (end - start));
+    }
+  }
+
+  public long getPos() throws IOException {
+    return reader.tell();
+  }
+
+  @Override
+  public void close() throws IOException {
+    reader.close();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTableType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTableType.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTableType.java
new file mode 100644
index 0000000..86613df
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTableType.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.lib.PTables;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.TupleDeepCopier;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+/**
+ * The implementation of the PTableType interface for Avro-based serialization.
+ * 
+ */
+class AvroTableType<K, V> extends AvroType<Pair<K, V>> implements PTableType<K, V> {
+
+  private static class PairToAvroPair extends MapFn<Pair, org.apache.avro.mapred.Pair> {
+    private final MapFn keyMapFn;
+    private final MapFn valueMapFn;
+    private final String firstJson;
+    private final String secondJson;
+
+    private String pairSchemaJson;
+    private transient Schema pairSchema;
+
+    public PairToAvroPair(AvroType keyType, AvroType valueType) {
+      this.keyMapFn = keyType.getOutputMapFn();
+      this.firstJson = keyType.getSchema().toString();
+      this.valueMapFn = valueType.getOutputMapFn();
+      this.secondJson = valueType.getSchema().toString();
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      keyMapFn.configure(conf);
+      valueMapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      keyMapFn.setContext(context);
+      valueMapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      keyMapFn.initialize();
+      valueMapFn.initialize();
+      pairSchemaJson = org.apache.avro.mapred.Pair.getPairSchema(
+          new Schema.Parser().parse(firstJson), new Schema.Parser().parse(secondJson)).toString();
+    }
+
+    @Override
+    public org.apache.avro.mapred.Pair map(Pair input) {
+      if (pairSchema == null) {
+        pairSchema = new Schema.Parser().parse(pairSchemaJson);
+      }
+      org.apache.avro.mapred.Pair avroPair = new org.apache.avro.mapred.Pair(pairSchema);
+      avroPair.key(keyMapFn.map(input.first()));
+      avroPair.value(valueMapFn.map(input.second()));
+      return avroPair;
+    }
+  }
+
+  private static class IndexedRecordToPair extends MapFn<IndexedRecord, Pair> {
+
+    private final MapFn firstMapFn;
+    private final MapFn secondMapFn;
+
+    public IndexedRecordToPair(MapFn firstMapFn, MapFn secondMapFn) {
+      this.firstMapFn = firstMapFn;
+      this.secondMapFn = secondMapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      firstMapFn.configure(conf);
+      secondMapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      firstMapFn.setContext(context);
+      secondMapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      firstMapFn.initialize();
+      secondMapFn.initialize();
+    }
+
+    @Override
+    public Pair map(IndexedRecord input) {
+      return Pair.of(firstMapFn.map(input.get(0)), secondMapFn.map(input.get(1)));
+    }
+  }
+
+  private final AvroType<K> keyType;
+  private final AvroType<V> valueType;
+
+  public AvroTableType(AvroType<K> keyType, AvroType<V> valueType, Class<Pair<K, V>> pairClass) {
+    super(pairClass, org.apache.avro.mapred.Pair.getPairSchema(keyType.getSchema(),
+        valueType.getSchema()), new IndexedRecordToPair(keyType.getInputMapFn(),
+        valueType.getInputMapFn()), new PairToAvroPair(keyType, valueType), new TupleDeepCopier(
+        Pair.class, keyType, valueType), keyType, valueType);
+    this.keyType = keyType;
+    this.valueType = valueType;
+  }
+
+  @Override
+  public PType<K> getKeyType() {
+    return keyType;
+  }
+
+  @Override
+  public PType<V> getValueType() {
+    return valueType;
+  }
+
+  @Override
+  public PGroupedTableType<K, V> getGroupedTableType() {
+    return new AvroGroupedTableType<K, V>(this);
+  }
+
+  @Override
+  public Pair<K, V> getDetachedValue(Pair<K, V> value) {
+    return PTables.getDetachedValue(this, value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java
new file mode 100644
index 0000000..4930235
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTextOutputFormat.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.IOException;
+
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+
+public class AvroTextOutputFormat<K, V> extends TextOutputFormat<K, V> {
+  class DatumRecordTextWriter extends RecordWriter<K, V> {
+    private RecordWriter lineRecordWriter;
+
+    public DatumRecordTextWriter(RecordWriter recordWriter) {
+      this.lineRecordWriter = recordWriter;
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+      lineRecordWriter.close(context);
+    }
+
+    @Override
+    public void write(K arg0, V arg1) throws IOException, InterruptedException {
+      lineRecordWriter.write(getData(arg0), getData(arg1));
+    }
+
+    private Object getData(Object o) {
+      Object data = o;
+      if (o instanceof AvroWrapper) {
+        data = ((AvroWrapper) o).datum();
+      }
+      return data;
+    }
+  }
+
+  @Override
+  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
+    RecordWriter<K, V> recordWriter = super.getRecordWriter(context);
+    return new DatumRecordTextWriter(recordWriter);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroType.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroType.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroType.java
new file mode 100644
index 0000000..a92b0d0
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroType.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.specific.SpecificRecord;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.io.ReadableSourceTarget;
+import org.apache.crunch.io.avro.AvroFileSourceTarget;
+import org.apache.crunch.types.Converter;
+import org.apache.crunch.types.DeepCopier;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+/**
+ * The implementation of the PType interface for Avro-based serialization.
+ * 
+ */
+public class AvroType<T> implements PType<T> {
+
+  private static final Converter AVRO_CONVERTER = new AvroKeyConverter();
+
+  private final Class<T> typeClass;
+  private final String schemaString;
+  private transient Schema schema;
+  private final MapFn baseInputMapFn;
+  private final MapFn baseOutputMapFn;
+  private final List<PType> subTypes;
+  private DeepCopier<T> deepCopier;
+  private boolean initialized = false;
+
+  public AvroType(Class<T> typeClass, Schema schema, DeepCopier<T> deepCopier, PType... ptypes) {
+    this(typeClass, schema, IdentityFn.getInstance(), IdentityFn.getInstance(), deepCopier, ptypes);
+  }
+
+  public AvroType(Class<T> typeClass, Schema schema, MapFn inputMapFn, MapFn outputMapFn,
+      DeepCopier<T> deepCopier, PType... ptypes) {
+    this.typeClass = typeClass;
+    this.schema = Preconditions.checkNotNull(schema);
+    this.schemaString = schema.toString();
+    this.baseInputMapFn = inputMapFn;
+    this.baseOutputMapFn = outputMapFn;
+    this.deepCopier = deepCopier;
+    this.subTypes = ImmutableList.<PType> builder().add(ptypes).build();
+  }
+
+  @Override
+  public Class<T> getTypeClass() {
+    return typeClass;
+  }
+
+  @Override
+  public PTypeFamily getFamily() {
+    return AvroTypeFamily.getInstance();
+  }
+
+  @Override
+  public List<PType> getSubTypes() {
+    return Lists.<PType> newArrayList(subTypes);
+  }
+
+  public Schema getSchema() {
+    if (schema == null) {
+      schema = new Schema.Parser().parse(schemaString);
+    }
+    return schema;
+  }
+
+  /**
+   * Determine if the wrapped type is a specific data avro type or wraps one.
+   * 
+   * @return true if the wrapped type is a specific data type or wraps one
+   */
+  public boolean hasSpecific() {
+    if (Avros.isPrimitive(this)) {
+      return false;
+    }
+
+    if (!this.subTypes.isEmpty()) {
+      for (PType<?> subType : this.subTypes) {
+        AvroType<?> atype = (AvroType<?>) subType;
+        if (atype.hasSpecific()) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    return SpecificRecord.class.isAssignableFrom(typeClass);
+  }
+
+  /**
+   * Determine if the wrapped type is a generic data avro type.
+   * 
+   * @return true if the wrapped type is a generic type
+   */
+  public boolean isGeneric() {
+    return GenericData.Record.class.equals(typeClass);
+  }
+
+  /**
+   * Determine if the wrapped type is a reflection-based avro type or wraps one.
+   * 
+   * @return true if the wrapped type is a reflection-based type or wraps one.
+   */
+  public boolean hasReflect() {
+    if (Avros.isPrimitive(this)) {
+      return false;
+    }
+
+    if (!this.subTypes.isEmpty()) {
+      for (PType<?> subType : this.subTypes) {
+        if (((AvroType<?>) subType).hasReflect()) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    return !(typeClass.equals(GenericData.Record.class) || SpecificRecord.class
+        .isAssignableFrom(typeClass));
+  }
+
+  public MapFn<Object, T> getInputMapFn() {
+    return baseInputMapFn;
+  }
+
+  public MapFn<T, Object> getOutputMapFn() {
+    return baseOutputMapFn;
+  }
+
+  @Override
+  public Converter getConverter() {
+    return AVRO_CONVERTER;
+  }
+
+  @Override
+  public ReadableSourceTarget<T> getDefaultFileSource(Path path) {
+    return new AvroFileSourceTarget<T>(path, this);
+  }
+
+  @Override
+  public void initialize(Configuration conf) {
+    deepCopier.initialize(conf);
+    initialized = true;
+  }
+
+  @Override
+  public T getDetachedValue(T value) {
+    if (!initialized) {
+      throw new IllegalStateException("Cannot call getDetachedValue on an uninitialized PType");
+    }
+    return deepCopier.deepCopy(value);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof AvroType)) {
+      return false;
+    }
+    AvroType at = (AvroType) other;
+    return (typeClass.equals(at.typeClass) && subTypes.equals(at.subTypes));
+
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(typeClass).append(subTypes);
+    return hcb.toHashCode();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java
new file mode 100644
index 0000000..e09e173
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroTypeFamily.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.types.PGroupedTableType;
+import org.apache.crunch.types.PTableType;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypeFamily;
+import org.apache.crunch.types.PTypeUtils;
+
+public class AvroTypeFamily implements PTypeFamily {
+
+  private static final AvroTypeFamily INSTANCE = new AvroTypeFamily();
+
+  public static AvroTypeFamily getInstance() {
+    return INSTANCE;
+  }
+
+  // There can only be one instance.
+  private AvroTypeFamily() {
+  }
+
+  @Override
+  public PType<Void> nulls() {
+    return Avros.nulls();
+  }
+
+  @Override
+  public PType<String> strings() {
+    return Avros.strings();
+  }
+
+  @Override
+  public PType<Long> longs() {
+    return Avros.longs();
+  }
+
+  @Override
+  public PType<Integer> ints() {
+    return Avros.ints();
+  }
+
+  @Override
+  public PType<Float> floats() {
+    return Avros.floats();
+  }
+
+  @Override
+  public PType<Double> doubles() {
+    return Avros.doubles();
+  }
+
+  @Override
+  public PType<Boolean> booleans() {
+    return Avros.booleans();
+  }
+
+  @Override
+  public PType<ByteBuffer> bytes() {
+    return Avros.bytes();
+  }
+
+  @Override
+  public <T> PType<T> records(Class<T> clazz) {
+    return Avros.records(clazz);
+  }
+
+  public PType<GenericData.Record> generics(Schema schema) {
+    return Avros.generics(schema);
+  }
+
+  public <T> PType<T> containers(Class<T> clazz) {
+    return Avros.containers(clazz);
+  }
+
+  @Override
+  public <T> PType<Collection<T>> collections(PType<T> ptype) {
+    return Avros.collections(ptype);
+  }
+
+  @Override
+  public <T> PType<Map<String, T>> maps(PType<T> ptype) {
+    return Avros.maps(ptype);
+  }
+
+  @Override
+  public <V1, V2> PType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2) {
+    return Avros.pairs(p1, p2);
+  }
+
+  @Override
+  public <V1, V2, V3> PType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3) {
+    return Avros.triples(p1, p2, p3);
+  }
+
+  @Override
+  public <V1, V2, V3, V4> PType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3, PType<V4> p4) {
+    return Avros.quads(p1, p2, p3, p4);
+  }
+
+  @Override
+  public PType<TupleN> tuples(PType<?>... ptypes) {
+    return Avros.tuples(ptypes);
+  }
+
+  @Override
+  public <K, V> PTableType<K, V> tableOf(PType<K> key, PType<V> value) {
+    return Avros.tableOf(key, value);
+  }
+
+  @Override
+  public <T> PType<T> as(PType<T> ptype) {
+    if (ptype instanceof AvroType || ptype instanceof AvroGroupedTableType) {
+      return ptype;
+    }
+    if (ptype instanceof PGroupedTableType) {
+      PTableType ptt = ((PGroupedTableType) ptype).getTableType();
+      return new AvroGroupedTableType((AvroTableType) as(ptt));
+    }
+    Class<T> typeClass = ptype.getTypeClass();
+    PType<T> prim = Avros.getPrimitiveType(typeClass);
+    if (prim != null) {
+      return prim;
+    }
+    return PTypeUtils.convert(ptype, this);
+  }
+
+  @Override
+  public <T extends Tuple> PType<T> tuples(Class<T> clazz, PType<?>... ptypes) {
+    return Avros.tuples(clazz, ptypes);
+  }
+
+  @Override
+  public <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
+    return Avros.derived(clazz, inputFn, outputFn, base);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java
new file mode 100644
index 0000000..9460fa5
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/avro/AvroUtf8InputFormat.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.avro;
+
+import java.io.IOException;
+
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.avro.util.Utf8;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
+
+/**
+ * An {@link org.apache.hadoop.mapred.InputFormat} for text files. Each line is
+ * a {@link Utf8} key; values are null.
+ */
+public class AvroUtf8InputFormat extends FileInputFormat<AvroWrapper<Utf8>, NullWritable> {
+
+  static class Utf8LineRecordReader extends RecordReader<AvroWrapper<Utf8>, NullWritable> {
+
+    private LineRecordReader lineRecordReader;
+
+    private AvroWrapper<Utf8> currentKey = new AvroWrapper<Utf8>();
+
+    public Utf8LineRecordReader() throws IOException {
+      this.lineRecordReader = new LineRecordReader();
+    }
+
+    public void close() throws IOException {
+      lineRecordReader.close();
+    }
+
+    public float getProgress() throws IOException {
+      return lineRecordReader.getProgress();
+    }
+
+    @Override
+    public AvroWrapper<Utf8> getCurrentKey() throws IOException, InterruptedException {
+      Text txt = lineRecordReader.getCurrentValue();
+      currentKey.datum(new Utf8(txt.toString()));
+      return currentKey;
+    }
+
+    @Override
+    public NullWritable getCurrentValue() throws IOException, InterruptedException {
+      return NullWritable.get();
+    }
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+      lineRecordReader.initialize(split, context);
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      return lineRecordReader.nextKeyValue();
+    }
+  }
+
+  private CompressionCodecFactory compressionCodecs = null;
+
+  public void configure(Configuration conf) {
+    compressionCodecs = new CompressionCodecFactory(conf);
+  }
+
+  protected boolean isSplitable(FileSystem fs, Path file) {
+    return compressionCodecs.getCodec(file) == null;
+  }
+
+  @Override
+  public RecordReader<AvroWrapper<Utf8>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    return new Utf8LineRecordReader();
+  }
+}


[13/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java b/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java
deleted file mode 100644
index 93926c1..0000000
--- a/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchControlledJob.java
+++ /dev/null
@@ -1,325 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.hadoop.mapreduce.lib.jobcontrol;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.impl.mr.run.RuntimeParameters;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobID;
-import org.apache.hadoop.util.StringUtils;
-
-import com.google.common.base.Objects;
-import com.google.common.collect.Lists;
-
-/**
- * This class encapsulates a MapReduce job and its dependency. It monitors the
- * states of the depending jobs and updates the state of this job. A job starts
- * in the WAITING state. If it does not have any depending jobs, or all of the
- * depending jobs are in SUCCEEDED state, then the job state will become READY. If
- * any depending jobs fail, the job will fail too. When in READY state, the job
- * can be submitted to Hadoop for execution, with the state changing into
- * RUNNING state. From RUNNING state, the job can get into SUCCEEDED or FAILED
- * state, depending the status of the job execution.
- */
-public class CrunchControlledJob {
-
-  // A job will be in one of the following states
-  public static enum State {
-    SUCCESS, WAITING, RUNNING, READY, FAILED, DEPENDENT_FAILED
-  };
-
-  public static interface Hook {
-    public void run() throws IOException;
-  }
-
-  private static final Log LOG = LogFactory.getLog(CrunchControlledJob.class);
-
-  private final int jobID;
-  private final Job job; // mapreduce job to be executed.
-  // the jobs the current job depends on
-  private final List<CrunchControlledJob> dependingJobs;
-  private final Hook prepareHook;
-  private final Hook completionHook;
-  private State state;
-  // some info for human consumption, e.g. the reason why the job failed
-  private String message;
-  private String lastKnownProgress;
-
-  /**
-   * Construct a job.
-   *
-   * @param jobID
-   *          an ID used to match with its {@link org.apache.crunch.impl.mr.plan.JobPrototype}.
-   * @param job
-   *          a mapreduce job to be executed.
-   * @param prepareHook
-   *          a piece of code that will run before this job is submitted.
-   * @param completionHook
-   *          a piece of code that will run after this job gets completed.
-   */
-  public CrunchControlledJob(int jobID, Job job, Hook prepareHook, Hook completionHook) {
-    this.jobID = jobID;
-    this.job = job;
-    this.dependingJobs = Lists.newArrayList();
-    this.prepareHook = prepareHook;
-    this.completionHook = completionHook;
-    this.state = State.WAITING;
-    this.message = "just initialized";
-  }
-
-  @Override
-  public String toString() {
-    StringBuffer sb = new StringBuffer();
-    sb.append("job name:\t").append(this.job.getJobName()).append("\n");
-    sb.append("job id:\t").append(this.jobID).append("\n");
-    sb.append("job state:\t").append(this.state).append("\n");
-    sb.append("job mapred id:\t").append(this.job.getJobID()).append("\n");
-    sb.append("job message:\t").append(this.message).append("\n");
-
-    if (this.dependingJobs == null || this.dependingJobs.size() == 0) {
-      sb.append("job has no depending job:\t").append("\n");
-    } else {
-      sb.append("job has ").append(this.dependingJobs.size())
-          .append(" dependeng jobs:\n");
-      for (int i = 0; i < this.dependingJobs.size(); i++) {
-        sb.append("\t depending job ").append(i).append(":\t");
-        sb.append((this.dependingJobs.get(i)).getJobName()).append("\n");
-      }
-    }
-    return sb.toString();
-  }
-
-  /**
-   * @return the job name of this job
-   */
-  public String getJobName() {
-    return job.getJobName();
-  }
-
-  /**
-   * Set the job name for this job.
-   *
-   * @param jobName
-   *          the job name
-   */
-  public void setJobName(String jobName) {
-    job.setJobName(jobName);
-  }
-
-  /**
-   * @return the job ID of this job
-   */
-  public int getJobID() {
-    return this.jobID;
-  }
-
-  /**
-   * @return the mapred ID of this job as assigned by the mapred framework.
-   */
-  public JobID getMapredJobID() {
-    return this.job.getJobID();
-  }
-
-  /**
-   * @return the mapreduce job
-   */
-  public synchronized Job getJob() {
-    return this.job;
-  }
-
-  /**
-   * @return the state of this job
-   */
-  public synchronized State getJobState() {
-    return this.state;
-  }
-
-  /**
-   * Set the state for this job.
-   * 
-   * @param state
-   *          the new state for this job.
-   */
-  protected synchronized void setJobState(State state) {
-    this.state = state;
-  }
-
-  /**
-   * @return the message of this job
-   */
-  public synchronized String getMessage() {
-    return this.message;
-  }
-
-  /**
-   * Set the message for this job.
-   * 
-   * @param message
-   *          the message for this job.
-   */
-  public synchronized void setMessage(String message) {
-    this.message = message;
-  }
-
-  /**
-   * @return the depending jobs of this job
-   */
-  public List<CrunchControlledJob> getDependentJobs() {
-    return this.dependingJobs;
-  }
-
-  /**
-   * Add a job to this jobs' dependency list. Dependent jobs can only be added
-   * while a Job is waiting to run, not during or afterwards.
-   * 
-   * @param dependingJob
-   *          Job that this Job depends on.
-   * @return <tt>true</tt> if the Job was added.
-   */
-  public synchronized boolean addDependingJob(CrunchControlledJob dependingJob) {
-    if (this.state == State.WAITING) { // only allowed to add jobs when waiting
-      return this.dependingJobs.add(dependingJob);
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * @return true if this job is in a complete state
-   */
-  public synchronized boolean isCompleted() {
-    return this.state == State.FAILED || this.state == State.DEPENDENT_FAILED
-        || this.state == State.SUCCESS;
-  }
-
-  /**
-   * @return true if this job is in READY state
-   */
-  public synchronized boolean isReady() {
-    return this.state == State.READY;
-  }
-
-  public void killJob() throws IOException, InterruptedException {
-    job.killJob();
-  }
-
-  /**
-   * Check the state of this running job. The state may remain the same, become
-   * SUCCEEDED or FAILED.
-   */
-  private void checkRunningState() throws IOException, InterruptedException {
-    try {
-      if (job.isComplete()) {
-        if (job.isSuccessful()) {
-          this.state = State.SUCCESS;
-        } else {
-          this.state = State.FAILED;
-          this.message = "Job failed!";
-        }
-      } else {
-        // still running
-        if (job.getConfiguration().getBoolean(RuntimeParameters.LOG_JOB_PROGRESS, false)) {
-          logJobProgress();
-        }
-      }
-    } catch (IOException ioe) {
-      this.state = State.FAILED;
-      this.message = StringUtils.stringifyException(ioe);
-      try {
-        if (job != null) {
-          job.killJob();
-        }
-      } catch (IOException e) {
-      }
-    }
-    if (isCompleted()) {
-      completionHook.run();
-    }
-  }
-
-  /**
-   * Check and update the state of this job. The state changes depending on its
-   * current state and the states of the depending jobs.
-   */
-  synchronized State checkState() throws IOException, InterruptedException {
-    if (this.state == State.RUNNING) {
-      checkRunningState();
-    }
-    if (this.state != State.WAITING) {
-      return this.state;
-    }
-    if (this.dependingJobs == null || this.dependingJobs.size() == 0) {
-      this.state = State.READY;
-      return this.state;
-    }
-    CrunchControlledJob pred = null;
-    int n = this.dependingJobs.size();
-    for (int i = 0; i < n; i++) {
-      pred = this.dependingJobs.get(i);
-      State s = pred.checkState();
-      if (s == State.WAITING || s == State.READY || s == State.RUNNING) {
-        break; // a pred is still not completed, continue in WAITING
-        // state
-      }
-      if (s == State.FAILED || s == State.DEPENDENT_FAILED) {
-        this.state = State.DEPENDENT_FAILED;
-        this.message = "depending job " + i + " with jobID " + pred.getJobID()
-            + " failed. " + pred.getMessage();
-        break;
-      }
-      // pred must be in success state
-      if (i == n - 1) {
-        this.state = State.READY;
-      }
-    }
-
-    return this.state;
-  }
-
-  /**
-   * Submit this job to mapred. The state becomes RUNNING if submission is
-   * successful, FAILED otherwise.
-   */
-  protected synchronized void submit() {
-    try {
-      prepareHook.run();
-      job.submit();
-      this.state = State.RUNNING;
-      LOG.info("Running job \"" + getJobName() + "\"");
-      LOG.info("Job status available at: " + job.getTrackingURL());
-    } catch (Exception ioe) {
-      this.state = State.FAILED;
-      this.message = StringUtils.stringifyException(ioe);
-      LOG.info("Error occurred starting job \"" + getJobName() + "\":");
-      LOG.info(getMessage());
-    }
-  }
-
-  private void logJobProgress() throws IOException, InterruptedException {
-    String progress = String.format("map %.0f%% reduce %.0f%%",
-        100.0 * job.mapProgress(), 100.0 * job.reduceProgress());
-    if (!Objects.equal(lastKnownProgress, progress)) {
-      LOG.info(job.getJobName() + " progress: " + progress);
-      lastKnownProgress = progress;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java b/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java
deleted file mode 100644
index 727ab6f..0000000
--- a/crunch/src/main/java/org/apache/crunch/hadoop/mapreduce/lib/jobcontrol/CrunchJobControl.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.hadoop.mapreduce.lib.jobcontrol;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.hadoop.mapreduce.lib.jobcontrol.CrunchControlledJob.State;
-
-/**
- * This class encapsulates a set of MapReduce jobs and its dependency.
- * 
- * It tracks the states of the jobs by placing them into different tables
- * according to their states.
- * 
- * This class provides APIs for the client app to add a job to the group and to
- * get the jobs in the group in different states. When a job is added, an ID
- * unique to the group is assigned to the job.
- */
-public class CrunchJobControl {
-
-  private Map<Integer, CrunchControlledJob> waitingJobs;
-  private Map<Integer, CrunchControlledJob> readyJobs;
-  private Map<Integer, CrunchControlledJob> runningJobs;
-  private Map<Integer, CrunchControlledJob> successfulJobs;
-  private Map<Integer, CrunchControlledJob> failedJobs;
-
-  private Log log = LogFactory.getLog(CrunchJobControl.class);
-
-  private final String groupName;
-
-  /**
-   * Construct a job control for a group of jobs.
-   * 
-   * @param groupName
-   *          a name identifying this group
-   */
-  public CrunchJobControl(String groupName) {
-    this.waitingJobs = new Hashtable<Integer, CrunchControlledJob>();
-    this.readyJobs = new Hashtable<Integer, CrunchControlledJob>();
-    this.runningJobs = new Hashtable<Integer, CrunchControlledJob>();
-    this.successfulJobs = new Hashtable<Integer, CrunchControlledJob>();
-    this.failedJobs = new Hashtable<Integer, CrunchControlledJob>();
-    this.groupName = groupName;
-  }
-
-  private static List<CrunchControlledJob> toList(Map<Integer, CrunchControlledJob> jobs) {
-    ArrayList<CrunchControlledJob> retv = new ArrayList<CrunchControlledJob>();
-    synchronized (jobs) {
-      for (CrunchControlledJob job : jobs.values()) {
-        retv.add(job);
-      }
-    }
-    return retv;
-  }
-
-  /**
-   * @return the jobs in the waiting state
-   */
-  public List<CrunchControlledJob> getWaitingJobList() {
-    return toList(this.waitingJobs);
-  }
-
-  /**
-   * @return the jobs in the running state
-   */
-  public List<CrunchControlledJob> getRunningJobList() {
-    return toList(this.runningJobs);
-  }
-
-  /**
-   * @return the jobs in the ready state
-   */
-  public List<CrunchControlledJob> getReadyJobsList() {
-    return toList(this.readyJobs);
-  }
-
-  /**
-   * @return the jobs in the success state
-   */
-  public List<CrunchControlledJob> getSuccessfulJobList() {
-    return toList(this.successfulJobs);
-  }
-
-  public List<CrunchControlledJob> getFailedJobList() {
-    return toList(this.failedJobs);
-  }
-
-  private static void addToQueue(CrunchControlledJob aJob,
-      Map<Integer, CrunchControlledJob> queue) {
-    synchronized (queue) {
-      queue.put(aJob.getJobID(), aJob);
-    }
-  }
-
-  private void addToQueue(CrunchControlledJob aJob) {
-    Map<Integer, CrunchControlledJob> queue = getQueue(aJob.getJobState());
-    addToQueue(aJob, queue);
-  }
-
-  private Map<Integer, CrunchControlledJob> getQueue(State state) {
-    Map<Integer, CrunchControlledJob> retv = null;
-    if (state == State.WAITING) {
-      retv = this.waitingJobs;
-    } else if (state == State.READY) {
-      retv = this.readyJobs;
-    } else if (state == State.RUNNING) {
-      retv = this.runningJobs;
-    } else if (state == State.SUCCESS) {
-      retv = this.successfulJobs;
-    } else if (state == State.FAILED || state == State.DEPENDENT_FAILED) {
-      retv = this.failedJobs;
-    }
-    return retv;
-  }
-
-  /**
-   * Add a new job.
-   * 
-   * @param aJob
-   *          the new job
-   */
-  synchronized public void addJob(CrunchControlledJob aJob) {
-    aJob.setJobState(State.WAITING);
-    this.addToQueue(aJob);
-  }
-
-  synchronized private void checkRunningJobs() throws IOException,
-      InterruptedException {
-
-    Map<Integer, CrunchControlledJob> oldJobs = null;
-    oldJobs = this.runningJobs;
-    this.runningJobs = new Hashtable<Integer, CrunchControlledJob>();
-
-    for (CrunchControlledJob nextJob : oldJobs.values()) {
-      nextJob.checkState();
-      this.addToQueue(nextJob);
-    }
-  }
-
-  synchronized private void checkWaitingJobs() throws IOException,
-      InterruptedException {
-    Map<Integer, CrunchControlledJob> oldJobs = null;
-    oldJobs = this.waitingJobs;
-    this.waitingJobs = new Hashtable<Integer, CrunchControlledJob>();
-
-    for (CrunchControlledJob nextJob : oldJobs.values()) {
-      nextJob.checkState();
-      this.addToQueue(nextJob);
-    }
-  }
-
-  synchronized private void startReadyJobs() {
-    Map<Integer, CrunchControlledJob> oldJobs = null;
-    oldJobs = this.readyJobs;
-    this.readyJobs = new Hashtable<Integer, CrunchControlledJob>();
-
-    for (CrunchControlledJob nextJob : oldJobs.values()) {
-      // Submitting Job to Hadoop
-      nextJob.submit();
-      this.addToQueue(nextJob);
-    }
-  }
-
-  synchronized public void killAllRunningJobs() {
-    for (CrunchControlledJob job : runningJobs.values()) {
-      if (!job.isCompleted()) {
-        try {
-          job.killJob();
-        } catch (Exception e) {
-          log.error("Exception killing job: " + job.getJobName(), e);
-        }
-      }
-    }
-  }
-
-  synchronized public boolean allFinished() {
-    return this.waitingJobs.size() == 0 && this.readyJobs.size() == 0
-        && this.runningJobs.size() == 0;
-  }
-
-  /**
-   * Checks the states of the running jobs Update the states of waiting jobs, and submits the jobs in
-   * ready state (i.e. whose dependencies are all finished in success).
-   */
-  public void pollJobStatusAndStartNewOnes() throws IOException, InterruptedException {
-    checkRunningJobs();
-    checkWaitingJobs();
-    startReadyJobs();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/SingleUseIterable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/SingleUseIterable.java b/crunch/src/main/java/org/apache/crunch/impl/SingleUseIterable.java
deleted file mode 100644
index 98f982f..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/SingleUseIterable.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl;
-
-import java.util.Iterator;
-
-/**
- * Wrapper around a Reducer's input Iterable. Ensures that the
- * {@link #iterator()} method is not called more than once.
- */
-public class SingleUseIterable<T> implements Iterable<T> {
-
-  private boolean used = false;
-  private Iterable<T> wrappedIterable;
-
-  /**
-   * Instantiate around an Iterable that may only be used once.
-   * 
-   * @param toWrap iterable to wrap
-   */
-  public SingleUseIterable(Iterable<T> toWrap) {
-    this.wrappedIterable = toWrap;
-  }
-
-  @Override
-  public Iterator<T> iterator() {
-    if (used) {
-      throw new IllegalStateException("iterator() can only be called once on this Iterable");
-    }
-    used = true;
-    return wrappedIterable.iterator();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java b/crunch/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java
deleted file mode 100644
index 272b2af..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/MemPipeline.java
+++ /dev/null
@@ -1,275 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.PipelineExecution;
-import org.apache.crunch.PipelineResult;
-import org.apache.crunch.Source;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.Target;
-import org.apache.crunch.Target.WriteMode;
-import org.apache.crunch.impl.mem.collect.MemCollection;
-import org.apache.crunch.impl.mem.collect.MemTable;
-import org.apache.crunch.io.At;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Counters;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-public class MemPipeline implements Pipeline {
-
-  private static final Log LOG = LogFactory.getLog(MemPipeline.class);
-  private static Counters COUNTERS = new Counters();
-  private static final MemPipeline INSTANCE = new MemPipeline();
-
-  private int outputIndex = 0;
-  
-  public static Counters getCounters() {
-    return COUNTERS;
-  }
-  
-  public static void clearCounters() {
-    COUNTERS = new Counters();
-  }
-
-  public static Pipeline getInstance() {
-    return INSTANCE;
-  }
-
-  public static <T> PCollection<T> collectionOf(T... ts) {
-    return new MemCollection<T>(ImmutableList.copyOf(ts));
-  }
-
-  public static <T> PCollection<T> collectionOf(Iterable<T> collect) {
-    return new MemCollection<T>(collect);
-  }
-
-  public static <T> PCollection<T> typedCollectionOf(PType<T> ptype, T... ts) {
-    return new MemCollection<T>(ImmutableList.copyOf(ts), ptype, null);
-  }
-
-  public static <T> PCollection<T> typedCollectionOf(PType<T> ptype, Iterable<T> collect) {
-    return new MemCollection<T>(collect, ptype, null);
-  }
-
-  public static <S, T> PTable<S, T> tableOf(S s, T t, Object... more) {
-    List<Pair<S, T>> pairs = Lists.newArrayList();
-    pairs.add(Pair.of(s, t));
-    for (int i = 0; i < more.length; i += 2) {
-      pairs.add(Pair.of((S) more[i], (T) more[i + 1]));
-    }
-    return new MemTable<S, T>(pairs);
-  }
-
-  public static <S, T> PTable<S, T> typedTableOf(PTableType<S, T> ptype, S s, T t, Object... more) {
-    List<Pair<S, T>> pairs = Lists.newArrayList();
-    pairs.add(Pair.of(s, t));
-    for (int i = 0; i < more.length; i += 2) {
-      pairs.add(Pair.of((S) more[i], (T) more[i + 1]));
-    }
-    return new MemTable<S, T>(pairs, ptype, null);
-  }
-
-  public static <S, T> PTable<S, T> tableOf(Iterable<Pair<S, T>> pairs) {
-    return new MemTable<S, T>(pairs);
-  }
-
-  public static <S, T> PTable<S, T> typedTableOf(PTableType<S, T> ptype, Iterable<Pair<S, T>> pairs) {
-    return new MemTable<S, T>(pairs, ptype, null);
-  }
-
-  private Configuration conf = new Configuration();
-  private Set<Target> activeTargets = Sets.newHashSet();
-  
-  private MemPipeline() {
-  }
-
-  @Override
-  public void setConfiguration(Configuration conf) {
-    this.conf = conf;
-  }
-
-  @Override
-  public Configuration getConfiguration() {
-    return conf;
-  }
-
-  @Override
-  public <T> PCollection<T> read(Source<T> source) {
-    if (source instanceof ReadableSource) {
-      try {
-        Iterable<T> iterable = ((ReadableSource<T>) source).read(conf);
-        return new MemCollection<T>(iterable, source.getType(), source.toString());
-      } catch (IOException e) {
-        LOG.error("Exception reading source: " + source.toString(), e);
-        throw new IllegalStateException(e);
-      }
-    }
-    LOG.error("Source " + source + " is not readable");
-    throw new IllegalStateException("Source " + source + " is not readable");
-  }
-
-  @Override
-  public <K, V> PTable<K, V> read(TableSource<K, V> source) {
-    if (source instanceof ReadableSource) {
-      try {
-        Iterable<Pair<K, V>> iterable = ((ReadableSource<Pair<K, V>>) source).read(conf);
-        return new MemTable<K, V>(iterable, source.getTableType(), source.toString());
-      } catch (IOException e) {
-        LOG.error("Exception reading source: " + source.toString(), e);
-        throw new IllegalStateException(e);
-      }
-    }
-    LOG.error("Source " + source + " is not readable");
-    throw new IllegalStateException("Source " + source + " is not readable");
-  }
-
-  @Override
-  public void write(PCollection<?> collection, Target target) {
-    write(collection, target, Target.WriteMode.DEFAULT);
-  }
-  
-  @Override
-  public void write(PCollection<?> collection, Target target,
-      Target.WriteMode writeMode) {
-    target.handleExisting(writeMode, getConfiguration());
-    if (writeMode != WriteMode.APPEND && activeTargets.contains(target)) {
-      throw new CrunchRuntimeException("Target " + target + " is already written in the current run." +
-          " Use WriteMode.APPEND in order to write additional data to it.");
-    }
-    activeTargets.add(target);
-    if (target instanceof PathTarget) {
-      Path path = ((PathTarget) target).getPath();
-      try {
-        FileSystem fs = path.getFileSystem(conf);
-        FSDataOutputStream os = fs.create(new Path(path, "out" + outputIndex));
-        outputIndex++;
-        if (collection instanceof PTable) {
-          for (Object o : collection.materialize()) {
-            Pair p = (Pair) o;
-            os.writeBytes(p.first().toString());
-            os.writeBytes("\t");
-            os.writeBytes(p.second().toString());
-            os.writeBytes("\r\n");
-          }
-        } else {
-          for (Object o : collection.materialize()) {
-            os.writeBytes(o.toString() + "\r\n");
-          }
-        }
-        os.close();
-      } catch (IOException e) {
-        LOG.error("Exception writing target: " + target, e);
-      }
-    } else {
-      LOG.error("Target " + target + " is not a PathTarget instance");
-    }
-  }
-
-  @Override
-  public PCollection<String> readTextFile(String pathName) {
-    return read(At.textFile(pathName));
-  }
-
-  @Override
-  public <T> void writeTextFile(PCollection<T> collection, String pathName) {
-    write(collection, At.textFile(pathName));
-  }
-
-  @Override
-  public <T> Iterable<T> materialize(PCollection<T> pcollection) {
-    return pcollection.materialize();
-  }
-
-  @Override
-  public PipelineExecution runAsync() {
-    activeTargets.clear();
-    return new PipelineExecution() {
-      @Override
-      public String getPlanDotFile() {
-        return "";
-      }
-
-      @Override
-      public void waitFor(long timeout, TimeUnit timeUnit) throws InterruptedException {
-        // no-po
-      }
-
-      @Override
-      public void waitUntilDone() throws InterruptedException {
-        // no-po
-      }
-
-      @Override
-      public Status getStatus() {
-        return Status.SUCCEEDED;
-      }
-
-      @Override
-      public PipelineResult getResult() {
-        return new PipelineResult(ImmutableList.of(new PipelineResult.StageResult("MemPipelineStage", COUNTERS)));
-      }
-
-      @Override
-      public void kill() {
-      }
-    };
-  }
-  
-  @Override
-  public PipelineResult run() {
-    activeTargets.clear();
-    return new PipelineResult(ImmutableList.of(new PipelineResult.StageResult("MemPipelineStage", COUNTERS)));
-  }
-
-  @Override
-  public PipelineResult done() {
-    return run();
-  }
-
-  @Override
-  public void enableDebug() {
-    LOG.info("Note: in-memory pipelines do not have debug logging");
-  }
-
-  @Override
-  public String getName() {
-    return "Memory Pipeline";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java b/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java
deleted file mode 100644
index c97fac6..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemCollection.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem.collect;
-
-import java.lang.reflect.Method;
-import java.util.Collection;
-
-import javassist.util.proxy.MethodFilter;
-import javassist.util.proxy.MethodHandler;
-import javassist.util.proxy.ProxyFactory;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.FilterFn;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.ParallelDoOptions;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Target;
-import org.apache.crunch.fn.ExtractKeyFn;
-import org.apache.crunch.impl.mem.MemPipeline;
-import org.apache.crunch.impl.mem.emit.InMemoryEmitter;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.materialize.pobject.CollectionPObject;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.StatusReporter;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-public class MemCollection<S> implements PCollection<S> {
-
-  private final Collection<S> collect;
-  private final PType<S> ptype;
-  private String name;
-
-  public MemCollection(Iterable<S> collect) {
-    this(collect, null, null);
-  }
-
-  public MemCollection(Iterable<S> collect, PType<S> ptype) {
-    this(collect, ptype, null);
-  }
-
-  public MemCollection(Iterable<S> collect, PType<S> ptype, String name) {
-    this.collect = ImmutableList.copyOf(collect);
-    this.ptype = ptype;
-    this.name = name;
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    return MemPipeline.getInstance();
-  }
-
-  @Override
-  public PCollection<S> union(PCollection<S> other) {
-    return union(new PCollection[] { other });
-  }
-  
-  @Override
-  public PCollection<S> union(PCollection<S>... collections) {
-    Collection<S> output = Lists.newArrayList();
-    for (PCollection<S> pcollect : collections) {
-      for (S s : pcollect.materialize()) {
-        output.add(s);
-      }
-    }
-    output.addAll(collect);
-    return new MemCollection<S>(output, collections[0].getPType());
-  }
-
-  @Override
-  public <T> PCollection<T> parallelDo(DoFn<S, T> doFn, PType<T> type) {
-    return parallelDo(null, doFn, type);
-  }
-
-  @Override
-  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type) {
-    return parallelDo(name, doFn, type, ParallelDoOptions.builder().build());
-  }
-  
-  @Override
-  public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type,
-      ParallelDoOptions options) {
-    InMemoryEmitter<T> emitter = new InMemoryEmitter<T>();
-    doFn.setContext(getInMemoryContext(getPipeline().getConfiguration()));
-    doFn.initialize();
-    for (S s : collect) {
-      doFn.process(s, emitter);
-    }
-    doFn.cleanup(emitter);
-    return new MemCollection<T>(emitter.getOutput(), type, name);
-  }
-
-  @Override
-  public <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type) {
-    return parallelDo(null, doFn, type);
-  }
-
-  @Override
-  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type) {
-    return parallelDo(name, doFn, type, ParallelDoOptions.builder().build());
-  }
-  
-  @Override
-  public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type,
-      ParallelDoOptions options) {
-    InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>();
-    doFn.setContext(getInMemoryContext(getPipeline().getConfiguration()));
-    doFn.initialize();
-    for (S s : collect) {
-      doFn.process(s, emitter);
-    }
-    doFn.cleanup(emitter);
-    return new MemTable<K, V>(emitter.getOutput(), type, name);
-  }
-
-  @Override
-  public PCollection<S> write(Target target) {
-    getPipeline().write(this, target);
-    return this;
-  }
-
-  @Override
-  public PCollection<S> write(Target target, Target.WriteMode writeMode) {
-    getPipeline().write(this, target, writeMode);
-    return this;
-  }
-
-  @Override
-  public Iterable<S> materialize() {
-    return collect;
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public PObject<Collection<S>> asCollection() {
-    return new CollectionPObject<S>(this);
-  }
-
-  public Collection<S> getCollection() {
-    return collect;
-  }
-
-  @Override
-  public PType<S> getPType() {
-    return ptype;
-  }
-
-  @Override
-  public PTypeFamily getTypeFamily() {
-    if (ptype != null) {
-      return ptype.getFamily();
-    }
-    return null;
-  }
-
-  @Override
-  public long getSize() {
-    return collect.isEmpty() ? 0 : 1; // getSize is only used for pipeline optimization in MR
-  }
-
-  @Override
-  public String getName() {
-    return name;
-  }
-
-  @Override
-  public String toString() {
-    return collect.toString();
-  }
-
-  @Override
-  public PTable<S, Long> count() {
-    return Aggregate.count(this);
-  }
-
-  @Override
-  public PObject<Long> length() {
-    return Aggregate.length(this);
-  }
-
-  @Override
-  public PObject<S> max() {
-    return Aggregate.max(this);
-  }
-
-  @Override
-  public PObject<S> min() {
-    return Aggregate.min(this);
-  }
-
-  @Override
-  public PCollection<S> filter(FilterFn<S> filterFn) {
-    return parallelDo(filterFn, getPType());
-  }
-
-  @Override
-  public PCollection<S> filter(String name, FilterFn<S> filterFn) {
-    return parallelDo(name, filterFn, getPType());
-  }
-
-  @Override
-  public <K> PTable<K, S> by(MapFn<S, K> mapFn, PType<K> keyType) {
-    return parallelDo(new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
-  }
-
-  @Override
-  public <K> PTable<K, S> by(String name, MapFn<S, K> mapFn, PType<K> keyType) {
-    return parallelDo(name, new ExtractKeyFn<K, S>(mapFn), getTypeFamily().tableOf(keyType, getPType()));
-  }
-
-  /**
-   * The method creates a {@link TaskInputOutputContext} that will just provide
-   * {@linkplain Configuration}. The method has been implemented with javaassist
-   * as there are API changes in versions of Hadoop. In hadoop 1.0.3 the
-   * {@linkplain TaskInputOutputContext} is abstract class while in version 2
-   * the same is an interface.
-   * <p>
-   * Note: The intention of this is to provide the bare essentials that are
-   * required to make the {@linkplain MemPipeline} work. It lacks even the basic
-   * things that can proved some support for unit testing pipeline.
-   */
-  private static TaskInputOutputContext<?, ?, ?, ?> getInMemoryContext(final Configuration conf) {
-    ProxyFactory factory = new ProxyFactory();
-    Class<TaskInputOutputContext> superType = TaskInputOutputContext.class;
-    Class[] types = new Class[0];
-    Object[] args = new Object[0];
-    if (superType.isInterface()) {
-      factory.setInterfaces(new Class[] { superType });
-    } else {
-      types = new Class[] { Configuration.class, TaskAttemptID.class, RecordWriter.class, OutputCommitter.class,
-          StatusReporter.class };
-      args = new Object[] { conf, new TaskAttemptID(), null, null, null };
-      factory.setSuperclass(superType);
-    }
-    factory.setFilter(new MethodFilter() {
-      @Override
-      public boolean isHandled(Method m) {
-        String name = m.getName();
-        return "getConfiguration".equals(name) || "getCounter".equals(name) || "progress".equals(name);
-      }
-    });
-    MethodHandler handler = new MethodHandler() {
-      @Override
-      public Object invoke(Object arg0, Method m, Method arg2, Object[] args) throws Throwable {
-        String name = m.getName();
-        if ("getConfiguration".equals(name)) {
-          return conf;
-        } else if ("progress".equals(name)) {
-          // no-op
-          return null;
-        } else { // getCounter
-          if (args.length == 1) {
-            return MemPipeline.getCounters().findCounter((Enum<?>) args[0]);
-          } else {
-            return MemPipeline.getCounters().findCounter((String) args[0], (String) args[1]);
-          }
-        }
-      }
-    };
-    try {
-      Object newInstance = factory.create(types, args, handler);
-      return (TaskInputOutputContext<?, ?, ?, ?>) newInstance;
-    } catch (Exception e) {
-      e.printStackTrace();
-      throw new RuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java b/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java
deleted file mode 100644
index d105bb4..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemGroupedTable.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem.collect;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.crunch.Aggregator;
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Target;
-import org.apache.crunch.fn.Aggregators;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-class MemGroupedTable<K, V> extends MemCollection<Pair<K, Iterable<V>>> implements PGroupedTable<K, V> {
-
-  private final MemTable<K, V> parent;
-
-  private static <S, T> Iterable<Pair<S, Iterable<T>>> buildMap(MemTable<S, T> parent, GroupingOptions options) {
-    PType<S> keyType = parent.getKeyType();
-    Shuffler<S, T> shuffler = Shuffler.create(keyType, options, parent.getPipeline());
-
-    for (Pair<S, T> pair : parent.materialize()) {
-      shuffler.add(pair);
-    }
-
-    return shuffler;
-  }
-
-  public MemGroupedTable(MemTable<K, V> parent, GroupingOptions options) {
-    super(buildMap(parent, options));
-    this.parent = parent;
-  }
-
-  @Override
-  public PCollection<Pair<K, Iterable<V>>> union(PCollection<Pair<K, Iterable<V>>>... collections) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public PCollection<Pair<K, Iterable<V>>> write(Target target) {
-    getPipeline().write(this.ungroup(), target);
-    return this;
-  }
-
-  @Override
-  public PType<Pair<K, Iterable<V>>> getPType() {
-    PTableType<K, V> parentType = parent.getPTableType();
-    if (parentType != null) {
-      return parentType.getGroupedTableType();
-    }
-    return null;
-  }
-
-  @Override
-  public PTypeFamily getTypeFamily() {
-    return parent.getTypeFamily();
-  }
-
-  @Override
-  public long getSize() {
-    return 1; // getSize is only used for pipeline optimization in MR
-  }
-
-  @Override
-  public String getName() {
-    return "MemGrouped(" + parent.getName() + ")";
-  }
-
-  @Override
-  public PTable<K, V> combineValues(CombineFn<K, V> combineFn) {
-    return parallelDo(combineFn, parent.getPTableType());
-  }
-
-  @Override
-  public PTable<K, V> combineValues(Aggregator<V> agg) {
-    return combineValues(Aggregators.<K, V>toCombineFn(agg));
-  }
-
-  @Override
-  public PTable<K, V> ungroup() {
-    return parent;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java b/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java
deleted file mode 100644
index f8a5960..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/MemTable.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem.collect;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.crunch.FilterFn;
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.PObject;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Target;
-import org.apache.crunch.lib.Aggregate;
-import org.apache.crunch.lib.Cogroup;
-import org.apache.crunch.lib.Join;
-import org.apache.crunch.lib.PTables;
-import org.apache.crunch.materialize.MaterializableMap;
-import org.apache.crunch.materialize.pobject.MapPObject;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.Lists;
-
-public class MemTable<K, V> extends MemCollection<Pair<K, V>> implements PTable<K, V> {
-
-  private PTableType<K, V> ptype;
-
-  public MemTable(Iterable<Pair<K, V>> collect) {
-    this(collect, null, null);
-  }
-
-  public MemTable(Iterable<Pair<K, V>> collect, PTableType<K, V> ptype, String name) {
-    super(collect, ptype, name);
-    this.ptype = ptype;
-  }
-
-  @Override
-  public PTable<K, V> union(PTable<K, V> other) {
-    return union(new PTable[] { other });
-  }
-  
-  @Override
-  public PTable<K, V> union(PTable<K, V>... others) {
-    List<Pair<K, V>> values = Lists.newArrayList();
-    values.addAll(getCollection());
-    for (PTable<K, V> ptable : others) {
-      for (Pair<K, V> p : ptable.materialize()) {
-        values.add(p);
-      }
-    }
-    return new MemTable<K, V>(values, others[0].getPTableType(), null);
-  }
-
-  @Override
-  public PGroupedTable<K, V> groupByKey() {
-    return groupByKey(null);
-  }
-
-  @Override
-  public PGroupedTable<K, V> groupByKey(int numPartitions) {
-    return groupByKey(null);
-  }
-
-  @Override
-  public PGroupedTable<K, V> groupByKey(GroupingOptions options) {
-    return new MemGroupedTable<K, V>(this, options);
-  }
-
-  @Override
-  public PTable<K, V> write(Target target) {
-    super.write(target);
-    return this;
-  }
-
-  @Override
-  public PTable<K, V> write(Target target, Target.WriteMode writeMode) {
-    getPipeline().write(this, target, writeMode);
-    return this;
-  }
-  
-  @Override
-  public PTableType<K, V> getPTableType() {
-    return ptype;
-  }
-
-  @Override
-  public PType<K> getKeyType() {
-    if (ptype != null) {
-      return ptype.getKeyType();
-    }
-    return null;
-  }
-
-  @Override
-  public PType<V> getValueType() {
-    if (ptype != null) {
-      return ptype.getValueType();
-    }
-    return null;
-  }
-
-  @Override
-  public PTable<K, V> filter(FilterFn<Pair<K, V>> filterFn) {
-    return parallelDo(filterFn, getPTableType());
-  }
-  
-  @Override
-  public PTable<K, V> filter(String name, FilterFn<Pair<K, V>> filterFn) {
-    return parallelDo(name, filterFn, getPTableType());
-  }
-
-  @Override
-  public PTable<K, V> top(int count) {
-    return Aggregate.top(this, count, true);
-  }
-
-  @Override
-  public PTable<K, V> bottom(int count) {
-    return Aggregate.top(this, count, false);
-  }
-
-  @Override
-  public PTable<K, Collection<V>> collectValues() {
-    return Aggregate.collectValues(this);
-  }
-
-  @Override
-  public <U> PTable<K, Pair<V, U>> join(PTable<K, U> other) {
-    return Join.join(this, other);
-  }
-
-  @Override
-  public <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other) {
-    return Cogroup.cogroup(this, other);
-  }
-
-  @Override
-  public PCollection<K> keys() {
-    return PTables.keys(this);
-  }
-
-  @Override
-  public PCollection<V> values() {
-    return PTables.values(this);
-  }
-
-  @Override
-  public Map<K, V> materializeToMap() {
-    return new MaterializableMap<K, V>(this.materialize());
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public PObject<Map<K, V>> asMap() {
-    return new MapPObject<K, V>(this);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java b/crunch/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java
deleted file mode 100644
index 2e8f9eb..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/collect/Shuffler.java
+++ /dev/null
@@ -1,149 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem.collect;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.impl.SingleUseIterable;
-import org.apache.crunch.types.PType;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.base.Function;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Iterators;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-/**
- * In-memory versions of common MapReduce patterns for aggregating key-value data.
- */
-abstract class Shuffler<K, V> implements Iterable<Pair<K, Iterable<V>>> {
-
-  public abstract void add(Pair<K, V> record);
-  
-  private static <K, V> Map<K, V> getMapForKeyType(PType<?> ptype) {
-    if (ptype != null && Comparable.class.isAssignableFrom(ptype.getTypeClass())) {
-      return new TreeMap<K, V>();
-    } else {
-      return Maps.newHashMap();
-    }
-  }
-  
-  public static <S, T> Shuffler<S, T> create(PType<S> keyType, GroupingOptions options,
-      Pipeline pipeline) {
-    Map<S, Collection<T>> map = getMapForKeyType(keyType);
-    
-    if (options != null) {
-      if (Pair.class.equals(keyType.getTypeClass()) && options.getGroupingComparatorClass() != null) {
-        PType<?> pairKey = keyType.getSubTypes().get(0);
-        return new SecondarySortShuffler(getMapForKeyType(pairKey));
-      } else if (options.getSortComparatorClass() != null) {
-        RawComparator<S> rc = ReflectionUtils.newInstance(options.getSortComparatorClass(),
-            pipeline.getConfiguration());
-        map = new TreeMap<S, Collection<T>>(rc);
-      }
-    }
-    
-    return new MapShuffler<S, T>(map);
-  }
-  
-  private static class HFunction<K, V> implements Function<Map.Entry<K, Collection<V>>, Pair<K, Iterable<V>>> {
-    @Override
-    public Pair<K, Iterable<V>> apply(Map.Entry<K, Collection<V>> input) {
-      return Pair.<K, Iterable<V>>of(input.getKey(), new SingleUseIterable<V>(input.getValue()));
-    }
-  }
-  
-  private static class MapShuffler<K, V> extends Shuffler<K, V> {
-    private final Map<K, Collection<V>> map;
-    
-    public MapShuffler(Map<K, Collection<V>> map) {
-      this.map = map;
-    }
-    
-    @Override
-    public Iterator<Pair<K, Iterable<V>>> iterator() {
-      return Iterators.transform(map.entrySet().iterator(),
-          new HFunction<K, V>());
-    }
-
-    @Override
-    public void add(Pair<K, V> record) {
-      if (!map.containsKey(record.first())) {
-        Collection<V> values = Lists.newArrayList();
-        map.put(record.first(), values);
-      }
-      map.get(record.first()).add(record.second());
-    }
-  }
-
-  private static class SSFunction<K, SK, V> implements
-      Function<Map.Entry<K, List<Pair<SK, V>>>, Pair<Pair<K, SK>, Iterable<V>>> {
-    @Override
-    public Pair<Pair<K, SK>, Iterable<V>> apply(Entry<K, List<Pair<SK, V>>> input) {
-      List<Pair<SK, V>> values = input.getValue();
-      Collections.sort(values, new Comparator<Pair<SK, V>>() {
-        @Override
-        public int compare(Pair<SK, V> o1, Pair<SK, V> o2) {
-          return ((Comparable) o1.first()).compareTo(o2.first());
-        }
-      });
-      Pair<K, SK> key = Pair.of(input.getKey(), values.get(0).first());
-      return Pair.of(key, Iterables.transform(values, new Function<Pair<SK, V>, V>() {
-        @Override
-        public V apply(Pair<SK, V> input) {
-          return input.second();
-        }
-      }));
-    }
-  }
-
-  private static class SecondarySortShuffler<K, SK, V> extends Shuffler<Pair<K, SK>, V> {
-
-    private Map<K, List<Pair<SK, V>>> map;
-    
-    public SecondarySortShuffler(Map<K, List<Pair<SK, V>>> map) {
-      this.map = map;
-    }
-    
-    @Override
-    public Iterator<Pair<Pair<K, SK>, Iterable<V>>> iterator() {
-      return Iterators.transform(map.entrySet().iterator(), new SSFunction<K, SK, V>());
-    }
-
-    @Override
-    public void add(Pair<Pair<K, SK>, V> record) {
-      K primary = record.first().first();
-      if (!map.containsKey(primary)) {
-        map.put(primary, Lists.<Pair<SK, V>>newArrayList());
-      }
-      map.get(primary).add(Pair.of(record.first().second(), record.second()));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java b/crunch/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java
deleted file mode 100644
index 6976615..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/emit/InMemoryEmitter.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mem.emit;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-
-import com.google.common.collect.Lists;
-
-/**
- * An {@code Emitter} instance that writes emitted records to a backing
- * {@code List}.
- * 
- * @param <T>
- */
-public class InMemoryEmitter<T> implements Emitter<T> {
-
-  private final List<T> output;
-
-  public InMemoryEmitter() {
-    this(Lists.<T> newArrayList());
-  }
-
-  public InMemoryEmitter(List<T> output) {
-    this.output = output;
-  }
-
-  @Override
-  public void emit(T emitted) {
-    output.add(emitted);
-  }
-
-  @Override
-  public void flush() {
-
-  }
-
-  public List<T> getOutput() {
-    return output;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mem/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mem/package-info.java b/crunch/src/main/java/org/apache/crunch/impl/mem/package-info.java
deleted file mode 100644
index a55b673..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mem/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * In-memory Pipeline implementation for rapid prototyping and testing.
- */
-package org.apache.crunch.impl.mem;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java b/crunch/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java
deleted file mode 100644
index 00cf486..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/MRPipeline.java
+++ /dev/null
@@ -1,396 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.PipelineExecution;
-import org.apache.crunch.PipelineResult;
-import org.apache.crunch.Source;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.Target;
-import org.apache.crunch.Target.WriteMode;
-import org.apache.crunch.fn.IdentityFn;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.InputTable;
-import org.apache.crunch.impl.mr.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-import org.apache.crunch.impl.mr.collect.UnionCollection;
-import org.apache.crunch.impl.mr.collect.UnionTable;
-import org.apache.crunch.impl.mr.exec.MRExecutor;
-import org.apache.crunch.impl.mr.plan.MSCRPlanner;
-import org.apache.crunch.impl.mr.run.RuntimeParameters;
-import org.apache.crunch.io.From;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.io.To;
-import org.apache.crunch.materialize.MaterializableIterable;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.writable.Writables;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-/**
- * Pipeline implementation that is executed within Hadoop MapReduce.
- */
-public class MRPipeline implements Pipeline {
-
-  private static final Log LOG = LogFactory.getLog(MRPipeline.class);
-
-  private static final Random RANDOM = new Random();
-
-  private final Class<?> jarClass;
-  private final String name;
-  private final Map<PCollectionImpl<?>, Set<Target>> outputTargets;
-  private final Map<PCollectionImpl<?>, MaterializableIterable<?>> outputTargetsToMaterialize;
-  private Path tempDirectory;
-  private int tempFileIndex;
-  private int nextAnonymousStageId;
-
-  private Configuration conf;
-
-  /**
-   * Instantiate with a default Configuration and name.
-   * 
-   * @param jarClass Class containing the main driver method for running the pipeline
-   */
-  public MRPipeline(Class<?> jarClass) {
-    this(jarClass, new Configuration());
-  }
-
-  /**
-   * Instantiate with a custom pipeline name. The name will be displayed in the Hadoop JobTracker.
-   * 
-   * @param jarClass Class containing the main driver method for running the pipeline
-   * @param name Display name of the pipeline
-   */
-  public MRPipeline(Class<?> jarClass, String name) {
-    this(jarClass, name, new Configuration());
-  }
-
-  /**
-   * Instantiate with a custom configuration and default naming.
-   * 
-   * @param jarClass Class containing the main driver method for running the pipeline
-   * @param conf Configuration to be used within all MapReduce jobs run in the pipeline
-   */
-  public MRPipeline(Class<?> jarClass, Configuration conf) {
-    this(jarClass, jarClass.getName(), conf);
-  }
-
-  /**
-   * Instantiate with a custom name and configuration. The name will be displayed in the Hadoop
-   * JobTracker.
-   * 
-   * @param jarClass Class containing the main driver method for running the pipeline
-   * @param name Display name of the pipeline
-   * @param conf Configuration to be used within all MapReduce jobs run in the pipeline
-   */
-  public MRPipeline(Class<?> jarClass, String name, Configuration conf) {
-    this.jarClass = jarClass;
-    this.name = name;
-    this.outputTargets = Maps.newHashMap();
-    this.outputTargetsToMaterialize = Maps.newHashMap();
-    this.conf = conf;
-    this.tempDirectory = createTempDirectory(conf);
-    this.tempFileIndex = 0;
-    this.nextAnonymousStageId = 0;
-  }
-
-  @Override
-  public Configuration getConfiguration() {
-    return conf;
-  }
-
-  @Override
-  public void setConfiguration(Configuration conf) {
-    this.conf = conf;
-    this.tempDirectory = createTempDirectory(conf);
-  }
-
-  public MRExecutor plan() {
-    Map<PCollectionImpl<?>, MaterializableIterable> toMaterialize = Maps.newHashMap();
-    for (PCollectionImpl<?> c : outputTargets.keySet()) {
-      if (outputTargetsToMaterialize.containsKey(c)) {
-        toMaterialize.put(c, outputTargetsToMaterialize.get(c));
-        outputTargetsToMaterialize.remove(c);
-      }
-    }
-    MSCRPlanner planner = new MSCRPlanner(this, outputTargets, toMaterialize);
-    try {
-      return planner.plan(jarClass, conf);
-    } catch (IOException e) {
-      throw new CrunchRuntimeException(e);
-    }
-  }
-
-  @Override
-  public PipelineResult run() {
-    try {
-      PipelineExecution pipelineExecution = runAsync();
-      pipelineExecution.waitUntilDone();
-      return pipelineExecution.getResult();
-    } catch (InterruptedException e) {
-      // TODO: How to handle this without changing signature?
-      LOG.error("Exception running pipeline", e);
-      return PipelineResult.EMPTY;
-    }
-  }
-  
-  @Override
-  public PipelineExecution runAsync() {
-    PipelineExecution res = plan().execute();
-    outputTargets.clear();
-    return res;
-  }
-
-  @Override
-  public PipelineResult done() {
-    PipelineResult res = null;
-    if (!outputTargets.isEmpty()) {
-      res = run();
-    }
-    cleanup();
-    return res;
-  }
-
-  public <S> PCollection<S> read(Source<S> source) {
-    return new InputCollection<S>(source, this);
-  }
-
-  public <K, V> PTable<K, V> read(TableSource<K, V> source) {
-    return new InputTable<K, V>(source, this);
-  }
-
-  public PCollection<String> readTextFile(String pathName) {
-    return read(From.textFile(pathName));
-  }
-
-  public void write(PCollection<?> pcollection, Target target) {
-    write(pcollection, target, Target.WriteMode.DEFAULT);
-  }
-  
-  @SuppressWarnings("unchecked")
-  public void write(PCollection<?> pcollection, Target target,
-      Target.WriteMode writeMode) {
-    if (pcollection instanceof PGroupedTableImpl) {
-      pcollection = ((PGroupedTableImpl<?, ?>) pcollection).ungroup();
-    } else if (pcollection instanceof UnionCollection || pcollection instanceof UnionTable) {
-      pcollection = pcollection.parallelDo("UnionCollectionWrapper",
-          (MapFn) IdentityFn.<Object> getInstance(), pcollection.getPType());
-    }
-    target.handleExisting(writeMode, getConfiguration());
-    if (writeMode != WriteMode.APPEND && targetInCurrentRun(target)) {
-      throw new CrunchRuntimeException("Target " + target + " is already written in current run." +
-          " Use WriteMode.APPEND in order to write additional data to it.");
-    }
-    addOutput((PCollectionImpl<?>) pcollection, target);
-  }
-
-  private boolean targetInCurrentRun(Target target) {
-    for (Set<Target> targets : outputTargets.values()) {
-      if (targets.contains(target)) {
-        return true;
-      }
-    }
-    return false;
-  }
-  
-  private void addOutput(PCollectionImpl<?> impl, Target target) {
-    if (!outputTargets.containsKey(impl)) {
-      outputTargets.put(impl, Sets.<Target> newHashSet());
-    }
-    outputTargets.get(impl).add(target);
-  }
-
-  @Override
-  public <T> Iterable<T> materialize(PCollection<T> pcollection) {
-
-    PCollectionImpl<T> pcollectionImpl = toPcollectionImpl(pcollection);
-    ReadableSource<T> readableSrc = getMaterializeSourceTarget(pcollectionImpl);
-
-    MaterializableIterable<T> c = new MaterializableIterable<T>(this, readableSrc);
-    if (!outputTargetsToMaterialize.containsKey(pcollectionImpl)) {
-      outputTargetsToMaterialize.put(pcollectionImpl, c);
-    }
-    return c;
-  }
-
-  /**
-   * Retrieve a ReadableSourceTarget that provides access to the contents of a {@link PCollection}.
-   * This is primarily intended as a helper method to {@link #materialize(PCollection)}. The
-   * underlying data of the ReadableSourceTarget may not be actually present until the pipeline is
-   * run.
-   * 
-   * @param pcollection The collection for which the ReadableSourceTarget is to be retrieved
-   * @return The ReadableSourceTarget
-   * @throws IllegalArgumentException If no ReadableSourceTarget can be retrieved for the given
-   *           PCollection
-   */
-  public <T> ReadableSource<T> getMaterializeSourceTarget(PCollection<T> pcollection) {
-    PCollectionImpl<T> impl = toPcollectionImpl(pcollection);
-
-    // First, check to see if this is a readable input collection.
-    if (impl instanceof InputCollection) {
-      InputCollection<T> ic = (InputCollection<T>) impl;
-      if (ic.getSource() instanceof ReadableSource) {
-        return (ReadableSource) ic.getSource();
-      } else {
-        throw new IllegalArgumentException(
-            "Cannot materialize non-readable input collection: " + ic);
-      }
-    } else if (impl instanceof InputTable) {
-      InputTable it = (InputTable) impl;
-      if (it.getSource() instanceof ReadableSource) {
-        return (ReadableSource) it.getSource();
-      } else {
-        throw new IllegalArgumentException(
-            "Cannot materialize non-readable input table: " + it);
-      }
-    }
-
-    // Next, check to see if this pcollection has already been materialized.
-    SourceTarget<T> matTarget = impl.getMaterializedAt();
-    if (matTarget != null && matTarget instanceof ReadableSourceTarget) {
-      return (ReadableSourceTarget<T>) matTarget;
-    }
-    
-    // Check to see if we plan on materializing this collection on the
-    // next run.
-    ReadableSourceTarget<T> srcTarget = null;
-    if (outputTargets.containsKey(pcollection)) {
-      for (Target target : outputTargets.get(impl)) {
-        if (target instanceof ReadableSourceTarget) {
-          return (ReadableSourceTarget<T>) target;
-        }
-      }
-    }
-
-    // If we're not planning on materializing it already, create a temporary
-    // output to hold the materialized records and return that.
-    SourceTarget<T> st = createIntermediateOutput(pcollection.getPType());
-    if (!(st instanceof ReadableSourceTarget)) {
-      throw new IllegalArgumentException("The PType for the given PCollection is not readable"
-          + " and cannot be materialized");
-    } else {
-      srcTarget = (ReadableSourceTarget<T>) st;
-      addOutput(impl, srcTarget);
-      return srcTarget;
-    }
-  }
-
-  /**
-   * Safely cast a PCollection into a PCollectionImpl, including handling the case of
-   * UnionCollections.
-   * 
-   * @param pcollection The PCollection to be cast/transformed
-   * @return The PCollectionImpl representation
-   */
-  private <T> PCollectionImpl<T> toPcollectionImpl(PCollection<T> pcollection) {
-    PCollectionImpl<T> pcollectionImpl = null;
-    if (pcollection instanceof UnionCollection || pcollection instanceof UnionTable) {
-      pcollectionImpl = (PCollectionImpl<T>) pcollection.parallelDo("UnionCollectionWrapper",
-          (MapFn) IdentityFn.<Object> getInstance(), pcollection.getPType());
-    } else {
-      pcollectionImpl = (PCollectionImpl<T>) pcollection;
-    }
-    return pcollectionImpl;
-  }
-
-  public <T> SourceTarget<T> createIntermediateOutput(PType<T> ptype) {
-    return ptype.getDefaultFileSource(createTempPath());
-  }
-
-  public Path createTempPath() {
-    tempFileIndex++;
-    return new Path(tempDirectory, "p" + tempFileIndex);
-  }
-
-  private static Path createTempDirectory(Configuration conf) {
-    Path dir = createTemporaryPath(conf);
-    try {
-      dir.getFileSystem(conf).mkdirs(dir);
-    } catch (IOException e) {
-      throw new RuntimeException("Cannot create job output directory " + dir, e);
-    }
-    return dir;
-  }
-
-  private static Path createTemporaryPath(Configuration conf) {
-    String baseDir = conf.get(RuntimeParameters.TMP_DIR, "/tmp");
-    return new Path(baseDir, "crunch-" + (RANDOM.nextInt() & Integer.MAX_VALUE));
-  }
-
-  @Override
-  public <T> void writeTextFile(PCollection<T> pcollection, String pathName) {
-    pcollection.parallelDo("asText", new StringifyFn<T>(), Writables.strings())
-        .write(To.textFile(pathName));
-  }
-
-  private static class StringifyFn<T> extends MapFn<T, String> {
-    @Override
-    public String map(T input) {
-      return input.toString();
-    }
-  }
-  
-  private void cleanup() {
-    if (!outputTargets.isEmpty()) {
-      LOG.warn("Not running cleanup while output targets remain");
-      return;
-    }
-    try {
-      FileSystem fs = tempDirectory.getFileSystem(conf);
-      if (fs.exists(tempDirectory)) {
-        fs.delete(tempDirectory, true);
-      }
-    } catch (IOException e) {
-      LOG.info("Exception during cleanup", e);
-    }
-  }
-
-  public int getNextAnonymousStageId() {
-    return nextAnonymousStageId++;
-  }
-
-  @Override
-  public void enableDebug() {
-    // Turn on Crunch runtime error catching.
-    getConfiguration().setBoolean(RuntimeParameters.DEBUG, true);
-  }
-
-  @Override
-  public String getName() {
-    return name;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java
deleted file mode 100644
index 7b8f2ea..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoCollectionImpl.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.ParallelDoOptions;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-
-public class DoCollectionImpl<S> extends PCollectionImpl<S> {
-
-  private final PCollectionImpl<Object> parent;
-  private final DoFn<Object, S> fn;
-  private final PType<S> ntype;
-
-  <T> DoCollectionImpl(String name, PCollectionImpl<T> parent, DoFn<T, S> fn, PType<S> ntype) {
-    this(name, parent, fn, ntype, ParallelDoOptions.builder().build());
-  }
-  
-  <T> DoCollectionImpl(String name, PCollectionImpl<T> parent, DoFn<T, S> fn, PType<S> ntype,
-      ParallelDoOptions options) {
-    super(name, options);
-    this.parent = (PCollectionImpl<Object>) parent;
-    this.fn = (DoFn<Object, S>) fn;
-    this.ntype = ntype;
-  }
-
-  @Override
-  protected long getSizeInternal() {
-    return (long) (fn.scaleFactor() * parent.getSize());
-  }
-
-  @Override
-  public PType<S> getPType() {
-    return ntype;
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitDoFnCollection(this);
-  }
-
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.<PCollectionImpl<?>> of(parent);
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    return DoNode.createFnNode(getName(), fn, ntype);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java
deleted file mode 100644
index 176643b..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/DoTableImpl.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-
-import org.apache.crunch.CombineFn;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.ParallelDoOptions;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.ImmutableList;
-
-public class DoTableImpl<K, V> extends PTableBase<K, V> implements PTable<K, V> {
-
-  private final PCollectionImpl<?> parent;
-  private final DoFn<?, Pair<K, V>> fn;
-  private final PTableType<K, V> type;
-
-  <S> DoTableImpl(String name, PCollectionImpl<S> parent, DoFn<S, Pair<K, V>> fn, PTableType<K, V> ntype) {
-    this(name, parent, fn, ntype, ParallelDoOptions.builder().build());
-  }
-  
-  <S> DoTableImpl(String name, PCollectionImpl<S> parent, DoFn<S, Pair<K, V>> fn, PTableType<K, V> ntype,
-      ParallelDoOptions options) {
-    super(name, options);
-    this.parent = parent;
-    this.fn = fn;
-    this.type = ntype;
-  }
-
-  @Override
-  protected long getSizeInternal() {
-    return (long) (fn.scaleFactor() * parent.getSize());
-  }
-
-  @Override
-  public PTableType<K, V> getPTableType() {
-    return type;
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitDoTable(this);
-  }
-
-  @Override
-  public PType<Pair<K, V>> getPType() {
-    return type;
-  }
-
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.<PCollectionImpl<?>> of(parent);
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    return DoNode.createFnNode(getName(), fn, type);
-  }
-
-  public boolean hasCombineFn() {
-    return fn instanceof CombineFn;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java
deleted file mode 100644
index ace5cc1..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputCollection.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.crunch.Source;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.ImmutableList;
-
-public class InputCollection<S> extends PCollectionImpl<S> {
-
-  private final Source<S> source;
-
-  public InputCollection(Source<S> source, MRPipeline pipeline) {
-    super(source.toString());
-    this.source = source;
-    this.pipeline = pipeline;
-  }
-
-  @Override
-  public PType<S> getPType() {
-    return source.getType();
-  }
-
-  public Source<S> getSource() {
-    return source;
-  }
-
-  @Override
-  protected long getSizeInternal() {
-    long sz = source.getSize(pipeline.getConfiguration());
-    if (sz < 0) {
-      throw new IllegalStateException("Input source " + source + " does not exist!");
-    }
-    return sz;
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitInputCollection(this);
-  }
-
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.of();
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    return DoNode.createInputNode(source);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == null || !(obj instanceof InputCollection)) {
-      return false;
-    }
-    return source.equals(((InputCollection) obj).source);
-  }
-
-  @Override
-  public int hashCode() {
-    return new HashCodeBuilder().append(source).toHashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java b/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java
deleted file mode 100644
index 71f11c5..0000000
--- a/crunch/src/main/java/org/apache/crunch/impl/mr/collect/InputTable.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.impl.mr.collect;
-
-import java.util.List;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.TableSource;
-import org.apache.crunch.impl.mr.MRPipeline;
-import org.apache.crunch.impl.mr.plan.DoNode;
-import org.apache.crunch.types.PTableType;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.ImmutableList;
-
-public class InputTable<K, V> extends PTableBase<K, V> {
-
-  private final TableSource<K, V> source;
-  private final InputCollection<Pair<K, V>> asCollection;
-
-  public InputTable(TableSource<K, V> source, MRPipeline pipeline) {
-    super(source.toString());
-    this.source = source;
-    this.pipeline = pipeline;
-    this.asCollection = new InputCollection<Pair<K, V>>(source, pipeline);
-  }
-
-  public TableSource<K, V> getSource() {
-    return source;
-  }
-  
-  @Override
-  protected long getSizeInternal() {
-    return asCollection.getSizeInternal();
-  }
-
-  @Override
-  public PTableType<K, V> getPTableType() {
-    return source.getTableType();
-  }
-
-  @Override
-  public PType<Pair<K, V>> getPType() {
-    return source.getType();
-  }
-
-  @Override
-  public List<PCollectionImpl<?>> getParents() {
-    return ImmutableList.of();
-  }
-
-  @Override
-  protected void acceptInternal(PCollectionImpl.Visitor visitor) {
-    visitor.visitInputCollection(asCollection);
-  }
-
-  @Override
-  public DoNode createDoNode() {
-    return DoNode.createInputNode(source);
-  }
-
-  @Override
-  public int hashCode() {
-    return asCollection.hashCode();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    return asCollection.equals(other);
-  }
-}


[07/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java b/crunch/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java
deleted file mode 100644
index a3d30d2..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/InnerJoinFn.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.Lists;
-
-/**
- * Used to perform the last step of an inner join.
- * 
- * @param <K> Type of the keys.
- * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
- * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
- */
-public class InnerJoinFn<K, U, V> extends JoinFn<K, U, V> {
-
-  private transient K lastKey;
-  private transient List<U> leftValues;
-
-  public InnerJoinFn(PType<K> keyType, PType<U> leftValueType) {
-    super(keyType, leftValueType);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void initialize() {
-    super.initialize();
-    lastKey = null;
-    this.leftValues = Lists.newArrayList();
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
-    if (!key.equals(lastKey)) {
-      lastKey = keyType.getDetachedValue(key);
-      leftValues.clear();
-    }
-    if (id == 0) { // from left
-      for (Pair<U, V> pair : pairs) {
-        if (pair.first() != null)
-          leftValues.add(leftValueType.getDetachedValue(pair.first()));
-      }
-    } else { // from right
-      for (Pair<U, V> pair : pairs) {
-        for (U u : leftValues) {
-          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
-        }
-      }
-    }
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public String getJoinType() {
-    return "innerJoin";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/JoinFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/JoinFn.java b/crunch/src/main/java/org/apache/crunch/lib/join/JoinFn.java
deleted file mode 100644
index 99aea5a..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/JoinFn.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-
-/**
- * Represents a {@link org.apache.crunch.DoFn} for performing joins.
- * 
- * @param <K> Type of the keys.
- * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
- * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
- */
-public abstract class JoinFn<K, U, V> extends
-    DoFn<Pair<Pair<K, Integer>, Iterable<Pair<U, V>>>, Pair<K, Pair<U, V>>> {
-
-  protected PType<K> keyType;
-  protected PType<U> leftValueType;
-
-  /**
-   * Instantiate with the PType of the value of the left side of the join (used for creating deep
-   * copies of values).
-   * 
-   * @param keyType The PType of the value used as the key of the join
-   * @param leftValueType The PType of the value type of the left side of the join
-   */
-  public JoinFn(PType<K> keyType, PType<U> leftValueType) {
-    this.keyType = keyType;
-    this.leftValueType = leftValueType;
-  }
-
-  @Override
-  public void initialize() {
-    this.keyType.initialize(getConfiguration());
-    this.leftValueType.initialize(getConfiguration());
-  }
-
-  /** @return The name of this join type (e.g. innerJoin, leftOuterJoin). */
-  public abstract String getJoinType();
-
-  /**
-   * Performs the actual joining.
-   * 
-   * @param key The key for this grouping of values.
-   * @param id The side that this group of values is from (0 -> left, 1 -> right).
-   * @param pairs The group of values associated with this key and id pair.
-   * @param emitter The emitter to send the output to.
-   */
-  public abstract void join(K key, int id, Iterable<Pair<U, V>> pairs,
-      Emitter<Pair<K, Pair<U, V>>> emitter);
-
-  /**
-   * Split up the input record to make coding a bit more manageable.
-   * 
-   * @param input The input record.
-   * @param emitter The emitter to send the output to.
-   */
-  @Override
-  public void process(Pair<Pair<K, Integer>, Iterable<Pair<U, V>>> input,
-      Emitter<Pair<K, Pair<U, V>>> emitter) {
-    join(input.first().first(), input.first().second(), input.second(), emitter);
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/JoinUtils.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/JoinUtils.java b/crunch/src/main/java/org/apache/crunch/lib/join/JoinUtils.java
deleted file mode 100644
index 6efeccb..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/JoinUtils.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.avro.io.BinaryData;
-import org.apache.avro.mapred.AvroJob;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.avro.mapred.AvroWrapper;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.writable.TupleWritable;
-import org.apache.crunch.types.writable.WritableTypeFamily;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.DataInputBuffer;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * Utilities that are useful in joining multiple data sets via a MapReduce.
- * 
- */
-public class JoinUtils {
-
-  public static Class<? extends Partitioner> getPartitionerClass(PTypeFamily typeFamily) {
-    if (typeFamily == WritableTypeFamily.getInstance()) {
-      return TupleWritablePartitioner.class;
-    } else {
-      return AvroIndexedRecordPartitioner.class;
-    }
-  }
-
-  public static Class<? extends RawComparator> getGroupingComparator(PTypeFamily typeFamily) {
-    if (typeFamily == WritableTypeFamily.getInstance()) {
-      return TupleWritableComparator.class;
-    } else {
-      return AvroPairGroupingComparator.class;
-    }
-  }
-
-  public static class TupleWritablePartitioner extends Partitioner<TupleWritable, Writable> {
-    @Override
-    public int getPartition(TupleWritable key, Writable value, int numPartitions) {
-      return (Math.abs(key.get(0).hashCode()) & Integer.MAX_VALUE) % numPartitions;
-    }
-  }
-
-  public static class TupleWritableComparator implements RawComparator<TupleWritable> {
-
-    private DataInputBuffer buffer = new DataInputBuffer();
-    private TupleWritable key1 = new TupleWritable();
-    private TupleWritable key2 = new TupleWritable();
-
-    @Override
-    public int compare(TupleWritable o1, TupleWritable o2) {
-      return ((WritableComparable) o1.get(0)).compareTo((WritableComparable) o2.get(0));
-    }
-
-    @Override
-    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-      try {
-        buffer.reset(b1, s1, l1);
-        key1.readFields(buffer);
-
-        buffer.reset(b2, s2, l2);
-        key2.readFields(buffer);
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-
-      return compare(key1, key2);
-    }
-  }
-
-  public static class AvroIndexedRecordPartitioner<K, V> extends Partitioner<AvroKey<K>, AvroValue<V>> {
-    @Override
-    public int getPartition(AvroKey<K> key, AvroValue<V> value, int numPartitions) {
-      IndexedRecord record = (IndexedRecord) key.datum();
-      return (Math.abs(record.get(0).hashCode()) & Integer.MAX_VALUE) % numPartitions;
-    }
-  }
-
-  public static class AvroPairGroupingComparator<T> extends Configured implements RawComparator<AvroWrapper<T>> {
-    private Schema schema;
-
-    @Override
-    public void setConf(Configuration conf) {
-      super.setConf(conf);
-      if (conf != null) {
-        Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf);
-        Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema);
-        schema = keySchema.getFields().get(0).schema();
-      }
-    }
-
-    @Override
-    public int compare(AvroWrapper<T> x, AvroWrapper<T> y) {
-      return ReflectData.get().compare(x.datum(), y.datum(), schema);
-    }
-
-    @Override
-    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-      return BinaryData.compare(b1, s1, l1, b2, s2, l2, schema);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java b/crunch/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java
deleted file mode 100644
index 731c496..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/LeftOuterJoinFn.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.Lists;
-
-/**
- * Used to perform the last step of an left outer join.
- * 
- * @param <K> Type of the keys.
- * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
- * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
- */
-public class LeftOuterJoinFn<K, U, V> extends JoinFn<K, U, V> {
-
-  private transient int lastId;
-  private transient K lastKey;
-  private transient List<U> leftValues;
-
-  public LeftOuterJoinFn(PType<K> keyType, PType<U> leftValueType) {
-    super(keyType, leftValueType);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void initialize() {
-    super.initialize();
-    lastId = 1;
-    lastKey = null;
-    this.leftValues = Lists.newArrayList();
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
-    if (!key.equals(lastKey)) {
-      // Make sure that left side always gets emitted.
-      if (0 == lastId) {
-        for (U u : leftValues) {
-          emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
-        }
-      }
-      lastKey = keyType.getDetachedValue(key);
-      leftValues.clear();
-    }
-    if (id == 0) {
-      for (Pair<U, V> pair : pairs) {
-        if (pair.first() != null)
-          leftValues.add(leftValueType.getDetachedValue(pair.first()));
-      }
-    } else {
-      for (Pair<U, V> pair : pairs) {
-        for (U u : leftValues) {
-          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
-        }
-      }
-    }
-
-    lastId = id;
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void cleanup(Emitter<Pair<K, Pair<U, V>>> emitter) {
-    if (0 == lastId) {
-      for (U u : leftValues) {
-        emitter.emit(Pair.of(lastKey, Pair.of(u, (V) null)));
-      }
-    }
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public String getJoinType() {
-    return "leftOuterJoin";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
deleted file mode 100644
index 56476c1..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import java.io.IOException;
-
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.DoFn;
-import org.apache.crunch.Emitter;
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.ParallelDoOptions;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.crunch.materialize.MaterializableIterable;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.util.DistCache;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Multimap;
-
-/**
- * Utility for doing map side joins on a common key between two {@link PTable}s.
- * <p>
- * A map side join is an optimized join which doesn't use a reducer; instead,
- * the right side of the join is loaded into memory and the join is performed in
- * a mapper. This style of join has the important implication that the output of
- * the join is not sorted, which is the case with a conventional (reducer-based)
- * join.
- * <p>
- * <b>Note:</b>This utility is only supported when running with a
- * {@link MRPipeline} as the pipeline.
- */
-public class MapsideJoin {
-
-  /**
-   * Join two tables using a map side join. The right-side table will be loaded
-   * fully in memory, so this method should only be used if the right side
-   * table's contents can fit in the memory allocated to mappers. The join
-   * performed by this method is an inner join.
-   * 
-   * @param left
-   *          The left-side table of the join
-   * @param right
-   *          The right-side table of the join, whose contents will be fully
-   *          read into memory
-   * @return A table keyed on the join key, containing pairs of joined values
-   */
-  public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right) {
-    PTypeFamily tf = left.getTypeFamily();
-    Iterable<Pair<K, V>> iterable = right.materialize();
-
-    if (iterable instanceof MaterializableIterable) {
-      MaterializableIterable<Pair<K, V>> mi = (MaterializableIterable<Pair<K, V>>) iterable;
-      MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(mi.getPath().toString(),
-          right.getPType());
-      ParallelDoOptions.Builder optionsBuilder = ParallelDoOptions.builder();
-      if (mi.isSourceTarget()) {
-        optionsBuilder.sourceTargets((SourceTarget) mi.getSource());
-      }
-      return left.parallelDo("mapjoin", mapJoinDoFn,
-          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())),
-          optionsBuilder.build());
-    } else { // in-memory pipeline
-      return left.parallelDo(new InMemoryJoinFn<K, U, V>(iterable),
-          tf.tableOf(left.getKeyType(), tf.pairs(left.getValueType(), right.getValueType())));
-    }
-  }
-
-  static class InMemoryJoinFn<K, U, V> extends DoFn<Pair<K, U>, Pair<K, Pair<U, V>>> {
-
-    private Multimap<K, V> joinMap;
-    
-    public InMemoryJoinFn(Iterable<Pair<K, V>> iterable) {
-      joinMap = HashMultimap.create();
-      for (Pair<K, V> joinPair : iterable) {
-        joinMap.put(joinPair.first(), joinPair.second());
-      }
-    }
-    
-    @Override
-    public void process(Pair<K, U> input, Emitter<Pair<K, Pair<U, V>>> emitter) {
-      K key = input.first();
-      U value = input.second();
-      for (V joinValue : joinMap.get(key)) {
-        Pair<U, V> valuePair = Pair.of(value, joinValue);
-        emitter.emit(Pair.of(key, valuePair));
-      }
-    }
-  }
-  
-  static class MapsideJoinDoFn<K, U, V> extends DoFn<Pair<K, U>, Pair<K, Pair<U, V>>> {
-
-    private String inputPath;
-    private PType<Pair<K, V>> ptype;
-    private Multimap<K, V> joinMap;
-
-    public MapsideJoinDoFn(String inputPath, PType<Pair<K, V>> ptype) {
-      this.inputPath = inputPath;
-      this.ptype = ptype;
-    }
-
-    private Path getCacheFilePath() {
-      Path local = DistCache.getPathToCacheFile(new Path(inputPath), getConfiguration());
-      if (local == null) {
-        throw new CrunchRuntimeException("Can't find local cache file for '" + inputPath + "'");
-      }
-      return local;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      DistCache.addCacheFile(new Path(inputPath), conf);
-    }
-    
-    @Override
-    public void initialize() {
-      super.initialize();
-
-      ReadableSourceTarget<Pair<K, V>> sourceTarget = ptype.getDefaultFileSource(
-          getCacheFilePath());
-      Iterable<Pair<K, V>> iterable = null;
-      try {
-        iterable = sourceTarget.read(getConfiguration());
-      } catch (IOException e) {
-        throw new CrunchRuntimeException("Error reading right-side of map side join: ", e);
-      }
-
-      joinMap = ArrayListMultimap.create();
-      for (Pair<K, V> joinPair : iterable) {
-        joinMap.put(joinPair.first(), joinPair.second());
-      }
-    }
-
-    @Override
-    public void process(Pair<K, U> input, Emitter<Pair<K, Pair<U, V>>> emitter) {
-      K key = input.first();
-      U value = input.second();
-      for (V joinValue : joinMap.get(key)) {
-        Pair<U, V> valuePair = Pair.of(value, joinValue);
-        emitter.emit(Pair.of(key, valuePair));
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java b/crunch/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java
deleted file mode 100644
index 2789d40..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/RightOuterJoinFn.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.join;
-
-import java.util.List;
-
-import org.apache.crunch.Emitter;
-import org.apache.crunch.Pair;
-import org.apache.crunch.types.PType;
-
-import com.google.common.collect.Lists;
-
-/**
- * Used to perform the last step of an right outer join.
- * 
- * @param <K> Type of the keys.
- * @param <U> Type of the first {@link org.apache.crunch.PTable}'s values
- * @param <V> Type of the second {@link org.apache.crunch.PTable}'s values
- */
-public class RightOuterJoinFn<K, U, V> extends JoinFn<K, U, V> {
-
-  private transient K lastKey;
-  private transient List<U> leftValues;
-
-  public RightOuterJoinFn(PType<K> keyType, PType<U> leftValueType) {
-    super(keyType, leftValueType);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void initialize() {
-    super.initialize();
-    lastKey = null;
-    this.leftValues = Lists.newArrayList();
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public void join(K key, int id, Iterable<Pair<U, V>> pairs, Emitter<Pair<K, Pair<U, V>>> emitter) {
-    if (!key.equals(lastKey)) {
-      lastKey = keyType.getDetachedValue(key);
-      leftValues.clear();
-    }
-    if (id == 0) {
-      for (Pair<U, V> pair : pairs) {
-        if (pair.first() != null)
-          leftValues.add(leftValueType.getDetachedValue(pair.first()));
-      }
-    } else {
-      for (Pair<U, V> pair : pairs) {
-        // Make sure that right side gets emitted.
-        if (leftValues.isEmpty()) {
-          leftValues.add(null);
-        }
-
-        for (U u : leftValues) {
-          emitter.emit(Pair.of(lastKey, Pair.of(u, pair.second())));
-        }
-      }
-    }
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public String getJoinType() {
-    return "rightOuterJoin";
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/join/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/package-info.java b/crunch/src/main/java/org/apache/crunch/lib/join/package-info.java
deleted file mode 100644
index f1ad9f1..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/join/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Inner and outer joins on collections.
- */
-package org.apache.crunch.lib.join;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/package-info.java b/crunch/src/main/java/org/apache/crunch/lib/package-info.java
deleted file mode 100644
index 2695787..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Joining, sorting, aggregating, and other commonly used functionality.
- */
-package org.apache.crunch.lib;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/sort/Comparators.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/sort/Comparators.java b/crunch/src/main/java/org/apache/crunch/lib/sort/Comparators.java
deleted file mode 100644
index ae7f49a..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/sort/Comparators.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.sort;
-
-import java.util.Arrays;
-
-import org.apache.avro.Schema;
-import org.apache.avro.io.BinaryData;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.crunch.lib.Sort.ColumnOrder;
-import org.apache.crunch.lib.Sort.Order;
-import org.apache.crunch.types.writable.TupleWritable;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.mapred.JobConf;
-
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.collect.Iterables;
-
-/**
- * A collection of {@code RawComparator<T>} implementations that are used by Crunch's {@code Sort} library.
- */
-public class Comparators {
-  
-  public static class ReverseWritableComparator<T> extends Configured implements RawComparator<T> {
-
-    private RawComparator<T> comparator;
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public void setConf(Configuration conf) {
-      super.setConf(conf);
-      if (conf != null) {
-        JobConf jobConf = new JobConf(conf);
-        comparator = WritableComparator.get(jobConf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
-      }
-    }
-
-    @Override
-    public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3, int arg4, int arg5) {
-      return -comparator.compare(arg0, arg1, arg2, arg3, arg4, arg5);
-    }
-
-    @Override
-    public int compare(T o1, T o2) {
-      return -comparator.compare(o1, o2);
-    }
-  }
-
-  public static class ReverseAvroComparator<T> extends Configured implements RawComparator<AvroKey<T>> {
-
-    private Schema schema;
-
-    @Override
-    public void setConf(Configuration conf) {
-      super.setConf(conf);
-      if (conf != null) {
-        schema = (new Schema.Parser()).parse(conf.get("crunch.schema"));
-      }
-    }
-
-    @Override
-    public int compare(AvroKey<T> o1, AvroKey<T> o2) {
-      return -ReflectData.get().compare(o1.datum(), o2.datum(), schema);
-    }
-
-    @Override
-    public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3, int arg4, int arg5) {
-      return -BinaryData.compare(arg0, arg1, arg2, arg3, arg4, arg5, schema);
-    }
-  }
-
-  public static class TupleWritableComparator extends WritableComparator implements Configurable {
-
-    private static final String CRUNCH_ORDERING_PROPERTY = "crunch.ordering";
-
-    private Configuration conf;
-    private ColumnOrder[] columnOrders;
-
-    public TupleWritableComparator() {
-      super(TupleWritable.class, true);
-    }
-
-    public static void configureOrdering(Configuration conf, Order... orders) {
-      conf.set(CRUNCH_ORDERING_PROPERTY,
-          Joiner.on(",").join(Iterables.transform(Arrays.asList(orders), new Function<Order, String>() {
-            @Override
-            public String apply(Order o) {
-              return o.name();
-            }
-          })));
-    }
-
-    public static void configureOrdering(Configuration conf, ColumnOrder... columnOrders) {
-      conf.set(CRUNCH_ORDERING_PROPERTY,
-          Joiner.on(",").join(Iterables.transform(Arrays.asList(columnOrders), new Function<ColumnOrder, String>() {
-            @Override
-            public String apply(ColumnOrder o) {
-              return o.column() + ";" + o.order().name();
-            }
-          })));
-    }
-
-    @Override
-    public int compare(WritableComparable a, WritableComparable b) {
-      TupleWritable ta = (TupleWritable) a;
-      TupleWritable tb = (TupleWritable) b;
-      for (int index = 0; index < columnOrders.length; index++) {
-        int order = 1;
-        if (columnOrders[index].order() == Order.ASCENDING) {
-          order = 1;
-        } else if (columnOrders[index].order() == Order.DESCENDING) {
-          order = -1;
-        } else { // ignore
-          continue;
-        }
-        if (!ta.has(index) && !tb.has(index)) {
-          continue;
-        } else if (ta.has(index) && !tb.has(index)) {
-          return order;
-        } else if (!ta.has(index) && tb.has(index)) {
-          return -order;
-        } else {
-          Writable v1 = ta.get(index);
-          Writable v2 = tb.get(index);
-          if (v1 != v2 && (v1 != null && !v1.equals(v2))) {
-            if (v1 instanceof WritableComparable && v2 instanceof WritableComparable) {
-              int cmp = ((WritableComparable) v1).compareTo((WritableComparable) v2);
-              if (cmp != 0) {
-                return order * cmp;
-              }
-            } else {
-              int cmp = v1.hashCode() - v2.hashCode();
-              if (cmp != 0) {
-                return order * cmp;
-              }
-            }
-          }
-        }
-      }
-      return 0; // ordering using specified cols found no differences
-    }
-
-    @Override
-    public Configuration getConf() {
-      return conf;
-    }
-
-    @Override
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      if (conf != null) {
-        String ordering = conf.get(CRUNCH_ORDERING_PROPERTY);
-        String[] columnOrderNames = ordering.split(",");
-        columnOrders = new ColumnOrder[columnOrderNames.length];
-        for (int i = 0; i < columnOrders.length; i++) {
-          String[] split = columnOrderNames[i].split(";");
-          int column = Integer.parseInt(split[0]);
-          Order order = Order.valueOf(split[1]);
-          columnOrders[i] = ColumnOrder.by(column, order);
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/sort/SortFns.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/sort/SortFns.java b/crunch/src/main/java/org/apache/crunch/lib/sort/SortFns.java
deleted file mode 100644
index be218f6..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/sort/SortFns.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.sort;
-
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.lib.Sort.ColumnOrder;
-import org.apache.crunch.lib.Sort.Order;
-import org.apache.crunch.types.PType;
-import org.apache.crunch.types.PTypeFamily;
-import org.apache.crunch.types.TupleFactory;
-import org.apache.crunch.types.avro.AvroType;
-import org.apache.crunch.types.avro.AvroTypeFamily;
-import org.apache.crunch.types.avro.Avros;
-
-import com.google.common.collect.Lists;
-
-/**
- * A set of {@code DoFn}s that are used by Crunch's {@code Sort} library.
- */
-public class SortFns {
-
-  /**
-   * Extracts a single indexed key from a {@code Tuple} instance.
-   */
-  public static class SingleKeyFn<V extends Tuple, K> extends MapFn<V, K> {
-    private final int index;
-    
-    public SingleKeyFn(int index) {
-      this.index = index;
-    }
-
-    @Override
-    public K map(V input) {
-      return (K) input.get(index);
-    }
-  }
-
-  /**
-   * Extracts a composite key from a {@code Tuple} instance.
-   */
-  public static class TupleKeyFn<V extends Tuple, K extends Tuple> extends MapFn<V, K> {
-    private final int[] indices;
-    private final TupleFactory tupleFactory;
-    
-    public TupleKeyFn(int[] indices, TupleFactory tupleFactory) {
-      this.indices = indices;
-      this.tupleFactory = tupleFactory;
-    }
-    
-    @Override
-    public K map(V input) {
-      Object[] values = new Object[indices.length];
-      for (int i = 0; i < indices.length; i++) {
-        values[i] = input.get(indices[i]);
-      }
-      return (K) tupleFactory.makeTuple(values);
-    }
-  }
-  
-  /**
-   * Pulls a composite set of keys from an Avro {@code GenericRecord} instance.
-   */
-  public static class AvroGenericFn<V extends Tuple> extends MapFn<V, GenericRecord> {
-
-    private final int[] indices;
-    private final String schemaJson;
-    private transient Schema schema;
-    
-    public AvroGenericFn(int[] indices, Schema schema) {
-      this.indices = indices;
-      this.schemaJson = schema.toString();
-    }
-    
-    @Override
-    public void initialize() {
-      this.schema = (new Schema.Parser()).parse(schemaJson);
-    }
-    
-    @Override
-    public GenericRecord map(V input) {
-      GenericRecord rec = new GenericData.Record(schema);
-      for (int i = 0; i < indices.length; i++) {
-        rec.put(i, input.get(indices[i]));
-      }
-      return rec;
-    }
-  }
-  
-  /**
-   * Constructs an Avro schema for the given {@code PType<S>} that respects the given column
-   * orderings.
-   */
-  public static <S> Schema createOrderedTupleSchema(PType<S> ptype, ColumnOrder[] orders) {
-    // Guarantee each tuple schema has a globally unique name
-    String tupleName = "tuple" + UUID.randomUUID().toString().replace('-', 'x');
-    Schema schema = Schema.createRecord(tupleName, "", "crunch", false);
-    List<Schema.Field> fields = Lists.newArrayList();
-    AvroType<S> parentAvroType = (AvroType<S>) ptype;
-    Schema parentAvroSchema = parentAvroType.getSchema();
-
-    for (int index = 0; index < orders.length; index++) {
-      ColumnOrder columnOrder = orders[index];
-      AvroType<?> atype = (AvroType<?>) ptype.getSubTypes().get(index);
-      Schema fieldSchema = atype.getSchema();
-      String fieldName = parentAvroSchema.getFields().get(index).name();
-      // Note: avro sorting of strings is inverted relative to how sorting works for WritableComparable
-      // Text instances: making this consistent
-      Schema.Field.Order order = columnOrder.order() == Order.DESCENDING ? Schema.Field.Order.DESCENDING :
-        Schema.Field.Order.ASCENDING;
-      fields.add(new Schema.Field(fieldName, fieldSchema, "", null, order));
-    }
-    schema.setFields(fields);
-    return schema;
-  }
-
-  /**
-   * Utility class for encapsulating key extraction logic and serialization information about
-   * key extraction.
-   */
-  public static class KeyExtraction<V extends Tuple> {
-
-    private PType<V> ptype;
-    private final ColumnOrder[] columnOrder;
-    private final int[] cols;
-    
-    private MapFn<V, Object> byFn;
-    private PType<Object> keyPType;
-    
-    public KeyExtraction(PType<V> ptype, ColumnOrder[] columnOrder) {
-      this.ptype = ptype;
-      this.columnOrder = columnOrder;
-      this.cols = new int[columnOrder.length];
-      for (int i = 0; i < columnOrder.length; i++) {
-        cols[i] = columnOrder[i].column() - 1;
-      }
-      init();
-    }
-    
-    private void init() {
-      List<PType> pt = ptype.getSubTypes();
-      PTypeFamily ptf = ptype.getFamily();
-      if (cols.length == 1) {
-        byFn = new SingleKeyFn(cols[0]);
-        keyPType = pt.get(cols[0]);
-      } else {
-        TupleFactory tf = null;
-        switch (cols.length) {
-        case 2:
-          tf = TupleFactory.PAIR;
-          keyPType = ptf.pairs(pt.get(cols[0]), pt.get(cols[1]));
-          break;
-        case 3:
-          tf = TupleFactory.TUPLE3;
-          keyPType = ptf.triples(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]));
-          break;
-        case 4:
-          tf = TupleFactory.TUPLE4;
-          keyPType = ptf.quads(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]), pt.get(cols[3]));
-          break;
-        default:
-          PType[] pts = new PType[cols.length];
-          for (int i = 0; i < pts.length; i++) {
-            pts[i] = pt.get(cols[i]);
-          }
-          tf = TupleFactory.TUPLEN;
-          keyPType = (PType<Object>) (PType<?>) ptf.tuples(pts);
-        }
-        
-        if (ptf == AvroTypeFamily.getInstance()) {
-          Schema s = createOrderedTupleSchema(keyPType, columnOrder);
-          keyPType = (PType<Object>) (PType<?>) Avros.generics(s);
-          byFn = new AvroGenericFn(cols, s);
-        } else {
-          byFn = new TupleKeyFn(cols, tf);
-        }
-      }
-      
-    }
-
-    public MapFn<V, Object> getByFn() {
-      return byFn;
-    }
-    
-    public PType<Object> getKeyType() {
-      return keyPType;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java b/crunch/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java
deleted file mode 100644
index 94fbdbe..0000000
--- a/crunch/src/main/java/org/apache/crunch/lib/sort/TotalOrderPartitioner.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.lib.sort;
-
-import java.io.IOException;
-import java.lang.reflect.Array;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.crunch.io.CompositePathIterable;
-import org.apache.crunch.io.avro.AvroFileReaderFactory;
-import org.apache.crunch.io.seq.SeqFileReaderFactory;
-import org.apache.crunch.types.writable.WritableDeepCopier;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * A partition-aware {@code Partitioner} instance that can work with either Avro or Writable-formatted
- * keys.
- */
-public class TotalOrderPartitioner<K, V> extends Partitioner<K, V> implements Configurable {
-
-  public static final String DEFAULT_PATH = "_partition.lst";
-  public static final String PARTITIONER_PATH = 
-    "crunch.totalorderpartitioner.path";
-  
-  private Configuration conf;
-  private Node<K> partitions;
-  
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    try {
-      this.conf = conf;
-      String parts = getPartitionFile(conf);
-      final Path partFile = new Path(parts);
-      final FileSystem fs = (DEFAULT_PATH.equals(parts))
-        ? FileSystem.getLocal(conf)     // assume in DistributedCache
-        : partFile.getFileSystem(conf);
-
-      Job job = new Job(conf);
-      Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
-      RawComparator<K> comparator =
-          (RawComparator<K>) job.getSortComparator();
-      K[] splitPoints = readPartitions(fs, partFile, keyClass, conf, comparator);
-      int numReduceTasks = job.getNumReduceTasks();
-      if (splitPoints.length != numReduceTasks - 1) {
-        throw new IOException("Wrong number of partitions in keyset");
-      }
-      partitions = new BinarySearchNode(splitPoints, comparator);
-    } catch (IOException e) {
-      throw new IllegalArgumentException("Can't read partitions file", e);
-    }
-  }
-
-  @Override
-  public int getPartition(K key, V value, int modulo) {
-    return partitions.findPartition(key);
-  }
-
-  public static void setPartitionFile(Configuration conf, Path p) {
-    conf.set(PARTITIONER_PATH, p.toString());
-  }
-
-  public static String getPartitionFile(Configuration conf) {
-    return conf.get(PARTITIONER_PATH, DEFAULT_PATH);
-  }
-  
-  @SuppressWarnings("unchecked") // map output key class
-  private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass,
-      Configuration conf, final RawComparator<K> comparator) throws IOException {
-    ArrayList<K> parts = new ArrayList<K>();
-    String schema = conf.get("crunch.schema");
-    if (schema != null) {
-      Schema s = (new Schema.Parser()).parse(schema);
-      AvroFileReaderFactory<K> a = new AvroFileReaderFactory<K>(s);
-      Iterator<K> iter = CompositePathIterable.create(fs, p, a).iterator();
-      while (iter.hasNext()) {
-        parts.add((K) new AvroKey<K>(iter.next()));
-      }
-    } else {
-      WritableDeepCopier wdc = new WritableDeepCopier(keyClass);
-      SeqFileReaderFactory<K> s = new SeqFileReaderFactory<K>(keyClass);
-      Iterator<K> iter = CompositePathIterable.create(fs, p, s).iterator();
-      while (iter.hasNext()) {
-        parts.add((K) wdc.deepCopy((Writable) iter.next()));
-      }
-    }
-    Collections.sort(parts, comparator);
-    return parts.toArray((K[])Array.newInstance(keyClass, parts.size()));
-  }
-  
-  /**
-   * Interface to the partitioner to locate a key in the partition keyset.
-   */
-  interface Node<T> {
-    /**
-     * Locate partition in keyset K, st [Ki..Ki+1) defines a partition,
-     * with implicit K0 = -inf, Kn = +inf, and |K| = #partitions - 1.
-     */
-    int findPartition(T key);
-  }
-  
-  class BinarySearchNode implements Node<K> {
-    private final K[] splitPoints;
-    private final RawComparator<K> comparator;
-    BinarySearchNode(K[] splitPoints, RawComparator<K> comparator) {
-      this.splitPoints = splitPoints;
-      this.comparator = comparator;
-    }
-    public int findPartition(K key) {
-      final int pos = Arrays.binarySearch(splitPoints, key, comparator) + 1;
-      return (pos < 0) ? -pos : pos;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java b/crunch/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java
deleted file mode 100644
index 2dcc64f..0000000
--- a/crunch/src/main/java/org/apache/crunch/materialize/MaterializableIterable.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.materialize;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.crunch.CrunchRuntimeException;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.io.PathTarget;
-import org.apache.crunch.io.ReadableSource;
-import org.apache.crunch.io.impl.FileSourceImpl;
-import org.apache.hadoop.fs.Path;
-
-public class MaterializableIterable<E> implements Iterable<E> {
-
-  private static final Log LOG = LogFactory.getLog(MaterializableIterable.class);
-
-  private final Pipeline pipeline;
-  private final ReadableSource<E> source;
-  private Iterable<E> materialized;
-
-  public MaterializableIterable(Pipeline pipeline, ReadableSource<E> source) {
-    this.pipeline = pipeline;
-    this.source = source;
-    this.materialized = null;
-  }
-
-  public ReadableSource<E> getSource() {
-    return source;
-  }
-
-  public boolean isSourceTarget() {
-    return (source instanceof SourceTarget);
-  }
-  
-  public Path getPath() {
-    if (source instanceof FileSourceImpl) {
-      return ((FileSourceImpl) source).getPath();
-    } else if (source instanceof PathTarget) {
-      return ((PathTarget) source).getPath();
-    }
-    return null;
-  }
-  
-  @Override
-  public Iterator<E> iterator() {
-    if (materialized == null) {
-      pipeline.run();
-      materialize();
-    }
-    return materialized.iterator();
-  }
-
-  public void materialize() {
-    try {
-      materialized = source.read(pipeline.getConfiguration());
-    } catch (IOException e) {
-      LOG.error("Could not materialize: " + source, e);
-      throw new CrunchRuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/materialize/MaterializableMap.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/materialize/MaterializableMap.java b/crunch/src/main/java/org/apache/crunch/materialize/MaterializableMap.java
deleted file mode 100644
index 69082e2..0000000
--- a/crunch/src/main/java/org/apache/crunch/materialize/MaterializableMap.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.materialize;
-
-import java.util.AbstractMap;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.crunch.Pair;
-
-public class MaterializableMap<K, V> extends AbstractMap<K, V> {
-
-  private Iterable<Pair<K, V>> iterable;
-  private Set<Map.Entry<K, V>> entrySet;
-
-  public MaterializableMap(Iterable<Pair<K, V>> iterable) {
-    this.iterable = iterable;
-  }
-
-  private Set<Map.Entry<K, V>> toMapEntries(Iterable<Pair<K, V>> xs) {
-    HashMap<K, V> m = new HashMap<K, V>();
-    for (Pair<K, V> x : xs)
-      m.put(x.first(), x.second());
-    return m.entrySet();
-  }
-
-  @Override
-  public Set<Map.Entry<K, V>> entrySet() {
-    if (entrySet == null)
-      entrySet = toMapEntries(iterable);
-    return entrySet;
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java b/crunch/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java
deleted file mode 100644
index 60e64b1..0000000
--- a/crunch/src/main/java/org/apache/crunch/materialize/pobject/CollectionPObject.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.materialize.pobject;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-
-import org.apache.crunch.PCollection;
-
-/**
- * A concrete implementation of {@link org.apache.crunch.materialize.pobject.PObjectImpl} whose
- * value is a Java {@link java.util.Collection} containing the elements of the underlying {@link
- * PCollection} for this {@link org.apache.crunch.PObject}.
- *
- * @param <S> The value type for elements contained in the {@code Collection} value encapsulated
- * by this {@code PObject}.
- */
-public class CollectionPObject<S> extends PObjectImpl<S, Collection<S>> {
-
-  /**
-   * Constructs a new instance of this {@code PObject} implementation.
-   *
-   * @param collect The backing {@code PCollection} for this {@code PObject}.
-   */
-  public CollectionPObject(PCollection<S> collect) {
-    super(collect);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public Collection<S> process(Iterable<S> input) {
-    Collection<S> target = new ArrayList<S>();
-    Iterator<S> itr = input.iterator();
-    while (itr.hasNext()) {
-      target.add(itr.next());
-    }
-    return target;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java b/crunch/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java
deleted file mode 100644
index aa5fd9e..0000000
--- a/crunch/src/main/java/org/apache/crunch/materialize/pobject/FirstElementPObject.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.materialize.pobject;
-
-import java.util.Iterator;
-
-import org.apache.crunch.PCollection;
-
-/**
- * A concrete implementation of {@link PObjectImpl} that uses the first element in the backing
- * {@link PCollection} as the {@link org.apache.crunch.PObject} value.
- *
- * @param <T> The value type of this {@code PObject}.
- */
-public class FirstElementPObject<T> extends PObjectImpl<T, T> {
-
-  /**
-   * Constructs a new instance of this {@code PObject} implementation.
-   *
-   * @param collect The backing {@code PCollection} for this {@code PObject}.
-   */
-  public FirstElementPObject(PCollection<T> collect) {
-    super(collect);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public T process(Iterable<T> input) {
-    Iterator<T> itr = input.iterator();
-    if (itr.hasNext()) {
-      return itr.next();
-    }
-    return null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java b/crunch/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java
deleted file mode 100644
index 243997f..0000000
--- a/crunch/src/main/java/org/apache/crunch/materialize/pobject/MapPObject.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.materialize.pobject;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.Pair;
-
-/**
- * A concrete implementation of {@link PObjectImpl} whose
- * value is a Java {@link Map}. The underlying {@link PCollection} for this
- * {@link org.apache.crunch.PObject} must contain {@link Pair}s of values. The
- * first element of the pair will be used as the map key, while the second element will be used
- * as the map value.  Note that the contents of the underlying {@code PCollection} may not be
- * reflected in the returned {@code Map}, since a single key may be mapped to several values in
- * the underlying {@code PCollection}, and only one of those values will appear in the {@code
- * Map} encapsulated by this {@code PObject}.
- *
- * @param <K> The type of keys for the Map.
- * @param <V> The type of values for the Map.
- */
-public class MapPObject<K, V> extends PObjectImpl<Pair<K, V>, Map<K, V>> {
-
-  /**
-   * Constructs a new instance of this {@code PObject} implementation.
-   *
-   * @param collect The backing {@code PCollection} for this {@code PObject}.
-   */
-  public MapPObject(PCollection<Pair<K, V>> collect) {
-    super(collect);
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public Map<K, V> process(Iterable<Pair<K, V>> input) {
-    Map<K, V> target = new HashMap<K, V>();
-    Iterator<Pair<K, V>> itr = input.iterator();
-    while (itr.hasNext()) {
-      Pair<K, V> pair = itr.next();
-      target.put(pair.first(), pair.second());
-    }
-    return target;
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java b/crunch/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java
deleted file mode 100644
index 59c2ba2..0000000
--- a/crunch/src/main/java/org/apache/crunch/materialize/pobject/PObjectImpl.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.materialize.pobject;
-
-import org.apache.crunch.PCollection;
-import org.apache.crunch.PObject;
-import org.apache.crunch.Pipeline;
-import org.apache.crunch.Target;
-
-/**
- * An abstract implementation of {@link PObject} that is backed by a {@link PCollection}.
- * Clients creating a concrete implementation should override the method
- * {@link PObjectImpl#process(Iterable)}, which transforms the backing PCollection into the
- * singleton value encapsulated by the PObject. Once this {code PObject}'s value has been
- * calculated, the value is cached to prevent subsequent materializations of the backing
- * {@code PCollection}.
- *
- * @param <S> The type contained in the underlying PCollection.
- * @param <T> The type encapsulated by this PObject.
- */
-public abstract class PObjectImpl<S, T> implements PObject<T> {
-
-  // The underlying PCollection whose contents will be used to generate the value for this
-  // PObject.
-  private PCollection<S> collection;
-
-  // A variable to hold a cached copy of the value of this {@code PObject},
-  // to prevent unnecessary materializations of the backing {@code PCollection}.
-  private T cachedValue;
-
-  // A flag indicating if a value for this {@code PObject} has been cached.
-  private boolean isCached;
-
-  /**
-   * Constructs a new instance of this {@code PObject} implementation.
-   *
-   * @param collect The backing {@code PCollection} for this {@code PObject}.
-   */
-  public PObjectImpl(PCollection<S> collect) {
-    this.collection = collect;
-    this.cachedValue = null;
-    this.isCached = false;
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public String toString() {
-    return collection.toString();
-  }
-
-  /** {@inheritDoc} */
-  @Override
-  public final T getValue() {
-    if (!isCached) {
-      cachedValue = process(collection.materialize());
-      isCached = true;
-    }
-    return cachedValue;
-  }
-
-  /**
-   * Transforms the provided Iterable, obtained from the backing {@link PCollection},
-   * into the value encapsulated by this {@code PObject}.
-   *
-   * @param input An Iterable whose elements correspond to those of the backing {@code
-   * PCollection}.
-   * @return The value of this {@code PObject}.
-   */
-  protected abstract T process(Iterable<S> input);
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/package-info.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/package-info.java b/crunch/src/main/java/org/apache/crunch/package-info.java
deleted file mode 100644
index 38f11bc..0000000
--- a/crunch/src/main/java/org/apache/crunch/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Client-facing API and core abstractions.
- *
- * @see <a href="http://crunch.apache.org/intro.html">Introduction to
- *      Apache Crunch</a>
- */
-package org.apache.crunch;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java b/crunch/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java
deleted file mode 100644
index 151ab82..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/CollectionDeepCopier.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.Lists;
-
-/**
- * Performs deep copies (based on underlying PType deep copying) of Collections.
- * 
- * @param <T> The type of Tuple implementation being copied
- */
-public class CollectionDeepCopier<T> implements DeepCopier<Collection<T>> {
-
-  private PType<T> elementType;
-
-  public CollectionDeepCopier(PType<T> elementType) {
-    this.elementType = elementType;
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    this.elementType.initialize(conf);
-  }
-
-  @Override
-  public Collection<T> deepCopy(Collection<T> source) {
-    if (source == null) {
-      return null;
-    }
-    List<T> copiedCollection = Lists.newArrayListWithCapacity(source.size());
-    for (T value : source) {
-      copiedCollection.add(elementType.getDetachedValue(value));
-    }
-    return copiedCollection;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/Converter.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/Converter.java b/crunch/src/main/java/org/apache/crunch/types/Converter.java
deleted file mode 100644
index a0dbb16..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/Converter.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.io.Serializable;
-
-import org.apache.crunch.DoFn;
-
-/**
- * Converts the input key/value from a MapReduce task into the input to a
- * {@link DoFn}, or takes the output of a {@code DoFn} and write it to the
- * output key/values.
- */
-public interface Converter<K, V, S, T> extends Serializable {
-  S convertInput(K key, V value);
-
-  T convertIterableInput(K key, Iterable<V> value);
-
-  K outputKey(S value);
-
-  V outputValue(S value);
-
-  Class<K> getKeyClass();
-
-  Class<V> getValueClass();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/DeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/DeepCopier.java b/crunch/src/main/java/org/apache/crunch/types/DeepCopier.java
deleted file mode 100644
index f146e86..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/DeepCopier.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.io.Serializable;
-
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * Performs deep copies of values.
- * 
- * @param <T> The type of value that will be copied
- */
-public interface DeepCopier<T> extends Serializable {
-
-  /**
-   * Initialize the deep copier with a job-specific configuration
-   * 
-   * @param conf Job-specific configuration
-   */
-  void initialize(Configuration conf);
-
-  /**
-   * Create a deep copy of a value.
-   * 
-   * @param source The value to be copied
-   * @return The deep copy of the value
-   */
-  T deepCopy(T source);
-
-  static class NoOpDeepCopier<V> implements DeepCopier<V> {
-
-    @Override
-    public V deepCopy(V source) {
-      return source;
-    }
-
-    @Override
-    public void initialize(Configuration conf) {
-      // No initialization needed
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/MapDeepCopier.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/MapDeepCopier.java b/crunch/src/main/java/org/apache/crunch/types/MapDeepCopier.java
deleted file mode 100644
index de8903b..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/MapDeepCopier.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.apache.hadoop.conf.Configuration;
-
-import com.google.common.collect.Maps;
-
-public class MapDeepCopier<T> implements DeepCopier<Map<String, T>> {
-
-  private final PType<T> ptype;
-
-  public MapDeepCopier(PType<T> ptype) {
-    this.ptype = ptype;
-  }
-
-  @Override
-  public void initialize(Configuration conf) {
-    this.ptype.initialize(conf);
-  }
-
-  @Override
-  public Map<String, T> deepCopy(Map<String, T> source) {
-    if (source == null) {
-      return null;
-    }
-    
-    Map<String, T> deepCopyMap = Maps.newHashMap();
-    for (Entry<String, T> entry : source.entrySet()) {
-      deepCopyMap.put(entry.getKey(), ptype.getDetachedValue(entry.getValue()));
-    }
-    return deepCopyMap;
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/PGroupedTableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/PGroupedTableType.java b/crunch/src/main/java/org/apache/crunch/types/PGroupedTableType.java
deleted file mode 100644
index d276cd6..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/PGroupedTableType.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.crunch.GroupingOptions;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PGroupedTable;
-import org.apache.crunch.Pair;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-import com.google.common.collect.Iterables;
-
-/**
- * The {@code PType} instance for {@link PGroupedTable} instances. Its settings
- * are derived from the {@code PTableType} that was grouped to create the
- * {@code PGroupedTable} instance.
- * 
- */
-public abstract class PGroupedTableType<K, V> implements PType<Pair<K, Iterable<V>>> {
-
-  protected static class PTypeIterable<V> implements Iterable<V> {
-    private final Iterable<Object> iterable;
-    private final MapFn<Object, V> mapFn;
-
-    public PTypeIterable(MapFn<Object, V> mapFn, Iterable<Object> iterable) {
-      this.mapFn = mapFn;
-      this.iterable = iterable;
-    }
-
-    public Iterator<V> iterator() {
-      return new Iterator<V>() {
-        Iterator<Object> iter = iterable.iterator();
-
-        public boolean hasNext() {
-          return iter.hasNext();
-        }
-
-        public V next() {
-          return mapFn.map(iter.next());
-        }
-
-        public void remove() {
-          iter.remove();
-        }
-      };
-    }
-    
-    @Override
-    public String toString() {
-      return Iterables.toString(this);
-    }
-  }
-
-  public static class PairIterableMapFn<K, V> extends MapFn<Pair<Object, Iterable<Object>>, Pair<K, Iterable<V>>> {
-    private final MapFn<Object, K> keys;
-    private final MapFn<Object, V> values;
-
-    public PairIterableMapFn(MapFn<Object, K> keys, MapFn<Object, V> values) {
-      this.keys = keys;
-      this.values = values;
-    }
-
-    @Override
-    public void configure(Configuration conf) {
-      keys.configure(conf);
-      values.configure(conf);
-    }
-    
-    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-      keys.setContext(context);
-      values.setContext(context);
-    }
-    
-    @Override
-    public void initialize() {
-      keys.initialize();
-      values.initialize();
-    }
-
-    @Override
-    public Pair<K, Iterable<V>> map(Pair<Object, Iterable<Object>> input) {
-      return Pair.<K, Iterable<V>> of(keys.map(input.first()), new PTypeIterable(values, input.second()));
-    }
-  }
-
-  protected final PTableType<K, V> tableType;
-
-  public PGroupedTableType(PTableType<K, V> tableType) {
-    this.tableType = tableType;
-  }
-
-  public PTableType<K, V> getTableType() {
-    return tableType;
-  }
-
-  @Override
-  public PTypeFamily getFamily() {
-    return tableType.getFamily();
-  }
-
-  @Override
-  public List<PType> getSubTypes() {
-    return tableType.getSubTypes();
-  }
-
-  @Override
-  public Converter getConverter() {
-    return tableType.getConverter();
-  }
-
-  public abstract Converter getGroupingConverter();
-
-  public abstract void configureShuffle(Job job, GroupingOptions options);
-
-  @Override
-  public ReadableSourceTarget<Pair<K, Iterable<V>>> getDefaultFileSource(Path path) {
-    throw new UnsupportedOperationException("Grouped tables cannot be written out directly");
-  }
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/PTableType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/PTableType.java b/crunch/src/main/java/org/apache/crunch/types/PTableType.java
deleted file mode 100644
index 3d06f8b..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/PTableType.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import org.apache.crunch.PTable;
-import org.apache.crunch.Pair;
-
-/**
- * An extension of {@code PType} specifically for {@link PTable} objects. It
- * allows separate access to the {@code PType}s of the key and value for the
- * {@code PTable}.
- * 
- */
-public interface PTableType<K, V> extends PType<Pair<K, V>> {
-  /**
-   * Returns the key type for the table.
-   */
-  PType<K> getKeyType();
-
-  /**
-   * Returns the value type for the table.
-   */
-  PType<V> getValueType();
-
-  /**
-   * Returns the grouped table version of this type.
-   */
-  PGroupedTableType<K, V> getGroupedTableType();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/PType.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/PType.java b/crunch/src/main/java/org/apache/crunch/types/PType.java
deleted file mode 100644
index ebddf84..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/PType.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.crunch.DoFn;
-import org.apache.crunch.MapFn;
-import org.apache.crunch.PCollection;
-import org.apache.crunch.io.ReadableSourceTarget;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-/**
- * A {@code PType} defines a mapping between a data type that is used in a Crunch pipeline and a
- * serialization and storage format that is used to read/write data from/to HDFS. Every
- * {@link PCollection} has an associated {@code PType} that tells Crunch how to read/write data from
- * that {@code PCollection}.
- * 
- */
-public interface PType<T> extends Serializable {
-  /**
-   * Returns the Java type represented by this {@code PType}.
-   */
-  Class<T> getTypeClass();
-
-  /**
-   * Returns the {@code PTypeFamily} that this {@code PType} belongs to.
-   */
-  PTypeFamily getFamily();
-
-  MapFn<Object, T> getInputMapFn();
-
-  MapFn<T, Object> getOutputMapFn();
-
-  Converter getConverter();
-
-  /**
-   * Initialize this PType for use within a DoFn. This generally only needs to be called when using
-   * a PType for {@link #getDetachedValue(Object)}.
-   * 
-   * @param conf Configuration object
-   * @see PType#getDetachedValue(Object)
-   */
-  void initialize(Configuration conf);
-
-  /**
-   * Returns a copy of a value (or the value itself) that can safely be retained.
-   * <p>
-   * This is useful when iterable values being processed in a DoFn (via a reducer) need to be held
-   * on to for more than the scope of a single iteration, as a reducer (and therefore also a DoFn
-   * that has an Iterable as input) re-use deserialized values. More information on object reuse is
-   * available in the {@link DoFn} class documentation.
-   * 
-   * @param value The value to be deep-copied
-   * @return A deep copy of the input value
-   */
-  T getDetachedValue(T value);
-
-  /**
-   * Returns a {@code SourceTarget} that is able to read/write data using the serialization format
-   * specified by this {@code PType}.
-   */
-  ReadableSourceTarget<T> getDefaultFileSource(Path path);
-
-  /**
-   * Returns the sub-types that make up this PType if it is a composite instance, such as a tuple.
-   */
-  List<PType> getSubTypes();
-}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/main/java/org/apache/crunch/types/PTypeFamily.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/types/PTypeFamily.java b/crunch/src/main/java/org/apache/crunch/types/PTypeFamily.java
deleted file mode 100644
index 9458f14..0000000
--- a/crunch/src/main/java/org/apache/crunch/types/PTypeFamily.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.crunch.types;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.crunch.MapFn;
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.Tuple4;
-import org.apache.crunch.TupleN;
-
-/**
- * An abstract factory for creating {@code PType} instances that have the same
- * serialization/storage backing format.
- * 
- */
-public interface PTypeFamily {
-  PType<Void> nulls();
-
-  PType<String> strings();
-
-  PType<Long> longs();
-
-  PType<Integer> ints();
-
-  PType<Float> floats();
-
-  PType<Double> doubles();
-
-  PType<Boolean> booleans();
-
-  PType<ByteBuffer> bytes();
-
-  <T> PType<T> records(Class<T> clazz);
-
-  <T> PType<Collection<T>> collections(PType<T> ptype);
-
-  <T> PType<Map<String, T>> maps(PType<T> ptype);
-
-  <V1, V2> PType<Pair<V1, V2>> pairs(PType<V1> p1, PType<V2> p2);
-
-  <V1, V2, V3> PType<Tuple3<V1, V2, V3>> triples(PType<V1> p1, PType<V2> p2, PType<V3> p3);
-
-  <V1, V2, V3, V4> PType<Tuple4<V1, V2, V3, V4>> quads(PType<V1> p1, PType<V2> p2, PType<V3> p3, PType<V4> p4);
-
-  PType<TupleN> tuples(PType<?>... ptypes);
-
-  <T extends Tuple> PType<T> tuples(Class<T> clazz, PType<?>... ptypes);
-
-  <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base);
-
-  <K, V> PTableType<K, V> tableOf(PType<K> key, PType<V> value);
-
-  /**
-   * Returns the equivalent of the given ptype for this family, if it exists.
-   */
-  <T> PType<T> as(PType<T> ptype);
-}


[25/43] CRUNCH-196: crunch -> crunch-core rename to fix build issues

Posted by jw...@apache.org.
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
new file mode 100644
index 0000000..78cf3ae
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
@@ -0,0 +1,588 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.types.writable;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.crunch.MapFn;
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+import org.apache.crunch.fn.CompositeMapFn;
+import org.apache.crunch.fn.IdentityFn;
+import org.apache.crunch.types.PType;
+import org.apache.crunch.types.PTypes;
+import org.apache.crunch.types.TupleFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * Defines static methods that are analogous to the methods defined in
+ * {@link WritableTypeFamily} for convenient static importing.
+ * 
+ */
+public class Writables {
+  private static final MapFn<NullWritable, Void> NULL_WRITABLE_TO_VOID = new MapFn<NullWritable, Void>() {
+    @Override
+    public Void map(NullWritable input) {
+      return null;
+    }
+  };
+
+  private static final MapFn<Void, NullWritable> VOID_TO_NULL_WRITABLE = new MapFn<Void, NullWritable>() {
+    @Override
+    public NullWritable map(Void input) {
+      return NullWritable.get();
+    }
+  };
+
+  private static final MapFn<Text, String> TEXT_TO_STRING = new MapFn<Text, String>() {
+    @Override
+    public String map(Text input) {
+      return input.toString();
+    }
+  };
+
+  private static final MapFn<String, Text> STRING_TO_TEXT = new MapFn<String, Text>() {
+    @Override
+    public Text map(String input) {
+      return new Text(input);
+    }
+  };
+
+  private static final MapFn<IntWritable, Integer> IW_TO_INT = new MapFn<IntWritable, Integer>() {
+    @Override
+    public Integer map(IntWritable input) {
+      return input.get();
+    }
+  };
+
+  private static final MapFn<Integer, IntWritable> INT_TO_IW = new MapFn<Integer, IntWritable>() {
+    @Override
+    public IntWritable map(Integer input) {
+      return new IntWritable(input);
+    }
+  };
+
+  private static final MapFn<LongWritable, Long> LW_TO_LONG = new MapFn<LongWritable, Long>() {
+    @Override
+    public Long map(LongWritable input) {
+      return input.get();
+    }
+  };
+
+  private static final MapFn<Long, LongWritable> LONG_TO_LW = new MapFn<Long, LongWritable>() {
+    @Override
+    public LongWritable map(Long input) {
+      return new LongWritable(input);
+    }
+  };
+
+  private static final MapFn<FloatWritable, Float> FW_TO_FLOAT = new MapFn<FloatWritable, Float>() {
+    @Override
+    public Float map(FloatWritable input) {
+      return input.get();
+    }
+  };
+
+  private static final MapFn<Float, FloatWritable> FLOAT_TO_FW = new MapFn<Float, FloatWritable>() {
+    @Override
+    public FloatWritable map(Float input) {
+      return new FloatWritable(input);
+    }
+  };
+
+  private static final MapFn<DoubleWritable, Double> DW_TO_DOUBLE = new MapFn<DoubleWritable, Double>() {
+    @Override
+    public Double map(DoubleWritable input) {
+      return input.get();
+    }
+  };
+
+  private static final MapFn<Double, DoubleWritable> DOUBLE_TO_DW = new MapFn<Double, DoubleWritable>() {
+    @Override
+    public DoubleWritable map(Double input) {
+      return new DoubleWritable(input);
+    }
+  };
+
+  private static final MapFn<BooleanWritable, Boolean> BW_TO_BOOLEAN = new MapFn<BooleanWritable, Boolean>() {
+    @Override
+    public Boolean map(BooleanWritable input) {
+      return input.get();
+    }
+  };
+
+  private static final BooleanWritable TRUE = new BooleanWritable(true);
+  private static final BooleanWritable FALSE = new BooleanWritable(false);
+  private static final MapFn<Boolean, BooleanWritable> BOOLEAN_TO_BW = new MapFn<Boolean, BooleanWritable>() {
+    @Override
+    public BooleanWritable map(Boolean input) {
+      return input == Boolean.TRUE ? TRUE : FALSE;
+    }
+  };
+
+  private static final MapFn<BytesWritable, ByteBuffer> BW_TO_BB = new MapFn<BytesWritable, ByteBuffer>() {
+    @Override
+    public ByteBuffer map(BytesWritable input) {
+      return ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
+    }
+  };
+
+  private static final MapFn<ByteBuffer, BytesWritable> BB_TO_BW = new MapFn<ByteBuffer, BytesWritable>() {
+    @Override
+    public BytesWritable map(ByteBuffer input) {
+      BytesWritable bw = new BytesWritable();
+      bw.set(input.array(), input.arrayOffset(), input.limit());
+      return bw;
+    }
+  };
+
+  private static <S, W extends Writable> WritableType<S, W> create(Class<S> typeClass, Class<W> writableClass,
+      MapFn<W, S> inputDoFn, MapFn<S, W> outputDoFn) {
+    return new WritableType<S, W>(typeClass, writableClass, inputDoFn, outputDoFn);
+  }
+
+  private static final WritableType<Void, NullWritable> nulls = create(Void.class, NullWritable.class,
+      NULL_WRITABLE_TO_VOID, VOID_TO_NULL_WRITABLE);
+  private static final WritableType<String, Text> strings = create(String.class, Text.class, TEXT_TO_STRING,
+      STRING_TO_TEXT);
+  private static final WritableType<Long, LongWritable> longs = create(Long.class, LongWritable.class, LW_TO_LONG,
+      LONG_TO_LW);
+  private static final WritableType<Integer, IntWritable> ints = create(Integer.class, IntWritable.class, IW_TO_INT,
+      INT_TO_IW);
+  private static final WritableType<Float, FloatWritable> floats = create(Float.class, FloatWritable.class,
+      FW_TO_FLOAT, FLOAT_TO_FW);
+  private static final WritableType<Double, DoubleWritable> doubles = create(Double.class, DoubleWritable.class,
+      DW_TO_DOUBLE, DOUBLE_TO_DW);
+  private static final WritableType<Boolean, BooleanWritable> booleans = create(Boolean.class, BooleanWritable.class,
+      BW_TO_BOOLEAN, BOOLEAN_TO_BW);
+  private static final WritableType<ByteBuffer, BytesWritable> bytes = create(ByteBuffer.class, BytesWritable.class,
+      BW_TO_BB, BB_TO_BW);
+
+  private static final Map<Class<?>, PType<?>> PRIMITIVES = ImmutableMap.<Class<?>, PType<?>> builder()
+      .put(String.class, strings).put(Long.class, longs).put(Integer.class, ints).put(Float.class, floats)
+      .put(Double.class, doubles).put(Boolean.class, booleans).put(ByteBuffer.class, bytes).build();
+
+  private static final Map<Class<?>, WritableType<?, ?>> EXTENSIONS = Maps.newHashMap();
+
+  public static <T> PType<T> getPrimitiveType(Class<T> clazz) {
+    return (PType<T>) PRIMITIVES.get(clazz);
+  }
+
+  public static <T> void register(Class<T> clazz, WritableType<T, ? extends Writable> ptype) {
+    EXTENSIONS.put(clazz, ptype);
+  }
+
+  public static final WritableType<Void, NullWritable> nulls() {
+    return nulls;
+  }
+
+  public static final WritableType<String, Text> strings() {
+    return strings;
+  }
+
+  public static final WritableType<Long, LongWritable> longs() {
+    return longs;
+  }
+
+  public static final WritableType<Integer, IntWritable> ints() {
+    return ints;
+  }
+
+  public static final WritableType<Float, FloatWritable> floats() {
+    return floats;
+  }
+
+  public static final WritableType<Double, DoubleWritable> doubles() {
+    return doubles;
+  }
+
+  public static final WritableType<Boolean, BooleanWritable> booleans() {
+    return booleans;
+  }
+
+  public static final WritableType<ByteBuffer, BytesWritable> bytes() {
+    return bytes;
+  }
+
+  public static final <T, W extends Writable> WritableType<T, W> records(Class<T> clazz) {
+    if (EXTENSIONS.containsKey(clazz)) {
+      return (WritableType<T, W>) EXTENSIONS.get(clazz);
+    }
+    if (Writable.class.isAssignableFrom(clazz)) {
+      return (WritableType<T, W>) writables(clazz.asSubclass(Writable.class));
+    } else {
+      throw new IllegalArgumentException(
+          "Cannot create Writable records from non-Writable class"+ clazz.getCanonicalName());
+    }
+  }
+
+  public static <W extends Writable> WritableType<W, W> writables(Class<W> clazz) {
+    MapFn wIdentity = IdentityFn.getInstance();
+    return new WritableType<W, W>(clazz, clazz, wIdentity, wIdentity);
+  }
+
+  public static <K, V> WritableTableType<K, V> tableOf(PType<K> key, PType<V> value) {
+    if (key instanceof WritableTableType) {
+      WritableTableType wtt = (WritableTableType) key;
+      key = pairs(wtt.getKeyType(), wtt.getValueType());
+    } else if (!(key instanceof WritableType)) {
+      throw new IllegalArgumentException("Key type must be of class WritableType");
+    }
+    if (value instanceof WritableTableType) {
+      WritableTableType wtt = (WritableTableType) value;
+      value = pairs(wtt.getKeyType(), wtt.getValueType());
+    } else if (!(value instanceof WritableType)) {
+      throw new IllegalArgumentException("Value type must be of class WritableType");
+    }
+    return new WritableTableType((WritableType) key, (WritableType) value);
+  }
+
+  /**
+   * For mapping from {@link TupleWritable} instances to {@link Tuple}s.
+   * 
+   */
+  private static class TWTupleMapFn extends MapFn<TupleWritable, Tuple> {
+    private final TupleFactory<?> tupleFactory;
+    private final List<MapFn> fns;
+
+    private transient Object[] values;
+
+    public TWTupleMapFn(TupleFactory<?> tupleFactory, PType<?>... ptypes) {
+      this.tupleFactory = tupleFactory;
+      this.fns = Lists.newArrayList();
+      for (PType ptype : ptypes) {
+        fns.add(ptype.getInputMapFn());
+      }
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      for (MapFn fn : fns) {
+        fn.configure(conf);
+      }
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      for (MapFn fn : fns) {
+        fn.setContext(context);
+      }
+    }
+    
+    @Override
+    public void initialize() {
+      for (MapFn fn : fns) {
+        fn.initialize();
+      }
+      // The rest of the methods allocate new
+      // objects each time. However this one
+      // uses Tuple.tuplify which does a copy
+      this.values = new Object[fns.size()];
+      tupleFactory.initialize();
+    }
+
+    @Override
+    public Tuple map(TupleWritable in) {
+      for (int i = 0; i < values.length; i++) {
+        if (in.has(i)) {
+          values[i] = fns.get(i).map(in.get(i));
+        } else {
+          values[i] = null;
+        }
+      }
+      return tupleFactory.makeTuple(values);
+    }
+  }
+
+  /**
+   * For mapping from {@code Tuple}s to {@code TupleWritable}s.
+   * 
+   */
+  private static class TupleTWMapFn extends MapFn<Tuple, TupleWritable> {
+
+    private transient TupleWritable writable;
+    private transient Writable[] values;
+
+    private final List<MapFn> fns;
+
+    public TupleTWMapFn(PType<?>... ptypes) {
+      this.fns = Lists.newArrayList();
+      for (PType<?> ptype : ptypes) {
+        fns.add(ptype.getOutputMapFn());
+      }
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      for (MapFn fn : fns) {
+        fn.configure(conf);
+      }
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      for (MapFn fn : fns) {
+        fn.setContext(context);
+      }
+    }
+    
+    @Override
+    public void initialize() {
+      this.values = new Writable[fns.size()];
+      this.writable = new TupleWritable(values);
+      for (MapFn fn : fns) {
+        fn.initialize();
+      }
+    }
+
+    @Override
+    public TupleWritable map(Tuple input) {
+      writable.clearWritten();
+      for (int i = 0; i < input.size(); i++) {
+        Object value = input.get(i);
+        if (value != null) {
+          writable.setWritten(i);
+          values[i] = (Writable) fns.get(i).map(value);
+        }
+      }
+      return writable;
+    }
+  }
+
+  public static <V1, V2> WritableType<Pair<V1, V2>, TupleWritable> pairs(PType<V1> p1, PType<V2> p2) {
+    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.PAIR, p1, p2);
+    TupleTWMapFn output = new TupleTWMapFn(p1, p2);
+    return new WritableType(Pair.class, TupleWritable.class, input, output, p1, p2);
+  }
+
+  public static <V1, V2, V3> WritableType<Tuple3<V1, V2, V3>, TupleWritable> triples(PType<V1> p1, PType<V2> p2,
+      PType<V3> p3) {
+    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLE3, p1, p2, p3);
+    TupleTWMapFn output = new TupleTWMapFn(p1, p2, p3);
+    return new WritableType(Tuple3.class, TupleWritable.class, input, output, p1, p2, p3);
+  }
+
+  public static <V1, V2, V3, V4> WritableType<Tuple4<V1, V2, V3, V4>, TupleWritable> quads(PType<V1> p1, PType<V2> p2,
+      PType<V3> p3, PType<V4> p4) {
+    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLE4, p1, p2, p3, p4);
+    TupleTWMapFn output = new TupleTWMapFn(p1, p2, p3, p4);
+    return new WritableType(Tuple4.class, TupleWritable.class, input, output, p1, p2, p3, p4);
+  }
+
+  public static WritableType<TupleN, TupleWritable> tuples(PType... ptypes) {
+    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLEN, ptypes);
+    TupleTWMapFn output = new TupleTWMapFn(ptypes);
+    return new WritableType(TupleN.class, TupleWritable.class, input, output, ptypes);
+  }
+
+  public static <T extends Tuple> PType<T> tuples(Class<T> clazz, PType... ptypes) {
+    Class[] typeArgs = new Class[ptypes.length];
+    for (int i = 0; i < typeArgs.length; i++) {
+      typeArgs[i] = ptypes[i].getTypeClass();
+    }
+    TupleFactory<T> factory = TupleFactory.create(clazz, typeArgs);
+    TWTupleMapFn input = new TWTupleMapFn(factory, ptypes);
+    TupleTWMapFn output = new TupleTWMapFn(ptypes);
+    return new WritableType(clazz, TupleWritable.class, input, output, ptypes);
+  }
+
+  public static <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
+    WritableType<S, ?> wt = (WritableType<S, ?>) base;
+    MapFn input = new CompositeMapFn(wt.getInputMapFn(), inputFn);
+    MapFn output = new CompositeMapFn(outputFn, wt.getOutputMapFn());
+    return new WritableType(clazz, wt.getSerializationClass(), input, output, base.getSubTypes().toArray(new PType[0]));
+  }
+
+  private static class ArrayCollectionMapFn<T> extends MapFn<GenericArrayWritable, Collection<T>> {
+    private final MapFn<Object, T> mapFn;
+
+    public ArrayCollectionMapFn(MapFn<Object, T> mapFn) {
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public Collection<T> map(GenericArrayWritable input) {
+      Collection<T> collection = Lists.newArrayList();
+      for (Writable writable : input.get()) {
+        collection.add(mapFn.map(writable));
+      }
+      return collection;
+    }
+  }
+
+  private static class CollectionArrayMapFn<T> extends MapFn<Collection<T>, GenericArrayWritable> {
+
+    private final Class<? extends Writable> clazz;
+    private final MapFn<T, Object> mapFn;
+
+    public CollectionArrayMapFn(Class<? extends Writable> clazz, MapFn<T, Object> mapFn) {
+      this.clazz = clazz;
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public GenericArrayWritable map(Collection<T> input) {
+      GenericArrayWritable arrayWritable = new GenericArrayWritable(clazz);
+      Writable[] w = new Writable[input.size()];
+      int index = 0;
+      for (T in : input) {
+        w[index++] = ((Writable) mapFn.map(in));
+      }
+      arrayWritable.set(w);
+      return arrayWritable;
+    }
+  }
+
+  public static <T> WritableType<Collection<T>, GenericArrayWritable<T>> collections(PType<T> ptype) {
+    WritableType<T, ?> wt = (WritableType<T, ?>) ptype;
+    return new WritableType(Collection.class, GenericArrayWritable.class, new ArrayCollectionMapFn(wt.getInputMapFn()),
+        new CollectionArrayMapFn(wt.getSerializationClass(), wt.getOutputMapFn()), ptype);
+  }
+
+  private static class MapInputMapFn<T> extends MapFn<TextMapWritable<Writable>, Map<String, T>> {
+    private final MapFn<Writable, T> mapFn;
+
+    public MapInputMapFn(MapFn<Writable, T> mapFn) {
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public Map<String, T> map(TextMapWritable<Writable> input) {
+      Map<String, T> out = Maps.newHashMap();
+      for (Map.Entry<Text, Writable> e : input.entrySet()) {
+        out.put(e.getKey().toString(), mapFn.map(e.getValue()));
+      }
+      return out;
+    }
+  }
+
+  private static class MapOutputMapFn<T> extends MapFn<Map<String, T>, TextMapWritable<Writable>> {
+
+    private final Class<Writable> clazz;
+    private final MapFn<T, Writable> mapFn;
+
+    public MapOutputMapFn(Class<Writable> clazz, MapFn<T, Writable> mapFn) {
+      this.clazz = clazz;
+      this.mapFn = mapFn;
+    }
+
+    @Override
+    public void configure(Configuration conf) {
+      mapFn.configure(conf);
+    }
+
+    @Override
+    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
+      mapFn.setContext(context);
+    }
+    
+    @Override
+    public void initialize() {
+      mapFn.initialize();
+    }
+
+    @Override
+    public TextMapWritable<Writable> map(Map<String, T> input) {
+      TextMapWritable<Writable> tmw = new TextMapWritable<Writable>(clazz);
+      for (Map.Entry<String, T> e : input.entrySet()) {
+        tmw.put(new Text(e.getKey()), mapFn.map(e.getValue()));
+      }
+      return tmw;
+    }
+  }
+
+  public static <T> WritableType<Map<String, T>, MapWritable> maps(PType<T> ptype) {
+    WritableType<T, ?> wt = (WritableType<T, ?>) ptype;
+    return new WritableType(Map.class, TextMapWritable.class, new MapInputMapFn(wt.getInputMapFn()),
+        new MapOutputMapFn(wt.getSerializationClass(), wt.getOutputMapFn()), ptype);
+  }
+
+  public static <T> PType<T> jsons(Class<T> clazz) {
+    return PTypes.jsonString(clazz, WritableTypeFamily.getInstance());
+  }
+
+  // Not instantiable
+  private Writables() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/types/writable/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/package-info.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/package-info.java
new file mode 100644
index 0000000..7d54743
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Business object serialization using Hadoop's Writables framework.
+ */
+package org.apache.crunch.types.writable;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/util/CrunchTool.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/util/CrunchTool.java b/crunch-core/src/main/java/org/apache/crunch/util/CrunchTool.java
new file mode 100644
index 0000000..ea66291
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/util/CrunchTool.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.util;
+
+import java.io.Serializable;
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.PipelineExecution;
+import org.apache.crunch.PipelineResult;
+import org.apache.crunch.Source;
+import org.apache.crunch.TableSource;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.io.At;
+import org.apache.crunch.io.From;
+import org.apache.crunch.io.To;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+
+/**
+ * An extension of the {@code Tool} interface that creates a {@code Pipeline}
+ * instance and provides methods for working with the Pipeline from inside of
+ * the Tool's run method.
+ * 
+ */
+public abstract class CrunchTool extends Configured implements Tool, Serializable {
+
+  protected static final From from = new From();
+  protected static final To to = new To();
+  protected static final At at = new At();
+
+  // Pipeline object itself isn't necessarily serializable.
+  private transient Pipeline pipeline;
+
+  public CrunchTool() {
+    this(false);
+  }
+
+  public CrunchTool(boolean inMemory) {
+    this.pipeline = inMemory ? MemPipeline.getInstance() : new MRPipeline(getClass());
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    if (conf != null && pipeline != null) {
+      pipeline.setConfiguration(conf);
+    }
+  }
+
+  @Override
+  public Configuration getConf() {
+    return pipeline.getConfiguration();
+  }
+
+  public void enableDebug() {
+    pipeline.enableDebug();
+  }
+
+  public <T> PCollection<T> read(Source<T> source) {
+    return pipeline.read(source);
+  }
+
+  public <K, V> PTable<K, V> read(TableSource<K, V> tableSource) {
+    return pipeline.read(tableSource);
+  }
+
+  public PCollection<String> readTextFile(String pathName) {
+    return pipeline.readTextFile(pathName);
+  }
+
+  public void write(PCollection<?> pcollection, Target target) {
+    pipeline.write(pcollection, target);
+  }
+
+  public void writeTextFile(PCollection<?> pcollection, String pathName) {
+    pipeline.writeTextFile(pcollection, pathName);
+  }
+  
+  public <T> Iterable<T> materialize(PCollection<T> pcollection) {
+    return pipeline.materialize(pcollection);
+  }
+
+  public PipelineResult run() {
+    return pipeline.run();
+  }
+
+  public PipelineExecution runAsync() {
+    return pipeline.runAsync();
+  }
+
+  public PipelineResult done() {
+    return pipeline.done();
+  }
+
+  protected Pipeline getPipeline() {
+    return pipeline;
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java b/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
new file mode 100644
index 0000000..3e49930
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/util/DistCache.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.Enumeration;
+
+import org.apache.crunch.CrunchRuntimeException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Provides functions for working with Hadoop's distributed cache. These
+ * include:
+ * <ul>
+ * <li>
+ * Functions for working with a job-specific distributed cache of objects, like
+ * the serialized runtime nodes in a MapReduce.</li>
+ * <li>
+ * Functions for adding library jars to the distributed cache, which will be
+ * added to the classpath of MapReduce tasks.</li>
+ * </ul>
+ */
+public class DistCache {
+
+  // Configuration key holding the paths of jars to export to the distributed
+  // cache.
+  private static final String TMPJARS_KEY = "tmpjars";
+
+  public static void write(Configuration conf, Path path, Object value) throws IOException {
+    ObjectOutputStream oos = new ObjectOutputStream(path.getFileSystem(conf).create(path));
+    oos.writeObject(value);
+    oos.close();
+
+    DistributedCache.addCacheFile(path.toUri(), conf);
+  }
+
+  public static Object read(Configuration conf, Path path) throws IOException {
+    URI target = null;
+    for (URI uri : DistributedCache.getCacheFiles(conf)) {
+      if (uri.toString().equals(path.toString())) {
+        target = uri;
+        break;
+      }
+    }
+    Object value = null;
+    if (target != null) {
+      Path targetPath = new Path(target.toString());
+      ObjectInputStream ois = new ObjectInputStream(targetPath.getFileSystem(conf).open(targetPath));
+      try {
+        value = ois.readObject();
+      } catch (ClassNotFoundException e) {
+        throw new CrunchRuntimeException(e);
+      }
+      ois.close();
+    }
+    return value;
+  }
+
+  public static void addCacheFile(Path path, Configuration conf) {
+    DistributedCache.addCacheFile(path.toUri(), conf);
+  }
+  
+  public static Path getPathToCacheFile(Path path, Configuration conf) {
+    try {
+      for (Path localPath : DistributedCache.getLocalCacheFiles(conf)) {
+        if (localPath.toString().endsWith(path.getName())) {
+          return localPath.makeQualified(FileSystem.getLocal(conf));
+        }
+      }
+    } catch (IOException e) {
+      throw new CrunchRuntimeException(e);
+    }
+    return null;
+  }
+  
+  /**
+   * Adds the specified jar to the distributed cache of jobs using the provided
+   * configuration. The jar will be placed on the classpath of tasks run by the
+   * job.
+   * 
+   * @param conf
+   *          The configuration used to add the jar to the distributed cache.
+   * @param jarFile
+   *          The jar file to add to the distributed cache.
+   * @throws IOException
+   *           If the jar file does not exist or there is a problem accessing
+   *           the file.
+   */
+  public static void addJarToDistributedCache(Configuration conf, File jarFile) throws IOException {
+    if (!jarFile.exists()) {
+      throw new IOException("Jar file: " + jarFile.getCanonicalPath() + " does not exist.");
+    }
+    if (!jarFile.getName().endsWith(".jar")) {
+      throw new IllegalArgumentException("File: " + jarFile.getCanonicalPath() + " is not a .jar " + "file.");
+    }
+    // Get a qualified path for the jar.
+    FileSystem fileSystem = FileSystem.getLocal(conf);
+    Path jarPath = new Path(jarFile.getCanonicalPath());
+    String qualifiedPath = jarPath.makeQualified(fileSystem).toString();
+    // Add the jar to the configuration variable.
+    String jarConfiguration = conf.get(TMPJARS_KEY, "");
+    if (!jarConfiguration.isEmpty()) {
+      jarConfiguration += ",";
+    }
+    jarConfiguration += qualifiedPath;
+    conf.set(TMPJARS_KEY, jarConfiguration);
+  }
+
+  /**
+   * Adds the jar at the specified path to the distributed cache of jobs using
+   * the provided configuration. The jar will be placed on the classpath of
+   * tasks run by the job.
+   * 
+   * @param conf
+   *          The configuration used to add the jar to the distributed cache.
+   * @param jarFile
+   *          The path to the jar file to add to the distributed cache.
+   * @throws IOException
+   *           If the jar file does not exist or there is a problem accessing
+   *           the file.
+   */
+  public static void addJarToDistributedCache(Configuration conf, String jarFile) throws IOException {
+    addJarToDistributedCache(conf, new File(jarFile));
+  }
+
+  /**
+   * Finds the path to a jar that contains the class provided, if any. There is
+   * no guarantee that the jar returned will be the first on the classpath to
+   * contain the file. This method is basically lifted out of Hadoop's
+   * {@link org.apache.hadoop.mapred.JobConf} class.
+   * 
+   * @param jarClass
+   *          The class the jar file should contain.
+   * @return The path to a jar file that contains the class, or
+   *         <code>null</code> if no such jar exists.
+   * @throws IOException
+   *           If there is a problem searching for the jar file.
+   */
+  public static String findContainingJar(Class<?> jarClass) throws IOException {
+    ClassLoader loader = jarClass.getClassLoader();
+    String classFile = jarClass.getName().replaceAll("\\.", "/") + ".class";
+    for (Enumeration<URL> itr = loader.getResources(classFile); itr.hasMoreElements();) {
+      URL url = itr.nextElement();
+      if ("jar".equals(url.getProtocol())) {
+        String toReturn = url.getPath();
+        if (toReturn.startsWith("file:")) {
+          toReturn = toReturn.substring("file:".length());
+        }
+        // URLDecoder is a misnamed class, since it actually decodes
+        // x-www-form-urlencoded MIME type rather than actual
+        // URL encoding (which the file path has). Therefore it would
+        // decode +s to ' 's which is incorrect (spaces are actually
+        // either unencoded or encoded as "%20"). Replace +s first, so
+        // that they are kept sacred during the decoding process.
+        toReturn = toReturn.replaceAll("\\+", "%2B");
+        toReturn = URLDecoder.decode(toReturn, "UTF-8");
+        return toReturn.replaceAll("!.*$", "");
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Adds all jars under the specified directory to the distributed cache of
+   * jobs using the provided configuration. The jars will be placed on the
+   * classpath of tasks run by the job. This method does not descend into
+   * subdirectories when adding jars.
+   * 
+   * @param conf
+   *          The configuration used to add jars to the distributed cache.
+   * @param jarDirectory
+   *          A directory containing jar files to add to the distributed cache.
+   * @throws IOException
+   *           If the directory does not exist or there is a problem accessing
+   *           the directory.
+   */
+  public static void addJarDirToDistributedCache(Configuration conf, File jarDirectory) throws IOException {
+    if (!jarDirectory.exists() || !jarDirectory.isDirectory()) {
+      throw new IOException("Jar directory: " + jarDirectory.getCanonicalPath() + " does not "
+          + "exist or is not a directory.");
+    }
+    for (File file : jarDirectory.listFiles()) {
+      if (!file.isDirectory() && file.getName().endsWith(".jar")) {
+        addJarToDistributedCache(conf, file);
+      }
+    }
+  }
+
+  /**
+   * Adds all jars under the directory at the specified path to the distributed
+   * cache of jobs using the provided configuration. The jars will be placed on
+   * the classpath of the tasks run by the job. This method does not descend
+   * into subdirectories when adding jars.
+   * 
+   * @param conf
+   *          The configuration used to add jars to the distributed cache.
+   * @param jarDirectory
+   *          The path to a directory containing jar files to add to the
+   *          distributed cache.
+   * @throws IOException
+   *           If the directory does not exist or there is a problem accessing
+   *           the directory.
+   */
+  public static void addJarDirToDistributedCache(Configuration conf, String jarDirectory) throws IOException {
+    addJarDirToDistributedCache(conf, new File(jarDirectory));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/util/PartitionUtils.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/util/PartitionUtils.java b/crunch-core/src/main/java/org/apache/crunch/util/PartitionUtils.java
new file mode 100644
index 0000000..da8db6b
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/util/PartitionUtils.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.util;
+
+import org.apache.crunch.PCollection;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ *
+ */
+public class PartitionUtils {
+  public static final String BYTES_PER_REDUCE_TASK = "crunch.bytes.per.reduce.task";
+  public static final long DEFAULT_BYTES_PER_REDUCE_TASK = 1000L * 1000L * 1000L;
+  
+  public static <T> int getRecommendedPartitions(PCollection<T> pcollection, Configuration conf) {
+    long bytesPerTask = conf.getLong(BYTES_PER_REDUCE_TASK, DEFAULT_BYTES_PER_REDUCE_TASK);
+    return 1 + (int) (pcollection.getSize() / bytesPerTask);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/util/Tuples.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/util/Tuples.java b/crunch-core/src/main/java/org/apache/crunch/util/Tuples.java
new file mode 100644
index 0000000..9c8d7bd
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/util/Tuples.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch.util;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.crunch.Pair;
+import org.apache.crunch.Tuple3;
+import org.apache.crunch.Tuple4;
+import org.apache.crunch.TupleN;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.UnmodifiableIterator;
+
+/**
+ * Utilities for working with subclasses of the {@code Tuple} interface.
+ * 
+ */
+public class Tuples {
+
+  private static abstract class TuplifyIterator<T> extends UnmodifiableIterator<T> {
+    protected List<Iterator<?>> iterators;
+
+    public TuplifyIterator(Iterator<?>... iterators) {
+      this.iterators = Lists.newArrayList(iterators);
+    }
+
+    @Override
+    public boolean hasNext() {
+      for (Iterator<?> iter : iterators) {
+        if (!iter.hasNext()) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    protected Object next(int index) {
+      return iterators.get(index).next();
+    }
+  }
+
+  public static class PairIterable<S, T> implements Iterable<Pair<S, T>> {
+    private final Iterable<S> first;
+    private final Iterable<T> second;
+
+    public PairIterable(Iterable<S> first, Iterable<T> second) {
+      this.first = first;
+      this.second = second;
+    }
+
+    @Override
+    public Iterator<Pair<S, T>> iterator() {
+      return new TuplifyIterator<Pair<S, T>>(first.iterator(), second.iterator()) {
+        @Override
+        public Pair<S, T> next() {
+          return Pair.of((S) next(0), (T) next(1));
+        }
+      };
+    }
+  }
+
+  public static class TripIterable<A, B, C> implements Iterable<Tuple3<A, B, C>> {
+    private final Iterable<A> first;
+    private final Iterable<B> second;
+    private final Iterable<C> third;
+
+    public TripIterable(Iterable<A> first, Iterable<B> second, Iterable<C> third) {
+      this.first = first;
+      this.second = second;
+      this.third = third;
+    }
+
+    @Override
+    public Iterator<Tuple3<A, B, C>> iterator() {
+      return new TuplifyIterator<Tuple3<A, B, C>>(first.iterator(), second.iterator(), third.iterator()) {
+        @Override
+        public Tuple3<A, B, C> next() {
+          return new Tuple3<A, B, C>((A) next(0), (B) next(1), (C) next(2));
+        }
+      };
+    }
+  }
+
+  public static class QuadIterable<A, B, C, D> implements Iterable<Tuple4<A, B, C, D>> {
+    private final Iterable<A> first;
+    private final Iterable<B> second;
+    private final Iterable<C> third;
+    private final Iterable<D> fourth;
+
+    public QuadIterable(Iterable<A> first, Iterable<B> second, Iterable<C> third, Iterable<D> fourth) {
+      this.first = first;
+      this.second = second;
+      this.third = third;
+      this.fourth = fourth;
+    }
+
+    @Override
+    public Iterator<Tuple4<A, B, C, D>> iterator() {
+      return new TuplifyIterator<Tuple4<A, B, C, D>>(first.iterator(), second.iterator(), third.iterator(),
+          fourth.iterator()) {
+        @Override
+        public Tuple4<A, B, C, D> next() {
+          return new Tuple4<A, B, C, D>((A) next(0), (B) next(1), (C) next(2), (D) next(3));
+        }
+      };
+    }
+  }
+
+  public static class TupleNIterable implements Iterable<TupleN> {
+    private final Iterator<?>[] iters;
+
+    public TupleNIterable(Iterable<?>... iterables) {
+      this.iters = new Iterator[iterables.length];
+      for (int i = 0; i < iters.length; i++) {
+        iters[i] = iterables[i].iterator();
+      }
+    }
+
+    @Override
+    public Iterator<TupleN> iterator() {
+      return new TuplifyIterator<TupleN>(iters) {
+        @Override
+        public TupleN next() {
+          Object[] values = new Object[iters.length];
+          for (int i = 0; i < values.length; i++) {
+            values[i] = next(i);
+          }
+          return new TupleN(values);
+        }
+      };
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/java/org/apache/crunch/util/package-info.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/util/package-info.java b/crunch-core/src/main/java/org/apache/crunch/util/package-info.java
new file mode 100644
index 0000000..94d79a1
--- /dev/null
+++ b/crunch-core/src/main/java/org/apache/crunch/util/package-info.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An assorted set of utilities.
+ */
+package org.apache.crunch.util;

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/resources/log4j.properties b/crunch-core/src/main/resources/log4j.properties
new file mode 100644
index 0000000..506b527
--- /dev/null
+++ b/crunch-core/src/main/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ***** Set root logger level to INFO and its only appender to A.
+log4j.logger.org.apache.crunch=info, A
+
+# ***** A is set to be a ConsoleAppender.
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+# ***** A uses PatternLayout.
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/site/site.xml
----------------------------------------------------------------------
diff --git a/crunch-core/src/site/site.xml b/crunch-core/src/site/site.xml
new file mode 100644
index 0000000..73fbd17
--- /dev/null
+++ b/crunch-core/src/site/site.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project name="${project.name}"
+  xmlns="http://maven.apache.org/DECORATION/1.3.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/DECORATION/1.3.0
+                      http://maven.apache.org/xsd/decoration-1.3.0.xsd">
+
+  <body>
+    <!-- Note: Breadcrumbs for Doxia's Markdown parser are currently broken,
+               see https://jira.codehaus.org/browse/DOXIA-472 -->
+    <breadcrumbs>
+      <item name="Apache" href="http://www.apache.org/index.html" />
+      <item name="Crunch" href="../index.html"/>
+    </breadcrumbs>
+
+  </body>
+
+</project>

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/avro/employee.avsc
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/avro/employee.avsc b/crunch-core/src/test/avro/employee.avsc
new file mode 100644
index 0000000..35726e1
--- /dev/null
+++ b/crunch-core/src/test/avro/employee.avsc
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+"namespace": "org.apache.crunch.test",
+"name": "Employee",
+"type": "record",
+"fields": [
+  {"name": "name", "type": ["string", "null"] },
+  {"name": "salary", "type": "int"},
+  {"name": "department", "type": ["string", "null"] } ]
+} 

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/avro/person.avsc
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/avro/person.avsc b/crunch-core/src/test/avro/person.avsc
new file mode 100644
index 0000000..babd808
--- /dev/null
+++ b/crunch-core/src/test/avro/person.avsc
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+"namespace": "org.apache.crunch.test",
+"name": "Person",
+"type": "record",
+"fields": [
+  {"name": "name", "type": ["string", "null"] },
+  {"name": "age", "type": "int"},
+  {"name": "siblingnames", "type": {"type": "array", "items": "string"}} ]
+} 

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/AndFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/AndFnTest.java b/crunch-core/src/test/java/org/apache/crunch/AndFnTest.java
new file mode 100644
index 0000000..4b00874
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/AndFnTest.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.apache.crunch.FilterFn.AndFn;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.junit.Before;
+import org.junit.Test;
+
+public class AndFnTest {
+
+  private FilterFn<Integer> fnA;
+  private FilterFn<Integer> fnB;
+  private AndFn<Integer> andFn;
+
+  @Before
+  public void setUp() {
+    fnA = mock(FilterFn.class);
+    fnB = mock(FilterFn.class);
+    andFn = new AndFn(fnA, fnB);
+  }
+
+  @Test
+  public void testSetContext() {
+    TaskInputOutputContext<?, ?, ?, ?> context = mock(TaskInputOutputContext.class);
+    andFn.setContext(context);
+
+    verify(fnA).setContext(context);
+    verify(fnB).setContext(context);
+  }
+
+  @Test
+  public void testAccept_False() {
+    when(fnA.accept(1)).thenReturn(true);
+    when(fnB.accept(1)).thenReturn(false);
+
+    assertFalse(andFn.accept(1));
+  }
+
+  @Test
+  public void testAccept_True() {
+    when(fnA.accept(1)).thenReturn(true);
+    when(fnB.accept(1)).thenReturn(true);
+
+    assertTrue(andFn.accept(1));
+  }
+
+  @Test
+  public void testCleanup() {
+    andFn.cleanup(mock(Emitter.class));
+
+    verify(fnA).cleanup();
+    verify(fnB).cleanup();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/CombineFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/CombineFnTest.java b/crunch-core/src/test/java/org/apache/crunch/CombineFnTest.java
new file mode 100644
index 0000000..39548e2
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/CombineFnTest.java
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.apache.crunch.CombineFn.MAX_BIGINTS;
+import static org.apache.crunch.CombineFn.MAX_DOUBLES;
+import static org.apache.crunch.CombineFn.MAX_FLOATS;
+import static org.apache.crunch.CombineFn.MAX_INTS;
+import static org.apache.crunch.CombineFn.MAX_LONGS;
+import static org.apache.crunch.CombineFn.MIN_BIGINTS;
+import static org.apache.crunch.CombineFn.MIN_DOUBLES;
+import static org.apache.crunch.CombineFn.MIN_FLOATS;
+import static org.apache.crunch.CombineFn.MIN_INTS;
+import static org.apache.crunch.CombineFn.MIN_LONGS;
+import static org.apache.crunch.CombineFn.SUM_BIGINTS;
+import static org.apache.crunch.CombineFn.SUM_DOUBLES;
+import static org.apache.crunch.CombineFn.SUM_FLOATS;
+import static org.apache.crunch.CombineFn.SUM_INTS;
+import static org.apache.crunch.CombineFn.SUM_LONGS;
+import static org.junit.Assert.assertEquals;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.crunch.CombineFn.Aggregator;
+import org.apache.crunch.CombineFn.AggregatorFactory;
+import org.apache.crunch.CombineFn.FirstNAggregator;
+import org.apache.crunch.CombineFn.LastNAggregator;
+import org.apache.crunch.CombineFn.MaxNAggregator;
+import org.apache.crunch.CombineFn.MinNAggregator;
+import org.apache.crunch.CombineFn.PairAggregator;
+import org.apache.crunch.CombineFn.QuadAggregator;
+import org.apache.crunch.CombineFn.StringConcatAggregator;
+import org.apache.crunch.CombineFn.TripAggregator;
+import org.apache.crunch.CombineFn.TupleNAggregator;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+
+public class CombineFnTest {
+
+  private <T> Iterable<T> applyAggregator(AggregatorFactory<T> a, Iterable<T> values) {
+    return applyAggregator(a.create(), values);
+  }
+
+  private <T> Iterable<T> applyAggregator(Aggregator<T> a, Iterable<T> values) {
+    a.reset();
+    for (T value : values) {
+      a.update(value);
+    }
+    return a.results();
+  }
+
+  @Test
+  public void testSums() {
+    assertEquals(ImmutableList.of(1775L), applyAggregator(SUM_LONGS, ImmutableList.of(29L, 17L, 1729L)));
+
+    assertEquals(ImmutableList.of(1765L), applyAggregator(SUM_LONGS, ImmutableList.of(29L, 7L, 1729L)));
+
+    assertEquals(ImmutableList.of(1775), applyAggregator(SUM_INTS, ImmutableList.of(29, 17, 1729)));
+
+    assertEquals(ImmutableList.of(1775.0f), applyAggregator(SUM_FLOATS, ImmutableList.of(29f, 17f, 1729f)));
+
+    assertEquals(ImmutableList.of(1775.0), applyAggregator(SUM_DOUBLES, ImmutableList.of(29.0, 17.0, 1729.0)));
+
+    assertEquals(
+        ImmutableList.of(new BigInteger("1775")),
+        applyAggregator(SUM_BIGINTS,
+            ImmutableList.of(new BigInteger("29"), new BigInteger("17"), new BigInteger("1729"))));
+  }
+
+  @Test
+  public void testMax() {
+    assertEquals(ImmutableList.of(1729L), applyAggregator(MAX_LONGS, ImmutableList.of(29L, 17L, 1729L)));
+
+    assertEquals(ImmutableList.of(1729), applyAggregator(MAX_INTS, ImmutableList.of(29, 17, 1729)));
+
+    assertEquals(ImmutableList.of(1729.0f), applyAggregator(MAX_FLOATS, ImmutableList.of(29f, 17f, 1729f)));
+
+    assertEquals(ImmutableList.of(1729.0), applyAggregator(MAX_DOUBLES, ImmutableList.of(29.0, 17.0, 1729.0)));
+
+    assertEquals(ImmutableList.of(1745.0f), applyAggregator(MAX_FLOATS, ImmutableList.of(29f, 1745f, 17f, 1729f)));
+
+    assertEquals(
+        ImmutableList.of(new BigInteger("1729")),
+        applyAggregator(MAX_BIGINTS,
+            ImmutableList.of(new BigInteger("29"), new BigInteger("17"), new BigInteger("1729"))));
+  }
+
+  @Test
+  public void testMin() {
+    assertEquals(ImmutableList.of(17L), applyAggregator(MIN_LONGS, ImmutableList.of(29L, 17L, 1729L)));
+
+    assertEquals(ImmutableList.of(17), applyAggregator(MIN_INTS, ImmutableList.of(29, 17, 1729)));
+
+    assertEquals(ImmutableList.of(17.0f), applyAggregator(MIN_FLOATS, ImmutableList.of(29f, 17f, 1729f)));
+
+    assertEquals(ImmutableList.of(17.0), applyAggregator(MIN_DOUBLES, ImmutableList.of(29.0, 17.0, 1729.0)));
+
+    assertEquals(ImmutableList.of(29), applyAggregator(MIN_INTS, ImmutableList.of(29, 170, 1729)));
+
+    assertEquals(
+        ImmutableList.of(new BigInteger("17")),
+        applyAggregator(MIN_BIGINTS,
+            ImmutableList.of(new BigInteger("29"), new BigInteger("17"), new BigInteger("1729"))));
+  }
+
+  @Test
+  public void testMaxN() {
+    assertEquals(ImmutableList.of(98, 1009),
+        applyAggregator(new MaxNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
+  }
+
+  @Test
+  public void testMinN() {
+    assertEquals(ImmutableList.of(17, 29),
+        applyAggregator(new MinNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
+  }
+
+  @Test
+  public void testFirstN() {
+    assertEquals(ImmutableList.of(17, 34),
+        applyAggregator(new FirstNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
+  }
+
+  @Test
+  public void testLastN() {
+    assertEquals(ImmutableList.of(29, 1009),
+        applyAggregator(new LastNAggregator<Integer>(2), ImmutableList.of(17, 34, 98, 29, 1009)));
+  }
+
+  @Test
+  public void testPairs() {
+    List<Pair<Long, Double>> input = ImmutableList.of(Pair.of(1720L, 17.29), Pair.of(9L, -3.14));
+    Aggregator<Pair<Long, Double>> a = new PairAggregator<Long, Double>(SUM_LONGS.create(), MIN_DOUBLES.create());
+    assertEquals(Pair.of(1729L, -3.14), Iterables.getOnlyElement(applyAggregator(a, input)));
+  }
+
+  @Test
+  public void testPairsTwoLongs() {
+    List<Pair<Long, Long>> input = ImmutableList.of(Pair.of(1720L, 1L), Pair.of(9L, 19L));
+    Aggregator<Pair<Long, Long>> a = new PairAggregator<Long, Long>(SUM_LONGS.create(), SUM_LONGS.create());
+    assertEquals(Pair.of(1729L, 20L), Iterables.getOnlyElement(applyAggregator(a, input)));
+  }
+
+  @Test
+  public void testTrips() {
+    List<Tuple3<Float, Double, Double>> input = ImmutableList.of(Tuple3.of(17.29f, 12.2, 0.1),
+        Tuple3.of(3.0f, 1.2, 3.14), Tuple3.of(-1.0f, 14.5, -0.98));
+    Aggregator<Tuple3<Float, Double, Double>> a = new TripAggregator<Float, Double, Double>(MAX_FLOATS.create(),
+        MAX_DOUBLES.create(), MIN_DOUBLES.create());
+    assertEquals(Tuple3.of(17.29f, 14.5, -0.98), Iterables.getOnlyElement(applyAggregator(a, input)));
+  }
+
+  @Test
+  public void testQuads() {
+    List<Tuple4<Float, Double, Double, Integer>> input = ImmutableList.of(Tuple4.of(17.29f, 12.2, 0.1, 1),
+        Tuple4.of(3.0f, 1.2, 3.14, 2), Tuple4.of(-1.0f, 14.5, -0.98, 3));
+    Aggregator<Tuple4<Float, Double, Double, Integer>> a = new QuadAggregator<Float, Double, Double, Integer>(
+        MAX_FLOATS.create(), MAX_DOUBLES.create(), MIN_DOUBLES.create(), SUM_INTS.create());
+    assertEquals(Tuple4.of(17.29f, 14.5, -0.98, 6), Iterables.getOnlyElement(applyAggregator(a, input)));
+  }
+
+  @Test
+  public void testTupleN() {
+    List<TupleN> input = ImmutableList.of(new TupleN(1, 3.0, 1, 2.0, 4L), new TupleN(4, 17.0, 1, 9.7, 12L));
+    Aggregator<TupleN> a = new TupleNAggregator(MIN_INTS.create(), SUM_DOUBLES.create(), MAX_INTS.create(),
+        MIN_DOUBLES.create(), MAX_LONGS.create());
+    assertEquals(new TupleN(1, 20.0, 1, 2.0, 12L), Iterables.getOnlyElement(applyAggregator(a, input)));
+  }
+
+  @Test
+  public void testConcatenation() {
+    String[] arrayNull = new String[] { null, "" };
+    assertEquals(ImmutableList.of("foofoobarbar"), applyAggregator(
+        new StringConcatAggregator("", true), ImmutableList.of("foo", "foobar", "bar")));
+    assertEquals(ImmutableList.of("foo/foobar/bar"), applyAggregator(
+        new StringConcatAggregator("/", false), ImmutableList.of("foo", "foobar", "bar")));
+    assertEquals(ImmutableList.of("  "), applyAggregator(
+        new StringConcatAggregator(" ", true), ImmutableList.of(" ", "")));
+    assertEquals(ImmutableList.of(""), applyAggregator(
+        new StringConcatAggregator(" ", true), Arrays.asList(arrayNull)));
+    assertEquals(ImmutableList.of("foo bar"), applyAggregator(
+        new StringConcatAggregator(" ", true, 20, 3), ImmutableList.of("foo", "foobar", "bar")));
+    assertEquals(ImmutableList.of("foo foobar"), applyAggregator(
+        new StringConcatAggregator(" ", true, 10, 6), ImmutableList.of("foo", "foobar", "bar")));
+    assertEquals(ImmutableList.of("foo bar"), applyAggregator(
+        new StringConcatAggregator(" ", true, 9, 6), ImmutableList.of("foo", "foobar", "bar")));
+  }
+
+  @Test
+  public void testConcatenationReset() {
+    StringConcatAggregator a = new StringConcatAggregator(" ", true, 10, 6);
+
+    assertEquals(ImmutableList.of("foo foobar"), applyAggregator(a, ImmutableList.of("foo", "foobar", "bar")));
+    assertEquals(ImmutableList.of("foo foobar"), applyAggregator(a, ImmutableList.of("foo", "foobar", "bar")));
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testConcatenationNullException() {
+    String[] arrayNull = new String[] { null, "" };
+    assertEquals(ImmutableList.of(""), applyAggregator(
+        new StringConcatAggregator(" ", false), Arrays.asList(arrayNull)));
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/NotFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/NotFnTest.java b/crunch-core/src/test/java/org/apache/crunch/NotFnTest.java
new file mode 100644
index 0000000..8af17a2
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/NotFnTest.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.*;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.apache.crunch.FilterFn.NotFn;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.junit.Before;
+import org.junit.Test;
+
+public class NotFnTest {
+  
+  private FilterFn<Integer> base;
+  private NotFn<Integer> notFn;
+  
+  @Before
+  public void setUp() {
+    base = mock(FilterFn.class);
+    notFn = new NotFn(base);
+  }
+
+  @Test
+  public void testSetContext() {
+    TaskInputOutputContext<?, ?, ?, ?> context = mock(TaskInputOutputContext.class);
+    
+    notFn.setContext(context);
+    
+    verify(base).setContext(context);
+  }
+
+  @Test
+  public void testAccept_True() {
+    when(base.accept(1)).thenReturn(true);
+    
+    assertFalse(notFn.accept(1));
+  }
+  
+  @Test
+  public void testAccept_False() {
+    when(base.accept(1)).thenReturn(false);
+    
+    assertTrue(notFn.accept(1));
+  }
+
+  @Test
+  public void testCleanupEmitterOfT() {
+    notFn.cleanup(mock(Emitter.class));
+    
+    verify(base).cleanup();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/OrFnTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/OrFnTest.java b/crunch-core/src/test/java/org/apache/crunch/OrFnTest.java
new file mode 100644
index 0000000..fde2376
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/OrFnTest.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.apache.crunch.FilterFn.OrFn;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.junit.Before;
+import org.junit.Test;
+
+public class OrFnTest {
+
+  private FilterFn<Integer> fnA;
+  private FilterFn<Integer> fnB;
+  private OrFn<Integer> orFn;
+
+  @Before
+  public void setUp() {
+    fnA = mock(FilterFn.class);
+    fnB = mock(FilterFn.class);
+    orFn = new OrFn(fnA, fnB);
+  }
+
+  @Test
+  public void testSetContext() {
+    TaskInputOutputContext<?, ?, ?, ?> context = mock(TaskInputOutputContext.class);
+
+    orFn.setContext(context);
+
+    verify(fnA).setContext(context);
+    verify(fnB).setContext(context);
+  }
+
+  @Test
+  public void testAccept_True() {
+    when(fnA.accept(1)).thenReturn(false);
+    when(fnB.accept(1)).thenReturn(true);
+
+    assertTrue(orFn.accept(1));
+  }
+
+  @Test
+  public void testAccept_False() {
+    when(fnA.accept(1)).thenReturn(false);
+    when(fnB.accept(1)).thenReturn(false);
+
+    assertFalse(orFn.accept(1));
+  }
+
+  @Test
+  public void testCleanupEmitterOfT() {
+    orFn.cleanup(mock(Emitter.class));
+
+    verify(fnA).cleanup();
+    verify(fnB).cleanup();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/PairTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/PairTest.java b/crunch-core/src/test/java/org/apache/crunch/PairTest.java
new file mode 100644
index 0000000..106413c
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/PairTest.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Test;
+
+public class PairTest {
+
+  @Test
+  public void testPairConstructor() {
+    Pair<String, Integer> pair = new Pair<String, Integer>("brock", 45);
+    test(pair);
+  }
+
+  @Test
+  public void testPairOf() {
+    Pair<String, Integer> pair = Pair.of("brock", 45);
+    test(pair);
+  }
+
+  protected void test(Pair<String, Integer> pair) {
+    assertTrue(pair.size() == 2);
+
+    assertEquals("brock", pair.first());
+    assertEquals(new Integer(45), pair.second());
+    assertEquals(Pair.of("brock", 45), pair);
+
+    assertEquals("brock", pair.get(0));
+    assertEquals(new Integer(45), pair.get(1));
+
+    try {
+      pair.get(-1);
+      fail();
+    } catch (IndexOutOfBoundsException e) {
+      // expected
+    }
+  }
+
+  @Test
+  public void testPairComparisons() {
+    assertEquals(0, Pair.of(null, null).compareTo(Pair.of(null, null)));
+    assertEquals(0, Pair.of(1, 2).compareTo(Pair.of(1, 2)));
+    assertTrue(Pair.of(2, "a").compareTo(Pair.of(1, "a")) > 0);
+    assertTrue(Pair.of("a", 2).compareTo(Pair.of("a", 1)) > 0);
+    assertTrue(Pair.of(null, 17).compareTo(Pair.of(null, 29)) < 0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/TupleTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/TupleTest.java b/crunch-core/src/test/java/org/apache/crunch/TupleTest.java
new file mode 100644
index 0000000..b07ec3f
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/TupleTest.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.crunch.types.TupleFactory;
+import org.junit.Test;
+
+public class TupleTest {
+  private String first = "foo";
+  private Integer second = 1729;
+  private Double third = 64.2;
+  private Boolean fourth = false;
+  private Float fifth = 17.29f;
+
+  @Test
+  public void testTuple3() {
+    Tuple3<String, Integer, Double> t = new Tuple3<String, Integer, Double>(first, second, third);
+    assertEquals(3, t.size());
+    assertEquals(first, t.first());
+    assertEquals(second, t.second());
+    assertEquals(third, t.third());
+    assertEquals(first, t.get(0));
+    assertEquals(second, t.get(1));
+    assertEquals(third, t.get(2));
+    try {
+      t.get(-1);
+      fail();
+    } catch (IndexOutOfBoundsException e) {
+      // expected
+    }
+  }
+
+  @Test
+  public void testTuple3Equality() {
+    Tuple3<String, Integer, Double> t = new Tuple3<String, Integer, Double>(first, second, third);
+    assertTrue(t.equals(new Tuple3(first, second, third)));
+    assertFalse(t.equals(new Tuple3(first, null, third)));
+    assertFalse((new Tuple3(null, null, null)).equals(t));
+    assertTrue((new Tuple3(first, null, null)).equals(new Tuple3(first, null, null)));
+  }
+
+  @Test
+  public void testTuple4() {
+    Tuple4<String, Integer, Double, Boolean> t = new Tuple4<String, Integer, Double, Boolean>(first, second, third,
+        fourth);
+    assertEquals(4, t.size());
+    assertEquals(first, t.first());
+    assertEquals(second, t.second());
+    assertEquals(third, t.third());
+    assertEquals(fourth, t.fourth());
+    assertEquals(first, t.get(0));
+    assertEquals(second, t.get(1));
+    assertEquals(third, t.get(2));
+    assertEquals(fourth, t.get(3));
+    try {
+      t.get(-1);
+      fail();
+    } catch (IndexOutOfBoundsException e) {
+      // expected
+    }
+  }
+
+  @Test
+  public void testTuple4Equality() {
+    Tuple4<String, Integer, Double, Boolean> t = new Tuple4<String, Integer, Double, Boolean>(first, second, third,
+        fourth);
+    assertFalse(t.equals(new Tuple3(first, second, third)));
+    assertFalse(t.equals(new Tuple4(first, null, third, null)));
+    assertFalse((new Tuple4(null, null, null, null)).equals(t));
+    assertTrue((new Tuple4(first, null, third, null)).equals(new Tuple4(first, null, third, null)));
+  }
+
+  @Test
+  public void testTupleN() {
+    TupleN t = new TupleN(first, second, third, fourth, fifth);
+    assertEquals(5, t.size());
+    assertEquals(first, t.get(0));
+    assertEquals(second, t.get(1));
+    assertEquals(third, t.get(2));
+    assertEquals(fourth, t.get(3));
+    assertEquals(fifth, t.get(4));
+    try {
+      t.get(-1);
+      fail();
+    } catch (IndexOutOfBoundsException e) {
+      // expected
+    }
+  }
+
+  @Test
+  public void testTupleNEquality() {
+    TupleN t = new TupleN(first, second, third, fourth, fifth);
+    assertTrue(t.equals(new TupleN(first, second, third, fourth, fifth)));
+    assertFalse(t.equals(new TupleN(first, null, third, null)));
+    assertFalse((new TupleN(null, null, null, null, null)).equals(t));
+    assertTrue((new TupleN(first, second, third, null, null)).equals(new TupleN(first, second, third, null, null)));
+  }
+
+  @Test
+  public void testTupleFactory() {
+    checkTuple(TupleFactory.PAIR.makeTuple("a", "b"), Pair.class, "a", "b");
+    checkTuple(TupleFactory.TUPLE3.makeTuple("a", "b", "c"), Tuple3.class, "a", "b", "c");
+    checkTuple(TupleFactory.TUPLE4.makeTuple("a", "b", "c", "d"), Tuple4.class, "a", "b", "c", "d");
+    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c", "d", "e"), TupleN.class, "a", "b", "c", "d", "e");
+
+    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b"), TupleN.class, "a", "b");
+    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c"), TupleN.class, "a", "b", "c");
+    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c", "d"), TupleN.class, "a", "b", "c", "d");
+    checkTuple(TupleFactory.TUPLEN.makeTuple("a", "b", "c", "d", "e"), TupleN.class, "a", "b", "c", "d", "e");
+  }
+
+  private void checkTuple(Tuple t, Class<? extends Tuple> type, Object... values) {
+    assertEquals(type, t.getClass());
+    assertEquals(values.length, t.size());
+    for (int i = 0; i < values.length; i++)
+      assertEquals(values[i], t.get(i));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch-core/src/test/java/org/apache/crunch/WriteModeTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/WriteModeTest.java b/crunch-core/src/test/java/org/apache/crunch/WriteModeTest.java
new file mode 100644
index 0000000..e99ac7b
--- /dev/null
+++ b/crunch-core/src/test/java/org/apache/crunch/WriteModeTest.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.crunch;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.crunch.Target.WriteMode;
+import org.apache.crunch.impl.mem.MemPipeline;
+import org.apache.crunch.io.To;
+import org.apache.crunch.test.TemporaryPath;
+import org.apache.crunch.test.TemporaryPaths;
+import org.apache.crunch.types.avro.Avros;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Rule;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class WriteModeTest {
+
+  @Rule
+  public TemporaryPath tmpDir = TemporaryPaths.create();
+
+  @Test(expected=CrunchRuntimeException.class)
+  public void testDefault() throws Exception {
+    run(null, true);
+  }
+
+  @Test(expected=CrunchRuntimeException.class)
+  public void testDefaultNoRun() throws Exception {
+    run(null, false);
+  }
+  
+  @Test
+  public void testOverwrite() throws Exception {
+    Path p = run(WriteMode.OVERWRITE, true);
+    PCollection<String> lines = MemPipeline.getInstance().readTextFile(p.toString());
+    assertEquals(ImmutableList.of("some", "string", "values"), lines.materialize());
+  }
+  
+  @Test(expected=CrunchRuntimeException.class)
+  public void testOverwriteNoRun() throws Exception {
+    run(WriteMode.OVERWRITE, false);
+  }
+  
+  @Test
+  public void testAppend() throws Exception {
+    Path p = run(WriteMode.APPEND, true);
+    PCollection<String> lines = MemPipeline.getInstance().readTextFile(p.toString());
+    assertEquals(ImmutableList.of("some", "string", "values", "some", "string", "values"),
+        lines.materialize());
+  }
+  
+  @Test
+  public void testAppendNoRun() throws Exception {
+    Path p = run(WriteMode.APPEND, false);
+    PCollection<String> lines = MemPipeline.getInstance().readTextFile(p.toString());
+    assertEquals(ImmutableList.of("some", "string", "values", "some", "string", "values"),
+        lines.materialize());
+  }
+  
+  Path run(WriteMode writeMode, boolean doRun) throws Exception {
+    Path output = tmpDir.getPath("existing");
+    FileSystem fs = FileSystem.get(tmpDir.getDefaultConfiguration());
+    if (fs.exists(output)) {
+      fs.delete(output, true);
+    }
+    Pipeline p = MemPipeline.getInstance();
+    PCollection<String> data = MemPipeline.typedCollectionOf(Avros.strings(),
+        ImmutableList.of("some", "string", "values"));
+    data.write(To.textFile(output));
+
+    if (doRun) {
+      p.run();
+    }
+    
+    if (writeMode == null) {
+      data.write(To.textFile(output));
+    } else {
+      data.write(To.textFile(output), writeMode);
+    }
+    
+    p.run();
+    
+    return output;
+  }
+}