You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/01/27 18:28:05 UTC

[12/59] [abbrv] jena git commit: Rebrand to Jena Elephas per community vote

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
deleted file mode 100644
index 9b6e307..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.trix;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.riot.Lang;
-
-/**
- * Tests for TriX output format
- */
-public class TriXOutputTest extends AbstractQuadOutputFormatTests {
-
-    @Override
-    protected String getFileExtension() {
-        return ".trix";
-    }
-
-    @Override
-    protected Lang getRdfLanguage() {
-        return Lang.TRIX;
-    }
-
-    @Override
-    protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() {
-        return new TriXOutputFormat<NullWritable>();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
deleted file mode 100644
index a6c4d70..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.turtle;
-
-import java.util.Arrays;
-import java.util.Collection;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-
-/**
- * Tests for Turtle output
- * 
- * 
- * 
- */
-@RunWith(Parameterized.class)
-public class BatchedTurtleOutputTest extends AbstractTripleOutputFormatTests {
-
-    static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
-    static long $bs2 = 1000;
-    static long $bs3 = 100;
-    static long $bs4 = 1;
-
-    /**
-     * @return Test parameters
-     */
-    @Parameters
-    public static Collection<Object[]> data() {
-        return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } });
-    }
-
-    private final long batchSize;
-
-    /**
-     * Creates new tests
-     * 
-     * @param batchSize
-     *            Batch size
-     */
-    public BatchedTurtleOutputTest(long batchSize) {
-        this.batchSize = batchSize;
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return ".ttl";
-    }
-
-    @Override
-    protected Lang getRdfLanguage() {
-        return Lang.TURTLE;
-    }
-    
-    @Override
-    protected Configuration prepareConfiguration() {
-        Configuration config = super.prepareConfiguration();
-        config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
-        return config;
-    }
-
-    @Override
-    protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
-        return new BatchedTurtleOutputFormat<NullWritable>();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
deleted file mode 100644
index d8843d3..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.turtle;
-
-import java.util.Arrays;
-import java.util.Collection;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-
-/**
- * Tests for Turtle output
- * 
- * 
- * 
- */
-@RunWith(Parameterized.class)
-public class StreamedTurtleOutputTest extends AbstractTripleOutputFormatTests {
-
-    static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
-    static long $bs2 = 1000;
-    static long $bs3 = 100;
-    static long $bs4 = 1;
-
-    /**
-     * @return Test parameters
-     */
-    @Parameters
-    public static Collection<Object[]> data() {
-        return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } });
-    }
-
-    private final long batchSize;
-
-    /**
-     * Creates new tests
-     * 
-     * @param batchSize
-     *            Batch size
-     */
-    public StreamedTurtleOutputTest(long batchSize) {
-        this.batchSize = batchSize;
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return ".ttl";
-    }
-
-    @Override
-    protected Lang getRdfLanguage() {
-        return Lang.TURTLE;
-    }
-    
-    @Override
-    protected Configuration prepareConfiguration() {
-        Configuration config = super.prepareConfiguration();
-        config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
-        return config;
-    }
-
-    @Override
-    protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
-        return new TurtleOutputFormat<NullWritable>();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
deleted file mode 100644
index 8dcae4e..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.output.turtle;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.RDFDataMgr;
-import org.junit.Assert;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
-import com.hp.hpl.jena.graph.Node;
-import com.hp.hpl.jena.graph.NodeFactory;
-import com.hp.hpl.jena.graph.Triple;
-import com.hp.hpl.jena.rdf.model.Model;
-import com.hp.hpl.jena.rdf.model.ResIterator;
-import com.hp.hpl.jena.rdf.model.Resource;
-
-/**
- * Tests for Turtle output with blank nodes
- * 
- * 
- * 
- */
-@RunWith(Parameterized.class)
-public class TurtleBlankNodeOutputTests extends StreamedTurtleOutputTest {
-
-	static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
-	static long $bs2 = 1000;
-	static long $bs3 = 100;
-	static long $bs4 = 1;
-
-	/**
-	 * @return Test parameters
-	 */
-	@Parameters
-	public static Collection<Object[]> data() {
-		return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 },
-				{ $bs4 } });
-	}
-
-	/**
-	 * Creates new tests
-	 * 
-	 * @param batchSize
-	 *            Batch size
-	 */
-	public TurtleBlankNodeOutputTests(long batchSize) {
-		super(batchSize);
-	}
-
-	@Override
-	protected Iterator<TripleWritable> generateTuples(int num) {
-		List<TripleWritable> ts = new ArrayList<TripleWritable>();
-		Node subject = NodeFactory.createAnon();
-		for (int i = 0; i < num; i++) {
-			Triple t = new Triple(subject,
-					NodeFactory.createURI("http://example.org/predicate"),
-					NodeFactory.createLiteral(Integer.toString(i),
-							XSDDatatype.XSDinteger));
-			ts.add(new TripleWritable(t));
-		}
-		return ts.iterator();
-	}
-
-	@Override
-	protected void checkTuples(File f, long expected) {
-		super.checkTuples(f, expected);
-
-		Model m = RDFDataMgr.loadModel("file://" + f.getAbsolutePath(),
-				this.getRdfLanguage());
-		ResIterator iter = m.listSubjects();
-		Set<Node> subjects = new HashSet<Node>();
-		while (iter.hasNext()) {
-			Resource res = iter.next();
-			Assert.assertTrue(res.isAnon());
-			subjects.add(res.asNode());
-		}
-		// Should only be one subject unless the data was empty in which case
-		// there will be zero subjects
-		Assert.assertEquals(expected == 0 ? 0 : 1, subjects.size());
-	}
-
-	@Override
-	protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
-		return new TurtleOutputFormat<NullWritable>();
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
deleted file mode 100644
index 2eae232..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.jena.hadoop.rdf.io.registry;
-
-import java.io.IOException;
-import java.io.StringWriter;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.apache.jena.riot.RDFLanguages;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Tests for the {@link HadoopRdfIORegistry}
- */
-public class TestHadoopRdfIORegistry {
-
-    private void testLang(Lang lang, boolean triples, boolean quads, boolean writesSupported) {
-        Assert.assertEquals(triples, HadoopRdfIORegistry.hasTriplesReader(lang));
-        Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadReader(lang));
-
-        // Some formats may be asymmetric
-        if (writesSupported) {
-            Assert.assertEquals(triples, HadoopRdfIORegistry.hasTriplesWriter(lang));
-            Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadWriter(lang));
-        } else {
-            Assert.assertFalse(HadoopRdfIORegistry.hasTriplesWriter(lang));
-            Assert.assertFalse(HadoopRdfIORegistry.hasQuadWriter(lang));
-        }
-
-        if (triples) {
-            // Check that triples are supported
-            RecordReader<LongWritable, TripleWritable> tripleReader;
-            try {
-                tripleReader = HadoopRdfIORegistry.createTripleReader(lang);
-                Assert.assertNotNull(tripleReader);
-            } catch (IOException e) {
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " can read triples but fails to produce a triple reader when asked: " + e.getMessage());
-            }
-
-            if (writesSupported) {
-                RecordWriter<NullWritable, TripleWritable> tripleWriter;
-                try {
-                    tripleWriter = HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new Configuration(
-                            false));
-                    Assert.assertNotNull(tripleWriter);
-                } catch (IOException e) {
-                    Assert.fail("Registry indicates that " + lang.getName()
-                            + " can write triples but fails to produce a triple writer when asked: " + e.getMessage());
-                }
-            }
-        } else {
-            // Check that triples are not supported
-            try {
-                HadoopRdfIORegistry.createTripleReader(lang);
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot read triples but produced a triple reader when asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-            try {
-                HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new Configuration(false));
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot write triples but produced a triple write when asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-        }
-
-        if (quads) {
-            // Check that quads are supported
-            RecordReader<LongWritable, QuadWritable> quadReader;
-            try {
-                quadReader = HadoopRdfIORegistry.createQuadReader(lang);
-                Assert.assertNotNull(quadReader);
-            } catch (IOException e) {
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " can read quads but fails to produce a quad reader when asked: " + e.getMessage());
-            }
-
-            if (writesSupported) {
-                RecordWriter<NullWritable, QuadWritable> quadWriter;
-                try {
-                    quadWriter = HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(),
-                            new Configuration(false));
-                    Assert.assertNotNull(quadWriter);
-                } catch (IOException e) {
-                    Assert.fail("Registry indicates that " + lang.getName()
-                            + " can write quads but fails to produce a triple writer when asked: " + e.getMessage());
-                }
-            }
-        } else {
-            try {
-                HadoopRdfIORegistry.createQuadReader(lang);
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot read quads but produced a quad reader when asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-            try {
-                HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(), new Configuration(false));
-                Assert.fail("Registry indicates that " + lang.getName()
-                        + " cannot write quads but produced a quad writer when asked (error was expected)");
-            } catch (IOException e) {
-                // This is expected
-            }
-        }
-    }
-
-    @Test
-    public void json_ld_registered() {
-        testLang(Lang.JSONLD, true, true, true);
-    }
-
-    @Test
-    public void nquads_registered() {
-        testLang(Lang.NQUADS, false, true, true);
-        testLang(Lang.NQ, false, true, true);
-    }
-
-    @Test
-    public void ntriples_registered() {
-        testLang(Lang.NTRIPLES, true, false, true);
-        testLang(Lang.NT, true, false, true);
-    }
-
-    @Test
-    public void rdf_json_registered() {
-        testLang(Lang.RDFJSON, true, false, true);
-    }
-
-    @Test
-    public void rdf_xml_registered() {
-        testLang(Lang.RDFXML, true, false, true);
-    }
-
-    @Test
-    public void rdf_thrift_registered() {
-        testLang(RDFLanguages.THRIFT, true, true, true);
-    }
-
-    @Test
-    public void trig_registered() {
-        testLang(Lang.TRIG, false, true, true);
-    }
-
-    @Test
-    public void trix_registered() {
-        testLang(Lang.TRIX, false, true, true);
-    }
-
-    @Test
-    public void turtle_registered() {
-        testLang(Lang.TURTLE, true, false, true);
-        testLang(Lang.TTL, true, false, true);
-        testLang(Lang.N3, true, false, true);
-    }
-
-    @Test
-    public void unregistered() {
-        testLang(Lang.RDFNULL, false, false, true);
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml b/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
deleted file mode 100644
index de72645..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-stats/hadoop-job.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<assembly>
-  <id>hadoop-job</id>
-  <formats>
-    <format>jar</format>
-  </formats>
-  <includeBaseDirectory>false</includeBaseDirectory>
-  <dependencySets>
-    <dependencySet>
-      <unpack>false</unpack>
-      <scope>runtime</scope>
-      <outputDirectory>lib</outputDirectory>
-      <excludes>
-        <exclude>${groupId}:${artifactId}</exclude>
-      </excludes>
-    </dependencySet>
-    <dependencySet>
-      <unpack>true</unpack>
-      <includes>
-        <include>${groupId}:${artifactId}</include>
-      </includes>
-    </dependencySet>
-  </dependencySets>
-  <fileSets>
-    <fileSet>
-      <directory>${basedir}/target/test-classes</directory>
-      <outputDirectory>/</outputDirectory>
-    </fileSet>
- </fileSets>
-</assembly>

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml b/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml
deleted file mode 100644
index bf69fa6..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-stats/pom.xml
+++ /dev/null
@@ -1,103 +0,0 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<parent>
-		<groupId>org.apache.jena</groupId>
-		<artifactId>jena-hadoop-rdf</artifactId>
-		<version>0.9.0-SNAPSHOT</version>
-	</parent>
-	<artifactId>jena-hadoop-rdf-stats</artifactId>
-	<name>Apache Jena - RDF Tools for Hadoop - Statistics Demo App</name>
-	<description>A demo application that can be run on Hadoop to produce a statistical analysis on arbitrary RDF inputs</description>
-
-	<dependencies>
-		<!-- Internal Project Dependencies -->
-		<dependency>
-			<groupId>org.apache.jena</groupId>
-			<artifactId>jena-hadoop-rdf-io</artifactId>
-			<version>${project.version}</version>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.jena</groupId>
-			<artifactId>jena-hadoop-rdf-mapreduce</artifactId>
-			<version>${project.version}</version>
-		</dependency>
-
-		<!-- CLI related Dependencies -->
-		<dependency>
-			<groupId>io.airlift</groupId>
-			<artifactId>airline</artifactId>
-			<version>0.6</version>
-		</dependency>
-
-		<!-- Hadoop Dependencies -->
-		<!-- Note these will be provided on the Hadoop cluster hence the provided 
-			scope -->
-		<dependency>
-			<groupId>org.apache.hadoop</groupId>
-			<artifactId>hadoop-common</artifactId>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hadoop</groupId>
-			<artifactId>hadoop-mapreduce-client-common</artifactId>
-			<scope>provided</scope>
-		</dependency>
-
-		<!-- Test Dependencies -->
-		<dependency>
-			<groupId>org.apache.jena</groupId>
-			<artifactId>jena-hadoop-rdf-mapreduce</artifactId>
-			<version>${project.version}</version>
-			<classifier>tests</classifier>
-			<scope>test</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.mrunit</groupId>
-			<artifactId>mrunit</artifactId>
-			<scope>test</scope>
-			<classifier>hadoop2</classifier>
-		</dependency>
-	</dependencies>
-
-	<build>
-		<plugins>
-			<!-- Assembly plugin is used to produce the runnable Hadoop JAR with all 
-				dependencies contained therein -->
-			<plugin>
-				<artifactId>maven-assembly-plugin</artifactId>
-				<configuration>
-					<descriptors>
-						<descriptor>hadoop-job.xml</descriptor>
-					</descriptors>
-				</configuration>
-				<executions>
-					<execution>
-						<id>make-assembly</id>
-						<phase>package</phase>
-						<goals>
-							<goal>single</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java b/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
deleted file mode 100644
index 5f870ee..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.stats;
-
-import io.airlift.command.Arguments;
-import io.airlift.command.Command;
-import io.airlift.command.Help;
-import io.airlift.command.HelpOption;
-import io.airlift.command.Option;
-import io.airlift.command.OptionType;
-import io.airlift.command.ParseArgumentsMissingException;
-import io.airlift.command.ParseArgumentsUnexpectedException;
-import io.airlift.command.ParseException;
-import io.airlift.command.ParseOptionMissingException;
-import io.airlift.command.ParseOptionMissingValueException;
-import io.airlift.command.SingleCommand;
-import io.airlift.command.model.CommandMetadata;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-import javax.inject.Inject;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory;
-
-
-/**
- * Entry point for the Hadoop job, handles launching all the relevant Hadoop
- * jobs
- */
-@Command(name = "bin/hadoop jar PATH_TO_JAR com.yarcdata.urika.hadoop.rdf.stats.RdfStats", description = "A command which computes statistics on RDF data using Hadoop")
-public class RdfStats implements Tool {
-
-    static final String ANSI_RED = "\u001B[31m";
-    static final String ANSI_RESET = "\u001B[0m";
-
-    private static final String DATA_TYPE_TRIPLES = "triples", DATA_TYPE_QUADS = "quads", DATA_TYPE_MIXED = "mixed";
-
-    /**
-     * Help option
-     */
-    @Inject
-    public HelpOption helpOption;
-
-    /**
-     * Gets/Sets whether all available statistics will be calculated
-     */
-    @Option(name = { "-a", "--all" }, description = "Requests that all available statistics be calculated", type = OptionType.COMMAND)
-    public boolean all = false;
-
-    /**
-     * Gets/Sets whether node usage counts will be calculated
-     */
-    @Option(name = { "-n", "--node-count" }, description = "Requests that node usage counts be calculated", type = OptionType.COMMAND)
-    public boolean nodeCount = false;
-
-    /**
-     * Gets/Sets whether characteristic sets will be calculated
-     */
-    @Option(name = { "-c", "--characteristic-sets" }, description = "Requests that characteristic sets be calculated", type = OptionType.COMMAND)
-    public boolean characteristicSets = false;
-
-    /**
-     * Gets/Sets whether type counts will be calculated
-     */
-    @Option(name = { "-t", "--type-counts" }, description = "Requests that rdf:type usage counts be calculated", type = OptionType.COMMAND)
-    public boolean typeCount = false;
-
-    /**
-     * Gets/Sets whether data type counts will be calculated
-     */
-    @Option(name = { "-d", "--data-types" }, description = "Requests that literal data type usage counts be calculated", type = OptionType.COMMAND)
-    public boolean dataTypeCount = false;
-
-    /**
-     * Gets/Sets whether namespace counts will be calculated
-     */
-    @Option(name = { "--namespaces" }, description = "Requests that namespace usage counts be calculated", type = OptionType.COMMAND)
-    public boolean namespaceCount = false;
-
-    /**
-     * Gets/Sets the input data type used
-     */
-    @Option(name = { "--input-type" }, allowedValues = { DATA_TYPE_MIXED, DATA_TYPE_QUADS, DATA_TYPE_TRIPLES }, description = "Specifies whether the input data is a mixture of quads and triples, just quads or just triples.  Using the most specific data type will yield the most accurrate statistics")
-    public String inputType = DATA_TYPE_MIXED;
-
-    /**
-     * Gets/Sets the output path
-     */
-    @Option(name = { "-o", "--output" }, title = "OutputPath", description = "Sets the output path", arity = 1, required = true)
-    public String outputPath = null;
-
-    /**
-     * Gets/Sets the input path(s)
-     */
-    @Arguments(description = "Sets the input path(s)", title = "InputPath", required = true)
-    public List<String> inputPaths = new ArrayList<String>();
-
-    private Configuration config;
-
-    /**
-     * Entry point method
-     * 
-     * @param args
-     *            Arguments
-     * @throws Exception
-     */
-    public static void main(String[] args) throws Exception {
-        try {
-            // Run and exit with result code if no errors bubble up
-            // Note that the exit code may still be a error code
-            int res = ToolRunner.run(new Configuration(true), new RdfStats(), args);
-            System.exit(res);
-        } catch (Exception e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            e.printStackTrace(System.err);
-        } finally {
-            System.err.print(ANSI_RESET);
-        }
-        // If any errors bubble up exit with non-zero code
-        System.exit(1);
-    }
-
-    private static void showUsage() {
-        CommandMetadata metadata = SingleCommand.singleCommand(RdfStats.class).getCommandMetadata();
-        StringBuilder builder = new StringBuilder();
-        Help.help(metadata, builder);
-        System.err.print(ANSI_RESET);
-        System.err.println(builder.toString());
-        System.exit(1);
-    }
-
-    @Override
-    public void setConf(Configuration conf) {
-        this.config = conf;
-    }
-
-    @Override
-    public Configuration getConf() {
-        return this.config;
-    }
-
-    @Override
-    public int run(String[] args) throws Exception {
-        try {
-            // Parse custom arguments
-            RdfStats cmd = SingleCommand.singleCommand(RdfStats.class).parse(args);
-
-            // Copy Hadoop configuration across
-            cmd.setConf(this.getConf());
-
-            // Show help if requested and exit with success
-            if (cmd.helpOption.showHelpIfRequested()) {
-                return 0;
-            }
-
-            // Run the command and exit with success
-            cmd.run();
-            return 0;
-
-        } catch (ParseOptionMissingException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (ParseOptionMissingValueException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (ParseArgumentsMissingException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (ParseArgumentsUnexpectedException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-            // TODO Re-enable as and when we upgrade Airline
-            // } catch (ParseOptionIllegalValueException e) {
-            // System.err.println(ANSI_RED + e.getMessage());
-            // System.err.println();
-            // showUsage();
-        } catch (ParseException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            System.err.println();
-            showUsage();
-        } catch (UnsupportedOperationException e) {
-            System.err.println(ANSI_RED + e.getMessage());
-        } catch (Throwable e) {
-            System.err.println(ANSI_RED + e.getMessage());
-            e.printStackTrace(System.err);
-        } finally {
-            System.err.print(ANSI_RESET);
-        }
-        return 1;
-    }
-
-    private void run() throws Throwable {
-        if (!this.outputPath.endsWith("/")) {
-            this.outputPath += "/";
-        }
-
-        // If all statistics requested turn on all statistics
-        if (this.all) {
-            this.nodeCount = true;
-            this.characteristicSets = true;
-            this.typeCount = true;
-            this.dataTypeCount = true;
-            this.namespaceCount = true;
-        }
-
-        // How many statistics were requested?
-        int statsRequested = 0;
-        if (this.nodeCount)
-            statsRequested++;
-        if (this.characteristicSets)
-            statsRequested++;
-        if (this.typeCount)
-            statsRequested++;
-        if (this.dataTypeCount)
-            statsRequested++;
-        if (this.namespaceCount)
-            statsRequested++;
-
-        // Error if no statistics requested
-        if (statsRequested == 0) {
-            System.err
-                    .println("You did not request any statistics to be calculated, please use one/more of the relevant options to select the statistics to be computed");
-            return;
-        }
-        int statsComputed = 1;
-
-        // Compute statistics
-        if (this.nodeCount) {
-            Job job = this.selectNodeCountJob();
-            statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
-        }
-        if (this.typeCount) {
-            Job[] jobs = this.selectTypeCountJobs();
-            statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested);
-        }
-        if (this.dataTypeCount) {
-            Job job = this.selectDataTypeCountJob();
-            statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
-        }
-        if (this.namespaceCount) {
-            Job job = this.selectNamespaceCountJob();
-            statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
-        }
-        if (this.characteristicSets) {
-            Job[] jobs = this.selectCharacteristicSetJobs();
-            statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested);
-        }
-    }
-
-    private int computeStatistic(Job job, int statsComputed, int statsRequested) throws Throwable {
-        System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested));
-        this.runJob(job);
-        System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested));
-        System.out.println();
-        return ++statsComputed;
-    }
-
-    private int computeStatistic(Job[] jobs, boolean continueOnFailure, boolean continueOnError, int statsComputed,
-            int statsRequested) {
-        System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested));
-        this.runJobSequence(jobs, continueOnFailure, continueOnError);
-        System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested));
-        System.out.println();
-        return ++statsComputed;
-    }
-
-    private boolean runJob(Job job) throws Throwable {
-        System.out.println("Submitting Job " + job.getJobName());
-        long start = System.nanoTime();
-        try {
-            job.submit();
-            if (job.monitorAndPrintJob()) {
-                System.out.println("Job " + job.getJobName() + " succeeded");
-                return true;
-            } else {
-                System.out.println("Job " + job.getJobName() + " failed");
-                return false;
-            }
-        } catch (Throwable e) {
-            System.out.println("Unexpected failure in Job " + job.getJobName());
-            throw e;
-        } finally {
-            long end = System.nanoTime();
-            System.out.println("Job " + job.getJobName() + " finished after "
-                    + String.format("%,d milliseconds", TimeUnit.NANOSECONDS.toMillis(end - start)));
-            System.out.println();
-        }
-    }
-
-    private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean continueOnError) {
-        for (int i = 0; i < jobs.length; i++) {
-            Job job = jobs[i];
-            try {
-                boolean success = this.runJob(job);
-                if (!success && !continueOnFailure)
-                    throw new IllegalStateException("Unable to complete job sequence because Job " + job.getJobName() + " failed");
-            } catch (IllegalStateException e) {
-                throw e;
-            } catch (Throwable e) {
-                if (!continueOnError)
-                    throw new IllegalStateException("Unable to complete job sequence because job " + job.getJobName()
-                            + " errorred", e);
-            }
-        }
-    }
-
-    private Job selectNodeCountJob() throws IOException {
-        String realOutputPath = outputPath + "node-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadNodeCountJob(this.config, inputs, realOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleNodeCountJob(this.config, inputs, realOutputPath);
-        } else {
-            return JobFactory.getNodeCountJob(this.config, inputs, realOutputPath);
-        }
-    }
-
-    private Job selectDataTypeCountJob() throws IOException {
-        String realOutputPath = outputPath + "data-type-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadDataTypeCountJob(this.config, inputs, realOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleDataTypeCountJob(this.config, inputs, realOutputPath);
-        } else {
-            return JobFactory.getDataTypeCountJob(this.config, inputs, realOutputPath);
-        }
-    }
-
-    private Job selectNamespaceCountJob() throws IOException {
-        String realOutputPath = outputPath + "namespace-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadNamespaceCountJob(this.config, inputs, realOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleNamespaceCountJob(this.config, inputs, realOutputPath);
-        } else {
-            return JobFactory.getNamespaceCountJob(this.config, inputs, realOutputPath);
-        }
-    }
-
-    private Job[] selectCharacteristicSetJobs() throws IOException {
-        String intermediateOutputPath = outputPath + "characteristics/intermediate/";
-        String finalOutputPath = outputPath + "characteristics/final/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
-        } else {
-            return JobFactory.getCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
-        }
-    }
-
-    private Job[] selectTypeCountJobs() throws IOException {
-        String intermediateOutputPath = outputPath + "type-declarations/";
-        String finalOutputPath = outputPath + "type-counts/";
-        String[] inputs = new String[this.inputPaths.size()];
-        this.inputPaths.toArray(inputs);
-
-        if (DATA_TYPE_QUADS.equals(this.inputType)) {
-            return JobFactory.getQuadTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
-        } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
-            return JobFactory.getTripleTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
-        } else {
-            return JobFactory.getTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java b/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
deleted file mode 100644
index 55bb8af..0000000
--- a/jena-hadoop-rdf/hadoop-rdf-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
+++ /dev/null
@@ -1,757 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *     
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.stats.jobs;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.BZip2Codec;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.nquads.NQuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat;
-import org.apache.jena.hadoop.rdf.io.output.nquads.NQuadsOutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat;
-import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
-import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper;
-import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-import com.hp.hpl.jena.vocabulary.RDF;
-
-/**
- * Factory that can produce {@link Job} instances for computing various RDF
- * statistics
- * 
- * 
- * 
- */
-public class JobFactory {
-
-    /**
-     * Private constructor prevents instantiation
-     */
-    private JobFactory() {
-    }
-
-    /**
-     * Gets a job for computing node counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getTripleNodeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Node Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleNodeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing node counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getQuadNodeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Node Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNodeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing node counts on RDF triple and/or quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getNodeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Node Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNodeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a sequence of jobs that can be used to compute characteristic sets
-     * for RDF triples
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Intermediate output path
-     * @param outputPath
-     *            Final output path
-     * @return Sequence of jobs
-     * @throws IOException
-     */
-    public static Job[] getTripleCharacteristicSetJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Characteristic Set (Generation)");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleGroupBySubjectMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(TripleWritable.class);
-        job.setReducerClass(TripleCharacteristicSetGeneratingReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(NullWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(SequenceFileOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-        SequenceFileOutputFormat.setCompressOutput(job, true);
-        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
-        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
-
-        jobs[0] = job;
-
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Characteristic Set (Reduction)");
-
-        // Map/Reduce classes
-        job.setMapperClass(KeyMapper.class);
-        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
-        job.setMapOutputValueClass(CharacteristicSetWritable.class);
-        job.setReducerClass(CharacteristicSetReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(CharacteristicSetWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(SequenceFileInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-        return jobs;
-    }
-
-    /**
-     * Gets a sequence of jobs that can be used to compute characteristic sets
-     * for RDF quads
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Intermediate output path
-     * @param outputPath
-     *            Final output path
-     * @return Sequence of jobs
-     * @throws IOException
-     */
-    public static Job[] getQuadCharacteristicSetJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Characteristic Set (Generation)");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadGroupBySubjectMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-        job.setReducerClass(QuadCharacteristicSetGeneratingReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(NullWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(SequenceFileOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-        SequenceFileOutputFormat.setCompressOutput(job, true);
-        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
-        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
-
-        jobs[0] = job;
-
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Characteristic Set (Reduction)");
-
-        // Map/Reduce classes
-        job.setMapperClass(KeyMapper.class);
-        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
-        job.setMapOutputValueClass(CharacteristicSetWritable.class);
-        job.setReducerClass(CharacteristicSetReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(CharacteristicSetWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(SequenceFileInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-        return jobs;
-    }
-
-    /**
-     * Gets a sequence of jobs that can be used to compute characteristic sets
-     * for RDF triple and/or quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Intermediate output path
-     * @param outputPath
-     *            Final output path
-     * @return Sequence of jobs
-     * @throws IOException
-     */
-    public static Job[] getCharacteristicSetJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Characteristic Set (Generation)");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadGroupBySubjectMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-        job.setReducerClass(QuadCharacteristicSetGeneratingReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(NullWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(SequenceFileOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-        SequenceFileOutputFormat.setCompressOutput(job, true);
-        FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
-        SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
-
-        jobs[0] = job;
-
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Characteristic Set (Reduction)");
-
-        // Map/Reduce classes
-        job.setMapperClass(KeyMapper.class);
-        job.setMapOutputKeyClass(CharacteristicSetWritable.class);
-        job.setMapOutputValueClass(CharacteristicSetWritable.class);
-        job.setReducerClass(CharacteristicSetReducer.class);
-        job.setOutputKeyClass(CharacteristicSetWritable.class);
-        job.setOutputValueClass(CharacteristicSetWritable.class);
-
-        // Input and Output
-        job.setInputFormatClass(SequenceFileInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing type counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Path for intermediate output which will be all the type
-     *            declaration triples present in the inputs
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job[] getTripleTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Triples Extraction");
-
-        // Map/Reduce classes
-        job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
-        job.setMapperClass(TripleFilterByPredicateUriMapper.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(TripleWritable.class);
-
-        // Input and Output Format
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(NTriplesOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-
-        jobs[0] = job;
-
-        // Object Node Usage count job
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleObjectCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(NTriplesInputFormat.class);
-        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
-                                                          // better if this was
-                                                          // intelligently
-                                                          // configured
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing type counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Path for intermediate output which will be all the type
-     *            declaration quads present in the inputs
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job[] getQuadTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Quads Extraction");
-
-        // Map/Reduce classes
-        job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
-        job.setMapperClass(QuadFilterByPredicateMapper.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-
-        // Input and Output Format
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(NQuadsOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-
-        jobs[0] = job;
-
-        // Object Node Usage count job
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadObjectCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(NQuadsInputFormat.class);
-        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
-                                                          // better if this was
-                                                          // intelligently
-                                                          // configured
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing type counts on RDF triple and/or quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param intermediateOutputPath
-     *            Path for intermediate output which will be all the type
-     *            declaration quads present in the inputs
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job[] getTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
-            String outputPath) throws IOException {
-        Job[] jobs = new Job[2];
-
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Extraction");
-
-        // Map/Reduce classes
-        job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
-        job.setMapperClass(QuadFilterByPredicateMapper.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(QuadWritable.class);
-
-        // Input and Output Format
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(NQuadsOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));
-
-        jobs[0] = job;
-
-        // Object Node Usage count job
-        job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadObjectCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(NQuadsInputFormat.class);
-        NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
-                                                          // better if this was
-                                                          // intelligently
-                                                          // configured
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, intermediateOutputPath);
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        jobs[1] = job;
-
-        return jobs;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getTripleDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Literal Data Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleDataTypeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getQuadDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Literal Data Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadDataTypeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple and/or
-     * quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Literal Data Type Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadDataTypeCountMapper.class);
-        job.setMapOutputKeyClass(NodeWritable.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(NodeCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getTripleNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Triples Namespace Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(TripleNamespaceCountMapper.class);
-        job.setMapOutputKeyClass(Text.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(TextCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getQuadNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Quads Namespace Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNamespaceCountMapper.class);
-        job.setMapOutputKeyClass(Text.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(TextCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(QuadsInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-
-    /**
-     * Gets a job for computing literal data type counts on RDF triple and/or
-     * quad inputs
-     * 
-     * @param config
-     *            Configuration
-     * @param inputPaths
-     *            Input paths
-     * @param outputPath
-     *            Output path
-     * @return Job
-     * @throws IOException
-     */
-    public static Job getNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
-        Job job = Job.getInstance(config);
-        job.setJarByClass(JobFactory.class);
-        job.setJobName("RDF Namespace Usage Count");
-
-        // Map/Reduce classes
-        job.setMapperClass(QuadNamespaceCountMapper.class);
-        job.setMapOutputKeyClass(Text.class);
-        job.setMapOutputValueClass(LongWritable.class);
-        job.setReducerClass(TextCountReducer.class);
-
-        // Input and Output
-        job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
-        job.setOutputFormatClass(TextOutputFormat.class);
-        FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
-        FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
-        return job;
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-common/pom.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-common/pom.xml b/jena-hadoop-rdf/jena-elephas-common/pom.xml
new file mode 100644
index 0000000..7dd68a0
--- /dev/null
+++ b/jena-hadoop-rdf/jena-elephas-common/pom.xml
@@ -0,0 +1,54 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<groupId>org.apache.jena</groupId>
+		<artifactId>jena-elephas</artifactId>
+		<version>0.9.0-SNAPSHOT</version>
+	</parent>
+	<artifactId>jena-elephas-common</artifactId>
+	<name>Apache Jena - Elephas - Common API</name>
+	<description>Common code for RDF on Hadoop such as writable types for RDF primitives</description>
+
+	<!-- Note that versions are managed by parent POMs -->
+	<dependencies>
+		<!-- Hadoop Dependencies -->
+		<!-- Note these will be provided on the Hadoop cluster hence the provided 
+			scope -->
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-common</artifactId>
+			<scope>provided</scope>
+		</dependency>
+
+		<!-- Jena dependencies -->
+		<dependency>
+			<groupId>org.apache.jena</groupId>
+			<artifactId>jena-arq</artifactId>
+		</dependency>
+
+		<!-- Test Dependencies -->
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/a6c0fefc/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java b/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
new file mode 100644
index 0000000..f0acc09
--- /dev/null
+++ b/jena-hadoop-rdf/jena-elephas-common/src/main/java/org/apache/jena/hadoop/rdf/types/AbstractNodeTupleWritable.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.types;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.util.NodeUtils;
+
+/**
+ * A abstract general purpose writable where the actual class represented is
+ * composed of a number of {@link Node} instances
+ * <p>
+ * The binary encoding of this base implementation is just a variable integer
+ * indicating the number of nodes present followed by the binary encodings of
+ * the {@link NodeWritable} instances. Derived implementations may wish to
+ * override the {@link #readFields(DataInput)} and {@link #write(DataOutput)}
+ * methods in order to use more specialised encodings.
+ * </p>
+ * 
+ * @param <T>
+ *            Tuple type
+ */
+public abstract class AbstractNodeTupleWritable<T> implements WritableComparable<AbstractNodeTupleWritable<T>> {
+
+    private T tuple;
+
+    /**
+     * Creates a new empty instance
+     */
+    protected AbstractNodeTupleWritable() {
+        this(null);
+    }
+
+    /**
+     * Creates a new instance with the given value
+     * 
+     * @param tuple
+     *            Tuple value
+     */
+    protected AbstractNodeTupleWritable(T tuple) {
+        this.tuple = tuple;
+    }
+
+    /**
+     * Gets the tuple
+     * 
+     * @return Tuple
+     */
+    public T get() {
+        return this.tuple;
+    }
+
+    /**
+     * Sets the tuple
+     * 
+     * @param tuple
+     *            Tuple
+     */
+    public void set(T tuple) {
+        this.tuple = tuple;
+    }
+
+    @Override
+    public void readFields(DataInput input) throws IOException {
+        // Determine how many nodes
+        int size = WritableUtils.readVInt(input);
+        Node[] ns = new Node[size];
+
+        NodeWritable nw = new NodeWritable();
+        for (int i = 0; i < ns.length; i++) {
+            nw.readFields(input);
+            ns[i] = nw.get();
+        }
+
+        // Load the tuple
+        this.tuple = this.createTuple(ns);
+    }
+
+    /**
+     * Creates the actual tuple type from an array of nodes
+     * 
+     * @param ns
+     *            Nodes
+     * @return Tuple
+     */
+    protected abstract T createTuple(Node[] ns);
+
+    @Override
+    public void write(DataOutput output) throws IOException {
+        // Determine how many nodes
+        Node[] ns = this.createNodes(this.tuple);
+        WritableUtils.writeVInt(output, ns.length);
+
+        // Write out nodes
+        NodeWritable nw = new NodeWritable();
+        for (int i = 0; i < ns.length; i++) {
+            nw.set(ns[i]);
+            nw.write(output);
+        }
+    }
+
+    /**
+     * Sets the tuple value
+     * <p>
+     * Intended only for internal use i.e. when a derived implementation
+     * overrides {@link #readFields(DataInput)} and needs to set the tuple value
+     * directly i.e. when a derived implementation is using a custom encoding
+     * scheme
+     * </p>
+     * 
+     * @param tuple
+     *            Tuple
+     */
+    protected final void setInternal(T tuple) {
+        this.tuple = tuple;
+    }
+
+    /**
+     * Converts the actual tuple type into an array of nodes
+     * 
+     * @param tuple
+     *            Tuples
+     * @return Nodes
+     */
+    protected abstract Node[] createNodes(T tuple);
+
+    /**
+     * Compares instances node by node
+     * <p>
+     * Derived implementations may wish to override this and substitute native
+     * tuple based comparisons
+     * </p>
+     * 
+     * @param other
+     *            Instance to compare with
+     */
+    @Override
+    public int compareTo(AbstractNodeTupleWritable<T> other) {
+        Node[] ns = this.createNodes(this.tuple);
+        Node[] otherNs = this.createNodes(other.tuple);
+
+        if (ns.length < otherNs.length) {
+            return -1;
+        } else if (ns.length > otherNs.length) {
+            return 1;
+        }
+        // Compare node by node
+        for (int i = 0; i < ns.length; i++) {
+            int c = NodeUtils.compareRDFTerms(ns[i], otherNs[i]);
+            if (c != 0)
+                return c;
+        }
+        return 0;
+    }
+
+    @Override
+    public String toString() {
+        return this.get().toString();
+    }
+
+    @Override
+    public int hashCode() {
+        return this.get().hashCode();
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public boolean equals(Object other) {
+        if (!(other instanceof AbstractNodeTupleWritable))
+            return false;
+        return this.compareTo((AbstractNodeTupleWritable<T>) other) == 0;
+    }
+}