You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/01/05 16:07:50 UTC
[42/52] [abbrv] jena git commit: Further rebranding to Elephas
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java
new file mode 100644
index 0000000..fd886a3
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/BatchedTriGOutputTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.trig;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.Lang;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+
+/**
+ * Tests for TriG output
+ *
+ *
+ *
+ */
+@RunWith(Parameterized.class)
+public class BatchedTriGOutputTest extends AbstractQuadOutputFormatTests {
+
+ static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
+ static long $bs2 = 1000;
+ static long $bs3 = 100;
+ static long $bs4 = 1;
+
+ /**
+ * @return Test parameters
+ */
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } });
+ }
+
+ private final long batchSize;
+
+ /**
+ * Creates new tests
+ *
+ * @param batchSize
+ * Batch size
+ */
+ public BatchedTriGOutputTest(long batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ protected String getFileExtension() {
+ return ".trig";
+ }
+
+ @Override
+ protected Lang getRdfLanguage() {
+ return Lang.TRIG;
+ }
+
+ @Override
+ protected Configuration prepareConfiguration() {
+ Configuration config = super.prepareConfiguration();
+ config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
+ return config;
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() {
+ return new BatchedTriGOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java
new file mode 100644
index 0000000..9b2b669
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/StreamedTriGOutputTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.trig;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.Lang;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+
+/**
+ * Tests for Turtle output
+ *
+ *
+ *
+ */
+@RunWith(Parameterized.class)
+public class StreamedTriGOutputTest extends AbstractQuadOutputFormatTests {
+
+ static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
+ static long $bs2 = 1000;
+ static long $bs3 = 100;
+ static long $bs4 = 1;
+
+ /**
+ * @return Test parameters
+ */
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } });
+ }
+
+ private final long batchSize;
+
+ /**
+ * Creates new tests
+ *
+ * @param batchSize
+ * Batch size
+ */
+ public StreamedTriGOutputTest(long batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ protected String getFileExtension() {
+ return ".trig";
+ }
+
+ @Override
+ protected Lang getRdfLanguage() {
+ return Lang.TRIG;
+ }
+
+ @Override
+ protected Configuration prepareConfiguration() {
+ Configuration config = super.prepareConfiguration();
+ config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
+ return config;
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() {
+ return new TriGOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java
new file mode 100644
index 0000000..c9b3a26
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trig/TriGBlankNodeOutputTests.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.trig;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.RDFDataMgr;
+import org.junit.Assert;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ResIterator;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Tests for TriG output with blank nodes
+ *
+ *
+ *
+ */
+@RunWith(Parameterized.class)
+public class TriGBlankNodeOutputTests extends StreamedTriGOutputTest {
+
+ static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
+ static long $bs2 = 1000;
+ static long $bs3 = 100;
+ static long $bs4 = 1;
+
+ /**
+ * @return Test parameters
+ */
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 },
+ { $bs4 } });
+ }
+
+ /**
+ * Creates new tests
+ *
+ * @param batchSize
+ * Batch size
+ */
+ public TriGBlankNodeOutputTests(long batchSize) {
+ super(batchSize);
+ }
+
+ @Override
+ protected Iterator<QuadWritable> generateTuples(int num) {
+ List<QuadWritable> qs = new ArrayList<QuadWritable>();
+ Node subject = NodeFactory.createAnon();
+ for (int i = 0; i < num; i++) {
+ Quad t = new Quad(
+ NodeFactory.createURI("http://example.org/graphs/" + i),
+ subject,
+ NodeFactory.createURI("http://example.org/predicate"),
+ NodeFactory.createLiteral(Integer.toString(i),
+ XSDDatatype.XSDinteger));
+ qs.add(new QuadWritable(t));
+ }
+ return qs.iterator();
+ }
+
+ @Override
+ protected void checkTuples(File f, long expected) {
+ super.checkTuples(f, expected);
+
+ Model m = RDFDataMgr.loadModel("file://" + f.getAbsolutePath(),
+ this.getRdfLanguage());
+ ResIterator iter = m.listSubjects();
+ Set<Node> subjects = new HashSet<Node>();
+ while (iter.hasNext()) {
+ Resource res = iter.next();
+ Assert.assertTrue(res.isAnon());
+ subjects.add(res.asNode());
+ }
+ // Should only be one subject unless the data was empty in which case
+ // there will be zero subjects
+ Assert.assertEquals(expected == 0 ? 0 : 1, subjects.size());
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() {
+ return new TriGOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
new file mode 100644
index 0000000..9b6e307
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/trix/TriXOutputTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.trix;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.output.AbstractQuadOutputFormatTests;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.Lang;
+
+/**
+ * Tests for TriX output format
+ */
+public class TriXOutputTest extends AbstractQuadOutputFormatTests {
+
+ @Override
+ protected String getFileExtension() {
+ return ".trix";
+ }
+
+ @Override
+ protected Lang getRdfLanguage() {
+ return Lang.TRIX;
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, QuadWritable> getOutputFormat() {
+ return new TriXOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
new file mode 100644
index 0000000..a6c4d70
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/BatchedTurtleOutputTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.turtle;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.riot.Lang;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+
+/**
+ * Tests for Turtle output
+ *
+ *
+ *
+ */
+@RunWith(Parameterized.class)
+public class BatchedTurtleOutputTest extends AbstractTripleOutputFormatTests {
+
+ static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
+ static long $bs2 = 1000;
+ static long $bs3 = 100;
+ static long $bs4 = 1;
+
+ /**
+ * @return Test parameters
+ */
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } });
+ }
+
+ private final long batchSize;
+
+ /**
+ * Creates new tests
+ *
+ * @param batchSize
+ * Batch size
+ */
+ public BatchedTurtleOutputTest(long batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ protected String getFileExtension() {
+ return ".ttl";
+ }
+
+ @Override
+ protected Lang getRdfLanguage() {
+ return Lang.TURTLE;
+ }
+
+ @Override
+ protected Configuration prepareConfiguration() {
+ Configuration config = super.prepareConfiguration();
+ config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
+ return config;
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
+ return new BatchedTurtleOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
new file mode 100644
index 0000000..d8843d3
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/StreamedTurtleOutputTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.turtle;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.io.output.AbstractTripleOutputFormatTests;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.riot.Lang;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+
+/**
+ * Tests for Turtle output
+ *
+ *
+ *
+ */
+@RunWith(Parameterized.class)
+public class StreamedTurtleOutputTest extends AbstractTripleOutputFormatTests {
+
+ static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
+ static long $bs2 = 1000;
+ static long $bs3 = 100;
+ static long $bs4 = 1;
+
+ /**
+ * @return Test parameters
+ */
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 }, { $bs4 } });
+ }
+
+ private final long batchSize;
+
+ /**
+ * Creates new tests
+ *
+ * @param batchSize
+ * Batch size
+ */
+ public StreamedTurtleOutputTest(long batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ protected String getFileExtension() {
+ return ".ttl";
+ }
+
+ @Override
+ protected Lang getRdfLanguage() {
+ return Lang.TURTLE;
+ }
+
+ @Override
+ protected Configuration prepareConfiguration() {
+ Configuration config = super.prepareConfiguration();
+ config.setLong(RdfIOConstants.OUTPUT_BATCH_SIZE, this.batchSize);
+ return config;
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
+ return new TurtleOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
new file mode 100644
index 0000000..8dcae4e
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/output/turtle/TurtleBlankNodeOutputTests.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output.turtle;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.riot.RDFDataMgr;
+import org.junit.Assert;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ResIterator;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * Tests for Turtle output with blank nodes
+ *
+ *
+ *
+ */
+@RunWith(Parameterized.class)
+public class TurtleBlankNodeOutputTests extends StreamedTurtleOutputTest {
+
+ static long $bs1 = RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE;
+ static long $bs2 = 1000;
+ static long $bs3 = 100;
+ static long $bs4 = 1;
+
+ /**
+ * @return Test parameters
+ */
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { $bs1 }, { $bs2 }, { $bs3 },
+ { $bs4 } });
+ }
+
+ /**
+ * Creates new tests
+ *
+ * @param batchSize
+ * Batch size
+ */
+ public TurtleBlankNodeOutputTests(long batchSize) {
+ super(batchSize);
+ }
+
+ @Override
+ protected Iterator<TripleWritable> generateTuples(int num) {
+ List<TripleWritable> ts = new ArrayList<TripleWritable>();
+ Node subject = NodeFactory.createAnon();
+ for (int i = 0; i < num; i++) {
+ Triple t = new Triple(subject,
+ NodeFactory.createURI("http://example.org/predicate"),
+ NodeFactory.createLiteral(Integer.toString(i),
+ XSDDatatype.XSDinteger));
+ ts.add(new TripleWritable(t));
+ }
+ return ts.iterator();
+ }
+
+ @Override
+ protected void checkTuples(File f, long expected) {
+ super.checkTuples(f, expected);
+
+ Model m = RDFDataMgr.loadModel("file://" + f.getAbsolutePath(),
+ this.getRdfLanguage());
+ ResIterator iter = m.listSubjects();
+ Set<Node> subjects = new HashSet<Node>();
+ while (iter.hasNext()) {
+ Resource res = iter.next();
+ Assert.assertTrue(res.isAnon());
+ subjects.add(res.asNode());
+ }
+ // Should only be one subject unless the data was empty in which case
+ // there will be zero subjects
+ Assert.assertEquals(expected == 0 ? 0 : 1, subjects.size());
+ }
+
+ @Override
+ protected OutputFormat<NullWritable, TripleWritable> getOutputFormat() {
+ return new TurtleOutputFormat<NullWritable>();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
new file mode 100644
index 0000000..2eae232
--- /dev/null
+++ b/jena-elephas/jena-elephas-io/src/test/java/org/apache/jena/hadoop/rdf/io/registry/TestHadoopRdfIORegistry.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jena.hadoop.rdf.io.registry;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests for the {@link HadoopRdfIORegistry}
+ */
+public class TestHadoopRdfIORegistry {
+
+ private void testLang(Lang lang, boolean triples, boolean quads, boolean writesSupported) {
+ Assert.assertEquals(triples, HadoopRdfIORegistry.hasTriplesReader(lang));
+ Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadReader(lang));
+
+ // Some formats may be asymmetric
+ if (writesSupported) {
+ Assert.assertEquals(triples, HadoopRdfIORegistry.hasTriplesWriter(lang));
+ Assert.assertEquals(quads, HadoopRdfIORegistry.hasQuadWriter(lang));
+ } else {
+ Assert.assertFalse(HadoopRdfIORegistry.hasTriplesWriter(lang));
+ Assert.assertFalse(HadoopRdfIORegistry.hasQuadWriter(lang));
+ }
+
+ if (triples) {
+ // Check that triples are supported
+ RecordReader<LongWritable, TripleWritable> tripleReader;
+ try {
+ tripleReader = HadoopRdfIORegistry.createTripleReader(lang);
+ Assert.assertNotNull(tripleReader);
+ } catch (IOException e) {
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " can read triples but fails to produce a triple reader when asked: " + e.getMessage());
+ }
+
+ if (writesSupported) {
+ RecordWriter<NullWritable, TripleWritable> tripleWriter;
+ try {
+ tripleWriter = HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new Configuration(
+ false));
+ Assert.assertNotNull(tripleWriter);
+ } catch (IOException e) {
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " can write triples but fails to produce a triple writer when asked: " + e.getMessage());
+ }
+ }
+ } else {
+ // Check that triples are not supported
+ try {
+ HadoopRdfIORegistry.createTripleReader(lang);
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " cannot read triples but produced a triple reader when asked (error was expected)");
+ } catch (IOException e) {
+ // This is expected
+ }
+ try {
+ HadoopRdfIORegistry.createTripleWriter(lang, new StringWriter(), new Configuration(false));
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " cannot write triples but produced a triple write when asked (error was expected)");
+ } catch (IOException e) {
+ // This is expected
+ }
+ }
+
+ if (quads) {
+ // Check that quads are supported
+ RecordReader<LongWritable, QuadWritable> quadReader;
+ try {
+ quadReader = HadoopRdfIORegistry.createQuadReader(lang);
+ Assert.assertNotNull(quadReader);
+ } catch (IOException e) {
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " can read quads but fails to produce a quad reader when asked: " + e.getMessage());
+ }
+
+ if (writesSupported) {
+ RecordWriter<NullWritable, QuadWritable> quadWriter;
+ try {
+ quadWriter = HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(),
+ new Configuration(false));
+ Assert.assertNotNull(quadWriter);
+ } catch (IOException e) {
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " can write quads but fails to produce a triple writer when asked: " + e.getMessage());
+ }
+ }
+ } else {
+ try {
+ HadoopRdfIORegistry.createQuadReader(lang);
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " cannot read quads but produced a quad reader when asked (error was expected)");
+ } catch (IOException e) {
+ // This is expected
+ }
+ try {
+ HadoopRdfIORegistry.createQuadWriter(lang, new StringWriter(), new Configuration(false));
+ Assert.fail("Registry indicates that " + lang.getName()
+ + " cannot write quads but produced a quad writer when asked (error was expected)");
+ } catch (IOException e) {
+ // This is expected
+ }
+ }
+ }
+
+ @Test
+ public void json_ld_registered() {
+ testLang(Lang.JSONLD, true, true, true);
+ }
+
+ @Test
+ public void nquads_registered() {
+ testLang(Lang.NQUADS, false, true, true);
+ testLang(Lang.NQ, false, true, true);
+ }
+
+ @Test
+ public void ntriples_registered() {
+ testLang(Lang.NTRIPLES, true, false, true);
+ testLang(Lang.NT, true, false, true);
+ }
+
+ @Test
+ public void rdf_json_registered() {
+ testLang(Lang.RDFJSON, true, false, true);
+ }
+
+ @Test
+ public void rdf_xml_registered() {
+ testLang(Lang.RDFXML, true, false, true);
+ }
+
+ @Test
+ public void rdf_thrift_registered() {
+ testLang(RDFLanguages.THRIFT, true, true, true);
+ }
+
+ @Test
+ public void trig_registered() {
+ testLang(Lang.TRIG, false, true, true);
+ }
+
+ @Test
+ public void trix_registered() {
+ testLang(Lang.TRIX, false, true, true);
+ }
+
+ @Test
+ public void turtle_registered() {
+ testLang(Lang.TURTLE, true, false, true);
+ testLang(Lang.TTL, true, false, true);
+ testLang(Lang.N3, true, false, true);
+ }
+
+ @Test
+ public void unregistered() {
+ testLang(Lang.RDFNULL, false, false, true);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/pom.xml b/jena-elephas/jena-elephas-mapreduce/pom.xml
new file mode 100644
index 0000000..aed59be
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/pom.xml
@@ -0,0 +1,87 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.jena</groupId>
+ <artifactId>jena-elephas</artifactId>
+ <version>0.9.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>jena-elephas-mapreduce</artifactId>
+ <name>Apache Jena - Elephas - Map/Reduce</name>
+ <description>Contains some basic Map/Reduce implementations for working with RDF on Hadoop</description>
+
+ <dependencies>
+ <!-- Internal Project Dependencies -->
+ <dependency>
+ <groupId>org.apache.jena</groupId>
+ <artifactId>jena-hadoop-rdf-common</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- Hadoop Dependencies -->
+ <!-- Note these will be provided on the Hadoop cluster hence the provided
+ scope -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- Jena dependencies -->
+ <dependency>
+ <groupId>org.apache.jena</groupId>
+ <artifactId>jena-arq</artifactId>
+ </dependency>
+
+ <!-- Test Dependencies -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.mrunit</groupId>
+ <artifactId>mrunit</artifactId>
+ <scope>test</scope>
+ <classifier>hadoop2</classifier>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <!-- JAR plugin to ensure tests jar is built -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyMapper.java
new file mode 100644
index 0000000..306a697
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyMapper.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which discards the value replacing it with the key
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class KeyMapper<TKey, TValue> extends Mapper<TKey, TValue, TKey, TKey> {
+ private static final Logger LOG = LoggerFactory.getLogger(KeyMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Key = {}", key);
+ }
+ context.write(key, key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyPlusNullMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyPlusNullMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyPlusNullMapper.java
new file mode 100644
index 0000000..a6e9a6a
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyPlusNullMapper.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which discards the value replacing it with a null
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class KeyPlusNullMapper<TKey, TValue> extends Mapper<TKey, TValue, TKey, NullWritable> {
+ private static final Logger LOG = LoggerFactory.getLogger(KeyPlusNullMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Key = {}", key);
+ }
+ context.write(key, NullWritable.get());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyReducer.java
new file mode 100644
index 0000000..7805f16
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/KeyReducer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * A reducer that outputs a single pair consists of the key as both fields ignoring the values
+ * @author rvesse
+ *
+ * @param <TKey> Key
+ * @param <TValue> Value
+ */
+public class KeyReducer<TKey, TValue> extends Reducer<TKey, TValue, TKey, TKey> {
+
+ @Override
+ protected void reduce(TKey key, Iterable<TValue> values, Context context)
+ throws IOException, InterruptedException {
+ context.write(key, key);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyMapper.java
new file mode 100644
index 0000000..7a48c1d
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyMapper.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which discards the value, moves the key to the value position and uses a null key
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class NullPlusKeyMapper<TKey, TValue> extends Mapper<TKey, TValue, NullWritable, TKey> {
+ private static final Logger LOG = LoggerFactory.getLogger(NullPlusKeyMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Key = {}", key);
+ }
+ context.write(NullWritable.get(), key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
new file mode 100644
index 0000000..dfc6ec1
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A reducer that outputs a single pair consists of a null as the key and the key as the value
+ * @author rvesse
+ *
+ * @param <TKey> Key
+ * @param <TValue> Value
+ */
+public class NullPlusKeyReducer<TKey, TValue> extends Reducer<TKey, TValue, NullWritable, TKey> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(NullPlusKeyReducer.class);
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOGGER.isTraceEnabled();
+ }
+
+ @Override
+ protected void reduce(TKey key, Iterable<TValue> values, Context context)
+ throws IOException, InterruptedException {
+ if (this.tracing) {
+ LOGGER.trace("Input Key = {}", key);
+ Iterator<TValue> iter = values.iterator();
+ while (iter.hasNext()) {
+ LOGGER.trace("Input Value = {}", iter.next());
+ }
+ }
+ context.write(NullWritable.get(), key);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueMapper.java
new file mode 100644
index 0000000..a5ac199
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueMapper.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which discards the key replacing it with a null leaving the value as is
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class NullPlusValueMapper<TKey, TValue> extends Mapper<TKey, TValue, NullWritable, TValue> {
+ private static final Logger LOG = LoggerFactory.getLogger(NullPlusValueMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Value = {}", value);
+ }
+ context.write(NullWritable.get(), value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
new file mode 100644
index 0000000..c6b270f
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A reducer that outputs a pair for each value consisting of a null key and the
+ * value
+ *
+ * @author rvesse
+ *
+ * @param <TKey>
+ * Key
+ * @param <TValue>
+ * Value
+ */
+public class NullPlusValueReducer<TKey, TValue> extends Reducer<TKey, TValue, NullWritable, TValue> {
+ private static final Logger LOGGER = LoggerFactory.getLogger(NullPlusValueReducer.class);
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOGGER.isTraceEnabled();
+ }
+
+ @Override
+ protected void reduce(TKey key, Iterable<TValue> values, Context context) throws IOException, InterruptedException {
+ if (this.tracing) {
+ LOGGER.trace("Input Key = {}", key);
+ }
+ Iterator<TValue> iter = values.iterator();
+ while (iter.hasNext()) {
+ TValue value = iter.next();
+ if (tracing) {
+ LOGGER.trace("Input Value = {}", value);
+ }
+ context.write(NullWritable.get(), value);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/RdfMapReduceConstants.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/RdfMapReduceConstants.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/RdfMapReduceConstants.java
new file mode 100644
index 0000000..6a8cf18
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/RdfMapReduceConstants.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+/**
+ * RDF Map/Reduce related constants
+ *
+ *
+ *
+ */
+public class RdfMapReduceConstants {
+
+ /**
+ * Private constructor prevents instantiation
+ */
+ private RdfMapReduceConstants() {
+
+ }
+
+ /**
+ * Configuration key used to set whether the behaviour of the filter mappers
+ * is inverted. When enabled the filter mappers will invert their selection
+ * i.e. tuples that would normally be accepted will be rejected and vice
+ * versa.
+ */
+ public static final String FILTER_INVERT = "rdf.mapreduce.filter.invert";
+
+ /**
+ * Configuration key used to set a command separated list of predicate URIs
+ * to filter upon
+ */
+ public static final String FILTER_PREDICATE_URIS = "rdf.mapreduce.filter.predicate.uris";
+
+ /**
+ * Configuration key used to set a command separated list of subject URIs to
+ * filter upon
+ */
+ public static final String FILTER_SUBJECT_URIS = "rdf.mapreduce.filter.subject.uris";
+
+ /**
+ * Configuration key used to set a command separated list of object URIs to
+ * filter upon
+ */
+ public static final String FILTER_OBJECT_URIS = "rdf.mapreduce.filter.object.uris";
+
+ /**
+ * Configuration key used to set a command separated list of graph URIs to
+ * filter upon
+ */
+ public static final String FILTER_GRAPH_URIS = "rdf.mapreduce.filter.graph.uris";
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapMapper.java
new file mode 100644
index 0000000..ef518a9
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapMapper.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which swaps the key and value around
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class SwapMapper<TKey, TValue> extends Mapper<TKey, TValue, TValue, TKey> {
+ private static final Logger LOG = LoggerFactory.getLogger(SwapMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Key = {}", key);
+ LOG.trace("Value = {}", value);
+ }
+ context.write(value, key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapReducer.java
new file mode 100644
index 0000000..e7e42a0
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/SwapReducer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * A reducer that swaps the key and value
+ * @author rvesse
+ *
+ * @param <TKey> Key
+ * @param <TValue> Value
+ */
+public class SwapReducer<TKey, TValue> extends Reducer<TKey, TValue, TValue, TKey> {
+
+ @Override
+ protected void reduce(TKey key, Iterable<TValue> values, Context context)
+ throws IOException, InterruptedException {
+ Iterator<TValue> iter = values.iterator();
+ while (iter.hasNext()) {
+ context.write(iter.next(), key);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/TextCountReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/TextCountReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/TextCountReducer.java
new file mode 100644
index 0000000..04b9283
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/TextCountReducer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * A reducer which takes text keys with a sequence of longs representing counts
+ * as the values and sums the counts together into pairs consisting of a node
+ * key and a count value.
+ *
+ *
+ *
+ */
+public class TextCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
+
+ @Override
+ protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException,
+ InterruptedException {
+ long count = 0;
+ Iterator<LongWritable> iter = values.iterator();
+ while (iter.hasNext()) {
+ count += iter.next().get();
+ }
+ context.write(key, new LongWritable(count));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueMapper.java
new file mode 100644
index 0000000..23ae5f0
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueMapper.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which discards the key replacing it with the value
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class ValueMapper<TKey, TValue> extends Mapper<TKey, TValue, TValue, TValue> {
+ private static final Logger LOG = LoggerFactory.getLogger(ValueMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Value = {}", value);
+ }
+ context.write(value, value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValuePlusNullMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValuePlusNullMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValuePlusNullMapper.java
new file mode 100644
index 0000000..094fb2d
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValuePlusNullMapper.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mapper which discards the key replacing it with the value and nulls out the value
+ *
+ *
+ * @param <TKey> Key type
+ * @param <TValue> Value type
+ */
+public class ValuePlusNullMapper<TKey, TValue> extends Mapper<TKey, TValue, TValue, NullWritable> {
+ private static final Logger LOG = LoggerFactory.getLogger(ValuePlusNullMapper.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void map(TKey key, TValue value, Context context) throws IOException,
+ InterruptedException {
+ if (this.tracing) {
+ LOG.trace("Value = {}", value);
+ }
+ context.write(value, NullWritable.get());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueReducer.java
new file mode 100644
index 0000000..7d25799
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ValueReducer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * A reducer that outputs a pair for each value consisting of the value as both the key and value
+ * @author rvesse
+ *
+ * @param <TKey> Key
+ * @param <TValue> Value
+ */
+public class ValueReducer<TKey, TValue> extends Reducer<TKey, TValue, TValue, TValue> {
+
+ @Override
+ protected void reduce(TKey key, Iterable<TValue> values, Context context)
+ throws IOException, InterruptedException {
+ Iterator<TValue> iter = values.iterator();
+ while (iter.hasNext()) {
+ TValue value = iter.next();
+ context.write(value, value);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java
new file mode 100644
index 0000000..daf61d4
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Abstract reducer which takes in tuples grouped by some node and generating
+ * initial characteristic sets.
+ * <p>
+ * This produces the characteristic sets as both the key and value so that in a
+ * subsequent job the characteristic steps may be further combined together to
+ * total up the usage counts appropriately.
+ * </p>
+ * <p>
+ * It is important to note that the output from this mapper can be very large
+ * and since it typically needs to be written to HDFS before being processed by
+ * further jobs it is strongly recommended that you use appropriate output
+ * compression
+ * </p>
+ *
+ *
+ *
+ * @param <TValue>
+ * Tuple type
+ * @param <T>
+ * Writable tuple type
+ */
+public abstract class AbstractCharacteristicSetGeneratingReducer<TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+ Reducer<NodeWritable, T, CharacteristicSetWritable, NullWritable> {
+
+ private static final Logger LOG = LoggerFactory.getLogger(AbstractCharacteristicSetGeneratingReducer.class);
+
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void reduce(NodeWritable key, Iterable<T> values, Context context) throws IOException, InterruptedException {
+ Map<NodeWritable, CharacteristicWritable> characteristics = new TreeMap<NodeWritable, CharacteristicWritable>();
+
+ // Firstly need to find individual characteristics
+ Iterator<T> iter = values.iterator();
+ while (iter.hasNext()) {
+ T tuple = iter.next();
+ NodeWritable predicate = this.getPredicate(tuple);
+
+ if (characteristics.containsKey(predicate)) {
+ characteristics.get(predicate).increment();
+ } else {
+ characteristics.put(predicate, new CharacteristicWritable(predicate.get()));
+ }
+ }
+
+ // Then we need to produce all the possible characteristic sets based on
+ // this information
+ List<CharacteristicWritable> cs = new ArrayList<CharacteristicWritable>(characteristics.values());
+ if (cs.size() == 0)
+ return;
+ for (int i = 1; i <= cs.size(); i++) {
+ this.outputSets(cs, i, context);
+ }
+ }
+
+ /**
+ * Output all sets of a given size
+ *
+ * @param cs
+ * Characteristics
+ * @param perSet
+ * Set size
+ * @param context
+ * Context to output sets to
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ protected void outputSets(List<CharacteristicWritable> cs, int perSet, Context context) throws IOException,
+ InterruptedException {
+ if (perSet == 1) {
+ for (CharacteristicWritable c : cs) {
+ CharacteristicSetWritable set = new CharacteristicSetWritable(c);
+ context.write(set, NullWritable.get());
+ if (this.tracing) {
+ LOG.trace("Key = {}", set);
+ }
+ }
+ } else if (perSet == cs.size()) {
+ CharacteristicSetWritable set = new CharacteristicSetWritable();
+ for (CharacteristicWritable c : cs) {
+ set.add(c);
+ }
+ context.write(set, NullWritable.get());
+ if (this.tracing) {
+ LOG.trace("Key = {}", set);
+ }
+ } else {
+ CharacteristicWritable[] members = new CharacteristicWritable[perSet];
+ this.combinations(cs, perSet, 0, members, context);
+ }
+ }
+
+ /**
+ * Calculate all available combinations of N elements from the given
+ * characteristics
+ *
+ * @param cs
+ * Characteristics
+ * @param len
+ * Desired number of elements
+ * @param startPosition
+ * Start position
+ * @param result
+ * Result array to fill
+ * @param context
+ * Context to write completed combinations to
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ protected final void combinations(List<CharacteristicWritable> cs, int len, int startPosition,
+ CharacteristicWritable[] result, Context context) throws IOException, InterruptedException {
+ if (len == 0) {
+ CharacteristicSetWritable set = new CharacteristicSetWritable(result);
+ context.write(set, NullWritable.get());
+ if (this.tracing) {
+ LOG.trace("Key = {}", set);
+ }
+ return;
+ }
+ for (int i = startPosition; i <= cs.size() - len; i++) {
+ result[result.length - len] = cs.get(i);
+ combinations(cs, len - 1, i + 1, result, context);
+ }
+ }
+
+ /**
+ * Gets the predicate for the tuple
+ *
+ * @param tuple
+ * Tuple
+ * @return
+ */
+ protected abstract NodeWritable getPredicate(T tuple);
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java
new file mode 100644
index 0000000..e70698a
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Reducer which takes in characteristic sets and sums up all their usage counts
+ *
+ *
+ */
+public class CharacteristicSetReducer extends
+ Reducer<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> {
+
+ private static final Logger LOG = LoggerFactory.getLogger(CharacteristicSetReducer.class);
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOG.isTraceEnabled();
+ }
+
+ @Override
+ protected void reduce(CharacteristicSetWritable key, Iterable<CharacteristicSetWritable> values, Context context)
+ throws IOException, InterruptedException {
+ Iterator<CharacteristicSetWritable> iter = values.iterator();
+ CharacteristicSetWritable output = new CharacteristicSetWritable(0);
+
+ if (this.tracing) {
+ LOG.trace("Key = {}", key);
+ }
+
+ while (iter.hasNext()) {
+ CharacteristicSetWritable set = iter.next();
+ if (this.tracing) {
+ LOG.trace("Value = {}", set);
+ }
+ output.add(set);
+ }
+
+ context.write(output, NullWritable.get());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java
new file mode 100644
index 0000000..d11cd56
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A reducer which converts quads grouped by some node into characteristic sets
+ *
+ *
+ *
+ */
+public class QuadCharacteristicSetGeneratingReducer extends AbstractCharacteristicSetGeneratingReducer<Quad, QuadWritable> {
+
+ @Override
+ protected NodeWritable getPredicate(QuadWritable tuple) {
+ return new NodeWritable(tuple.get().getPredicate());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java
new file mode 100644
index 0000000..6515c91
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A reducer which converts triples grouped by some node into characteristic
+ * sets
+ *
+ *
+ *
+ */
+public class TripleCharacteristicSetGeneratingReducer extends AbstractCharacteristicSetGeneratingReducer<Triple, TripleWritable> {
+
+ @Override
+ protected NodeWritable getPredicate(TripleWritable tuple) {
+ return new NodeWritable(tuple.get().getPredicate());
+ }
+
+}