You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/01/27 18:28:33 UTC
[40/59] [abbrv] jena git commit: Further rebranding to Elephas
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java
new file mode 100644
index 0000000..d6ac375
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter.positional;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+
+/**
+ * A triple filter which selects triples which have matching subjects
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class TripleFilterBySubjectUriMapper<TKey> extends AbstractTripleFilterByPositionMapper<TKey> {
+
+ private List<Node> subjects = new ArrayList<Node>();
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+
+ // Get the subject URIs we are filtering on
+ String[] subjectUris = context.getConfiguration().getStrings(RdfMapReduceConstants.FILTER_SUBJECT_URIS);
+ if (subjectUris != null) {
+ for (String subjectUri : subjectUris) {
+ this.subjects.add(NodeFactory.createURI(subjectUri));
+ }
+ }
+ }
+
+ @Override
+ protected boolean acceptsSubject(Node subject) {
+ if (this.subjects.size() == 0)
+ return false;
+ return this.subjects.contains(subject);
+ }
+
+ @Override
+ protected boolean acceptsAllPredicates() {
+ return true;
+ }
+
+ @Override
+ protected boolean acceptsAllObjects() {
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java
new file mode 100644
index 0000000..e3d51e4
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+
+/**
+ * Abstract mapper implementation which helps in grouping tuples by assigning
+ * them a {@link NodeWritable} key in place of their existing key. Derived
+ * implementations of this may select the key based on some component of the
+ * tuple or by other custom logic.
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ * @param <TValue>
+ * Tuple type
+ * @param <T>
+ * Writable tuple type
+ */
+public abstract class AbstractNodeTupleGroupingMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+ Mapper<TKey, T, NodeWritable, T> {
+
+ @Override
+ protected final void map(TKey key, T value, Context context) throws IOException, InterruptedException {
+ NodeWritable newKey = this.selectKey(value);
+ context.write(newKey, value);
+ }
+
+ /**
+ * Gets the key to associated with the tuple
+ *
+ * @param tuple
+ * Tuple
+ * @return Node to use as key
+ */
+ protected abstract NodeWritable selectKey(T tuple);
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java
new file mode 100644
index 0000000..2b96110
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Abstract mapper implementation which helps in grouping quads by assigning
+ * them a {@link NodeWritable} key in place of their existing key. Derived
+ * implementations of this may select the key based on some component of the
+ * quad or by other custom logic.
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public abstract class AbstractQuadGroupingMapper<TKey> extends AbstractNodeTupleGroupingMapper<TKey, Quad, QuadWritable> {
+
+ protected final NodeWritable selectKey(QuadWritable tuple) {
+ return this.selectKey(tuple.get());
+ }
+
+ /**
+ * Selects the key to use
+ *
+ * @param quad
+ * Quad
+ * @return Key to use
+ */
+ protected abstract NodeWritable selectKey(Quad quad);
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java
new file mode 100644
index 0000000..3f44eb0
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Abstract mapper implementation which helps in grouping triples by assigning
+ * them a {@link NodeWritable} key in place of their existing key. Derived
+ * implementations of this may select the key based on some component of the
+ * triple or by other custom logic.
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public abstract class AbstractTripleGroupingMapper<TKey> extends AbstractNodeTupleGroupingMapper<TKey, Triple, TripleWritable> {
+
+ @Override
+ protected final NodeWritable selectKey(TripleWritable tuple) {
+ return this.selectKey(tuple.get());
+ }
+
+ protected abstract NodeWritable selectKey(Triple triple);
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java
new file mode 100644
index 0000000..3b9fd8d
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which assists in grouping quads by graph by reassigning their keys
+ * to be their graphs
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public class QuadGroupByGraphMapper<TKey> extends AbstractQuadGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Quad quad) {
+ return new NodeWritable(quad.getGraph());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java
new file mode 100644
index 0000000..eb26e0b
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which assists in grouping quads by object by reassigning their keys
+ * to be their objects
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public class QuadGroupByObjectMapper<TKey> extends AbstractQuadGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Quad quad) {
+ return new NodeWritable(quad.getObject());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java
new file mode 100644
index 0000000..2670cf4
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which assists in grouping quads by predicate by reassigning their keys
+ * to be their predicates
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public class QuadGroupByPredicateMapper<TKey> extends AbstractQuadGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Quad quad) {
+ return new NodeWritable(quad.getPredicate());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java
new file mode 100644
index 0000000..73809e8
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which assists in grouping quads by subject by reassigning their keys
+ * to be their subjects
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public class QuadGroupBySubjectMapper<TKey> extends AbstractQuadGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Quad quad) {
+ return new NodeWritable(quad.getSubject());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java
new file mode 100644
index 0000000..9fde939
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper which assists in grouping triples by object by reassigning their
+ * keys to be their objects
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class TripleGroupByObjectMapper<TKey> extends AbstractTripleGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Triple triple) {
+ return new NodeWritable(triple.getObject());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java
new file mode 100644
index 0000000..dd15ef5
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper which assists in grouping triples by predicate by reassigning their
+ * keys to be their predicates
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class TripleGroupByPredicateMapper<TKey> extends AbstractTripleGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Triple triple) {
+ return new NodeWritable(triple.getPredicate());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java
new file mode 100644
index 0000000..f1116c1
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.group;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper which assists in grouping triples by subject by reassigning their
+ * keys to be their subjects
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class TripleGroupBySubjectMapper<TKey> extends AbstractTripleGroupingMapper<TKey> {
+
+ @Override
+ protected NodeWritable selectKey(Triple triple) {
+ return new NodeWritable(triple.getSubject());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java
new file mode 100644
index 0000000..840d78c
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.split;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+
+/**
+ * Abstract mapper implementation which splits the tuples into their constituent
+ * nodes preserving the keys as-is
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ * @param <TValue>
+ * Tuple type
+ * @param <T>
+ * Writable tuple type
+ */
+public abstract class AbstractNodeTupleSplitToNodesMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+ Mapper<TKey, T, TKey, NodeWritable> {
+
+ @Override
+ protected final void map(TKey key, T value, Context context) throws IOException, InterruptedException {
+ NodeWritable[] ns = this.split(value);
+ for (NodeWritable n : ns) {
+ context.write(key, n);
+ }
+ }
+
+ /**
+ * Splits the node tuple type into the individual nodes
+ *
+ * @param tuple
+ * Tuple
+ * @return Nodes
+ */
+ protected abstract NodeWritable[] split(T tuple);
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java
new file mode 100644
index 0000000..7dc85fd
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.split;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+
+/**
+ * Abstract mapper implementation which splits the tuples into their constituent
+ * nodes using the tuples as the keys and the nodes as the values
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ * @param <TValue>
+ * Tuple type
+ * @param <T>
+ * Writable tuple type
+ */
+public abstract class AbstractNodeTupleSplitWithNodesMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+ Mapper<TKey, T, T, NodeWritable> {
+
+ @Override
+ protected void map(TKey key, T value, Context context) throws IOException, InterruptedException {
+ NodeWritable[] ns = this.split(value);
+ for (NodeWritable n : ns) {
+ context.write(value, n);
+ }
+ }
+
+ /**
+ * Splits the node tuple type into the individual nodes
+ *
+ * @param tuple
+ * Tuple
+ * @return Nodes
+ */
+ protected abstract NodeWritable[] split(T tuple);
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java
new file mode 100644
index 0000000..c993810
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.split;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which splits quads into their constituent nodes preserving the
+ * existing keys as-is
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class QuadSplitToNodesMapper<TKey> extends AbstractNodeTupleSplitToNodesMapper<TKey, Quad, QuadWritable> {
+
+ @Override
+ protected NodeWritable[] split(QuadWritable tuple) {
+ Quad q = tuple.get();
+ return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()),
+ new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) };
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java
new file mode 100644
index 0000000..09caef6
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.split;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which splits quads into their constituent nodes using the quad as
+ * the key and the nodes as the values
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class QuadSplitWithNodesMapper<TKey> extends AbstractNodeTupleSplitWithNodesMapper<TKey, Quad, QuadWritable> {
+
+ @Override
+ protected NodeWritable[] split(QuadWritable tuple) {
+ Quad q = tuple.get();
+ return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()),
+ new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) };
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java
new file mode 100644
index 0000000..0ef02d9
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.split;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper which splits triples into their constituent nodes
+ *
+ *
+ *
+ * @param <TKey> Key type
+ */
+public class TripleSplitToNodesMapper<TKey> extends AbstractNodeTupleSplitToNodesMapper<TKey, Triple, TripleWritable> {
+
+ @Override
+ protected NodeWritable[] split(TripleWritable tuple) {
+ Triple t = tuple.get();
+ return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()),
+ new NodeWritable(t.getObject()) };
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java
new file mode 100644
index 0000000..7b18f55
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.split;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper which splits triples into their constituent nodes
+ *
+ *
+ *
+ * @param <TKey> Key type
+ */
+public class TripleSplitWithNodesMapper<TKey> extends AbstractNodeTupleSplitWithNodesMapper<TKey, Triple, TripleWritable> {
+
+ @Override
+ protected NodeWritable[] split(TripleWritable tuple) {
+ Triple t = tuple.get();
+ return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()),
+ new NodeWritable(t.getObject()) };
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java
new file mode 100644
index 0000000..76137fe
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * An abstract mapper which transforms triples into quads. Derived
+ * implementations may choose how the graph to which triples are assigned is
+ * decided.
+ * <p>
+ * Keys are left as is by this mapper.
+ * </p>
+ *
+ *
+ *
+ * @param <TKey>
+ */
+public abstract class AbstractTriplesToQuadsMapper<TKey> extends Mapper<TKey, TripleWritable, TKey, QuadWritable> {
+
+ @Override
+ protected final void map(TKey key, TripleWritable value, Context context) throws IOException, InterruptedException {
+ Triple triple = value.get();
+ Node graphNode = this.selectGraph(triple);
+ context.write(key, new QuadWritable(new Quad(graphNode, triple)));
+ }
+
+ /**
+ * Selects the graph name to use for converting the given triple into a quad
+ *
+ * @param triple
+ * Triple
+ * @return Tuple
+ */
+ protected abstract Node selectGraph(Triple triple);
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java
new file mode 100644
index 0000000..048e669
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+
+/**
+ * A mapper which transforms quads into triples
+ * <p>
+ * Keys are left as is by this mapper.
+ * </p>
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class QuadsToTriplesMapper<TKey> extends Mapper<TKey, QuadWritable, TKey, TripleWritable> {
+
+ @Override
+ protected void map(TKey key, QuadWritable value, Context context) throws IOException, InterruptedException {
+ context.write(key, new TripleWritable(value.get().asTriple()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java
new file mode 100644
index 0000000..394d5fd
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper which converts triples into quads using the subjects of the triples
+ * as the graph nodes
+ *
+ *
+ * @param <TKey>
+ * Key type
+ *
+ */
+public class TriplesToQuadsBySubjectMapper<TKey> extends AbstractTriplesToQuadsMapper<TKey> {
+
+ @Override
+ protected final Node selectGraph(Triple triple) {
+ return triple.getSubject();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java
new file mode 100644
index 0000000..ef19edf
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.transform;
+
+import java.io.IOException;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper which converts triples to quads where all triples are placed in the
+ * same graph
+ *
+ *
+ *
+ * @param <TKey>
+ * Key type
+ */
+public class TriplesToQuadsConstantGraphMapper<TKey> extends AbstractTriplesToQuadsMapper<TKey> {
+
+ private Node graphNode;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.graphNode = this.getGraphNode();
+ }
+
+ /**
+ * Gets the graph node that will be used for all quads, this will be called
+ * once and only once during the
+ * {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)} method and the
+ * value returned cached for use throughout the lifetime of this mapper.
+ * <p>
+ * This implementation always used the default graph as the graph for
+ * generated quads. You can override this method in your own derived
+ * implementation to put triples into a different graph than the default
+ * graph.
+ * </p>
+ * <p>
+ * If instead you wanted to select different graphs for each triple you
+ * should extend {@link AbstractTriplesToQuadsMapper} instead and override
+ * the {@link #selectGraph(Triple)} method which is sealed in this
+ * implementation.
+ * </p>
+ *
+ * @return
+ */
+ protected Node getGraphNode() {
+ return Quad.defaultGraphNodeGenerated;
+ }
+
+ @Override
+ protected final Node selectGraph(Triple triple) {
+ return this.graphNode;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java
new file mode 100644
index 0000000..32c40f7
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+
+/**
+ * Abstract tests for mappers
+ *
+ *
+ * @param <TKey>
+ * Mapper input key type
+ * @param <TValue>
+ * Mapper input value type
+ * @param <TIntermediateKey>
+ * Mapper output/Reducer input key type
+ * @param <TIntermediateValue>
+ * Mapper output/Reducer input value type
+ * @param <TReducedKey>
+ * Reducer output key type
+ * @param <TReducedValue>
+ * Reducer output value type
+ *
+ *
+ */
+public abstract class AbstractMapReduceTests<TKey, TValue, TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue> {
+
+ /**
+ * Gets the mapper instance to test
+ *
+ * @return Mapper instance
+ */
+ protected abstract Mapper<TKey, TValue, TIntermediateKey, TIntermediateValue> getMapperInstance();
+
+ /**
+ * Gets the reducer instance to test
+ *
+ * @return Reducer instance
+ */
+ protected abstract Reducer<TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue> getReducerInstance();
+
+ /**
+ * Gets a map reduce driver that can be used to create a test case
+ *
+ * @return Map reduce driver
+ */
+ protected MapReduceDriver<TKey, TValue, TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue> getMapReduceDriver() {
+ return new MapReduceDriver<TKey, TValue, TIntermediateKey, TIntermediateValue, TReducedKey, TReducedValue>(
+ this.getMapperInstance(), this.getReducerInstance());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java
new file mode 100644
index 0000000..ce6ab9d
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+
+/**
+ * Abstract tests for mappers
+ *
+ *
+ * @param <TKeyIn>
+ * Input key type
+ * @param <TValueIn>
+ * Input value type
+ * @param <TKeyOut>
+ * Output key type
+ * @param <TValueOut>
+ * Output value type
+ *
+ */
+public abstract class AbstractMapperTests<TKeyIn, TValueIn, TKeyOut, TValueOut> {
+
+ /**
+ * Gets the mapper instance to test
+ *
+ * @return Mapper instance
+ */
+ protected abstract Mapper<TKeyIn, TValueIn, TKeyOut, TValueOut> getInstance();
+
+ /**
+ * Gets a map driver that can be used to create a test case
+ *
+ * @return Map driver
+ */
+ protected MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut> getMapDriver() {
+ MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut> driver = new MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut>(
+ this.getInstance());
+ this.configureDriver(driver);
+ return driver;
+ }
+
+ /**
+ * Method that may be overridden by test harnesses which need to configure
+ * the driver in more detail e.g. add configuration keys
+ *
+ * @param driver
+ * Driver
+ */
+ protected void configureDriver(MapDriver<TKeyIn, TValueIn, TKeyOut, TValueOut> driver) {
+ // Does nothing
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
new file mode 100644
index 0000000..af32dac
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+
+public class TestDistinctTriples
+ extends
+ AbstractMapReduceTests<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> {
+
+ @Override
+ protected Mapper<LongWritable, TripleWritable, TripleWritable, NullWritable> getMapperInstance() {
+ return new ValuePlusNullMapper<LongWritable, TripleWritable>();
+ }
+
+ @Override
+ protected Reducer<TripleWritable, NullWritable, NullWritable, TripleWritable> getReducerInstance() {
+ return new NullPlusKeyReducer<TripleWritable, NullWritable>();
+ }
+
+ @Test
+ public void distinct_triples_01() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+ NodeFactory.createLiteral("1"));
+ TripleWritable tw = new TripleWritable(t);
+ driver.addInput(new LongWritable(1), tw);
+ driver.addOutput(NullWritable.get(), tw);
+
+ driver.runTest();
+ }
+
+ @Test
+ public void distinct_triples_02() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+ NodeFactory.createLiteral("1"));
+ TripleWritable tw = new TripleWritable(t);
+ for (int i = 0; i < 100; i++) {
+ driver.addInput(new LongWritable(i), tw);
+ }
+ driver.addOutput(NullWritable.get(), tw);
+
+ driver.runTest();
+ }
+
+ @Test
+ public void distinct_triples_03() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+ NodeFactory.createLiteral("1"));
+ Triple t2 = new Triple(t.getSubject(), t.getPredicate(), NodeFactory.createLiteral("2"));
+ Assert.assertNotEquals(t, t2);
+
+ TripleWritable tw = new TripleWritable(t);
+ TripleWritable tw2 = new TripleWritable(t2);
+ Assert.assertNotEquals(tw, tw2);
+
+ driver.addInput(new LongWritable(1), tw);
+ driver.addInput(new LongWritable(2), tw2);
+ driver.addOutput(NullWritable.get(), tw);
+ driver.addOutput(NullWritable.get(), tw2);
+
+ driver.runTest(false);
+ }
+
+ @Test
+ public void distinct_triples_04() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Node s1 = NodeFactory.createURI("urn:nf#cbf2b2c7-109e-4097-bbea-f67f272c7fcc");
+ Node s2 = NodeFactory.createURI("urn:nf#bb08b75c-1ad2-47ef-acd2-eb2d92b94b89");
+ Node p = NodeFactory.createURI("urn:p");
+ Node o = NodeFactory.createURI("urn:66.230.159.118");
+ Assert.assertNotEquals(s1, s2);
+
+ Triple t1 = new Triple(s1, p, o);
+ Triple t2 = new Triple(s2, p, o);
+ Assert.assertNotEquals(t1, t2);
+
+ TripleWritable tw1 = new TripleWritable(t1);
+ TripleWritable tw2 = new TripleWritable(t2);
+ Assert.assertNotEquals(tw1, tw2);
+ Assert.assertNotEquals(0, tw1.compareTo(tw2));
+
+ driver.addInput(new LongWritable(1), tw1);
+ driver.addInput(new LongWritable(2), tw2);
+ driver.addOutput(NullWritable.get(), tw1);
+ driver.addOutput(NullWritable.get(), tw2);
+
+ driver.runTest(false);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java
new file mode 100644
index 0000000..b2d0b92
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests;
+import org.apache.jena.hadoop.rdf.mapreduce.characteristics.AbstractCharacteristicSetGeneratingReducer;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.NodeFactory;
+
+/**
+ * Abstract tests for the {@link AbstractCharacteristicSetGeneratingReducer}
+ *
+ *
+ *
+ * @param <TValue>
+ * @param <T>
+ */
+public abstract class AbstractCharacteristicSetGeneratingReducerTests<TValue, T extends AbstractNodeTupleWritable<TValue>>
+ extends AbstractMapReduceTests<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> {
+
+ /**
+ * Create a tuple
+ *
+ * @param i
+ * Key to use in creating the subject
+ * @param predicateUri
+ * Predicate URI string
+ * @return Tuple
+ */
+ protected abstract T createTuple(int i, String predicateUri);
+
+ /**
+ * Creates a set consisting of the given predicates
+ *
+ * @param predicates
+ * Predicates
+ * @return Set
+ */
+ protected CharacteristicSetWritable createSet(MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver, int occurrences, String... predicates) {
+ CharacteristicSetWritable set = new CharacteristicSetWritable();
+ for (String predicateUri : predicates) {
+ set.add(new CharacteristicWritable(NodeFactory.createURI(predicateUri)));
+ }
+ for (int i = 1; i <= occurrences; i++) {
+ driver.addOutput(set, NullWritable.get());
+ }
+ return set;
+ }
+
+ /**
+ * Test basic characteristic set computation
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_generating_reducer_01() throws IOException {
+ MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+ T tuple = this.createTuple(1, "http://predicate");
+ driver.addInput(new LongWritable(1), tuple);
+
+ this.createSet(driver, 1, "http://predicate");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test basic characteristic set computation
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_generating_reducer_02() throws IOException {
+ MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+ T tuple = this.createTuple(1, "http://predicate");
+ driver.addInput(new LongWritable(1), tuple);
+ driver.addInput(new LongWritable(1), tuple);
+
+ this.createSet(driver, 1, "http://predicate");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test basic characteristic set computation
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_generating_reducer_03() throws IOException {
+ MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+ T tuple = this.createTuple(1, "http://predicate");
+ driver.addInput(new LongWritable(1), tuple);
+ tuple = this.createTuple(2, "http://predicate");
+ driver.addInput(new LongWritable(2), tuple);
+
+ this.createSet(driver, 2, "http://predicate");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test basic characteristic set computation
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_generating_reducer_04() throws IOException {
+ MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+ T tuple = this.createTuple(1, "http://predicate");
+ driver.addInput(new LongWritable(1), tuple);
+ tuple = this.createTuple(1, "http://other");
+ driver.addInput(new LongWritable(1), tuple);
+
+ // Single entry sets
+ this.createSet(driver, 1, "http://predicate");
+ this.createSet(driver, 1, "http://other");
+
+ // Two entry sets
+ this.createSet(driver, 1, "http://predicate", "http://other");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test basic characteristic set computation
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_generating_reducer_05() throws IOException {
+ MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+ T tuple = this.createTuple(1, "http://predicate");
+ driver.addInput(new LongWritable(1), tuple);
+ tuple = this.createTuple(1, "http://other");
+ driver.addInput(new LongWritable(2), tuple);
+ tuple = this.createTuple(1, "http://third");
+ driver.addInput(new LongWritable(3), tuple);
+
+ // Single entry sets
+ this.createSet(driver, 1, "http://predicate");
+ this.createSet(driver, 1, "http://other");
+ this.createSet(driver, 1, "http://third");
+
+ // Two entry sets
+ this.createSet(driver, 1, "http://predicate", "http://other");
+ this.createSet(driver, 1, "http://predicate", "http://third");
+ this.createSet(driver, 1, "http://other", "http://third");
+
+ // Three entry sets
+ this.createSet(driver, 1, "http://predicate", "http://other", "http://third");
+
+ driver.runTest(false);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java
new file mode 100644
index 0000000..30da730
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests;
+import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.NodeFactory;
+
+/**
+ * Abstract tests for the {@link CharacteristicSetReducer}
+ *
+ *
+ */
+public class CharacteristicSetReducerTest
+ extends
+ AbstractMapReduceTests<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> {
+
+ @Override
+ protected final Mapper<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable> getMapperInstance() {
+ // Identity mapper
+ return new Mapper<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable>();
+ }
+
+ @Override
+ protected final Reducer<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> getReducerInstance() {
+ return new CharacteristicSetReducer();
+ }
+
+ /**
+ * Creates a set consisting of the given predicates
+ *
+ * @param predicates
+ * Predicates
+ * @return Set
+ */
+ protected CharacteristicSetWritable createSet(
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver,
+ int inputOccurrences, int outputOccurrences, String... predicates) {
+ CharacteristicSetWritable set = new CharacteristicSetWritable();
+ for (String predicateUri : predicates) {
+ set.add(new CharacteristicWritable(NodeFactory.createURI(predicateUri)));
+ }
+ for (int i = 1; i <= inputOccurrences; i++) {
+ driver.addInput(set, set);
+ }
+ for (int i = 1; i <= outputOccurrences; i++) {
+ driver.addOutput(set, NullWritable.get());
+ }
+ return set;
+ }
+
+ /**
+ * Test characteristic set reduction
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_reducer_01() throws IOException {
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+
+ this.createSet(driver, 1, 1, "http://predicate");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test characteristic set reduction
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_reducer_02() throws IOException {
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+
+ this.createSet(driver, 2, 1, "http://predicate");
+
+ driver.runTest(false);
+
+ List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
+ CharacteristicSetWritable cw = results.get(0).getFirst();
+ Assert.assertEquals(2, cw.getCount().get());
+ }
+
+ /**
+ * Test characteristic set reduction
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_reducer_03() throws IOException {
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+
+ this.createSet(driver, 1, 1, "http://predicate");
+ this.createSet(driver, 1, 1, "http://other");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test characteristic set reduction
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_reducer_04() throws IOException {
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+
+ this.createSet(driver, 2, 1, "http://predicate");
+ this.createSet(driver, 1, 1, "http://other");
+
+ driver.runTest(false);
+
+ List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
+ for (Pair<CharacteristicSetWritable, NullWritable> pair : results) {
+ CharacteristicSetWritable cw = pair.getFirst();
+ boolean expectTwo = cw.getCharacteristics().next().getNode().get().hasURI("http://predicate");
+ Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get());
+ }
+ }
+
+ /**
+ * Test characteristic set reduction
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_reducer_05() throws IOException {
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+
+ this.createSet(driver, 1, 1, "http://predicate", "http://other");
+ this.createSet(driver, 1, 1, "http://other");
+
+ driver.runTest(false);
+ }
+
+ /**
+ * Test characteristic set reduction
+ *
+ * @throws IOException
+ */
+ @Test
+ public void characteristic_set_reducer_06() throws IOException {
+ MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this
+ .getMapReduceDriver();
+
+ this.createSet(driver, 2, 1, "http://predicate", "http://other");
+ this.createSet(driver, 1, 1, "http://other");
+
+ driver.runTest(false);
+
+ List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
+ for (Pair<CharacteristicSetWritable, NullWritable> pair : results) {
+ CharacteristicSetWritable cw = pair.getFirst();
+ boolean expectTwo = cw.hasCharacteristic("http://predicate");
+ Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java
new file mode 100644
index 0000000..e647b68
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Tests for the {@link TripleCharacteristicSetGeneratingReducer}
+ *
+ *
+ *
+ */
+public class TripleCharacteristicSetGeneratingReducerTest extends AbstractCharacteristicSetGeneratingReducerTests<Triple, TripleWritable> {
+
+ @Override
+ protected Mapper<LongWritable, TripleWritable, NodeWritable, TripleWritable> getMapperInstance() {
+ return new TripleGroupBySubjectMapper<LongWritable>();
+ }
+
+ @Override
+ protected Reducer<NodeWritable, TripleWritable, CharacteristicSetWritable, NullWritable> getReducerInstance() {
+ return new TripleCharacteristicSetGeneratingReducer();
+ }
+
+ @Override
+ protected TripleWritable createTuple(int i, String predicateUri) {
+ return new TripleWritable(new Triple(NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI(predicateUri),
+ NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger)));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java
new file mode 100644
index 0000000..ebdbcde
--- /dev/null
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests;
+import org.apache.jena.hadoop.rdf.mapreduce.count.AbstractNodeTupleNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.junit.Test;
+
+
+/**
+ * Abstract tests for mappers derived from
+ * {@link AbstractNodeTupleNodeCountMapper}
+ *
+ *
+ *
+ * @param <TValue>
+ * Tuple type
+ * @param <T>
+ * Writable tuple type
+ */
+public abstract class AbstractNodeTupleNodeCountReducedTests<TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+ AbstractMapReduceTests<LongWritable, T, NodeWritable, LongWritable, NodeWritable, LongWritable> {
+
+ /**
+ * Generates tuples for the tests
+ *
+ * @param driver
+ * Driver
+ * @param num
+ * Number of tuples to generate
+ */
+ protected void generateData(MapReduceDriver<LongWritable, T, NodeWritable, LongWritable, NodeWritable, LongWritable> driver, int num) {
+ Map<NodeWritable, Long> counts = new HashMap<NodeWritable, Long>();
+ for (int i = 0; i < num; i++) {
+ LongWritable key = new LongWritable(i);
+ T value = this.createValue(i);
+ NodeWritable[] nodes = this.getNodes(value);
+
+ driver.addInput(key, value);
+ for (NodeWritable n : nodes) {
+ if (counts.containsKey(n)) {
+ counts.put(n, counts.get(n) + 1);
+ } else {
+ counts.put(n, 1l);
+ }
+ }
+ }
+
+ for (Entry<NodeWritable, Long> kvp : counts.entrySet()) {
+ driver.addOutput(kvp.getKey(), new LongWritable(kvp.getValue()));
+ }
+ }
+
+ /**
+ * Creates a tuple value
+ *
+ * @param i
+ * Index
+ * @return Tuple value
+ */
+ protected abstract T createValue(int i);
+
+ /**
+ * Splits the tuple value into its constituent nodes
+ *
+ * @param tuple
+ * Tuple value
+ * @return Nodes
+ */
+ protected abstract NodeWritable[] getNodes(T tuple);
+
+ /**
+ * Runs a node count test
+ *
+ * @param num
+ * Number of tuples to generate
+ * @throws IOException
+ */
+ protected void testNodeCount(int num) throws IOException {
+ MapReduceDriver<LongWritable, T, NodeWritable, LongWritable, NodeWritable, LongWritable> driver = this.getMapReduceDriver();
+ this.generateData(driver, num);
+ driver.runTest(false);
+ }
+
+ /**
+ * Tests node counting
+ *
+ * @throws IOException
+ */
+ @Test
+ public void node_count_01() throws IOException {
+ this.testNodeCount(1);
+ }
+
+ /**
+ * Tests node counting
+ *
+ * @throws IOException
+ */
+ @Test
+ public void node_count_02() throws IOException {
+ this.testNodeCount(100);
+ }
+
+ /**
+ * Tests node counting
+ *
+ * @throws IOException
+ */
+ @Test
+ public void node_count_03() throws IOException {
+ this.testNodeCount(1000);
+ }
+
+ /**
+ * Tests node counting
+ *
+ * @throws IOException
+ */
+ @Test
+ public void node_count_04() throws IOException {
+ this.testNodeCount(2500);
+ }
+}