You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2014/07/09 15:29:24 UTC
svn commit: r1609146 - in /jena/Experimental/hadoop-rdf: ./
hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/
hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/
hadoop-rdf-mapreduce/src/test/java/org/apache/jena/ha...
Author: rvesse
Date: Wed Jul 9 13:29:23 2014
New Revision: 1609146
URL: http://svn.apache.org/r1609146
Log:
Fix comparison bug in TripleWritable and QuadWritable, add extra logging to some reducer implementations
Added:
jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
Modified:
jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java
jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java
jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
jena/Experimental/hadoop-rdf/pom.xml
Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java Wed Jul 9 13:29:23 2014
@@ -73,8 +73,7 @@ public class QuadWritable extends Abstra
@Override
protected Node[] createNodes(Quad tuple) {
- Quad q = this.get();
- return new Node[] { q.getGraph(), q.getSubject(), q.getPredicate(), q.getObject() };
+ return new Node[] { tuple.getGraph(), tuple.getSubject(), tuple.getPredicate(), tuple.getObject() };
}
}
Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java Wed Jul 9 13:29:23 2014
@@ -74,7 +74,6 @@ public class TripleWritable extends Abst
@Override
protected Node[] createNodes(Triple tuple) {
- Triple t = this.get();
- return new Node[] { t.getSubject(), t.getPredicate(), t.getObject() };
+ return new Node[] { tuple.getSubject(), tuple.getPredicate(), tuple.getObject() };
}
}
Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java Wed Jul 9 13:29:23 2014
@@ -19,9 +19,12 @@
package org.apache.jena.hadoop.rdf.mapreduce;
import java.io.IOException;
+import java.util.Iterator;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* A reducer that outputs a single pair consists of a null as the key and the key as the value
@@ -32,9 +35,25 @@ import org.apache.hadoop.mapreduce.Reduc
*/
public class NullPlusKeyReducer<TKey, TValue> extends Reducer<TKey, TValue, NullWritable, TKey> {
+ private static final Logger LOGGER = LoggerFactory.getLogger(NullPlusKeyReducer.class);
+ private boolean tracing = false;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOGGER.isTraceEnabled();
+ }
+
@Override
protected void reduce(TKey key, Iterable<TValue> values, Context context)
throws IOException, InterruptedException {
+ if (this.tracing) {
+ LOGGER.trace("Input Key = {}", key);
+ Iterator<TValue> iter = values.iterator();
+ while (iter.hasNext()) {
+ LOGGER.trace("Input Value = {}", iter.next());
+ }
+ }
context.write(NullWritable.get(), key);
}
}
Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java Wed Jul 9 13:29:23 2014
@@ -23,22 +23,41 @@ import java.util.Iterator;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
- * A reducer that outputs a pair for each value consisting of a null key and the value
+ * A reducer that outputs a pair for each value consisting of a null key and the
+ * value
+ *
* @author rvesse
- *
- * @param <TKey> Key
- * @param <TValue> Value
+ *
+ * @param <TKey>
+ * Key
+ * @param <TValue>
+ * Value
*/
public class NullPlusValueReducer<TKey, TValue> extends Reducer<TKey, TValue, NullWritable, TValue> {
+ private static final Logger LOGGER = LoggerFactory.getLogger(NullPlusValueReducer.class);
+ private boolean tracing = false;
@Override
- protected void reduce(TKey key, Iterable<TValue> values, Context context)
- throws IOException, InterruptedException {
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.tracing = LOGGER.isTraceEnabled();
+ }
+
+ @Override
+ protected void reduce(TKey key, Iterable<TValue> values, Context context) throws IOException, InterruptedException {
+ if (this.tracing) {
+ LOGGER.trace("Input Key = {}", key);
+ }
Iterator<TValue> iter = values.iterator();
while (iter.hasNext()) {
TValue value = iter.next();
+ if (tracing) {
+ LOGGER.trace("Input Value = {}", value);
+ }
context.write(NullWritable.get(), value);
}
}
Added: jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java?rev=1609146&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java Wed Jul 9 13:29:23 2014
@@ -0,0 +1,88 @@
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.log4j.BasicConfigurator;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+
+public class TestDistinctTriples
+ extends
+ AbstractMapReduceTests<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> {
+
+ @Override
+ protected Mapper<LongWritable, TripleWritable, TripleWritable, NullWritable> getMapperInstance() {
+ return new ValuePlusNullMapper<LongWritable, TripleWritable>();
+ }
+
+ @Override
+ protected Reducer<TripleWritable, NullWritable, NullWritable, TripleWritable> getReducerInstance() {
+ return new NullPlusKeyReducer<TripleWritable, NullWritable>();
+ }
+
+ @Test
+ public void distinct_triples_01() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+ NodeFactory.createLiteral("1"));
+ TripleWritable tw = new TripleWritable(t);
+ driver.addInput(new LongWritable(1), tw);
+ driver.addOutput(NullWritable.get(), tw);
+
+ driver.runTest();
+ }
+
+ @Test
+ public void distinct_triples_02() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+ NodeFactory.createLiteral("1"));
+ TripleWritable tw = new TripleWritable(t);
+ for (int i = 0; i < 100; i++) {
+ driver.addInput(new LongWritable(i), tw);
+ }
+ driver.addOutput(NullWritable.get(), tw);
+
+ driver.runTest();
+ }
+
+ @Test
+ public void distinct_triples_03() throws IOException {
+ MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+ .getMapReduceDriver();
+
+ Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+ NodeFactory.createLiteral("1"));
+ Triple t2 = new Triple(t.getSubject(), t.getPredicate(), NodeFactory.createLiteral("2"));
+ Assert.assertNotEquals(t, t2);
+
+ TripleWritable tw = new TripleWritable(t);
+ TripleWritable tw2 = new TripleWritable(t2);
+ Assert.assertNotEquals(tw, tw2);
+
+ driver.addInput(new LongWritable(1), tw);
+ driver.addInput(new LongWritable(2), tw2);
+ driver.addOutput(NullWritable.get(), tw);
+ driver.addOutput(NullWritable.get(), tw2);
+
+ BasicConfigurator.configure();
+ Logger.getRootLogger().setLevel(Level.TRACE);
+
+ driver.runTest(false);
+ }
+}
Modified: jena/Experimental/hadoop-rdf/pom.xml
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/pom.xml?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/pom.xml (original)
+++ jena/Experimental/hadoop-rdf/pom.xml Wed Jul 9 13:29:23 2014
@@ -1,19 +1,13 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
@@ -22,12 +16,8 @@
<version>0.9.0-SNAPSHOT</version>
<packaging>pom</packaging>
-<!-- <parent>
- <groupId>org.apache.jena</groupId>
- <artifactId>jena-parent</artifactId>
- <version>10-SNAPSHOT</version>
- <relativePath />
- </parent> -->
+ <!-- <parent> <groupId>org.apache.jena</groupId> <artifactId>jena-parent</artifactId>
+ <version>10-SNAPSHOT</version> <relativePath /> </parent> -->
<name>Apache Jena - RDF Tools for Hadoop</name>
<description>A collection of tools for working with RDF on the Hadoop platform</description>
@@ -116,6 +106,14 @@
<build>
<plugins>
<plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>${plugin.compiler.version}</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+ <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>