You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2014/07/09 15:29:24 UTC

svn commit: r1609146 - in /jena/Experimental/hadoop-rdf: ./ hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/ hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/ hadoop-rdf-mapreduce/src/test/java/org/apache/jena/ha...

Author: rvesse
Date: Wed Jul  9 13:29:23 2014
New Revision: 1609146

URL: http://svn.apache.org/r1609146
Log:
Fix comparison bug in TripleWritable and QuadWritable, add extra logging to some reducer implementations

Added:
    jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
Modified:
    jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java
    jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java
    jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
    jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
    jena/Experimental/hadoop-rdf/pom.xml

Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/QuadWritable.java Wed Jul  9 13:29:23 2014
@@ -73,8 +73,7 @@ public class QuadWritable extends Abstra
 
     @Override
     protected Node[] createNodes(Quad tuple) {
-        Quad q = this.get();
-        return new Node[] { q.getGraph(), q.getSubject(), q.getPredicate(), q.getObject() };
+        return new Node[] { tuple.getGraph(), tuple.getSubject(), tuple.getPredicate(), tuple.getObject() };
     }
 
 }

Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-common/src/main/java/org/apache/jena/hadoop/rdf/types/TripleWritable.java Wed Jul  9 13:29:23 2014
@@ -74,7 +74,6 @@ public class TripleWritable extends Abst
 
     @Override
     protected Node[] createNodes(Triple tuple) {
-        Triple t = this.get();
-        return new Node[] { t.getSubject(), t.getPredicate(), t.getObject() };
+        return new Node[] { tuple.getSubject(), tuple.getPredicate(), tuple.getObject() };
     }
 }

Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusKeyReducer.java Wed Jul  9 13:29:23 2014
@@ -19,9 +19,12 @@
 package org.apache.jena.hadoop.rdf.mapreduce;
 
 import java.io.IOException;
+import java.util.Iterator;
 
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A reducer that outputs a single pair consists of a null as the key and the key as the value
@@ -32,9 +35,25 @@ import org.apache.hadoop.mapreduce.Reduc
  */
 public class NullPlusKeyReducer<TKey, TValue> extends Reducer<TKey, TValue, NullWritable, TKey> {
 
+    private static final Logger LOGGER = LoggerFactory.getLogger(NullPlusKeyReducer.class);
+    private boolean tracing = false;
+    
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+        super.setup(context);
+        this.tracing = LOGGER.isTraceEnabled();
+    }
+
     @Override
     protected void reduce(TKey key, Iterable<TValue> values, Context context)
             throws IOException, InterruptedException {
+        if (this.tracing) {
+            LOGGER.trace("Input Key = {}", key);
+            Iterator<TValue> iter = values.iterator();
+            while (iter.hasNext()) {
+                LOGGER.trace("Input Value = {}", iter.next());
+            }
+        }
         context.write(NullWritable.get(), key);
     }
 }

Modified: jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java (original)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/NullPlusValueReducer.java Wed Jul  9 13:29:23 2014
@@ -23,22 +23,41 @@ import java.util.Iterator;
 
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
- * A reducer that outputs a pair for each value consisting of a null key and the value
+ * A reducer that outputs a pair for each value consisting of a null key and the
+ * value
+ * 
  * @author rvesse
- *
- * @param <TKey> Key
- * @param <TValue> Value
+ * 
+ * @param <TKey>
+ *            Key
+ * @param <TValue>
+ *            Value
  */
 public class NullPlusValueReducer<TKey, TValue> extends Reducer<TKey, TValue, NullWritable, TValue> {
+    private static final Logger LOGGER = LoggerFactory.getLogger(NullPlusValueReducer.class);
+    private boolean tracing = false;
 
     @Override
-    protected void reduce(TKey key, Iterable<TValue> values, Context context)
-            throws IOException, InterruptedException {
+    protected void setup(Context context) throws IOException, InterruptedException {
+        super.setup(context);
+        this.tracing = LOGGER.isTraceEnabled();
+    }
+
+    @Override
+    protected void reduce(TKey key, Iterable<TValue> values, Context context) throws IOException, InterruptedException {
+        if (this.tracing) {
+            LOGGER.trace("Input Key = {}", key);
+        }
         Iterator<TValue> iter = values.iterator();
         while (iter.hasNext()) {
             TValue value = iter.next();
+            if (tracing) {
+                LOGGER.trace("Input Value = {}", value);
+            }
             context.write(NullWritable.get(), value);
         }
     }

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java?rev=1609146&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java Wed Jul  9 13:29:23 2014
@@ -0,0 +1,88 @@
+package org.apache.jena.hadoop.rdf.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.log4j.BasicConfigurator;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.hp.hpl.jena.graph.NodeFactory;
+import com.hp.hpl.jena.graph.Triple;
+
+public class TestDistinctTriples
+        extends
+        AbstractMapReduceTests<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> {
+
+    @Override
+    protected Mapper<LongWritable, TripleWritable, TripleWritable, NullWritable> getMapperInstance() {
+        return new ValuePlusNullMapper<LongWritable, TripleWritable>();
+    }
+
+    @Override
+    protected Reducer<TripleWritable, NullWritable, NullWritable, TripleWritable> getReducerInstance() {
+        return new NullPlusKeyReducer<TripleWritable, NullWritable>();
+    }
+
+    @Test
+    public void distinct_triples_01() throws IOException {
+        MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+                .getMapReduceDriver();
+
+        Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+                NodeFactory.createLiteral("1"));
+        TripleWritable tw = new TripleWritable(t);
+        driver.addInput(new LongWritable(1), tw);
+        driver.addOutput(NullWritable.get(), tw);
+
+        driver.runTest();
+    }
+
+    @Test
+    public void distinct_triples_02() throws IOException {
+        MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+                .getMapReduceDriver();
+
+        Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+                NodeFactory.createLiteral("1"));
+        TripleWritable tw = new TripleWritable(t);
+        for (int i = 0; i < 100; i++) {
+            driver.addInput(new LongWritable(i), tw);
+        }
+        driver.addOutput(NullWritable.get(), tw);
+
+        driver.runTest();
+    }
+    
+    @Test
+    public void distinct_triples_03() throws IOException {
+        MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this
+                .getMapReduceDriver();
+
+        Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"),
+                NodeFactory.createLiteral("1"));
+        Triple t2 = new Triple(t.getSubject(), t.getPredicate(), NodeFactory.createLiteral("2"));
+        Assert.assertNotEquals(t, t2);
+        
+        TripleWritable tw = new TripleWritable(t);
+        TripleWritable tw2 = new TripleWritable(t2);
+        Assert.assertNotEquals(tw, tw2);
+        
+        driver.addInput(new LongWritable(1), tw);
+        driver.addInput(new LongWritable(2), tw2);
+        driver.addOutput(NullWritable.get(), tw);
+        driver.addOutput(NullWritable.get(), tw2);
+
+        BasicConfigurator.configure();
+        Logger.getRootLogger().setLevel(Level.TRACE);
+        
+        driver.runTest(false);
+    }
+}

Modified: jena/Experimental/hadoop-rdf/pom.xml
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/pom.xml?rev=1609146&r1=1609145&r2=1609146&view=diff
==============================================================================
--- jena/Experimental/hadoop-rdf/pom.xml (original)
+++ jena/Experimental/hadoop-rdf/pom.xml Wed Jul  9 13:29:23 2014
@@ -1,19 +1,13 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	You under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
@@ -22,12 +16,8 @@
 	<version>0.9.0-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
-<!-- 	<parent>
-		<groupId>org.apache.jena</groupId>
-		<artifactId>jena-parent</artifactId>
-		<version>10-SNAPSHOT</version>
-		<relativePath />
-	</parent> -->
+	<!-- <parent> <groupId>org.apache.jena</groupId> <artifactId>jena-parent</artifactId> 
+		<version>10-SNAPSHOT</version> <relativePath /> </parent> -->
 
 	<name>Apache Jena - RDF Tools for Hadoop</name>
 	<description>A collection of tools for working with RDF on the Hadoop platform</description>
@@ -116,6 +106,14 @@
 	<build>
 		<plugins>
 			<plugin>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>${plugin.compiler.version}</version>
+				<configuration>
+					<source>1.7</source>
+					<target>1.7</target>
+				</configuration>
+			</plugin>
+			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-enforcer-plugin</artifactId>
 				<executions>