You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/01/27 18:28:25 UTC
[32/59] [abbrv] jena git commit: Further rebranding to Elephas
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-common/src/test/java/org/apache/jena/hadoop/rdf/io/types/RdfTypesTest.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-common/src/test/java/org/apache/jena/hadoop/rdf/io/types/RdfTypesTest.java b/jena-hadoop-rdf/jena-elephas-common/src/test/java/org/apache/jena/hadoop/rdf/io/types/RdfTypesTest.java
deleted file mode 100644
index a70dfb0..0000000
--- a/jena-hadoop-rdf/jena-elephas-common/src/test/java/org/apache/jena/hadoop/rdf/io/types/RdfTypesTest.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.types;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.jena.atlas.lib.Tuple;
-import org.apache.jena.hadoop.rdf.types.NodeTupleWritable;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.junit.Assert;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
-import com.hp.hpl.jena.graph.Node;
-import com.hp.hpl.jena.graph.NodeFactory;
-import com.hp.hpl.jena.graph.Triple;
-import com.hp.hpl.jena.sparql.core.Quad;
-
-/**
- * Tests for the various RDF types defined by the
- * {@link org.apache.jena.hadoop.rdf.types} package
- *
- *
- *
- */
-public class RdfTypesTest {
-
- private static final Logger LOG = LoggerFactory.getLogger(RdfTypesTest.class);
-
- private ByteArrayOutputStream outputStream;
- private ByteArrayInputStream inputStream;
-
- /**
- * Prepare for output
- *
- * @return Data output
- */
- private DataOutput prepareOutput() {
- this.outputStream = new ByteArrayOutputStream();
- return new DataOutputStream(this.outputStream);
- }
-
- /**
- * Prepare for input from the previously written output
- *
- * @return Data Input
- */
- private DataInput prepareInput() {
- this.inputStream = new ByteArrayInputStream(this.outputStream.toByteArray());
- return new DataInputStream(this.inputStream);
- }
-
- /**
- * Prepare for input from the given data
- *
- * @param data
- * Data
- * @return Data Input
- */
- @SuppressWarnings("unused")
- private DataInput prepareInput(byte[] data) {
- this.inputStream = new ByteArrayInputStream(data);
- return new DataInputStream(this.inputStream);
- }
-
- @SuppressWarnings({ "unchecked", "rawtypes" })
- private <T extends WritableComparable> void testWriteRead(T writable, T expected) throws IOException, InstantiationException, IllegalAccessException,
- ClassNotFoundException {
- // Write out data
- DataOutput output = this.prepareOutput();
- writable.write(output);
-
- // Read back in data
- DataInput input = this.prepareInput();
- T actual = (T) Class.forName(writable.getClass().getName()).newInstance();
- actual.readFields(input);
-
- LOG.info("Original = " + writable.toString());
- LOG.info("Round Tripped = " + actual.toString());
-
- // Check equivalent
- Assert.assertEquals(0, expected.compareTo(actual));
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_null() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = null;
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- @Ignore
- public void node_writable_variable_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createVariable("x");
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- @Ignore
- public void node_writable_variable_02() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createVariable("really-log-variable-name-asddsfr4545egfdgdfgfdgdtgvdg-dfgfdgdfgdfgdfg4-dfvdfgdfgdfgfdgfdgdfgdfgfdg");
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_uri_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createURI("http://example.org");
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_uri_02() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createURI("http://user:password@example.org/some/path?key=value#id");
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("simple");
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_02() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("language", "en", null);
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_03() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("string", XSDDatatype.XSDstring);
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_04() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("1234", XSDDatatype.XSDinteger);
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_05() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("123.4", XSDDatatype.XSDdecimal);
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_06() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("12.3e4", XSDDatatype.XSDdouble);
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_literal_07() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createLiteral("true", XSDDatatype.XSDboolean);
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_bnode_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createAnon();
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- }
-
- /**
- * Basic node writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void node_writable_bnode_02() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Node n = NodeFactory.createAnon();
- NodeWritable nw = new NodeWritable(n);
- testWriteRead(nw, nw);
- NodeWritable nw2 = new NodeWritable(n);
- testWriteRead(nw2, nw2);
-
- Assert.assertEquals(0, nw.compareTo(nw2));
- }
-
- /**
- * Basic triple writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void triple_writable_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Triple t = new Triple(NodeFactory.createURI("http://example"), NodeFactory.createURI("http://predicate"), NodeFactory.createLiteral("value"));
- TripleWritable tw = new TripleWritable(t);
- testWriteRead(tw, tw);
- }
-
- /**
- * Basic triple writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void triple_writable_02() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Triple t = new Triple(NodeFactory.createAnon(), NodeFactory.createURI("http://predicate"), NodeFactory.createLiteral("value"));
- TripleWritable tw = new TripleWritable(t);
- testWriteRead(tw, tw);
- }
-
- /**
- * Basic quad writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void quad_writable_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Quad q = new Quad(Quad.defaultGraphNodeGenerated, NodeFactory.createURI("http://example"), NodeFactory.createURI("http://predicate"),
- NodeFactory.createLiteral("value"));
- QuadWritable qw = new QuadWritable(q);
- testWriteRead(qw, qw);
- }
-
- /**
- * Basic quad writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void quad_writable_02() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Quad q = new Quad(Quad.defaultGraphNodeGenerated, NodeFactory.createAnon(), NodeFactory.createURI("http://predicate"),
- NodeFactory.createLiteral("value"));
- QuadWritable qw = new QuadWritable(q);
- testWriteRead(qw, qw);
- }
-
- /**
- * Basic tuple writable round tripping test
- *
- * @throws IOException
- * @throws InstantiationException
- * @throws IllegalAccessException
- * @throws ClassNotFoundException
- */
- @Test
- public void tuple_writable_01() throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- Tuple<Node> t = Tuple.createTuple(NodeFactory.createURI("http://one"), NodeFactory.createURI("http://two"), NodeFactory.createLiteral("value"),
- NodeFactory.createLiteral("foo"), NodeFactory.createURI("http://three"));
- NodeTupleWritable tw = new NodeTupleWritable(t);
- testWriteRead(tw, tw);
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/pom.xml
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/pom.xml b/jena-hadoop-rdf/jena-elephas-io/pom.xml
deleted file mode 100644
index 2be37f9..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/pom.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.jena</groupId>
- <artifactId>jena-elephas</artifactId>
- <version>0.9.0-SNAPSHOT</version>
- </parent>
- <artifactId>jena-elephas-io</artifactId>
- <name>Apache Jena - Elephas - I/O</name>
- <description>RDF Input/Output formats library for Hadoop</description>
-
- <!-- Note that versions are managed by parent POMs -->
- <dependencies>
- <!-- Internal Project Dependencies -->
- <dependency>
- <groupId>org.apache.jena</groupId>
- <artifactId>jena-hadoop-rdf-common</artifactId>
- <version>${project.version}</version>
- </dependency>
-
- <!-- Hadoop Dependencies -->
- <!-- Note these will be provided on the Hadoop cluster hence the provided
- scope -->
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <scope>provided</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-common</artifactId>
- <scope>provided</scope>
- </dependency>
-
- <!-- Jena dependencies -->
- <dependency>
- <groupId>org.apache.jena</groupId>
- <artifactId>jena-arq</artifactId>
- </dependency>
-
- <!-- Test Dependencies -->
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
-</project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/HadoopIOConstants.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/HadoopIOConstants.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/HadoopIOConstants.java
deleted file mode 100644
index 5c1b41c..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/HadoopIOConstants.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io;
-
-/**
- * Hadoop IO related constants
- *
- *
- *
- */
-public class HadoopIOConstants {
-
- /**
- * Private constructor prevents instantiation
- */
- private HadoopIOConstants() {
- }
-
- /**
- * Map Reduce configuration setting for max line length
- */
- public static final String MAX_LINE_LENGTH = "mapreduce.input.linerecordreader.line.maxlength";
-
- /**
- * Run ID
- */
- public static final String RUN_ID = "runId";
-
- /**
- * Compression codecs to use
- */
- public static final String IO_COMPRESSION_CODECS = "io.compression.codecs";
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/RdfIOConstants.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/RdfIOConstants.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/RdfIOConstants.java
deleted file mode 100644
index 27c2bb2..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/RdfIOConstants.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io;
-
-import java.io.IOException;
-
-/**
- * RDF IO related constants
- *
- *
- *
- */
-public class RdfIOConstants {
-
- /**
- * Private constructor prevents instantiation
- */
- private RdfIOConstants() {
- }
-
- /**
- * Configuration key used to set whether bad tuples are ignored. This is the
- * default behaviour, when explicitly set to {@code false} bad tuples will
- * result in {@link IOException} being thrown by the relevant record
- * readers.
- */
- public static final String INPUT_IGNORE_BAD_TUPLES = "rdf.io.input.ignore-bad-tuples";
-
- /**
- * Configuration key used to set the batch size used for RDF output formats
- * that take a batched writing approach. Default value is given by the
- * constant {@link #DEFAULT_OUTPUT_BATCH_SIZE}.
- */
- public static final String OUTPUT_BATCH_SIZE = "rdf.io.output.batch-size";
-
- /**
- * Default batch size for batched output formats
- */
- public static final long DEFAULT_OUTPUT_BATCH_SIZE = 10000;
-
- /**
- * Configuration key used to control behaviour with regards to how blank
- * nodes are handled.
- * <p>
- * The default behaviour is that blank nodes are file scoped which is what
- * the RDF specifications require.
- * </p>
- * <p>
- * However in the case of a multi-stage pipeline this behaviour can cause
- * blank nodes to diverge over several jobs and introduce spurious blank
- * nodes over time. This is described in <a
- * href="https://issues.apache.org/jira/browse/JENA-820">JENA-820</a> and
- * enabling this flag for jobs in your pipeline allow you to work around
- * this problem.
- * </p>
- * <h3>Warning</h3> You should only enable this flag for jobs that take in
- * RDF output originating from previous jobs since our normal blank node
- * allocation policy ensures that blank nodes will be file scoped and unique
- * over all files (barring unfortunate hasing collisions). If you enable
- * this for jobs that take in RDF originating from other sources you may
- * incorrectly conflate blank nodes that are supposed to distinct and
- * separate nodes.
- */
- public static final String GLOBAL_BNODE_IDENTITY = "rdf.io.input.bnodes.global-identity";
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractNLineFileInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractNLineFileInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractNLineFileInputFormat.java
deleted file mode 100644
index 1fcb030..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractNLineFileInputFormat.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Abstract line based input format that reuses the machinery from
- * {@link NLineInputFormat} to calculate the splits
- *
- *
- *
- * @param <TKey>
- * Key type
- * @param <TValue>
- * Value type
- */
-public abstract class AbstractNLineFileInputFormat<TKey, TValue> extends FileInputFormat<TKey, TValue> {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(AbstractNLineFileInputFormat.class);
-
- /**
- * Logically splits the set of input files for the job, splits N lines of
- * the input as one split.
- *
- * @see FileInputFormat#getSplits(JobContext)
- */
- public final List<InputSplit> getSplits(JobContext job) throws IOException {
- boolean debug = LOGGER.isDebugEnabled();
- if (debug && FileInputFormat.getInputDirRecursive(job)) {
- LOGGER.debug("Recursive searching for input data is enabled");
- }
-
- List<InputSplit> splits = new ArrayList<InputSplit>();
- int numLinesPerSplit = NLineInputFormat.getNumLinesPerSplit(job);
- for (FileStatus status : listStatus(job)) {
- if (debug) {
- LOGGER.debug("Determining how to split input file/directory {}", status.getPath());
- }
- splits.addAll(NLineInputFormat.getSplitsForFile(status, job.getConfiguration(), numLinesPerSplit));
- }
- return splits;
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileInputFormat.java
deleted file mode 100644
index e561cdb..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/AbstractWholeFileInputFormat.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-
-/**
- * Abstract implementation of a while file input format where each file is a
- * single split
- *
- *
- *
- * @param <TKey>
- * Key type
- * @param <TValue>
- * Value type
- */
-public abstract class AbstractWholeFileInputFormat<TKey, TValue> extends FileInputFormat<TKey, TValue> {
-
- @Override
- protected final boolean isSplitable(JobContext context, Path filename) {
- return false;
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/QuadsInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/QuadsInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/QuadsInputFormat.java
deleted file mode 100644
index b8fdbd5..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/QuadsInputFormat.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.readers.QuadsReader;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-
-
-/**
- * RDF input format that can handle any RDF quads format that ARQ supports
- * selecting the format to use for each file based upon the file extension
- *
- *
- *
- */
-public class QuadsInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> {
-
- @Override
- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new QuadsReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesInputFormat.java
deleted file mode 100644
index 03f394a..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesInputFormat.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.readers.TriplesReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-/**
- * RDF input format that can handle any RDF triples format that ARQ supports
- * selecting the format to use for each file based upon the file extension
- */
-public class TriplesInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TriplesReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesOrQuadsInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesOrQuadsInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesOrQuadsInputFormat.java
deleted file mode 100644
index bfd643e..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/TriplesOrQuadsInputFormat.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.readers.TriplesOrQuadsReader;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-
-
-/**
- * RDF input format that can handle any RDF triple/quads format that ARQ
- * supports selecting the format to use for each file based upon the file
- * extension. Triples are converted into quads in the default graph.
- *
- *
- *
- */
-public class TriplesOrQuadsInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> {
-
- @Override
- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TriplesOrQuadsReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDQuadInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDQuadInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDQuadInputFormat.java
deleted file mode 100644
index 2464946..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDQuadInputFormat.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.jsonld;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.jsonld.JsonLDQuadReader;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-
-public class JsonLDQuadInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> {
-
- @Override
- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new JsonLDQuadReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDTripleInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDTripleInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDTripleInputFormat.java
deleted file mode 100644
index 0e08a4b..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/jsonld/JsonLDTripleInputFormat.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.jsonld;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.jsonld.JsonLDTripleReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-public class JsonLDTripleInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new JsonLDTripleReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/BlockedNQuadsInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/BlockedNQuadsInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/BlockedNQuadsInputFormat.java
deleted file mode 100644
index 6829c4d..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/BlockedNQuadsInputFormat.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.nquads;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractNLineFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.nquads.BlockedNQuadsReader;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-
-
-/**
- * NTriples input format where files are processed as blocks of lines rather
- * than in a line based manner as with the {@link NQuadsInputFormat} or as
- * whole files with the {@link WholeFileNQuadsInputFormat}
- * <p>
- * This provides a compromise between the higher parser setup of creating more
- * parsers and the benefit of being able to split input files over multiple
- * mappers.
- * </p>
- *
- *
- *
- */
-public class BlockedNQuadsInputFormat extends AbstractNLineFileInputFormat<LongWritable, QuadWritable> {
-
- @Override
- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new BlockedNQuadsReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/NQuadsInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/NQuadsInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/NQuadsInputFormat.java
deleted file mode 100644
index 802fbea..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/NQuadsInputFormat.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.nquads;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractNLineFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.nquads.NQuadsReader;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-
-
-/**
- * NQuads input format
- *
- *
- *
- */
-public class NQuadsInputFormat extends AbstractNLineFileInputFormat<LongWritable, QuadWritable> {
-
- @Override
- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit arg0, TaskAttemptContext arg1)
- throws IOException, InterruptedException {
- return new NQuadsReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/WholeFileNQuadsInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/WholeFileNQuadsInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/WholeFileNQuadsInputFormat.java
deleted file mode 100644
index 128d079..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/nquads/WholeFileNQuadsInputFormat.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.nquads;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.nquads.WholeFileNQuadsReader;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-
-
-/**
- * NQuads input format where files are processed as complete files rather than
- * in a line based manner as with the {@link NQuadsInputFormat}
- * <p>
- * This has the advantage of less parser setup overhead but the disadvantage
- * that the input cannot be split over multiple mappers.
- * </p>
- *
- *
- *
- */
-public class WholeFileNQuadsInputFormat extends AbstractWholeFileInputFormat<LongWritable, QuadWritable> {
-
- @Override
- public RecordReader<LongWritable, QuadWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new WholeFileNQuadsReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/BlockedNTriplesInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/BlockedNTriplesInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/BlockedNTriplesInputFormat.java
deleted file mode 100644
index 292167b..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/BlockedNTriplesInputFormat.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.ntriples;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractNLineFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.ntriples.BlockedNTriplesReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-
-/**
- * NTriples input format where files are processed as blocks of lines rather
- * than in a line based manner as with the {@link NTriplesInputFormat} or as
- * whole files with the {@link WholeFileNTriplesInputFormat}
- * <p>
- * This provides a compromise between the higher parser setup of creating more
- * parsers and the benefit of being able to split input files over multiple
- * mappers.
- * </p>
- *
- *
- *
- */
-public class BlockedNTriplesInputFormat extends AbstractNLineFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new BlockedNTriplesReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/NTriplesInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/NTriplesInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/NTriplesInputFormat.java
deleted file mode 100644
index 1694c87..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/NTriplesInputFormat.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.ntriples;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractNLineFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.ntriples.NTriplesReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-
-/**
- * NTriples input format
- *
- *
- *
- */
-public class NTriplesInputFormat extends AbstractNLineFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new NTriplesReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/WholeFileNTriplesInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/WholeFileNTriplesInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/WholeFileNTriplesInputFormat.java
deleted file mode 100644
index 31c1252..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/ntriples/WholeFileNTriplesInputFormat.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.ntriples;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.ntriples.WholeFileNTriplesReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-
-/**
- * NTriples input format where files are processed as complete files rather than
- * in a line based manner as with the {@link NTriplesInputFormat}
- * <p>
- * This has the advantage of less parser setup overhead but the disadvantage
- * that the input cannot be split over multiple mappers.
- * </p>
- *
- *
- *
- */
-public class WholeFileNTriplesInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new WholeFileNTriplesReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfjson/RdfJsonInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfjson/RdfJsonInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfjson/RdfJsonInputFormat.java
deleted file mode 100644
index e5a7940..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfjson/RdfJsonInputFormat.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.rdfjson;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.rdfjson.RdfJsonReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-
-/**
- * RDF/JSON input format
- *
- *
- *
- */
-public class RdfJsonInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new RdfJsonReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfxml/RdfXmlInputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfxml/RdfXmlInputFormat.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfxml/RdfXmlInputFormat.java
deleted file mode 100644
index 4deb925..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/rdfxml/RdfXmlInputFormat.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.rdfxml;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.readers.rdfxml.RdfXmlReader;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-
-
-/**
- * RDF/XML input format
- *
- *
- *
- */
-public class RdfXmlInputFormat extends AbstractWholeFileInputFormat<LongWritable, TripleWritable> {
-
- @Override
- public RecordReader<LongWritable, TripleWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new RdfXmlReader();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedNodeTupleReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedNodeTupleReader.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedNodeTupleReader.java
deleted file mode 100644
index 56d031e..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedNodeTupleReader.java
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.readers;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
-import org.apache.jena.hadoop.rdf.io.input.util.BlockInputStream;
-import org.apache.jena.hadoop.rdf.io.input.util.RdfIOUtils;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackableInputStream;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackedInputStream;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackedPipedRDFStream;
-import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
-import org.apache.jena.riot.Lang;
-import org.apache.jena.riot.RDFDataMgr;
-import org.apache.jena.riot.ReaderRIOT;
-import org.apache.jena.riot.lang.PipedRDFIterator;
-import org.apache.jena.riot.lang.PipedRDFStream;
-import org.apache.jena.riot.system.ParserProfile;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * An abstract implementation for a record reader that reads records from blocks
- * of files, this is a hybrid between {@link AbstractLineBasedNodeTupleReader}
- * and {@link AbstractWholeFileNodeTupleReader} in that it can only be used by
- * formats which can be split by lines but reduces the overhead by parsing the
- * split as a whole rather than as individual lines.
- * <p>
- * The keys produced are the approximate position in the file at which a tuple
- * was found and the values will be node tuples. Positions are approximate
- * because they are recorded after the point at which the most recent tuple was
- * parsed from the input thus they reflect the approximate position in the
- * stream immediately after which the triple was found.
- * </p>
- *
- *
- *
- * @param <TValue>
- * Value type
- * @param <T>
- * Tuple type
- */
-public abstract class AbstractBlockBasedNodeTupleReader<TValue, T extends AbstractNodeTupleWritable<TValue>> extends RecordReader<LongWritable, T> {
-
- private static final Logger LOG = LoggerFactory.getLogger(AbstractBlockBasedNodeTupleReader.class);
- private CompressionCodec compressionCodecs;
- private TrackableInputStream input;
- private LongWritable key;
- private long start, length;
- private T tuple;
- private TrackedPipedRDFStream<TValue> stream;
- private PipedRDFIterator<TValue> iter;
- private Thread parserThread;
- private boolean finished = false;
- private boolean ignoreBadTuples = true;
- private boolean parserFinished = false;
- private Throwable parserError = null;
-
- @Override
- public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
- LOG.debug("initialize({}, {})", genericSplit, context);
-
- // Assuming file split
- if (!(genericSplit instanceof FileSplit))
- throw new IOException("This record reader only supports FileSplit inputs");
- FileSplit split = (FileSplit) genericSplit;
-
- // Configuration
- Configuration config = context.getConfiguration();
- this.ignoreBadTuples = config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true);
- if (this.ignoreBadTuples)
- LOG.warn(
- "Configured to ignore bad tuples, parsing errors will be logged and further parsing aborted but no user visible errors will be thrown. Consider setting {} to false to disable this behaviour",
- RdfIOConstants.INPUT_IGNORE_BAD_TUPLES);
-
- // Figure out what portion of the file to read
- start = split.getStart();
- long end = start + split.getLength();
- final Path file = split.getPath();
- long totalLength = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen();
- boolean readToEnd = end == totalLength;
- CompressionCodecFactory factory = new CompressionCodecFactory(config);
- this.compressionCodecs = factory.getCodec(file);
-
- LOG.info(String.format("Got split with start %d and length %d for file with total length of %d", new Object[] { start, split.getLength(), totalLength }));
-
- // Open the file and prepare the input stream
- FileSystem fs = file.getFileSystem(config);
- FSDataInputStream fileIn = fs.open(file);
- this.length = split.getLength();
- if (start > 0)
- fileIn.seek(start);
-
- if (this.compressionCodecs != null) {
- // Compressed input
- // For compressed input NLineInputFormat will have failed to find
- // any line breaks and will give us a split from 0 -> (length - 1)
- // Add 1 and re-verify readToEnd so we can abort correctly if ever
- // given a partial split of a compressed file
- end++;
- readToEnd = end == totalLength;
- if (start > 0 || !readToEnd)
- throw new IOException("This record reader can only be used with compressed input where the split is a whole file");
- input = new TrackedInputStream(this.compressionCodecs.createInputStream(fileIn));
- } else {
- // Uncompressed input
-
- if (readToEnd) {
- input = new TrackedInputStream(fileIn);
- } else {
- // Need to limit the portion of the file we are reading
- input = new BlockInputStream(fileIn, split.getLength());
- }
- }
-
- // Set up background thread for parser
- iter = this.getPipedIterator();
- this.stream = this.getPipedStream(iter, this.input);
- ParserProfile profile = RdfIOUtils.createParserProfile(context, file);
- Runnable parserRunnable = this.createRunnable(this, this.input, stream, this.getRdfLanguage(), profile);
- this.parserThread = new Thread(parserRunnable);
- this.parserThread.setDaemon(true);
- this.parserThread.start();
- }
-
- /**
- * Gets the RDF iterator to use
- *
- * @return Iterator
- */
- protected abstract PipedRDFIterator<TValue> getPipedIterator();
-
- /**
- * Gets the RDF stream to parse to
- *
- * @param iterator
- * Iterator
- * @return RDF stream
- */
- protected abstract TrackedPipedRDFStream<TValue> getPipedStream(PipedRDFIterator<TValue> iterator, TrackableInputStream input);
-
- /**
- * Gets the RDF language to use for parsing
- *
- * @return
- */
- protected abstract Lang getRdfLanguage();
-
- /**
- * Creates the runnable upon which the parsing will run
- *
- * @param input
- * Input
- * @param stream
- * Stream
- * @param lang
- * Language to use for parsing
- * @return Parser runnable
- */
- private Runnable createRunnable(@SuppressWarnings("rawtypes") final AbstractBlockBasedNodeTupleReader reader, final InputStream input,
- final PipedRDFStream<TValue> stream, final Lang lang, final ParserProfile profile) {
- return new Runnable() {
- @Override
- public void run() {
- try {
- ReaderRIOT riotReader = RDFDataMgr.createReader(lang);
- riotReader.setParserProfile(profile);
- riotReader.read(input, null, lang.getContentType(), stream, null);
- //RDFDataMgr.parse(stream, input, null, lang);
- reader.setParserFinished(null);
- } catch (Throwable e) {
- reader.setParserFinished(e);
- }
- }
- };
- }
-
- /**
- * Sets the parser thread finished state
- *
- * @param e
- * Error (if any)
- */
- private void setParserFinished(Throwable e) {
- synchronized (this.parserThread) {
- this.parserError = e;
- this.parserFinished = true;
- }
- }
-
- /**
- * Waits for the parser thread to have reported as finished
- *
- * @throws InterruptedException
- */
- private void waitForParserFinished() throws InterruptedException {
- do {
- synchronized (this.parserThread) {
- if (this.parserFinished)
- return;
- }
- Thread.sleep(50);
- } while (true);
- }
-
- /**
- * Creates an instance of a writable tuple from the given tuple value
- *
- * @param tuple
- * Tuple value
- * @return Writable tuple
- */
- protected abstract T createInstance(TValue tuple);
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- // Reuse key for efficiency
- if (key == null) {
- key = new LongWritable();
- }
-
- if (this.finished)
- return false;
-
- try {
- if (this.iter.hasNext()) {
- // Position will be relative to the start for the split we're
- // processing
- Long l = this.start + this.stream.getPosition();
- if (l != null) {
- this.key.set(l);
- // For compressed input the actual length from which we
- // calculate progress is likely less than the actual
- // uncompressed length so we need to increment the
- // length as we go along
- // We always add 1 more than the current length because we
- // don't want to report 100% progress until we really have
- // finished
- if (this.compressionCodecs != null && l > this.length)
- this.length = l + 1;
- }
- this.tuple = this.createInstance(this.iter.next());
- return true;
- } else {
- // Need to ensure that the parser thread has finished in order
- // to determine whether we finished without error
- this.waitForParserFinished();
- if (this.parserError != null) {
- LOG.error("Error parsing block, aborting further parsing", this.parserError);
- if (!this.ignoreBadTuples)
- throw new IOException("Error parsing block at position " + (this.start + this.input.getBytesRead()) + ", aborting further parsing",
- this.parserError);
- }
-
- this.key = null;
- this.tuple = null;
- this.finished = true;
- // This is necessary so that when compressed input is used we
- // report 100% progress once we've reached the genuine end of
- // the stream
- if (this.compressionCodecs != null)
- this.length--;
- return false;
- }
- } catch (IOException e) {
- throw e;
- } catch (Throwable e) {
- // Failed to read the tuple on this line
- LOG.error("Error parsing block, aborting further parsing", e);
- if (!this.ignoreBadTuples) {
- this.iter.close();
- throw new IOException("Error parsing block at position " + (this.start + this.input.getBytesRead()) + ", aborting further parsing", e);
- }
- this.key = null;
- this.tuple = null;
- this.finished = true;
- return false;
- }
- }
-
- @Override
- public LongWritable getCurrentKey() throws IOException, InterruptedException {
- return this.key;
- }
-
- @Override
- public T getCurrentValue() throws IOException, InterruptedException {
- return this.tuple;
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- float progress = 0.0f;
- if (this.key == null) {
- // We've either not started or we've finished
- progress = (this.finished ? 1.0f : 0.0f);
- } else if (this.key.get() == Long.MIN_VALUE) {
- // We don't have a position so we've either in-progress or finished
- progress = (this.finished ? 1.0f : 0.5f);
- } else {
- // We're some way through the file
- progress = (this.key.get() - this.start) / (float) this.length;
- }
- LOG.debug("getProgress() --> {}", progress);
- return progress;
- }
-
- @Override
- public void close() throws IOException {
- this.iter.close();
- this.input.close();
- this.finished = true;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedQuadReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedQuadReader.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedQuadReader.java
deleted file mode 100644
index 2279444..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedQuadReader.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.readers;
-
-import org.apache.jena.hadoop.rdf.io.input.util.TrackableInputStream;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackedPipedQuadsStream;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackedPipedRDFStream;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.riot.lang.PipedRDFIterator;
-
-import com.hp.hpl.jena.sparql.core.Quad;
-
-/**
- * An abstract record reader for whole file triple formats
- *
- *
- *
- */
-public abstract class AbstractBlockBasedQuadReader extends AbstractBlockBasedNodeTupleReader<Quad, QuadWritable> {
-
- @Override
- protected PipedRDFIterator<Quad> getPipedIterator() {
- return new PipedRDFIterator<Quad>();
- }
-
- @Override
- protected TrackedPipedRDFStream<Quad> getPipedStream(PipedRDFIterator<Quad> iterator, TrackableInputStream input) {
- return new TrackedPipedQuadsStream(iterator, input);
- }
-
- @Override
- protected QuadWritable createInstance(Quad tuple) {
- return new QuadWritable(tuple);
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedTripleReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedTripleReader.java b/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedTripleReader.java
deleted file mode 100644
index 2afd329..0000000
--- a/jena-hadoop-rdf/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractBlockBasedTripleReader.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.io.input.readers;
-
-import org.apache.jena.hadoop.rdf.io.input.util.TrackableInputStream;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackedPipedRDFStream;
-import org.apache.jena.hadoop.rdf.io.input.util.TrackedPipedTriplesStream;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.lang.PipedRDFIterator;
-
-import com.hp.hpl.jena.graph.Triple;
-
-/**
- * An abstract record reader for whole file triple formats
- *
- *
- *
- */
-public abstract class AbstractBlockBasedTripleReader extends AbstractBlockBasedNodeTupleReader<Triple, TripleWritable> {
-
- @Override
- protected PipedRDFIterator<Triple> getPipedIterator() {
- return new PipedRDFIterator<Triple>();
- }
-
- @Override
- protected TrackedPipedRDFStream<Triple> getPipedStream(PipedRDFIterator<Triple> iterator, TrackableInputStream input) {
- return new TrackedPipedTriplesStream(iterator, input);
- }
-
- @Override
- protected TripleWritable createInstance(Triple tuple) {
- return new TripleWritable(tuple);
- }
-}