You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/07/11 21:17:53 UTC
any23 git commit: ANY23-362 resolved rdf4j deprecation warnings
Repository: any23
Updated Branches:
refs/heads/master 3f87cf3a8 -> 29bee074f
ANY23-362 resolved rdf4j deprecation warnings
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/29bee074
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/29bee074
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/29bee074
Branch: refs/heads/master
Commit: 29bee074f7454dacfd8a565624da84cadfa6d989
Parents: 3f87cf3
Author: Hans <fi...@gmail.com>
Authored: Wed Jul 11 11:13:30 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Wed Jul 11 16:07:36 2018 -0500
----------------------------------------------------------------------
.../any23/extractor/rdf/BaseRDFExtractor.java | 15 -------------
.../any23/extractor/rdf/RDFParserFactory.java | 22 +++++++++++---------
.../any23/extractor/rdf/RDFXMLExtractor.java | 2 +-
.../any23/extractor/rdf/TriXExtractor.java | 2 +-
.../java/org/apache/any23/rdf/RDFUtils.java | 13 ++++++------
.../apache/any23/mime/TikaMIMETypeDetector.java | 5 ++---
6 files changed, 23 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index a1eab72..f390f04 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -27,8 +27,6 @@ import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.RDFHandlerException;
-import org.eclipse.rdf4j.rio.RioSetting;
-import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.DataNode;
@@ -48,7 +46,6 @@ import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
import java.util.Iterator;
/**
@@ -111,18 +108,6 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor {
) throws IOException, ExtractionException {
try {
final RDFParser parser = getParser(extractionContext, extractionResult);
- parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
-
- // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes
- parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
- parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
- parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
- parser.getParserConfig().set(BasicParserSettings.VERIFY_RELATIVE_URIS, true);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_RELATIVE_URIS);
-
RDFFormat format = parser.getRDFFormat();
String iri = extractionContext.getDocumentIRI().stringValue();
http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
index b9d9c9b..2778621 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
@@ -28,6 +28,7 @@ import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
import org.eclipse.rdf4j.rio.helpers.RDFaParserSettings;
import org.eclipse.rdf4j.rio.helpers.RDFaVersion;
import org.eclipse.rdf4j.rio.turtle.TurtleParser;
@@ -38,6 +39,8 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
+import java.util.Collections;
+import java.util.HashSet;
/**
* This factory provides a common logic for creating and configuring correctly
@@ -49,13 +52,12 @@ public class RDFParserFactory {
private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class);
- private static RDFParserFactory instance;
+ private static class InstanceHolder {
+ private static final RDFParserFactory instance = new RDFParserFactory();
+ }
public static RDFParserFactory getInstance() {
- if(instance == null) {
- instance = new RDFParserFactory();
- }
- return instance;
+ return InstanceHolder.instance;
}
/**
@@ -280,11 +282,11 @@ public class RDFParserFactory {
final ExtractionContext extractionContext,
final ExtractionResult extractionResult
) {
- parser.setDatatypeHandling(
- verifyDataType ? RDFParser.DatatypeHandling.VERIFY : RDFParser.DatatypeHandling.IGNORE
- );
- parser.setStopAtFirstError(stopAtFirstError);
- parser.setParseErrorListener( new InternalParseErrorListener(extractionResult) );
+ parser.getParserConfig().setNonFatalErrors(stopAtFirstError ? Collections.emptySet() : new HashSet<>(parser.getSupportedSettings()));
+ parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, verifyDataType);
+ parser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
+
+ parser.setParseErrorListener(new InternalParseErrorListener(extractionResult));
parser.setValueFactory(
new Any23ValueFactoryWrapper(
SimpleValueFactory.getInstance(),
http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
index 67fac7a..1500723 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
@@ -45,7 +45,7 @@ public class RDFXMLExtractor extends BaseRDFExtractor {
* Default constructor, with no verification of data types and not stop at first error.
*/
public RDFXMLExtractor() {
- this(true, true);
+ this(false, false);
}
@Override
http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
index a3b9681..3a8b0d7 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
@@ -46,7 +46,7 @@ public class TriXExtractor extends BaseRDFExtractor {
* Default constructor, with no verification of data types and not stop at first error.
*/
public TriXExtractor() {
- this(true, true);
+ this(false, false);
}
@Override
http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
index 242984b..44a98e0 100644
--- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
+++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
@@ -69,6 +69,8 @@ public class RDFUtils {
private static final Logger LOG = LoggerFactory.getLogger(RDFUtils.class);
+ private static final Statement[] EMPTY_STATEMENTS = new Statement[0];
+
private RDFUtils() {}
/**
@@ -443,7 +445,7 @@ public class RDFUtils {
* @throws IllegalArgumentException if no extension matches.
*/
public static Optional<RDFFormat> getFormatByExtension(String ext) {
- if( ! ext.startsWith(".") )
+ if (!ext.startsWith("."))
ext = "." + ext;
return Rio.getParserFormatForFileName(ext);
}
@@ -463,11 +465,10 @@ public class RDFUtils {
final StatementCollector handler = new StatementCollector();
final RDFParser parser = getParser(format);
parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
- parser.setStopAtFirstError(true);
parser.setPreserveBNodeIDs(true);
parser.setRDFHandler(handler);
parser.parse(is, baseIRI);
- return handler.getStatements().toArray( new Statement[handler.getStatements().size()] );
+ return handler.getStatements().toArray(EMPTY_STATEMENTS);
}
/**
@@ -508,11 +509,11 @@ public class RDFUtils {
*/
public static Statement[] parseRDF(String resource) throws IOException {
final int extIndex = resource.lastIndexOf('.');
- if(extIndex == -1)
+ if (extIndex == -1)
throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource);
final String extension = resource.substring(extIndex + 1);
- return parseRDF( getFormatByExtension(extension).orElseThrow(Rio.unsupportedFormat(extension))
- , RDFUtils.class.getResourceAsStream(resource) );
+ return parseRDF(getFormatByExtension(extension).orElseThrow(Rio.unsupportedFormat(extension)),
+ RDFUtils.class.getResourceAsStream(resource));
}
/**
http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
----------------------------------------------------------------------
diff --git a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
index c5601b1..3347895 100644
--- a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
+++ b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
@@ -29,6 +29,7 @@ import org.apache.tika.mime.MimeTypes;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
@@ -110,9 +111,7 @@ public class TikaMIMETypeDetector implements MIMETypeDetector {
public static boolean checkTurtleFormat(InputStream is) throws IOException {
String sample = extractDataSample(is, '.');
RDFParser turtleParser = Rio.createParser(RDFFormat.TURTLE);
- turtleParser.setDatatypeHandling(RDFParser.DatatypeHandling.VERIFY);
- turtleParser.setStopAtFirstError(true);
- turtleParser.setVerifyData(true);
+ turtleParser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
ByteArrayInputStream bais = new ByteArrayInputStream(sample.getBytes());
try {
turtleParser.parse(bais, "");