You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/07/11 21:17:53 UTC

any23 git commit: ANY23-362 resolved rdf4j deprecation warnings

Repository: any23
Updated Branches:
  refs/heads/master 3f87cf3a8 -> 29bee074f


ANY23-362 resolved rdf4j deprecation warnings


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/29bee074
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/29bee074
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/29bee074

Branch: refs/heads/master
Commit: 29bee074f7454dacfd8a565624da84cadfa6d989
Parents: 3f87cf3
Author: Hans <fi...@gmail.com>
Authored: Wed Jul 11 11:13:30 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Wed Jul 11 16:07:36 2018 -0500

----------------------------------------------------------------------
 .../any23/extractor/rdf/BaseRDFExtractor.java   | 15 -------------
 .../any23/extractor/rdf/RDFParserFactory.java   | 22 +++++++++++---------
 .../any23/extractor/rdf/RDFXMLExtractor.java    |  2 +-
 .../any23/extractor/rdf/TriXExtractor.java      |  2 +-
 .../java/org/apache/any23/rdf/RDFUtils.java     | 13 ++++++------
 .../apache/any23/mime/TikaMIMETypeDetector.java |  5 ++---
 6 files changed, 23 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index a1eab72..f390f04 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -27,8 +27,6 @@ import org.eclipse.rdf4j.rio.RDFFormat;
 import org.eclipse.rdf4j.rio.RDFParseException;
 import org.eclipse.rdf4j.rio.RDFParser;
 import org.eclipse.rdf4j.rio.RDFHandlerException;
-import org.eclipse.rdf4j.rio.RioSetting;
-import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
 import org.jsoup.nodes.Attribute;
 import org.jsoup.nodes.Comment;
 import org.jsoup.nodes.DataNode;
@@ -48,7 +46,6 @@ import java.io.InputStream;
 import java.io.PushbackInputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
 import java.util.Iterator;
 
 /**
@@ -111,18 +108,6 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor {
     ) throws IOException, ExtractionException {
         try {
             final RDFParser parser = getParser(extractionContext, extractionResult);
-            parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
-
-            // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes
-            parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
-            parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
-            parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
-            parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
-            parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
-            parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
-            parser.getParserConfig().set(BasicParserSettings.VERIFY_RELATIVE_URIS, true);
-            parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_RELATIVE_URIS);
-
 
             RDFFormat format = parser.getRDFFormat();
             String iri = extractionContext.getDocumentIRI().stringValue();

http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
index b9d9c9b..2778621 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
@@ -28,6 +28,7 @@ import org.eclipse.rdf4j.rio.RDFHandlerException;
 import org.eclipse.rdf4j.rio.RDFParseException;
 import org.eclipse.rdf4j.rio.RDFParser;
 import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
 import org.eclipse.rdf4j.rio.helpers.RDFaParserSettings;
 import org.eclipse.rdf4j.rio.helpers.RDFaVersion;
 import org.eclipse.rdf4j.rio.turtle.TurtleParser;
@@ -38,6 +39,8 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
+import java.util.Collections;
+import java.util.HashSet;
 
 /**
  * This factory provides a common logic for creating and configuring correctly
@@ -49,13 +52,12 @@ public class RDFParserFactory {
 
     private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class);
 
-    private static RDFParserFactory instance;
+    private static class InstanceHolder {
+        private static final RDFParserFactory instance = new RDFParserFactory();
+    }
 
     public static RDFParserFactory getInstance() {
-        if(instance == null) {
-            instance = new RDFParserFactory();
-        }
-        return instance;
+        return InstanceHolder.instance;
     }
 
     /**
@@ -280,11 +282,11 @@ public class RDFParserFactory {
             final ExtractionContext extractionContext,
             final ExtractionResult extractionResult
     ) {
-        parser.setDatatypeHandling(
-            verifyDataType ? RDFParser.DatatypeHandling.VERIFY : RDFParser.DatatypeHandling.IGNORE
-        );
-        parser.setStopAtFirstError(stopAtFirstError);
-        parser.setParseErrorListener( new InternalParseErrorListener(extractionResult) );
+        parser.getParserConfig().setNonFatalErrors(stopAtFirstError ? Collections.emptySet() : new HashSet<>(parser.getSupportedSettings()));
+        parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, verifyDataType);
+        parser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
+
+        parser.setParseErrorListener(new InternalParseErrorListener(extractionResult));
         parser.setValueFactory(
                 new Any23ValueFactoryWrapper(
                         SimpleValueFactory.getInstance(),

http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
index 67fac7a..1500723 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
@@ -45,7 +45,7 @@ public class RDFXMLExtractor extends BaseRDFExtractor {
      * Default constructor, with no verification of data types and not stop at first error.
      */
     public RDFXMLExtractor() {
-        this(true, true);
+        this(false, false);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
index a3b9681..3a8b0d7 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
@@ -46,7 +46,7 @@ public class TriXExtractor extends BaseRDFExtractor {
      * Default constructor, with no verification of data types and not stop at first error.
      */
     public TriXExtractor() {
-        this(true, true);
+        this(false, false);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
index 242984b..44a98e0 100644
--- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
+++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
@@ -69,6 +69,8 @@ public class RDFUtils {
 
     private static final Logger LOG = LoggerFactory.getLogger(RDFUtils.class);
 
+    private static final Statement[] EMPTY_STATEMENTS = new Statement[0];
+
     private RDFUtils() {}
 
     /**
@@ -443,7 +445,7 @@ public class RDFUtils {
      * @throws IllegalArgumentException if no extension matches.
      */
     public static Optional<RDFFormat> getFormatByExtension(String ext) {
-        if( ! ext.startsWith(".") )
+        if (!ext.startsWith("."))
             ext = "." + ext;
         return Rio.getParserFormatForFileName(ext);
     }
@@ -463,11 +465,10 @@ public class RDFUtils {
         final StatementCollector handler = new StatementCollector();
         final RDFParser parser = getParser(format);
         parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
-        parser.setStopAtFirstError(true);
         parser.setPreserveBNodeIDs(true);
         parser.setRDFHandler(handler);
         parser.parse(is, baseIRI);
-        return handler.getStatements().toArray( new Statement[handler.getStatements().size()] );
+        return handler.getStatements().toArray(EMPTY_STATEMENTS);
     }
 
     /**
@@ -508,11 +509,11 @@ public class RDFUtils {
      */
     public static Statement[] parseRDF(String resource) throws IOException {
         final int extIndex = resource.lastIndexOf('.');
-        if(extIndex == -1)
+        if (extIndex == -1)
             throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource);
         final String extension = resource.substring(extIndex + 1);
-        return parseRDF( getFormatByExtension(extension).orElseThrow(Rio.unsupportedFormat(extension))
-        		        , RDFUtils.class.getResourceAsStream(resource) );
+        return parseRDF(getFormatByExtension(extension).orElseThrow(Rio.unsupportedFormat(extension)),
+                RDFUtils.class.getResourceAsStream(resource));
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
----------------------------------------------------------------------
diff --git a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
index c5601b1..3347895 100644
--- a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
+++ b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
@@ -29,6 +29,7 @@ import org.apache.tika.mime.MimeTypes;
 import org.eclipse.rdf4j.rio.RDFFormat;
 import org.eclipse.rdf4j.rio.RDFParser;
 import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
 
 import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
@@ -110,9 +111,7 @@ public class TikaMIMETypeDetector implements MIMETypeDetector {
     public static boolean checkTurtleFormat(InputStream is) throws IOException {
         String sample = extractDataSample(is, '.');
         RDFParser turtleParser = Rio.createParser(RDFFormat.TURTLE);
-        turtleParser.setDatatypeHandling(RDFParser.DatatypeHandling.VERIFY);
-        turtleParser.setStopAtFirstError(true);
-        turtleParser.setVerifyData(true);
+        turtleParser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
         ByteArrayInputStream bais = new ByteArrayInputStream(sample.getBytes());
         try {
             turtleParser.parse(bais, "");