You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2013/06/26 02:19:58 UTC

[11/24] git commit: Switch off verification for now as otherwise DBPedia will be inaccessible

Switch off verification for now as otherwise DBPedia will be
inaccessible


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/acf3afe9
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/acf3afe9
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/acf3afe9

Branch: refs/heads/master
Commit: acf3afe962e7d72ded6c91434562e870a386d9a7
Parents: f8cfa0e
Author: Peter Ansell <p_...@yahoo.com>
Authored: Fri Apr 19 14:07:25 2013 +1000
Committer: Peter Ansell <p_...@yahoo.com>
Committed: Wed Jun 26 10:08:07 2013 +1000

----------------------------------------------------------------------
 .../apache/any23/extractor/rdf/BaseRDFExtractor.java   | 13 +++++++++++++
 1 file changed, 13 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/acf3afe9/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index d0797dd..c079841 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -26,10 +26,13 @@ import org.apache.any23.extractor.ExtractorDescription;
 import org.openrdf.rio.RDFHandlerException;
 import org.openrdf.rio.RDFParseException;
 import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.RioSetting;
+import org.openrdf.rio.helpers.BasicParserSettings;
 import org.openrdf.rio.helpers.RDFParserBase;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashSet;
 
 /**
  * Base class for a generic <i>RDF</i>
@@ -90,6 +93,16 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor {
     ) throws IOException, ExtractionException {
         try {
             final RDFParser parser = getParser(extractionContext, extractionResult);
+            parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
+
+            // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes
+            parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);                
+            parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
+            parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);                
+            parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
+            parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);                
+            parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
+            
             parser.parse(in, extractionContext.getDocumentURI().stringValue());
         } catch (RDFHandlerException ex) {
             throw new IllegalStateException("Unexpected exception.", ex);