You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2013/04/19 06:11:04 UTC

[19/19] git commit: Switch off verification for now as otherwise DBPedia will be inaccessible

Switch off verification for now as otherwise DBPedia will be
inaccessible

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/ced48a39
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/ced48a39
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/ced48a39

Branch: refs/heads/ANY23-141
Commit: ced48a39f2a3f149541366958f2c3de93da21b25
Parents: d5cedee
Author: Peter Ansell <p_...@yahoo.com>
Authored: Fri Apr 19 14:07:25 2013 +1000
Committer: Peter Ansell <p_...@yahoo.com>
Committed: Fri Apr 19 14:07:25 2013 +1000

----------------------------------------------------------------------
 .../any23/extractor/rdf/BaseRDFExtractor.java      |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/ced48a39/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index d0797dd..c079841 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -26,10 +26,13 @@ import org.apache.any23.extractor.ExtractorDescription;
 import org.openrdf.rio.RDFHandlerException;
 import org.openrdf.rio.RDFParseException;
 import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.RioSetting;
+import org.openrdf.rio.helpers.BasicParserSettings;
 import org.openrdf.rio.helpers.RDFParserBase;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashSet;
 
 /**
  * Base class for a generic <i>RDF</i>
@@ -90,6 +93,16 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor {
     ) throws IOException, ExtractionException {
         try {
             final RDFParser parser = getParser(extractionContext, extractionResult);
+            parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
+
+            // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes
+            parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);                
+            parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
+            parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);                
+            parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
+            parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);                
+            parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
+            
             parser.parse(in, extractionContext.getDocumentURI().stringValue());
         } catch (RDFHandlerException ex) {
             throw new IllegalStateException("Unexpected exception.", ex);