You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2013/04/19 06:11:04 UTC
[19/19] git commit: Switch off verification for now as otherwise
DBPedia will be inaccessible
Switch off verification for now as otherwise DBPedia will be
inaccessible
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/ced48a39
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/ced48a39
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/ced48a39
Branch: refs/heads/ANY23-141
Commit: ced48a39f2a3f149541366958f2c3de93da21b25
Parents: d5cedee
Author: Peter Ansell <p_...@yahoo.com>
Authored: Fri Apr 19 14:07:25 2013 +1000
Committer: Peter Ansell <p_...@yahoo.com>
Committed: Fri Apr 19 14:07:25 2013 +1000
----------------------------------------------------------------------
.../any23/extractor/rdf/BaseRDFExtractor.java | 13 +++++++++++++
1 files changed, 13 insertions(+), 0 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/ced48a39/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index d0797dd..c079841 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -26,10 +26,13 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.RioSetting;
+import org.openrdf.rio.helpers.BasicParserSettings;
import org.openrdf.rio.helpers.RDFParserBase;
import java.io.IOException;
import java.io.InputStream;
+import java.util.HashSet;
/**
* Base class for a generic <i>RDF</i>
@@ -90,6 +93,16 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor {
) throws IOException, ExtractionException {
try {
final RDFParser parser = getParser(extractionContext, extractionResult);
+ parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
+
+ // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes
+ parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);
+ parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
+ parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
+ parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
+ parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
+ parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
+
parser.parse(in, extractionContext.getDocumentURI().stringValue());
} catch (RDFHandlerException ex) {
throw new IllegalStateException("Unexpected exception.", ex);