You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2017/01/13 22:25:47 UTC
[08/25] any23 git commit: ANY23-276 : Convert from Sesame to RDF4J
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
index 5dea08f..7536304 100644
--- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
@@ -28,13 +28,12 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.CSV;
import org.apache.commons.csv.CSVParser;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-import org.openrdf.model.impl.LiteralImpl;
-import org.openrdf.model.impl.URIImpl;
-import org.openrdf.model.vocabulary.RDF;
-import org.openrdf.model.vocabulary.RDFS;
-import org.openrdf.model.vocabulary.XMLSchema;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.RDFS;
+import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
import java.io.IOException;
import java.io.InputStream;
@@ -52,7 +51,7 @@ public class CSVExtractor implements Extractor.ContentExtractor {
private CSVParser csvParser;
- private URI[] headerURIs;
+ private IRI[] headerIRIs;
private CSV csv = CSV.getInstance();
@@ -71,14 +70,14 @@ public class CSVExtractor implements Extractor.ContentExtractor {
InputStream in
, ExtractionResult out
) throws IOException, ExtractionException {
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
// build the parser
csvParser = CSVReaderBuilder.build(in);
- // get the header and generate the URIs for column names
+ // get the header and generate the IRIs for column names
String[] header = csvParser.getLine();
- headerURIs = processHeader(header, documentURI);
+ headerIRIs = processHeader(header, documentIRI);
// write triples to describe properties
writeHeaderPropertiesMetadata(header, out);
@@ -86,8 +85,8 @@ public class CSVExtractor implements Extractor.ContentExtractor {
String[] nextLine;
int index = 0;
while ((nextLine = csvParser.getLine()) != null) {
- URI rowSubject = RDFUtils.uri(
- documentURI.toString(),
+ IRI rowSubject = RDFUtils.uri(
+ documentIRI.toString(),
"row/" + index
);
// add a row type
@@ -95,21 +94,21 @@ public class CSVExtractor implements Extractor.ContentExtractor {
// for each row produce its statements
produceRowStatements(rowSubject, nextLine, out);
// link the row to the document
- out.writeTriple(documentURI, csv.row, rowSubject);
+ out.writeTriple(documentIRI, csv.row, rowSubject);
// the progressive row number
out.writeTriple(
rowSubject,
csv.rowPosition,
- new LiteralImpl(String.valueOf(index))
+ SimpleValueFactory.getInstance().createLiteral(String.valueOf(index))
);
index++;
}
// add some CSV metadata such as the number of rows and columns
addTableMetadataStatements(
- documentURI,
+ documentIRI,
out,
index,
- headerURIs.length
+ headerIRIs.length
);
}
@@ -151,53 +150,53 @@ public class CSVExtractor implements Extractor.ContentExtractor {
*/
private void writeHeaderPropertiesMetadata(String[] header, ExtractionResult out) {
int index = 0;
- for (URI singleHeader : headerURIs) {
- if (index > headerURIs.length) {
+ for (IRI singleHeader : headerIRIs) {
+ if (index > headerIRIs.length) {
break;
}
- if (!RDFUtils.isAbsoluteURI(header[index])) {
+ if (!RDFUtils.isAbsoluteIRI(header[index])) {
out.writeTriple(
singleHeader,
RDFS.LABEL,
- new LiteralImpl(header[index])
+ SimpleValueFactory.getInstance().createLiteral(header[index])
);
}
out.writeTriple(
singleHeader,
csv.columnPosition,
- new LiteralImpl(String.valueOf(index), XMLSchema.INTEGER)
+ SimpleValueFactory.getInstance().createLiteral(String.valueOf(index), XMLSchema.INTEGER)
);
index++;
}
}
/**
- * It process the first row of the file, returning a list of {@link URI}s representing
- * the properties for each column. If a value of the header is an absolute <i>URI</i>
+ * It process the first row of the file, returning a list of {@link IRI}s representing
+ * the properties for each column. If a value of the header is an absolute <i>IRI</i>
* then it leave it as is. Otherwise the {@link org.apache.any23.vocab.CSV} vocabulary is used.
*
* @param header
- * @return an array of {@link URI}s identifying the column names.
+ * @return an array of {@link IRI}s identifying the column names.
*/
- private URI[] processHeader(String[] header, URI documentURI) {
- URI[] result = new URI[header.length];
+ private IRI[] processHeader(String[] header, IRI documentIRI) {
+ IRI[] result = new IRI[header.length];
int index = 0;
for (String h : header) {
String candidate = h.trim();
- if (RDFUtils.isAbsoluteURI(candidate)) {
- result[index] = new URIImpl(candidate);
+ if (RDFUtils.isAbsoluteIRI(candidate)) {
+ result[index] = SimpleValueFactory.getInstance().createIRI(candidate);
} else {
- result[index] = normalize(candidate, documentURI);
+ result[index] = normalize(candidate, documentIRI);
}
index++;
}
return result;
}
- private URI normalize(String toBeNormalized, URI documentURI) {
+ private IRI normalize(String toBeNormalized, IRI documentIRI) {
toBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
- StringBuilder result = new StringBuilder(documentURI.toString());
+ StringBuilder result = new StringBuilder(documentIRI.toString());
StringTokenizer tokenizer = new StringTokenizer(toBeNormalized, " ");
while (tokenizer.hasMoreTokens()) {
@@ -206,12 +205,12 @@ public class CSVExtractor implements Extractor.ContentExtractor {
result.append(toUpperCase(current.charAt(0))).append(current.substring(1));
}
- return new URIImpl(result.toString());
+ return SimpleValueFactory.getInstance().createIRI(result.toString());
}
/**
* It writes on the provided {@link ExtractionResult}, the </>RDF statements</>
- * representing the row <i>cell</i>. If a row <i>cell</i> is an absolute <i>URI</i>
+ * representing the row <i>cell</i>. If a row <i>cell</i> is an absolute <i>IRI</i>
* then an object property is written, literal otherwise.
*
* @param rowSubject
@@ -219,13 +218,13 @@ public class CSVExtractor implements Extractor.ContentExtractor {
* @param out
*/
private void produceRowStatements(
- URI rowSubject,
+ IRI rowSubject,
String[] values,
ExtractionResult out
) {
int index = 0;
for (String cell : values) {
- if (index >= headerURIs.length) {
+ if (index >= headerIRIs.length) {
// there are some row cells that don't have an associated column name
break;
}
@@ -233,7 +232,7 @@ public class CSVExtractor implements Extractor.ContentExtractor {
index++;
continue;
}
- URI predicate = headerURIs[index];
+ IRI predicate = headerIRIs[index];
Value object = getObjectFromCell(cell);
out.writeTriple(rowSubject, predicate, object);
index++;
@@ -243,16 +242,16 @@ public class CSVExtractor implements Extractor.ContentExtractor {
private Value getObjectFromCell(String cell) {
Value object;
cell = cell.trim();
- if (RDFUtils.isAbsoluteURI(cell)) {
- object = new URIImpl(cell);
+ if (RDFUtils.isAbsoluteIRI(cell)) {
+ object = SimpleValueFactory.getInstance().createIRI(cell);
} else {
- URI datatype = XMLSchema.STRING;
+ IRI datatype = XMLSchema.STRING;
if (isInteger(cell)) {
datatype = XMLSchema.INTEGER;
} else if(isFloat(cell)) {
datatype = XMLSchema.FLOAT;
}
- object = new LiteralImpl(cell, datatype);
+ object = SimpleValueFactory.getInstance().createLiteral(cell, datatype);
}
return object;
}
@@ -261,25 +260,25 @@ public class CSVExtractor implements Extractor.ContentExtractor {
* It writes on the provided {@link ExtractionResult} some <i>RDF Statements</i>
* on generic properties of the <i>CSV</i> file, such as number of rows and columns.
*
- * @param documentURI
+ * @param documentIRI
* @param out
* @param numberOfRows
* @param numberOfColumns
*/
private void addTableMetadataStatements(
- URI documentURI,
+ IRI documentIRI,
ExtractionResult out,
int numberOfRows,
int numberOfColumns) {
out.writeTriple(
- documentURI,
+ documentIRI,
csv.numberOfRows,
- new LiteralImpl(String.valueOf(numberOfRows), XMLSchema.INTEGER)
+ SimpleValueFactory.getInstance().createLiteral(String.valueOf(numberOfRows), XMLSchema.INTEGER)
);
out.writeTriple(
- documentURI,
+ documentIRI,
csv.numberOfColumns,
- new LiteralImpl(String.valueOf(numberOfColumns), XMLSchema.INTEGER)
+ SimpleValueFactory.getInstance().createLiteral(String.valueOf(numberOfColumns), XMLSchema.INTEGER)
);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java
index d57239d..21ed595 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java
@@ -21,8 +21,8 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.vocab.VCard;
-import org.openrdf.model.BNode;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
/**
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index ad2b29f..818fc98 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -27,9 +27,9 @@ import org.apache.any23.extractor.rdf.JSONLDExtractor;
import org.apache.any23.extractor.rdf.JSONLDExtractorFactory;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
-import org.openrdf.model.URI;
-import org.openrdf.model.impl.LiteralImpl;
-import org.openrdf.model.impl.URIImpl;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.LiteralImpl;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
@@ -54,9 +54,9 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
private static final SINDICE vSINDICE = SINDICE.getInstance();
- private URI profile;
+ private IRI profile;
- private Map<String, URI> prefixes = new HashMap<String, URI>();
+ private Map<String, IRI> prefixes = new HashMap<String, IRI>();
private String documentLang;
@@ -78,7 +78,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
baseProfile = profile.toString();
}
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile,
extractionParameters, extractionContext, out);
for (JSONLDScript jsonldScript : jsonldScripts) {
@@ -86,8 +86,8 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
//if (jsonldScript.getLang() != null) {
// lang = jsonldScript.getLang();
//}
- //out.writeTriple(documentURI, jsonldScript.getName(),
- // new LiteralImpl(jsonldScript.getContent(), lang));
+ //out.writeTriple(documentIRI, jsonldScript.getName(),
+ // SimpleValueFactory.getInstance().createLiteral(jsonldScript.getContent(), lang));
}
}
@@ -107,12 +107,12 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
return lang;
}
- private URI extractProfile(Document in) {
+ private IRI extractProfile(Document in) {
String profile = DomUtils.find(in, "string(/HTML/@profile)");
if (profile.equals("")) {
return null;
}
- return new URIImpl(profile);
+ return SimpleValueFactory.getInstance().createIRI(profile);
}
/**
@@ -126,8 +126,8 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
NamedNodeMap attributes = linkNode.getAttributes();
String rel = attributes.getNamedItem("rel").getTextContent();
String href = attributes.getNamedItem("href").getTextContent();
- if (rel != null && href != null && RDFUtils.isAbsoluteURI(href)) {
- prefixes.put(rel, new URIImpl(href));
+ if (rel != null && href != null && RDFUtils.isAbsoluteIRI(href)) {
+ prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href));
}
}
}
@@ -157,21 +157,21 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
String name = nameAttribute.getTextContent();
String content = contentAttribute.getTextContent();
String xpath = DomUtils.getXPathForNode(jsonldNode);
- URI nameAsURI = getPrefixIfExists(name);
- if (nameAsURI == null) {
- nameAsURI = new URIImpl(baseProfile + name);
+ IRI nameAsIRI = getPrefixIfExists(name);
+ if (nameAsIRI == null) {
+ nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name);
}
- JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsURI,
+ JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsIRI,
content);
result.add(jsonldScript);
}
return result;
}
- private URI getPrefixIfExists(String name) {
+ private IRI getPrefixIfExists(String name) {
String[] split = name.split("\\.");
if (split.length == 2 && prefixes.containsKey(split[0])) {
- return new URIImpl(prefixes.get(split[0]) + split[1]);
+ return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]);
}
return null;
}
@@ -185,28 +185,28 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
private String xpath;
- private URI name;
+ private IRI name;
private String lang;
private String content;
- public JSONLDScript(String xpath, URI name, String content) {
+ public JSONLDScript(String xpath, IRI name, String content) {
this.xpath = xpath;
this.name = name;
this.content = content;
}
- public JSONLDScript(String xpath, URI name, String content, String lang) {
+ public JSONLDScript(String xpath, IRI name, String content, String lang) {
this(xpath, name, content);
this.lang = lang;
}
- public URI getName() {
+ public IRI getName() {
return name;
}
- public void setName(URI name) {
+ public void setName(IRI name) {
this.name = name;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java
index 10e6872..677581d 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java
@@ -20,7 +20,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.rdf.RDFUtils;
-import org.openrdf.model.BNode;
+import org.eclipse.rdf4j.model.BNode;
import org.w3c.dom.Node;
import java.util.List;
@@ -76,7 +76,7 @@ public abstract class EntityBasedMicroformatExtractor extends MicroformatExtract
* blank node ID like "MD5 of http://doc-uri/#xpath/to/node"
*/
protected BNode getBlankNodeFor(Node node) {
- return RDFUtils.getBNode(getDocumentURI() + "#" + DomUtils.getXPathForNode(node));
+ return RDFUtils.getBNode(getDocumentIRI() + "#" + DomUtils.getXPathForNode(node));
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
index f818ccd..d85af79 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
@@ -21,8 +21,8 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.vocab.VCard;
-import org.openrdf.model.BNode;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
/**
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java
index e41ce72..3a14fca 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java
@@ -22,10 +22,10 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.ICAL;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
import javax.xml.datatype.DatatypeConfigurationException;
@@ -86,8 +86,8 @@ public class HCalendarExtractor extends MicroformatExtractor {
}
private boolean extractCalendar(Node node) throws ExtractionException {
- URI cal = getDocumentURI();
- addURIProperty(cal, RDF.TYPE, vICAL.Vcalendar);
+ IRI cal = getDocumentIRI();
+ addIRIProperty(cal, RDF.TYPE, vICAL.Vcalendar);
return addComponents(node, cal);
}
@@ -106,7 +106,7 @@ public class HCalendarExtractor extends MicroformatExtractor {
private boolean extractComponent(Node node, Resource cal, String component) throws ExtractionException {
HTMLDocument compoNode = new HTMLDocument(node);
BNode evt = valueFactory.createBNode();
- addURIProperty(evt, RDF.TYPE, vICAL.getClass(component));
+ addIRIProperty(evt, RDF.TYPE, vICAL.getClass(component));
addTextProps(compoNode, evt);
addUrl(compoNode, evt);
addRRule(compoNode, evt);
@@ -131,13 +131,13 @@ public class HCalendarExtractor extends MicroformatExtractor {
private void addUrl(HTMLDocument compoNode, Resource evt) throws ExtractionException {
TextField url = compoNode.getSingularUrlField("url");
if ("".equals(url.value())) return;
- addURIProperty(evt, vICAL.url, getHTMLDocument().resolveURI(url.value()));
+ addIRIProperty(evt, vICAL.url, getHTMLDocument().resolveIRI(url.value()));
}
private void addRRule(HTMLDocument compoNode, Resource evt) {
for (Node rule : compoNode.findAllByClassName("rrule")) {
BNode rrule = valueFactory.createBNode();
- addURIProperty(rrule, RDF.TYPE, vICAL.DomainOf_rrule);
+ addIRIProperty(rrule, RDF.TYPE, vICAL.DomainOf_rrule);
TextField freq = new HTMLDocument(rule).getSingularTextField("freq");
conditionallyAddStringProperty(
freq.source(),
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
index cb65e9c..c1160fa 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
@@ -25,10 +25,10 @@ import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.extractor.html.annotations.Includes;
import org.apache.any23.vocab.VCard;
import org.apache.commons.lang.StringUtils;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
@@ -99,7 +99,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
if( DomUtils.isAncestorOf(included, current) ) {
final int[] nodeLocation = DomUtils.getNodeLocation(current);
report.notifyIssue(
- IssueReport.IssueLevel.Warning,
+ IssueReport.IssueLevel.WARNING,
"Current node tries to include an ancestor node.",
nodeLocation[0], nodeLocation[1]
);
@@ -182,10 +182,10 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
}
private boolean addTel(Resource card, String type, String value) {
- URI tel = super.fixLink(value, "tel");
- URI composed = vCARD.getProperty(type + "Tel", null);
+ IRI tel = super.fixLink(value, "tel");
+ IRI composed = vCARD.getProperty(type + "Tel", null);
if (composed == null) {
- URI simple = vCARD.getProperty(type, null);
+ IRI simple = vCARD.getProperty(type, null);
if (simple == null) {
return conditionallyAddResourceProperty(card, vCARD.tel, tel);
}
@@ -194,7 +194,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
return conditionallyAddResourceProperty(card, composed, tel);
}
- private boolean addSubMicroformat(String className, Resource resource, URI property) {
+ private boolean addSubMicroformat(String className, Resource resource, IRI property) {
List<Node> nodes = fragment.findAllByClassName(className);
if (nodes.isEmpty()) return false;
for (Node node : nodes) {
@@ -206,7 +206,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
return true;
}
- private boolean addStringProperty(String className, Resource resource, URI property) {
+ private boolean addStringProperty(String className, Resource resource, IRI property) {
final HTMLDocument.TextField textField = fragment.getSingularTextField(className);
return conditionallyAddStringProperty(
textField.source(),
@@ -222,7 +222,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
* @param property
* @return <code>true</code> if the multi property has been added, <code>false</code> otherwise.
*/
- private boolean addStringMultiProperty(String className, Resource resource, URI property) {
+ private boolean addStringMultiProperty(String className, Resource resource, IRI property) {
HTMLDocument.TextField[] fields = fragment.getPluralTextField(className);
boolean found = false;
for(HTMLDocument.TextField field : fields) {
@@ -267,7 +267,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
boolean found = false;
for (TextField link : links) {
found |= conditionallyAddResourceProperty(
- card, vCARD.logo, getHTMLDocument().resolveURI(link.value())
+ card, vCARD.logo, getHTMLDocument().resolveIRI(link.value())
);
}
return found;
@@ -278,7 +278,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
boolean found = false;
for (TextField link : links) {
found |= conditionallyAddResourceProperty(
- card, vCARD.photo, getHTMLDocument().resolveURI(link.value())
+ card, vCARD.photo, getHTMLDocument().resolveIRI(link.value())
);
}
return found;
@@ -320,7 +320,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
this.fragment.getDocument(),
card, vCARD.n, n
);
- addURIProperty(n, RDF.TYPE, vCARD.Name);
+ addIRIProperty(n, RDF.TYPE, vCARD.Name);
for (String fieldName : HCardName.FIELDS) {
if (!name.containsField(fieldName)) {
@@ -385,7 +385,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
this.fragment.getDocument(),
card, vCARD.org, org
);
- addURIProperty(org, RDF.TYPE, vCARD.Organization);
+ addIRIProperty(org, RDF.TYPE, vCARD.Organization);
final TextField organizationTextField = name.getOrganization();
conditionallyAddLiteralProperty(
organizationTextField.source(),
@@ -405,7 +405,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor {
TextField[] links = fragment.getPluralUrlField("url");
boolean found = false;
for (TextField link : links) {
- found |= conditionallyAddResourceProperty(card, vCARD.url, getHTMLDocument().resolveURI(link.value()));
+ found |= conditionallyAddResourceProperty(card, vCARD.url, getHTMLDocument().resolveIRI(link.value()));
}
return found;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java
index 123c03a..308c3e2 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java
@@ -23,10 +23,10 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.vocab.FOAF;
import org.apache.any23.vocab.HListing;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
import java.util.ArrayList;
@@ -123,7 +123,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor {
node,
listing, hLISTING.item, blankItem
);
- addURIProperty(blankItem, RDF.TYPE, hLISTING.Item);
+ addIRIProperty(blankItem, RDF.TYPE, hLISTING.Item);
HTMLDocument item = new HTMLDocument(node);
@@ -143,7 +143,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor {
String value = node.getNodeValue();
// do not use conditionallyAdd, it won't work cause of evaluation rules
if (!(null == value || "".equals(value))) {
- URI property = hLISTING.getPropertyCamelCase(klass);
+ IRI property = hLISTING.getPropertyCamelCase(klass);
conditionallyAddLiteralProperty(
node,
blankItem, property, valueFactory.createLiteral(value)
@@ -200,7 +200,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor {
private Resource addLister() throws ExtractionException {
Resource blankLister = valueFactory.createBNode();
- addURIProperty(blankLister, RDF.TYPE, hLISTING.Lister);
+ addIRIProperty(blankLister, RDF.TYPE, hLISTING.Lister);
Node node = fragment.findMicroformattedObjectNode("*", "lister");
if (null == node)
return blankLister;
@@ -224,7 +224,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor {
private void addListerUrl(HTMLDocument doc, Resource blankLister) throws ExtractionException {
TextField url = doc.getSingularUrlField("url");
- conditionallyAddResourceProperty(blankLister, hLISTING.listerUrl, getHTMLDocument().resolveURI(url.value()));
+ conditionallyAddResourceProperty(blankLister, hLISTING.listerUrl, getHTMLDocument().resolveIRI(url.value()));
}
private void addListerEmail(HTMLDocument doc, Resource blankLister) {
@@ -242,7 +242,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor {
private void addListerLogo(HTMLDocument doc, Resource blankLister) throws ExtractionException {
TextField logo = doc.getSingularUrlField("logo");
- conditionallyAddResourceProperty(blankLister, hLISTING.listerLogo, getHTMLDocument().resolveURI(logo.value()));
+ conditionallyAddResourceProperty(blankLister, hLISTING.listerLogo, getHTMLDocument().resolveIRI(logo.value()));
}
private void addListerOrg(HTMLDocument doc, Resource blankLister) {
@@ -263,18 +263,18 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor {
private void addItemUrl(HTMLDocument item, Resource blankItem) throws ExtractionException {
TextField url = item.getSingularUrlField("url");
- conditionallyAddResourceProperty(blankItem, hLISTING.itemUrl, getHTMLDocument().resolveURI(url.value()));
+ conditionallyAddResourceProperty(blankItem, hLISTING.itemUrl, getHTMLDocument().resolveIRI(url.value()));
}
private void addItemPhoto(HTMLDocument doc, Resource blankLister) throws ExtractionException {
// as per spec
String url = doc.findMicroformattedValue("*", "item", "A", "photo", "@href");
- conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveURI(url));
+ conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveIRI(url));
url = doc.findMicroformattedValue("*", "item", "IMG", "photo", "@src");
- conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveURI(url));
+ conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveIRI(url));
// as per kelkoo. Remember that contains(foo,'') is true in xpath
url = doc.findMicroformattedValue("*", "photo", "IMG", "", "@src");
- conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveURI(url));
+ conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveIRI(url));
}
private List<String> findActions(HTMLDocument doc) {
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java
index 51735ae..1d6bdb1 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java
@@ -21,9 +21,9 @@ import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.vocab.HRecipe;
-import org.openrdf.model.BNode;
-import org.openrdf.model.URI;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
/**
@@ -78,7 +78,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
* @param fieldClass
* @param property
*/
- private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, URI property) {
+ private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, IRI property) {
HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
conditionallyAddStringProperty(
title.source(), recipe, property, title.value()
@@ -104,7 +104,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
*/
private BNode addIngredient(HTMLDocument fragment, HTMLDocument.TextField ingredient) {
final BNode ingredientBnode = getBlankNodeFor(ingredient.source());
- addURIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient);
+ addIRIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient);
conditionallyAddStringProperty(
ingredient.source(),
ingredientBnode,
@@ -161,7 +161,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
//TODO: USE http://microformats.org/wiki/value-class-pattern to read correct date format.
private BNode addDuration(HTMLDocument fragment, HTMLDocument.TextField duration) {
final BNode durationBnode = getBlankNodeFor(duration.source());
- addURIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration);
+ addIRIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration);
conditionallyAddStringProperty(
duration.source(),
durationBnode, vHRECIPE.durationTime, duration.value()
@@ -193,7 +193,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
final HTMLDocument.TextField[] photos = fragment.getPluralUrlField("photo");
for(HTMLDocument.TextField photo : photos) {
- addURIProperty(recipe, vHRECIPE.photo, fragment.resolveURI(photo.value()));
+ addIRIProperty(recipe, vHRECIPE.photo, fragment.resolveIRI(photo.value()));
}
}
@@ -243,7 +243,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
*/
private BNode addNutrition(HTMLDocument fragment, HTMLDocument.TextField nutrition) {
final BNode nutritionBnode = getBlankNodeFor(nutrition.source());
- addURIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition);
+ addIRIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition);
conditionallyAddStringProperty(
nutrition.source(),
nutritionBnode, vHRECIPE.nutritionValue, nutrition.value()
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java
index 1b04d13..a4b19af 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java
@@ -22,9 +22,9 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.vocab.DOAC;
import org.apache.any23.vocab.FOAF;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
import java.util.List;
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
index 249162a..7652b04 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
@@ -27,9 +27,9 @@ import org.apache.any23.extractor.html.HTMLDocument.TextField;
import org.apache.any23.vocab.Review;
import org.apache.any23.vocab.ReviewAggregate;
import org.apache.any23.vocab.VCard;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
/**
@@ -105,11 +105,11 @@ public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor {
val.value());
final TextField url = item.getSingularUrlField("url");
conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument()
- .resolveURI(url.value()));
+ .resolveIRI(url.value()));
TextField pics[] = item.getPluralUrlField("photo");
for (TextField pic : pics) {
- addURIProperty(blank, vVCARD.photo,
- getHTMLDocument().resolveURI(pic.value()));
+ addIRIProperty(blank, vVCARD.photo,
+ getHTMLDocument().resolveIRI(pic.value()));
}
return blank;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java
index 91d07fc..d0699a1 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java
@@ -24,9 +24,9 @@ import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.vocab.DCTerms;
import org.apache.any23.vocab.Review;
import org.apache.any23.vocab.VCard;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
import java.util.List;
@@ -121,10 +121,10 @@ public class HReviewExtractor extends EntityBasedMicroformatExtractor {
blank, vVCARD.fn, val.value()
);
final TextField url = item.getSingularUrlField("url");
- conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveURI(url.value()));
+ conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveIRI(url.value()));
TextField pics[] = item.getPluralUrlField("photo");
for (TextField pic : pics) {
- addURIProperty(blank, vVCARD.photo, getHTMLDocument().resolveURI(pic.value()));
+ addIRIProperty(blank, vVCARD.photo, getHTMLDocument().resolveIRI(pic.value()));
}
return blank;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java
index bdb9cdf..bb958c7 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java
@@ -20,8 +20,8 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
import org.apache.any23.rdf.RDFUtils;
-import org.openrdf.model.URI;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NamedNodeMap;
@@ -50,10 +50,10 @@ public class HTMLDocument {
private final static Logger log = LoggerFactory.getLogger(HTMLDocument.class);
private Node document;
- private java.net.URI baseURI;
+ private java.net.URI baseIRI;
private final Any23ValueFactoryWrapper valueFactory =
- new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance());
+ new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance());
/**
* Reads a text field from the given node adding the content to the given <i>res</i> list.
@@ -179,12 +179,12 @@ public class HTMLDocument {
}
/**
- * @param uri string to resolve to {@link org.openrdf.model.URI}
- * @return An absolute URI, or null if the URI is not fixable
- * @throws org.apache.any23.extractor.ExtractionException If the base URI is invalid
+ * @param uri string to resolve to {@link org.eclipse.rdf4j.model.IRI}
+ * @return An absolute IRI, or null if the IRI is not fixable
+ * @throws org.apache.any23.extractor.ExtractionException If the base IRI is invalid
*/
- public URI resolveURI(String uri) throws ExtractionException {
- return valueFactory.resolveURI(uri, getBaseURI());
+ public IRI resolveIRI(String uri) throws ExtractionException {
+ return valueFactory.resolveIRI(uri, getBaseIRI());
}
public String find(String xpath) {
@@ -373,20 +373,20 @@ public class HTMLDocument {
return result.toArray( new TextField[result.size()] );
}
- private java.net.URI getBaseURI() throws ExtractionException {
- if (baseURI == null) {
+ private java.net.URI getBaseIRI() throws ExtractionException {
+ if (baseIRI == null) {
try {
if (document.getBaseURI() == null) {
log.warn("document.getBaseURI() is null, this should not happen");
}
- baseURI = new java.net.URI(RDFUtils.fixAbsoluteURI(document.getBaseURI()));
+ baseIRI = new java.net.URI(RDFUtils.fixAbsoluteIRI(document.getBaseURI()));
} catch (IllegalArgumentException ex) {
- throw new ExtractionException("Error in base URI: " + document.getBaseURI(), ex);
+ throw new ExtractionException("Error in base IRI: " + document.getBaseURI(), ex);
} catch (URISyntaxException ex) {
- throw new ExtractionException("Error in base URI: " + document.getBaseURI(), ex);
+ throw new ExtractionException("Error in base IRI: " + document.getBaseURI(), ex);
}
}
- return baseURI;
+ return baseIRI;
}
/**
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
index 3e0c84e..e67ec42 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
@@ -25,9 +25,9 @@ import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
-import org.openrdf.model.URI;
-import org.openrdf.model.impl.LiteralImpl;
-import org.openrdf.model.impl.URIImpl;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.LiteralImpl;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
@@ -49,9 +49,9 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
private static final SINDICE vSINDICE = SINDICE.getInstance();
- private URI profile;
+ private IRI profile;
- private Map<String, URI> prefixes = new HashMap<String, URI>();
+ private Map<String, IRI> prefixes = new HashMap<String, IRI>();
private String documentLang;
@@ -74,7 +74,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
baseProfile = profile.toString();
}
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
Set<Meta> metas = extractMetaElement(in, baseProfile);
for(Meta meta : metas) {
String lang = documentLang;
@@ -82,17 +82,29 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
lang = meta.getLang();
}
if(meta.isPragmaDirective){
- out.writeTriple(
- documentURI,
+ if(lang != null) {
+ out.writeTriple(
+ documentIRI,
meta.getHttpEquiv(),
- new LiteralImpl(meta.getContent(), lang)
- );
+ SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang));
+ } else {
+ out.writeTriple(
+ documentIRI,
+ meta.getHttpEquiv(),
+ SimpleValueFactory.getInstance().createLiteral(meta.getContent()));
+ }
}else {
- out.writeTriple(
- documentURI,
+ if(lang != null) {
+ out.writeTriple(
+ documentIRI,
meta.getName(),
- new LiteralImpl(meta.getContent(), lang)
- );
+ SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang));
+ } else {
+ out.writeTriple(
+ documentIRI,
+ meta.getName(),
+ SimpleValueFactory.getInstance().createLiteral(meta.getContent()));
+ }
}
}
}
@@ -111,12 +123,12 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
return lang;
}
- private URI extractProfile(Document in) {
+ private IRI extractProfile(Document in) {
String profile = DomUtils.find(in, "string(/HTML/@profile)");
if (profile.equals("")) {
return null;
}
- return new URIImpl(profile);
+ return SimpleValueFactory.getInstance().createIRI(profile);
}
/**
@@ -130,8 +142,8 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
NamedNodeMap attributes = linkNode.getAttributes();
String rel = attributes.getNamedItem("rel").getTextContent();
String href = attributes.getNamedItem("href").getTextContent();
- if(rel != null && href !=null && RDFUtils.isAbsoluteURI(href)) {
- prefixes.put(rel, new URIImpl(href));
+ if(rel != null && href !=null && RDFUtils.isAbsoluteIRI(href)) {
+ prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href));
}
}
}
@@ -156,31 +168,31 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
String httpEquiv = httpEquivAttribute.getTextContent();
String content = contentAttribute.getTextContent();
String xpath = DomUtils.getXPathForNode(metaNode);
- URI httpEquivAsURI = getPrefixIfExists(httpEquiv);
- if (httpEquivAsURI == null) {
- httpEquivAsURI = new URIImpl(baseProfile + httpEquiv);
+ IRI httpEquivAsIRI = getPrefixIfExists(httpEquiv);
+ if (httpEquivAsIRI == null) {
+ httpEquivAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + httpEquiv);
}
- Meta meta = new Meta(xpath, content, httpEquivAsURI);
+ Meta meta = new Meta(xpath, content, httpEquivAsIRI);
result.add(meta);
} else {
String name = nameAttribute.getTextContent();
String content = contentAttribute.getTextContent();
String xpath = DomUtils.getXPathForNode(metaNode);
- URI nameAsURI = getPrefixIfExists(name);
- if (nameAsURI == null) {
- nameAsURI = new URIImpl(baseProfile + name);
+ IRI nameAsIRI = getPrefixIfExists(name);
+ if (nameAsIRI == null) {
+ nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name);
}
- Meta meta = new Meta(xpath, nameAsURI, content);
+ Meta meta = new Meta(xpath, nameAsIRI, content);
result.add(meta);
}
}
return result;
}
- private URI getPrefixIfExists(String name) {
+ private IRI getPrefixIfExists(String name) {
String[] split = name.split("\\.");
if(split.length == 2 && prefixes.containsKey(split[0])) {
- return new URIImpl(prefixes.get(split[0]) + split[1]);
+ return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]);
}
return null;
}
@@ -194,9 +206,9 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
private String xpath;
- private URI name;
+ private IRI name;
- private URI httpEquiv;
+ private IRI httpEquiv;
private String lang;
@@ -204,25 +216,25 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
private boolean isPragmaDirective;
- public Meta(String xpath, String content, URI httpEquiv) {
+ public Meta(String xpath, String content, IRI httpEquiv) {
this.xpath = xpath;
this.content = content;
this.httpEquiv = httpEquiv;
this.setPragmaDirective(true);
}
- public Meta(String xpath, String content, URI httpEquiv, String lang) {
+ public Meta(String xpath, String content, IRI httpEquiv, String lang) {
this(xpath,content,httpEquiv);
this.lang = lang;
}
- public Meta(String xpath, URI name, String content) {
+ public Meta(String xpath, IRI name, String content) {
this.xpath = xpath;
this.name = name;
this.content = content;
}
- public Meta(String xpath, URI name, String content, String lang) {
+ public Meta(String xpath, IRI name, String content, String lang) {
this(xpath, name, content);
this.lang = lang;
}
@@ -235,19 +247,19 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
this.isPragmaDirective=value;
}
- public URI getHttpEquiv(){
+ public IRI getHttpEquiv(){
return httpEquiv;
}
- public void setHttpEquiv(URI httpEquiv){
+ public void setHttpEquiv(IRI httpEquiv){
this.httpEquiv=httpEquiv;
}
- public URI getName() {
+ public IRI getName() {
return name;
}
- public void setName(URI name) {
+ public void setName(IRI name) {
this.name = name;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java
index 279ec3a..c987fa3 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java
@@ -24,9 +24,9 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.vocab.XHTML;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.model.URI;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.ValueFactory;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -46,7 +46,7 @@ public class HeadLinkExtractor implements TagSoupDOMExtractor {
ExtractionResult out
) throws IOException, ExtractionException {
HTMLDocument html = new HTMLDocument(in);
- ValueFactory vf = ValueFactoryImpl.getInstance();
+ ValueFactory vf = SimpleValueFactory.getInstance();
final List<Node> headLinkNodes = DomUtils.findAll(
in,
@@ -60,11 +60,11 @@ public class HeadLinkExtractor implements TagSoupDOMExtractor {
") and @href and @rel]"
);
for (Node node : headLinkNodes) {
- final URI href = html.resolveURI(DomUtils.find(node, "@href"));
+ final IRI href = html.resolveIRI(DomUtils.find(node, "@href"));
final String rel = DomUtils.find(node, "@rel");
out.writeTriple(
- extractionContext.getDocumentURI(),
- vf.createURI(XHTML.NS + rel),
+ extractionContext.getDocumentIRI(),
+ vf.createIRI(XHTML.NS + rel),
href
);
final String title = DomUtils.find(node, "@title");
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java
index 30c2362..eb2524a 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java
@@ -24,10 +24,10 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.model.BNode;
-import org.openrdf.model.URI;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.ValueFactory;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import java.io.IOException;
@@ -62,15 +62,15 @@ public class ICBMExtractor implements TagSoupDOMExtractor {
return;
}
- final ValueFactory factory = new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance(), out);
+ final ValueFactory factory = new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance(), out);
BNode point = factory.createBNode();
- out.writeTriple(extractionContext.getDocumentURI(), expand("dcterms:related"), point);
+ out.writeTriple(extractionContext.getDocumentIRI(), expand("dcterms:related"), point);
out.writeTriple(point, expand("rdf:type"), expand("geo:Point"));
out.writeTriple(point, expand("geo:lat"), factory.createLiteral(Float.toString(lat)));
out.writeTriple(point, expand("geo:long"), factory.createLiteral(Float.toString(lon)));
}
- private URI expand(String curie) {
+ private IRI expand(String curie) {
return getDescription().getPrefixes().expand(curie);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
index 9e0dfa7..6f666a9 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
@@ -25,7 +25,7 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.vocab.XHTML;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.model.URI;
+import org.eclipse.rdf4j.model.IRI;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -50,21 +50,21 @@ public class LicenseExtractor implements TagSoupDOMExtractor {
ExtractionResult out
) throws IOException, ExtractionException {
HTMLDocument document = new HTMLDocument(in);
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
for (Node node : DomUtils.findAll(in, "//A[@rel='license']/@href")) {
String link = node.getNodeValue();
if ("".equals(link)) {
out.notifyIssue(
- IssueReport.IssueLevel.Warning,
+ IssueReport.IssueLevel.WARNING,
String.format(
"Invalid license link detected within document %s.",
- documentURI.toString()
+ documentIRI.toString()
),
0, 0
);
continue;
}
- out.writeTriple(documentURI, vXHTML.license, document.resolveURI(link));
+ out.writeTriple(documentIRI, vXHTML.license, document.resolveIRI(link));
}
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
index 31cbeb6..ad6f901 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
@@ -27,11 +27,11 @@ import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.extractor.html.annotations.Includes;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Literal;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Literal;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -50,12 +50,12 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
private ExtractionContext context;
- private URI documentURI;
+ private IRI documentIRI;
private ExtractionResult out;
protected final Any23ValueFactoryWrapper valueFactory =
- new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance());
+ new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance());
/**
* Returns the description of this extractor.
@@ -83,8 +83,8 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
return context;
}
- public URI getDocumentURI() {
- return documentURI;
+ public IRI getDocumentIRI() {
+ return documentIRI;
}
public final void run(
@@ -95,7 +95,7 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
) throws IOException, ExtractionException {
this.htmlDocument = new HTMLDocument(in);
this.context = extractionContext;
- this.documentURI = extractionContext.getDocumentURI();
+ this.documentIRI = extractionContext.getDocumentIRI();
this.out = out;
valueFactory.setIssueReport(out);
try {
@@ -129,13 +129,13 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
*
* @param n the <i>HTML</i> node from which the property value has been extracted.
* @param subject the property subject.
- * @param p the property URI.
+ * @param p the property IRI.
* @param value the property value.
* @return returns <code>true</code> if the value has been accepted and added, <code>false</code> otherwise.
*/
protected boolean conditionallyAddStringProperty(
Node n,
- Resource subject, URI p, String value
+ Resource subject, IRI p, String value
) {
if (value == null) return false;
value = value.trim();
@@ -153,20 +153,20 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
*
* @param n the <i>HTML</i> node from which the property value has been extracted.
* @param subject subject the property subject.
- * @param property the property URI.
+ * @param property the property IRI.
* @param literal value the property value.
* @return returns <code>true</code> if the literal has been accepted and added, <code>false</code> otherwise.
*/
protected boolean conditionallyAddLiteralProperty(
Node n,
Resource subject,
- URI property,
+ IRI property,
Literal literal
) {
final String literalStr = literal.stringValue();
if( containsScriptBlock(literalStr) ) {
out.notifyIssue(
- IssueReport.IssueLevel.Warning,
+ IssueReport.IssueLevel.WARNING,
String.format("Detected script in literal: [%s]", literalStr)
, -1
, -1
@@ -180,13 +180,13 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
}
/**
- * Helper method that adds a URI property to a node.
+ * Helper method that adds a IRI property to a node.
* @param subject the property subject.
- * @param property the property URI.
+ * @param property the property IRI.
* @param uri the property object.
* @return <code>true</code> if the the resource has been added, <code>false</code> otherwise.
*/
- protected boolean conditionallyAddResourceProperty(Resource subject, URI property, URI uri) {
+ protected boolean conditionallyAddResourceProperty(Resource subject, IRI property, IRI uri) {
if (uri == null) return false;
out.writeTriple(subject, property, uri);
return true;
@@ -197,10 +197,10 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
*
* @param n the <i>HTML</i> node used for extracting such property.
* @param subject the property subject.
- * @param property the property URI.
+ * @param property the property IRI.
* @param bnode the property value.
*/
- protected void addBNodeProperty(Node n, Resource subject, URI property, BNode bnode) {
+ protected void addBNodeProperty(Node n, Resource subject, IRI property, BNode bnode) {
out.writeTriple(subject, property, bnode);
TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
tser.addPropertyPath(this.getClass(), subject, property, bnode, DomUtils.getXPathListForNode(n) );
@@ -210,29 +210,29 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
* Helper method that adds a BNode property to a node.
*
* @param subject the property subject.
- * @param property the property URI.
+ * @param property the property IRI.
* @param bnode the property value.
*/
- protected void addBNodeProperty( Resource subject, URI property, BNode bnode) {
+ protected void addBNodeProperty( Resource subject, IRI property, BNode bnode) {
out.writeTriple(subject, property, bnode);
}
/**
- * Helper method that adds a URI property to a node.
+ * Helper method that adds a IRI property to a node.
*
* @param subject subject to add
* @param property predicate to add
* @param object object to add
*/
- protected void addURIProperty(Resource subject, URI property, URI object) {
+ protected void addIRIProperty(Resource subject, IRI property, IRI object) {
out.writeTriple(subject, property, object);
}
- protected URI fixLink(String link) {
+ protected IRI fixLink(String link) {
return valueFactory.fixLink(link, null);
}
- protected URI fixLink(String link, String defaultSchema) {
+ protected IRI fixLink(String link, String defaultSchema) {
return valueFactory.fixLink(link, defaultSchema);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
index 0e9f51f..11a6223 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
@@ -22,10 +22,10 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.vocab.WO;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
/**
@@ -138,7 +138,7 @@ public class SpeciesExtractor extends EntityBasedMicroformatExtractor {
}
}
- private URI resolvePropertyName(String clazz) {
+ private IRI resolvePropertyName(String clazz) {
return vWO.getProperty(
String.format(
"%sName",
@@ -147,7 +147,7 @@ public class SpeciesExtractor extends EntityBasedMicroformatExtractor {
);
}
- private URI resolveClassName(String clazz) {
+ private IRI resolveClassName(String clazz) {
String upperCaseClass = clazz.substring(0, 1);
return vWO.getClass(
String.format("%s%s",
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
index 50311bd..e6eb9cd 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
@@ -25,6 +25,8 @@ import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XNIException;
import org.cyberneko.html.parsers.DOMParser;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
@@ -66,29 +68,29 @@ public class TagSoupParser {
private final InputStream input;
- private final String documentURI;
+ private final String documentIRI;
private final String encoding;
private Document result = null;
- public TagSoupParser(InputStream input, String documentURI) {
+ public TagSoupParser(InputStream input, String documentIRI) {
this.input = input;
- this.documentURI = documentURI;
+ this.documentIRI = documentIRI;
this.encoding = null;
}
- public TagSoupParser(InputStream input, String documentURI, String encoding) {
+ public TagSoupParser(InputStream input, String documentIRI, String encoding) {
if(encoding != null && !Charset.isSupported(encoding))
throw new UnsupportedCharsetException(String.format("Charset %s is not supported", encoding));
this.input = input;
- this.documentURI = documentURI;
+ this.documentIRI = documentIRI;
this.encoding = encoding;
}
/**
- * Returns the DOM of the given document URI.
+ * Returns the DOM of the given document IRI.
*
* @return the <i>HTML</i> DOM.
* @throws IOException if there is an error whilst accessing the DOM
@@ -112,10 +114,10 @@ public class TagSoupParser {
}
} finally {
long elapsed = System.currentTimeMillis() - startTime;
- logger.debug("Parsed " + documentURI + " with NekoHTML, " + elapsed + "ms");
+ logger.debug("Parsed " + documentIRI + " with NekoHTML, " + elapsed + "ms");
}
}
- result.setDocumentURI(documentURI);
+ result.setDocumentURI(documentIRI);
return result;
}
@@ -131,15 +133,15 @@ public class TagSoupParser {
* @throws org.apache.any23.validator.ValidatorException if there is an error validating the DOM
*/
public DocumentReport getValidatedDOM(boolean applyFix) throws IOException, ValidatorException {
- final URI dURI;
+ final URI dIRI;
try {
- dURI = new URI(documentURI);
- } catch (URISyntaxException urise) {
- throw new ValidatorException("Error while performing validation, invalid document URI.", urise);
+ dIRI = new URI(documentIRI);
+ } catch (IllegalArgumentException | URISyntaxException urise) {
+ throw new ValidatorException("Error while performing validation, invalid document IRI.", urise);
}
Validator validator = new DefaultValidator();
Document document = getDOM();
- return new DocumentReport( validator.validate(dURI, document, applyFix), document );
+ return new DocumentReport( validator.validate(dIRI, document, applyFix), document );
}
private Document parse() throws IOException, SAXException, TransformerException {
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java
index 8651ed4..3788af9 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java
@@ -25,7 +25,7 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
import org.apache.any23.vocab.DCTerms;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import java.io.IOException;
@@ -48,13 +48,13 @@ public class TitleExtractor implements TagSoupDOMExtractor {
ExtractionResult out
) throws IOException, ExtractionException {
final Any23ValueFactoryWrapper valueFactory = new Any23ValueFactoryWrapper(
- ValueFactoryImpl.getInstance(), out, extractionContext.getDefaultLanguage()
+ SimpleValueFactory.getInstance(), out, extractionContext.getDefaultLanguage()
);
try {
String title = DomUtils.find(in, "/HTML/HEAD/TITLE/text()").trim();
if (title != null && (title.length() != 0)) {
- out.writeTriple(extractionContext.getDocumentURI(), vDCTERMS.title, valueFactory.createLiteral(title));
+ out.writeTriple(extractionContext.getDocumentIRI(), vDCTERMS.title, valueFactory.createLiteral(title));
}
} finally {
valueFactory.setIssueReport(null);
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
index 9ede50a..17b54e6 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
@@ -25,9 +25,9 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.rdf.RDFParserFactory;
-import org.openrdf.model.URI;
-import org.openrdf.rio.RDFParseException;
-import org.openrdf.rio.RDFParser;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.rio.RDFParseException;
+import org.eclipse.rdf4j.rio.RDFParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -57,16 +57,16 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor {
) throws IOException, ExtractionException {
List<Node> scriptNodes;
HTMLDocument htmlDocument = new HTMLDocument(in);
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/turtle')]");
- processScriptNodes(documentURI, extractionContext, out, scriptNodes);
+ processScriptNodes(documentIRI, extractionContext, out, scriptNodes);
scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/n3')]");
- processScriptNodes(documentURI, extractionContext, out, scriptNodes);
+ processScriptNodes(documentIRI, extractionContext, out, scriptNodes);
scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/plain')]");
- processScriptNodes(documentURI, extractionContext,out, scriptNodes);
+ processScriptNodes(documentIRI, extractionContext,out, scriptNodes);
}
@Override
@@ -77,16 +77,16 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor {
/**
* Processes a list of <i>html script</i> nodes retrieving the N3 / Turtle content.
*
- * @param documentURI the URI of the original HTML document.
+ * @param documentIRI the IRI of the original HTML document.
* @param er the extraction result used to store triples.
* @param ns the list of script nodes.
*/
- private void processScriptNodes(URI documentURI, ExtractionContext ec, ExtractionResult er, List<Node> ns) {
+ private void processScriptNodes(IRI documentIRI, ExtractionContext ec, ExtractionResult er, List<Node> ns) {
if(ns.size() > 0 && turtleParser == null) {
turtleParser = RDFParserFactory.getInstance().getTurtleParserInstance(true, false, ec, er);
}
for(Node n : ns) {
- processScriptNode(turtleParser, documentURI, n, er);
+ processScriptNode(turtleParser, documentIRI, n, er);
}
}
@@ -94,20 +94,20 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor {
* Processes a single <i>html script</i> node.
*
* @param turtleParser the parser used to digest node content.
- * @param documentURI the URI of the original HTML document.
+ * @param documentIRI the IRI of the original HTML document.
* @param n the script node.
* @param er the extraction result used to store triples.
*/
- private void processScriptNode(RDFParser turtleParser, URI documentURI, Node n, ExtractionResult er) {
+ private void processScriptNode(RDFParser turtleParser, IRI documentIRI, Node n, ExtractionResult er) {
final Node idAttribute = n.getAttributes().getNamedItem("id");
final String graphName =
- documentURI.stringValue() +
+ documentIRI.stringValue() +
( idAttribute == null ? "" : "#" + idAttribute.getTextContent() );
try {
turtleParser.parse( new StringReader(n.getTextContent()), graphName );
} catch (RDFParseException rdfpe) {
er.notifyIssue(
- IssueReport.IssueLevel.Error,
+ IssueReport.IssueLevel.ERROR,
String.format(
"An error occurred while parsing turtle content within script node: %s",
Arrays.toString(DomUtils.getXPathListForNode(n))
@@ -115,7 +115,7 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor {
rdfpe.getLineNumber(), rdfpe.getColumnNumber()
);
} catch (Exception e) {
- er.notifyIssue(IssueReport.IssueLevel.Error, "An error occurred while processing RDF data.", -1, -1);
+ er.notifyIssue(IssueReport.IssueLevel.ERROR, "An error occurred while processing RDF data.", -1, -1);
}
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
index ab16fe5..af971fa 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
@@ -26,10 +26,10 @@ import org.apache.any23.rdf.Any23ValueFactoryWrapper;
import org.apache.any23.vocab.FOAF;
import org.apache.any23.vocab.XFN;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.model.BNode;
-import org.openrdf.model.URI;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -47,7 +47,7 @@ public class XFNExtractor implements TagSoupDOMExtractor {
private static final XFN vXFN = XFN.getInstance();
private final static Any23ValueFactoryWrapper factoryWrapper =
- new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance());
+ new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance());
private HTMLDocument document;
private ExtractionResult out;
@@ -71,36 +71,36 @@ public class XFNExtractor implements TagSoupDOMExtractor {
BNode subject = factoryWrapper.createBNode();
boolean foundAnyXFN = false;
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
for (Node link : document.findAll("//A[@rel][@href]")) {
- foundAnyXFN |= extractLink(link, subject, documentURI);
+ foundAnyXFN |= extractLink(link, subject, documentIRI);
}
if (!foundAnyXFN) return;
out.writeTriple(subject, RDF.TYPE, vFOAF.Person);
- out.writeTriple(subject, vXFN.mePage, documentURI);
+ out.writeTriple(subject, vXFN.mePage, documentIRI);
} finally {
factoryWrapper.setIssueReport(null);
}
}
- private boolean extractLink(Node firstLink, BNode subject, URI documentURI)
+ private boolean extractLink(Node firstLink, BNode subject, IRI documentIRI)
throws ExtractionException {
String href = firstLink.getAttributes().getNamedItem("href").getNodeValue();
String rel = firstLink.getAttributes().getNamedItem("rel").getNodeValue();
String[] rels = rel.split("\\s+");
- URI link = document.resolveURI(href);
+ IRI link = document.resolveIRI(href);
if (containsRelMe(rels)) {
if (containsXFNRelExceptMe(rels)) {
return false; // "me" cannot be combined with any other XFN values
}
out.writeTriple(subject, vXFN.mePage, link);
- out.writeTriple(documentURI, vXFN.getExtendedProperty("me"), link);
+ out.writeTriple(documentIRI, vXFN.getExtendedProperty("me"), link);
} else {
BNode person2 = factoryWrapper.createBNode();
boolean foundAnyXFNRel = false;
for (String aRel : rels) {
- foundAnyXFNRel |= extractRel(aRel, subject, documentURI, person2, link);
+ foundAnyXFNRel |= extractRel(aRel, subject, documentIRI, person2, link);
}
if (!foundAnyXFNRel) {
return false;
@@ -129,9 +129,9 @@ public class XFNExtractor implements TagSoupDOMExtractor {
return false;
}
- private boolean extractRel(String rel, BNode person1, URI uri1, BNode person2, URI uri2) {
- URI peopleProp = vXFN.getPropertyByLocalName(rel);
- URI hyperlinkProp = vXFN.getExtendedProperty(rel);
+ private boolean extractRel(String rel, BNode person1, IRI uri1, BNode person2, IRI uri2) {
+ IRI peopleProp = vXFN.getPropertyByLocalName(rel);
+ IRI hyperlinkProp = vXFN.getExtendedProperty(rel);
if (peopleProp == null) {
return false;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
index d0d9257..0e21b06 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
@@ -23,9 +23,9 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.extractor.html.microformats2.annotations.Includes;
import org.apache.any23.vocab.VCard;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Resource;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Node;
import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
import org.apache.any23.extractor.html.HTMLDocument;
@@ -101,7 +101,7 @@ public class HAdrExtractor extends EntityBasedMicroformatExtractor {
private void addGeoAsUrlResource(Resource card,HTMLDocument document) throws ExtractionException {
HTMLDocument.TextField[] links = document.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX+"geo");
for (HTMLDocument.TextField link : links) {
- conditionallyAddResourceProperty(card, vVCARD.geo, getHTMLDocument().resolveURI(link.value()));
+ conditionallyAddResourceProperty(card, vVCARD.geo, getHTMLDocument().resolveIRI(link.value()));
}
}