You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/07/03 19:02:17 UTC
[1/3] jena git commit: jena-text stored literals: initial
functionality and tests for Lucene
Repository: jena
Updated Branches:
refs/heads/master 01bc520ed -> b7eac624c
jena-text stored literals: initial functionality and tests for Lucene
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1592c33f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1592c33f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1592c33f
Branch: refs/heads/master
Commit: 1592c33f21e5337ecfa74706f5a675e6c57f9967
Parents: 6a08429
Author: Osma Suominen <os...@aalto.fi>
Authored: Fri Jun 26 09:53:10 2015 +0300
Committer: Osma Suominen <os...@aalto.fi>
Committed: Fri Jun 26 16:32:00 2015 +0300
----------------------------------------------------------------------
.../jena/query/text/DatasetGraphText.java | 2 +-
.../java/org/apache/jena/query/text/Entity.java | 10 +-
.../org/apache/jena/query/text/TextHit.java | 10 +-
.../jena/query/text/TextHitConverter.java | 11 +-
.../org/apache/jena/query/text/TextIndex.java | 4 +-
.../apache/jena/query/text/TextIndexConfig.java | 9 +
.../apache/jena/query/text/TextIndexLucene.java | 52 +++-
.../apache/jena/query/text/TextIndexSolr.java | 13 +-
.../apache/jena/query/text/TextQueryFuncs.java | 4 +-
.../org/apache/jena/query/text/TextQueryPF.java | 50 +++-
.../assembler/TextIndexLuceneAssembler.java | 11 +
.../jena/query/text/assembler/TextVocab.java | 1 +
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../TestDatasetWithLuceneStoredLiterals.java | 248 +++++++++++++++++++
14 files changed, 390 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java b/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
index e1d7306..f63e12a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
@@ -99,7 +99,7 @@ public class DatasetGraphText extends DatasetGraphMonitor implements Transaction
String f = textIndex.getDocDef().getField(predicate) ;
queryString = f + ":" + queryString ;
}
- List<TextHit> results = textIndex.query(queryString, limit) ;
+ List<TextHit> results = textIndex.query(predicate, queryString, limit) ;
return results.iterator() ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
index 4ca5782..83e34c6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
@@ -19,6 +19,7 @@
package org.apache.jena.query.text;
import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.jena.datatypes.RDFDatatype;
import java.util.HashMap ;
import java.util.Map ;
@@ -28,17 +29,20 @@ public class Entity
private final String id ;
private final String graph ;
private final String language ;
+ private final RDFDatatype datatype ;
private final Map<String, Object> map = new HashMap<>() ;
public Entity(String entityId, String entityGraph) {
- this(entityId, entityGraph, null);
+ this(entityId, entityGraph, null, null);
}
- public Entity(String entityId, String entityGraph, String lang) {
+ public Entity(String entityId, String entityGraph, String lang, RDFDatatype datatype) {
this.id = entityId ;
this.graph = entityGraph;
this.language = lang;
+ this.datatype = datatype;
}
+
/** @deprecated Use {@linkplain #Entity(String, String)} */
@Deprecated
public Entity(String entityId) { this(entityId, null) ; }
@@ -49,6 +53,8 @@ public class Entity
public String getLanguage() { return language ; }
+ public RDFDatatype getDatatype() { return datatype ; }
+
public void put(String key, Object value)
{ map.put(key, value) ; }
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java b/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
index a60f78e..75ffce1 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
@@ -25,12 +25,14 @@ public class TextHit
{
private Node node;
private float score;
+ private Node literal;
- public TextHit(Node node, float score) {
+ public TextHit(Node node, float score, Node literal) {
this.node = node;
this.score = score;
+ this.literal = literal;
}
-
+
public Node getNode() {
return this.node;
}
@@ -38,4 +40,8 @@ public class TextHit
public float getScore() {
return this.score;
}
+
+ public Node getLiteral() {
+ return this.literal;
+ }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java b/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
index 2894680..c15a449 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
@@ -32,20 +32,25 @@ public class TextHitConverter implements Function<TextHit, Binding>
private Binding binding;
private Var match;
private Var score;
+ private Var literal;
- public TextHitConverter(Binding binding, Var match, Var score) {
+ public TextHitConverter(Binding binding, Var match, Var score, Var literal) {
this.binding = binding;
this.match = match;
this.score = score;
+ this.literal = literal;
}
@Override
public Binding apply(TextHit hit) {
- if (score == null)
+ if (score == null && literal == null)
return BindingFactory.binding(binding, match, hit.getNode());
BindingMap bmap = BindingFactory.create(binding);
bmap.add(match, hit.getNode());
- bmap.add(score, NodeFactoryExtra.floatToNode(hit.getScore()));
+ if (score != null)
+ bmap.add(score, NodeFactoryExtra.floatToNode(hit.getScore()));
+ if (literal != null)
+ bmap.add(literal, hit.getLiteral());
return bmap;
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index e07f8e5..999eb46 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -46,9 +46,9 @@ public interface TextIndex extends Closeable //, Transactional
/** Access the index - limit if -1 for as many as possible
* Throw QueryParseException for syntax errors in the query string.
*/
- List<TextHit> query(String qs, int limit) ;
+ List<TextHit> query(Node property, String qs, int limit) ;
- List<TextHit> query(String qs) ;
+ List<TextHit> query(Node property, String qs) ;
EntityDefinition getDocDef() ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
index feeb324..98e82f6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -26,6 +26,7 @@ public class TextIndexConfig {
Analyzer analyzer;
Analyzer queryAnalyzer;
boolean multilingualSupport;
+ boolean valueStored;
public TextIndexConfig(EntityDefinition entDef) {
this.entDef = entDef;
@@ -58,4 +59,12 @@ public class TextIndexConfig {
public void setMultilingualSupport(boolean multilingualSupport) {
this.multilingualSupport = multilingualSupport;
}
+
+ public boolean isValueStored() {
+ return valueStored;
+ }
+
+ public void setValueStored(boolean valueStored) {
+ this.valueStored = valueStored;
+ }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index c677184..c02037c 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -22,6 +22,9 @@ import java.io.IOException ;
import java.util.* ;
import java.util.Map.Entry ;
+import org.apache.jena.datatypes.RDFDatatype ;
+import org.apache.jena.datatypes.TypeMapper ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.sparql.util.NodeFactoryExtra ;
@@ -47,6 +50,8 @@ public class TextIndexLucene implements TextIndex {
private static int MAX_N = 10000 ;
public static final Version VER = Version.LUCENE_46 ;
+ // prefix for storing datatype URIs in the index, to distinguish them from language tags
+ private static final String DATATYPE_PREFIX = "^^";
public static final FieldType ftIRI ;
static {
@@ -57,14 +62,12 @@ public class TextIndexLucene implements TextIndex {
ftIRI.freeze() ;
}
public static final FieldType ftString = StringField.TYPE_NOT_STORED ;
- public static final FieldType ftText = TextField.TYPE_NOT_STORED ;
- // Bigger index, easier to debug!
- // public static final FieldType ftText = TextField.TYPE_STORED ;
private final EntityDefinition docDef ;
private final Directory directory ;
private final Analyzer analyzer ;
private final Analyzer queryAnalyzer ;
+ private final FieldType ftText ;
// The IndexWriter can't be final because we may have to recreate it if rollback() is called.
// However, it needs to be volatile in case the next write transaction is on a different thread,
@@ -99,6 +102,9 @@ public class TextIndexLucene implements TextIndex {
this.analyzer = new PerFieldAnalyzerWrapper(
(null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
+ this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
+ if (config.isValueStored() && docDef.getLangField() == null)
+ log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
openIndexWriter();
}
@@ -249,8 +255,13 @@ public class TextIndexLucene implements TextIndex {
doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
if (langField != null) {
String lang = entity.getLanguage();
- if (lang != null && !"".equals(lang))
+ RDFDatatype datatype = entity.getDatatype();
+ if (lang != null && !"".equals(lang)) {
doc.add(new Field(langField, lang, StringField.TYPE_STORED));
+ } else if (datatype != null && datatype != XSDDatatype.XSDstring) {
+ // for non-string and non-langString datatypes, store the datatype in langField
+ doc.add(new Field(langField, DATATYPE_PREFIX + datatype.getURI(), StringField.TYPE_STORED));
+ }
}
if (uidField != null) {
String hash = entity.getChecksum(e.getKey(), (String) e.getValue());
@@ -319,14 +330,14 @@ public class TextIndexLucene implements TextIndex {
}
@Override
- public List<TextHit> query(String qs) {
- return query(qs, MAX_N) ;
+ public List<TextHit> query(Node property, String qs) {
+ return query(property, qs, MAX_N) ;
}
@Override
- public List<TextHit> query(String qs, int limit) {
+ public List<TextHit> query(Node property, String qs, int limit) {
try (IndexReader indexReader = DirectoryReader.open(directory)) {
- return query$(indexReader, qs, limit) ;
+ return query$(indexReader, property, qs, limit) ;
}
catch (ParseException ex) {
throw new TextIndexParseException(qs, ex.getMessage()) ;
@@ -336,7 +347,7 @@ public class TextIndexLucene implements TextIndex {
}
}
- private List<TextHit> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
+ private List<TextHit> query$(IndexReader indexReader, Node property, String qs, int limit) throws ParseException, IOException {
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
if ( limit <= 0 )
@@ -349,9 +360,30 @@ public class TextIndexLucene implements TextIndex {
for ( ScoreDoc sd : sDocs ) {
Document doc = indexSearcher.doc(sd.doc) ;
String[] values = doc.getValues(docDef.getEntityField()) ;
+
+ Node literal = null;
+ String field = (property != null) ? docDef.getField(property) : docDef.getPrimaryField();
+ String[] lexicals = doc.getValues(field) ;
+ if (lexicals.length > 0) {
+ String lexical = lexicals[0];
+ String[] langs = doc.getValues(docDef.getLangField()) ;
+ if (langs.length > 0) {
+ String lang = langs[0];
+ if (lang.startsWith(DATATYPE_PREFIX)) {
+ String datatype = lang.substring(DATATYPE_PREFIX.length());
+ TypeMapper tmap = TypeMapper.getInstance();
+ literal = NodeFactory.createLiteral(lexical, tmap.getSafeTypeByName(datatype));
+ } else {
+ literal = NodeFactory.createLiteral(lexical, lang);
+ }
+ } else {
+ literal = NodeFactory.createLiteral(lexical);
+ }
+ }
+
for ( String v : values ) {
Node n = TextQueryFuncs.stringToNode(v) ;
- TextHit hit = new TextHit(n, sd.score);
+ TextHit hit = new TextHit(n, sd.score, literal);
results.add(hit) ;
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index d806932..5d645c6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -184,10 +184,10 @@ public class TextIndexSolr implements TextIndex
}
@Override
- public List<TextHit> query(String qs) { return query(qs, 0) ; }
+ public List<TextHit> query(Node property, String qs) { return query(property, qs, 0) ; }
@Override
- public List<TextHit> query(String qs, int limit) {
+ public List<TextHit> query(Node property, String qs, int limit) {
SolrDocumentList solrResults = solrQuery(qs, limit) ;
List<TextHit> results = new ArrayList<>() ;
@@ -196,7 +196,14 @@ public class TextIndexSolr implements TextIndex
// log.info("Entity: "+uriStr) ;
Node n = TextQueryFuncs.stringToNode(str) ;
Float score = (Float) sd.getFirstValue("score");
- TextHit hit = new TextHit(n, score.floatValue());
+ // capture literal value, if stored
+ Node literal = null;
+ String field = (property != null) ? docDef.getField(property) : docDef.getPrimaryField();
+ String value = (String) sd.getFirstValue(field);
+ if (value != null) {
+ literal = NodeFactory.createLiteral(value); // FIXME: language and datatype
+ }
+ TextHit hit = new TextHit(n, score.floatValue(), literal);
results.add(hit) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
index b41d36b..7854726 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
@@ -19,6 +19,7 @@
package org.apache.jena.query.text;
import org.apache.jena.atlas.logging.Log ;
+import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.rdf.model.AnonId ;
@@ -78,7 +79,8 @@ public class TextQueryFuncs {
String x = TextQueryFuncs.subjectToString(s) ;
String graphText = TextQueryFuncs.graphNodeToString(g) ;
String language = o.getLiteral().language() ;
- Entity entity = new Entity(x, graphText, language) ;
+ RDFDatatype datatype = o.getLiteral().getDatatype() ;
+ Entity entity = new Entity(x, graphText, language, datatype) ;
String graphField = defn.getGraphField() ;
if ( defn.getGraphField() != null )
entity.put(graphField, graphText) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 0032be8..3a0b214 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -33,6 +33,9 @@ import org.apache.jena.sparql.core.* ;
import org.apache.jena.sparql.engine.ExecutionContext ;
import org.apache.jena.sparql.engine.QueryIterator ;
import org.apache.jena.sparql.engine.binding.Binding ;
+import org.apache.jena.sparql.engine.binding.BindingFactory ;
+import org.apache.jena.sparql.engine.binding.BindingMap ;
+import org.apache.jena.sparql.engine.iterator.QueryIterExtendByVar ;
import org.apache.jena.sparql.engine.iterator.QueryIterPlainWrapper ;
import org.apache.jena.sparql.engine.iterator.QueryIterSlice ;
import org.apache.jena.sparql.mgt.Explain ;
@@ -65,8 +68,12 @@ public class TextQueryPF extends PropertyFunctionBase {
DatasetGraph dsg = execCxt.getDataset() ;
textIndex = chooseTextIndex(dsg) ;
- if (argSubject.isList() && argSubject.getArgListSize() != 2)
- throw new QueryBuildException("Subject has "+argSubject.getArgList().size()+" elements, not 2: "+argSubject);
+ if (argSubject.isList()) {
+ int size = argSubject.getArgListSize();
+ if (size != 2 && size != 3) {
+ throw new QueryBuildException("Subject has "+argSubject.getArgList().size()+" elements, not 2 or 3: "+argSubject);
+ }
+ }
if (argObject.isList()) {
List<Node> list = argObject.getArgList() ;
@@ -148,6 +155,7 @@ public class TextQueryPF extends PropertyFunctionBase {
Node s = null;
Node score = null;
+ Node literal = null;
if (argSubject.isList()) {
// Length checked in build()
@@ -156,6 +164,12 @@ public class TextQueryPF extends PropertyFunctionBase {
if (!score.isVariable())
throw new QueryExecException("Hit score is not a variable: "+argSubject) ;
+
+ if (argSubject.getArgListSize() > 2) {
+ literal = argSubject.getArg(2);
+ if (!literal.isVariable())
+ throw new QueryExecException("Hit literal is not a variable: "+argSubject) ;
+ }
} else {
s = argSubject.getArg() ;
}
@@ -173,32 +187,34 @@ public class TextQueryPF extends PropertyFunctionBase {
// ----
QueryIterator qIter = (Var.isVar(s))
- ? variableSubject(binding, s, score, match, execCxt)
- : concreteSubject(binding, s, score, match, execCxt) ;
+ ? variableSubject(binding, s, score, literal, match, execCxt)
+ : concreteSubject(binding, s, score, literal, match, execCxt) ;
if (match.getLimit() >= 0)
qIter = new QueryIterSlice(qIter, 0, match.getLimit(), execCxt) ;
return qIter ;
}
- private QueryIterator variableSubject(Binding binding, Node s, Node score, StrMatch match, ExecutionContext execCxt) {
+ private QueryIterator variableSubject(Binding binding, Node s, Node score, Node literal, StrMatch match, ExecutionContext execCxt) {
Var sVar = Var.alloc(s) ;
Var scoreVar = (score==null) ? null : Var.alloc(score) ;
- List<TextHit> r = query(match.getQueryString(), match.getLimit(), execCxt) ;
- Function<TextHit,Binding> converter = new TextHitConverter(binding, sVar, scoreVar);
+ Var literalVar = (literal==null) ? null : Var.alloc(literal) ;
+ List<TextHit> r = query(match.getProperty(), match.getQueryString(), match.getLimit(), execCxt) ;
+ Function<TextHit,Binding> converter = new TextHitConverter(binding, sVar, scoreVar, literalVar);
Iterator<Binding> bIter = Iter.map(r.iterator(), converter);
QueryIterator qIter = new QueryIterPlainWrapper(bIter, execCxt);
return qIter ;
}
- private QueryIterator concreteSubject(Binding binding, Node s, Node score, StrMatch match, ExecutionContext execCxt) {
+ private QueryIterator concreteSubject(Binding binding, Node s, Node score, Node literal, StrMatch match, ExecutionContext execCxt) {
if (!s.isURI()) {
log.warn("Subject not a URI: " + s) ;
return IterLib.noResults(execCxt) ;
}
Var scoreVar = (score==null) ? null : Var.alloc(score) ;
+ Var literalVar = (literal==null) ? null : Var.alloc(literal) ;
String qs = match.getQueryString() ;
- List<TextHit> x = query(match.getQueryString(), -1, execCxt) ;
+ List<TextHit> x = query(match.getProperty(), match.getQueryString(), -1, execCxt) ;
if ( x == null ) // null return value - empty result
return IterLib.noResults(execCxt) ;
@@ -206,7 +222,17 @@ public class TextQueryPF extends PropertyFunctionBase {
for (TextHit hit : x ) {
if (hit.getNode().equals(s)) {
// found the node among the hits
- return IterLib.oneResult(binding, scoreVar, NodeFactoryExtra.floatToNode(hit.getScore()), execCxt) ;
+ if (literalVar == null) {
+ return IterLib.oneResult(binding, scoreVar, NodeFactoryExtra.floatToNode(hit.getScore()), execCxt);
+ }
+ BindingMap bmap = BindingFactory.create(binding);
+ if (scoreVar != null) {
+ bmap.add(scoreVar, NodeFactoryExtra.floatToNode(hit.getScore()));
+ }
+ if (literalVar != null) {
+ bmap.add(literalVar, hit.getLiteral());
+ }
+ return IterLib.result(bmap, execCxt) ;
}
}
@@ -214,7 +240,7 @@ public class TextQueryPF extends PropertyFunctionBase {
return IterLib.noResults(execCxt) ;
}
- private List<TextHit> query(String queryString, int limit, ExecutionContext execCxt) {
+ private List<TextHit> query(Node property, String queryString, int limit, ExecutionContext execCxt) {
// use the graph information in the text index if possible
if (textIndex.getDocDef().getGraphField() != null
&& execCxt.getActiveGraph() instanceof GraphView) {
@@ -243,7 +269,7 @@ public class TextQueryPF extends PropertyFunctionBase {
Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;
if ( log.isDebugEnabled())
log.debug("Text query: {} ({})", queryString,limit) ;
- return textIndex.query(queryString, limit) ;
+ return textIndex.query(property, queryString, limit) ;
}
/** Deconstruct the node or list object argument and make a StrMatch
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index abc6c97..98ffec7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -105,12 +105,23 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
isMultilingualSupport = mlsNode.asLiteral().getBoolean();
}
+ boolean storeValues = false;
+ Statement storeValuesStatement = root.getProperty(pStoreValues);
+ if (null != storeValuesStatement) {
+ RDFNode svNode = storeValuesStatement.getObject();
+ if (! svNode.isLiteral()) {
+ throw new TextIndexException("text:storeValues property must be a string : " + svNode);
+ }
+ storeValues = svNode.asLiteral().getBoolean();
+ }
+
Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
EntityDefinition docDef = (EntityDefinition)a.open(r) ;
TextIndexConfig config = new TextIndexConfig(docDef);
config.setAnalyzer(analyzer);
config.setQueryAnalyzer(queryAnalyzer);
config.setMultilingualSupport(isMultilingualSupport);
+ config.setValueStored(storeValues);
return TextDatasetFactory.createLuceneIndex(directory, config) ;
} catch (IOException e) {
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 743d773..fb14505 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -40,6 +40,7 @@ public class TextVocab
public static final Property pServer = Vocab.property(NS, "server") ; // Solr
public static final Property pDirectory = Vocab.property(NS, "directory") ; // Lucene
public static final Property pMultilingualSupport = Vocab.property(NS, "multilingualSupport") ;
+ public static final Property pStoreValues = Vocab.property(NS, "storeValues") ;
public static final Property pQueryAnalyzer = Vocab.property(NS, "queryAnalyzer") ;
public static final Property pEntityMap = Vocab.property(NS, "entityMap") ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 07f141a..3459e43 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -34,6 +34,7 @@ import org.junit.runners.Suite.SuiteClasses ;
, TestDatasetWithLuceneTextIndexWithLangField.class
, TestDatasetWithLuceneGraphTextIndex.class
, TestDatasetWithLuceneTextIndexDeletionSupport.class
+ , TestDatasetWithLuceneStoredLiterals.class
// Embedded solr not supported
//, TestDatasetWithEmbeddedSolrTextIndex.class
http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java
new file mode 100644
index 0000000..b55bbd7
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.Reader ;
+import java.io.StringReader ;
+import java.util.Arrays ;
+import java.util.HashMap ;
+import java.util.HashSet ;
+import java.util.Map ;
+import java.util.Set ;
+
+import org.apache.jena.assembler.Assembler ;
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.query.Dataset ;
+import org.apache.jena.query.Query ;
+import org.apache.jena.query.QueryExecution ;
+import org.apache.jena.query.QueryExecutionFactory ;
+import org.apache.jena.query.QueryFactory ;
+import org.apache.jena.query.QuerySolution ;
+import org.apache.jena.query.ReadWrite ;
+import org.apache.jena.query.ResultSet ;
+import org.apache.jena.query.text.assembler.TextAssembler ;
+import org.apache.jena.rdf.model.Literal ;
+import org.apache.jena.rdf.model.Model ;
+import org.apache.jena.rdf.model.ModelFactory ;
+import org.apache.jena.rdf.model.Resource ;
+import org.junit.After ;
+import org.junit.Before ;
+import org.junit.Test ;
+
+public class TestDatasetWithLuceneStoredLiterals extends AbstractTestDatasetWithTextIndex {
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory \"mem\" ;",
+ " text:storeValues true ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:langField \"lang\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; text:predicate rdfs:label ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ @Before
+ public void before() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+ @After
+ public void after() {
+ dataset.close();
+ }
+
+ protected Map<String,Literal> doTestSearchWithLiterals(String turtle, String queryString, Set<String> expectedEntityURIs) {
+ Model model = dataset.getDefaultModel();
+ Reader reader = new StringReader(turtle);
+ dataset.begin(ReadWrite.WRITE);
+ model.read(reader, "", "TURTLE");
+ dataset.commit();
+
+ Map<String,Literal> literals = new HashMap<>();
+ Query query = QueryFactory.create(queryString) ;
+ dataset.begin(ReadWrite.READ);
+ try(QueryExecution qexec = QueryExecutionFactory.create(query, dataset)) {
+ ResultSet results = qexec.execSelect() ;
+ assertEquals(expectedEntityURIs.size() > 0, results.hasNext());
+ int count;
+ for (count=0; results.hasNext(); count++) {
+ QuerySolution soln = results.nextSolution();
+ String entityUri = soln.getResource("s").getURI();
+ assertTrue(expectedEntityURIs.contains(entityUri));
+ Literal literal = soln.getLiteral("literal");
+ assertNotNull(literal);
+ literals.put(entityUri, literal);
+ }
+ assertEquals(expectedEntityURIs.size(), count);
+ }
+ finally {
+ dataset.end() ;
+ }
+ return literals;
+ }
+
+ @Test
+ public void testLiteralValue() {
+ // test basic capturing of the literal value in a variable
+ final String turtle = PF_DATA ;
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?literal",
+ "WHERE {",
+ " (?s ?score ?literal) text:query ('text') .",
+ "}"
+ );
+
+ Set<String> expectedURIs = new HashSet<>();
+ expectedURIs.addAll( Arrays.asList( R_S1 ) ) ;
+ Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+ Literal value = literals.get(R_S1);
+ assertNotNull(value);
+ assertEquals(NodeFactory.createLiteral("text"), value.asNode());
+ }
+
+ @Test
+ public void testLiteralValueNonDefaultField() {
+ // test basic capturing of the literal value in a variable
+ final String testName = "testLiteralValueNonDefaultField";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:comment 'a text comment'",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?literal",
+ "WHERE {",
+ " (?s ?score ?literal) text:query (rdfs:comment 'text') .",
+ "}"
+ );
+
+ Set<String> expectedURIs = new HashSet<>();
+ expectedURIs.addAll( Arrays.asList( RESOURCE_BASE + testName ) ) ;
+ Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+ Literal value = literals.get(RESOURCE_BASE + testName);
+ assertNotNull(value);
+ assertEquals(NodeFactory.createLiteral("a text comment"), value.asNode());
+ }
+
+ @Test
+ public void testLiteralValueWithLanguage() {
+ // test capturing of the literal value in a variable, with language tag
+ final String testName = "testLiteralValueWithLanguage";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:label 'English language text'@en",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?literal",
+ "WHERE {",
+ " (?s ?score ?literal) text:query ('text') .",
+ "}"
+ );
+
+ Set<String> expectedURIs = new HashSet<>();
+ expectedURIs.addAll( Arrays.asList( RESOURCE_BASE + testName ) ) ;
+ Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+ Literal value = literals.get( RESOURCE_BASE + testName );
+ assertNotNull(value);
+ assertEquals(NodeFactory.createLiteral("English language text", "en"), value.asNode());
+ }
+
+ @Test
+ public void testLiteralValueWithDatatype() {
+ // test capturing of the literal value in a variable, with datatype
+ final String testName = "testLiteralValueWithDatatype";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:comment true",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?literal",
+ "WHERE {",
+ " (?s ?score ?literal) text:query (rdfs:comment 'true') .",
+ "}"
+ );
+
+ Set<String> expectedURIs = new HashSet<>();
+ expectedURIs.addAll( Arrays.asList( RESOURCE_BASE + testName ) ) ;
+ Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+ Literal value = literals.get( RESOURCE_BASE + testName );
+ assertNotNull(value);
+ assertEquals(NodeFactory.createLiteral("true", XSDDatatype.XSDboolean), value.asNode());
+ }
+
+}
[2/3] jena git commit: safer to use equals() for datatype comparison
Posted by an...@apache.org.
safer to use equals() for datatype comparison
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0dd62a8b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0dd62a8b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0dd62a8b
Branch: refs/heads/master
Commit: 0dd62a8beac7484e9fd174f91c63eb12732e8147
Parents: 1592c33
Author: Osma Suominen <os...@aalto.fi>
Authored: Sat Jun 27 00:20:16 2015 +0300
Committer: Osma Suominen <os...@aalto.fi>
Committed: Sat Jun 27 00:20:16 2015 +0300
----------------------------------------------------------------------
.../src/main/java/org/apache/jena/query/text/TextIndexLucene.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/0dd62a8b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index c02037c..e209326 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -258,7 +258,7 @@ public class TextIndexLucene implements TextIndex {
RDFDatatype datatype = entity.getDatatype();
if (lang != null && !"".equals(lang)) {
doc.add(new Field(langField, lang, StringField.TYPE_STORED));
- } else if (datatype != null && datatype != XSDDatatype.XSDstring) {
+ } else if (datatype != null && !datatype.equals(XSDDatatype.XSDstring)) {
// for non-string and non-langString datatypes, store the datatype in langField
doc.add(new Field(langField, DATATYPE_PREFIX + datatype.getURI(), StringField.TYPE_STORED));
}
[3/3] jena git commit: JENA-978: jena-text stored literals: initial
functionality and tests for Lucene
Posted by an...@apache.org.
JENA-978: jena-text stored literals: initial functionality and tests for Lucene
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/b7eac624
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/b7eac624
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/b7eac624
Branch: refs/heads/master
Commit: b7eac624cfe5c95b4a7f6ecddbdfc27bd361da0a
Parents: 01bc520 0dd62a8
Author: Andy Seaborne <an...@apache.org>
Authored: Fri Jul 3 17:59:22 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Fri Jul 3 18:01:20 2015 +0100
----------------------------------------------------------------------
.../jena/query/text/DatasetGraphText.java | 2 +-
.../java/org/apache/jena/query/text/Entity.java | 10 +-
.../org/apache/jena/query/text/TextHit.java | 10 +-
.../jena/query/text/TextHitConverter.java | 11 +-
.../org/apache/jena/query/text/TextIndex.java | 4 +-
.../apache/jena/query/text/TextIndexConfig.java | 9 +
.../apache/jena/query/text/TextIndexLucene.java | 52 +++-
.../apache/jena/query/text/TextIndexSolr.java | 13 +-
.../apache/jena/query/text/TextQueryFuncs.java | 4 +-
.../org/apache/jena/query/text/TextQueryPF.java | 50 +++-
.../assembler/TextIndexLuceneAssembler.java | 11 +
.../jena/query/text/assembler/TextVocab.java | 1 +
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../TestDatasetWithLuceneStoredLiterals.java | 248 +++++++++++++++++++
14 files changed, 390 insertions(+), 36 deletions(-)
----------------------------------------------------------------------