You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/07/03 19:02:17 UTC

[1/3] jena git commit: jena-text stored literals: initial functionality and tests for Lucene

Repository: jena
Updated Branches:
  refs/heads/master 01bc520ed -> b7eac624c


jena-text stored literals: initial functionality and tests for Lucene


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1592c33f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1592c33f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1592c33f

Branch: refs/heads/master
Commit: 1592c33f21e5337ecfa74706f5a675e6c57f9967
Parents: 6a08429
Author: Osma Suominen <os...@aalto.fi>
Authored: Fri Jun 26 09:53:10 2015 +0300
Committer: Osma Suominen <os...@aalto.fi>
Committed: Fri Jun 26 16:32:00 2015 +0300

----------------------------------------------------------------------
 .../jena/query/text/DatasetGraphText.java       |   2 +-
 .../java/org/apache/jena/query/text/Entity.java |  10 +-
 .../org/apache/jena/query/text/TextHit.java     |  10 +-
 .../jena/query/text/TextHitConverter.java       |  11 +-
 .../org/apache/jena/query/text/TextIndex.java   |   4 +-
 .../apache/jena/query/text/TextIndexConfig.java |   9 +
 .../apache/jena/query/text/TextIndexLucene.java |  52 +++-
 .../apache/jena/query/text/TextIndexSolr.java   |  13 +-
 .../apache/jena/query/text/TextQueryFuncs.java  |   4 +-
 .../org/apache/jena/query/text/TextQueryPF.java |  50 +++-
 .../assembler/TextIndexLuceneAssembler.java     |  11 +
 .../jena/query/text/assembler/TextVocab.java    |   1 +
 .../org/apache/jena/query/text/TS_Text.java     |   1 +
 .../TestDatasetWithLuceneStoredLiterals.java    | 248 +++++++++++++++++++
 14 files changed, 390 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java b/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
index e1d7306..f63e12a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java
@@ -99,7 +99,7 @@ public class DatasetGraphText extends DatasetGraphMonitor implements Transaction
             String f = textIndex.getDocDef().getField(predicate) ;
             queryString = f + ":" + queryString ;
         }
-        List<TextHit> results = textIndex.query(queryString, limit) ;
+        List<TextHit> results = textIndex.query(predicate, queryString, limit) ;
         return results.iterator() ;
     }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
index 4ca5782..83e34c6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
@@ -19,6 +19,7 @@
 package org.apache.jena.query.text;
 
 import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.jena.datatypes.RDFDatatype;
 
 import java.util.HashMap ;
 import java.util.Map ;
@@ -28,17 +29,20 @@ public class Entity
     private final String id ;
     private final String graph ;
     private final String language ;
+    private final RDFDatatype datatype ;
     private final Map<String, Object> map = new HashMap<>() ;
 
     public Entity(String entityId, String entityGraph) {
-        this(entityId, entityGraph, null);
+        this(entityId, entityGraph, null, null);
     }
 
-    public Entity(String entityId, String entityGraph, String lang) {
+    public Entity(String entityId, String entityGraph, String lang, RDFDatatype datatype) {
         this.id = entityId ;
         this.graph = entityGraph;
         this.language = lang;
+        this.datatype = datatype;
     }
+
     /** @deprecated Use {@linkplain #Entity(String, String)} */
     @Deprecated
     public Entity(String entityId)          { this(entityId, null) ; }
@@ -49,6 +53,8 @@ public class Entity
 
     public String getLanguage()                { return language ; }
 
+    public RDFDatatype getDatatype()        { return datatype ; }
+
     public void put(String key, Object value)
     { map.put(key, value) ; }
     

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java b/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
index a60f78e..75ffce1 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextHit.java
@@ -25,12 +25,14 @@ public class TextHit
 {
     private Node node;
     private float score;
+    private Node literal;
 
-    public TextHit(Node node, float score) {
+    public TextHit(Node node, float score, Node literal) {
         this.node = node;
         this.score = score;
+        this.literal = literal;
     }
-    
+
     public Node getNode() {
         return this.node;
     }
@@ -38,4 +40,8 @@ public class TextHit
     public float getScore() {
         return this.score;
     }
+
+    public Node getLiteral() {
+        return this.literal;
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java b/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
index 2894680..c15a449 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextHitConverter.java
@@ -32,20 +32,25 @@ public class TextHitConverter implements Function<TextHit, Binding>
     private Binding binding;
     private Var match;
     private Var score;
+    private Var literal;
 
-    public TextHitConverter(Binding binding, Var match, Var score) {
+    public TextHitConverter(Binding binding, Var match, Var score, Var literal) {
         this.binding = binding;
         this.match = match;
         this.score = score;
+        this.literal = literal;
     }
     
     @Override
     public Binding apply(TextHit hit) {
-        if (score == null)
+        if (score == null && literal == null)
             return BindingFactory.binding(binding, match, hit.getNode());
         BindingMap bmap = BindingFactory.create(binding);
         bmap.add(match, hit.getNode());
-        bmap.add(score, NodeFactoryExtra.floatToNode(hit.getScore()));
+        if (score != null)
+            bmap.add(score, NodeFactoryExtra.floatToNode(hit.getScore()));
+        if (literal != null)
+            bmap.add(literal, hit.getLiteral());
         return bmap;
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index e07f8e5..999eb46 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -46,9 +46,9 @@ public interface TextIndex extends Closeable //, Transactional
     /** Access the index - limit if -1 for as many as possible 
      * Throw QueryParseException for syntax errors in the query string.
      */ 
-    List<TextHit> query(String qs, int limit) ;
+    List<TextHit> query(Node property, String qs, int limit) ;
     
-    List<TextHit> query(String qs) ;
+    List<TextHit> query(Node property, String qs) ;
 
     EntityDefinition getDocDef() ;
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
index feeb324..98e82f6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -26,6 +26,7 @@ public class TextIndexConfig {
     Analyzer analyzer;
     Analyzer queryAnalyzer;
     boolean multilingualSupport;
+    boolean valueStored;
 
     public TextIndexConfig(EntityDefinition entDef) {
         this.entDef = entDef;
@@ -58,4 +59,12 @@ public class TextIndexConfig {
     public void setMultilingualSupport(boolean multilingualSupport) {
         this.multilingualSupport = multilingualSupport;
     }
+
+    public boolean isValueStored() {
+        return valueStored;
+    }
+
+    public void setValueStored(boolean valueStored) {
+        this.valueStored = valueStored;
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index c677184..c02037c 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -22,6 +22,9 @@ import java.io.IOException ;
 import java.util.* ;
 import java.util.Map.Entry ;
 
+import org.apache.jena.datatypes.RDFDatatype ;
+import org.apache.jena.datatypes.TypeMapper ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.graph.NodeFactory ;
 import org.apache.jena.sparql.util.NodeFactoryExtra ;
@@ -47,6 +50,8 @@ public class TextIndexLucene implements TextIndex {
 
     private static int             MAX_N    = 10000 ;
     public static final Version    VER      = Version.LUCENE_46 ;
+    // prefix for storing datatype URIs in the index, to distinguish them from language tags
+    private static final String    DATATYPE_PREFIX = "^^";
 
     public static final FieldType  ftIRI ;
     static {
@@ -57,14 +62,12 @@ public class TextIndexLucene implements TextIndex {
         ftIRI.freeze() ;
     }
     public static final FieldType  ftString = StringField.TYPE_NOT_STORED ;
-    public static final FieldType  ftText   = TextField.TYPE_NOT_STORED ;
-    // Bigger index, easier to debug!
-    // public static final FieldType ftText = TextField.TYPE_STORED ;
 
     private final EntityDefinition docDef ;
     private final Directory        directory ;
     private final Analyzer         analyzer ;
     private final Analyzer         queryAnalyzer ;
+    private final FieldType        ftText ;
 
     // The IndexWriter can't be final because we may have to recreate it if rollback() is called.
     // However, it needs to be volatile in case the next write transaction is on a different thread,
@@ -99,6 +102,9 @@ public class TextIndexLucene implements TextIndex {
         this.analyzer = new PerFieldAnalyzerWrapper(
                 (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
         this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
+        this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
+        if (config.isValueStored() && docDef.getLangField() == null)
+            log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
 
         openIndexWriter();
     }
@@ -249,8 +255,13 @@ public class TextIndexLucene implements TextIndex {
             doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
             if (langField != null) {
                 String lang = entity.getLanguage();
-                if (lang != null && !"".equals(lang))
+                RDFDatatype datatype = entity.getDatatype();
+                if (lang != null && !"".equals(lang)) {
                     doc.add(new Field(langField, lang, StringField.TYPE_STORED));
+                } else if (datatype != null && datatype != XSDDatatype.XSDstring) {
+                    // for non-string and non-langString datatypes, store the datatype in langField
+                    doc.add(new Field(langField, DATATYPE_PREFIX + datatype.getURI(), StringField.TYPE_STORED));
+                }
             }
             if (uidField != null) {
                 String hash = entity.getChecksum(e.getKey(), (String) e.getValue());
@@ -319,14 +330,14 @@ public class TextIndexLucene implements TextIndex {
     }
 
     @Override
-    public List<TextHit> query(String qs) {
-        return query(qs, MAX_N) ;
+    public List<TextHit> query(Node property, String qs) {
+        return query(property, qs, MAX_N) ;
     }
 
     @Override
-    public List<TextHit> query(String qs, int limit) {
+    public List<TextHit> query(Node property, String qs, int limit) {
         try (IndexReader indexReader = DirectoryReader.open(directory)) {
-            return query$(indexReader, qs, limit) ;
+            return query$(indexReader, property, qs, limit) ;
         }
         catch (ParseException ex) {
             throw new TextIndexParseException(qs, ex.getMessage()) ;
@@ -336,7 +347,7 @@ public class TextIndexLucene implements TextIndex {
         }
     }
 
-    private List<TextHit> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
+    private List<TextHit> query$(IndexReader indexReader, Node property, String qs, int limit) throws ParseException, IOException {
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
         Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
         if ( limit <= 0 )
@@ -349,9 +360,30 @@ public class TextIndexLucene implements TextIndex {
         for ( ScoreDoc sd : sDocs ) {
             Document doc = indexSearcher.doc(sd.doc) ;
             String[] values = doc.getValues(docDef.getEntityField()) ;
+
+            Node literal = null;
+            String field = (property != null) ? docDef.getField(property) : docDef.getPrimaryField();
+            String[] lexicals = doc.getValues(field) ;
+            if (lexicals.length > 0) {
+                String lexical = lexicals[0];
+                String[] langs = doc.getValues(docDef.getLangField()) ;
+                if (langs.length > 0) {
+                    String lang = langs[0];
+                    if (lang.startsWith(DATATYPE_PREFIX)) {
+                        String datatype = lang.substring(DATATYPE_PREFIX.length());
+                        TypeMapper tmap = TypeMapper.getInstance();
+                        literal = NodeFactory.createLiteral(lexical, tmap.getSafeTypeByName(datatype));
+                    } else {
+                        literal = NodeFactory.createLiteral(lexical, lang);
+                    }
+                } else {
+                    literal = NodeFactory.createLiteral(lexical);
+                }
+            }
+
             for ( String v : values ) {
                 Node n = TextQueryFuncs.stringToNode(v) ;
-                TextHit hit = new TextHit(n, sd.score);
+                TextHit hit = new TextHit(n, sd.score, literal);
                 results.add(hit) ;
             }
         }

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index d806932..5d645c6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -184,10 +184,10 @@ public class TextIndexSolr implements TextIndex
     }
 
     @Override
-    public List<TextHit> query(String qs) { return query(qs, 0) ; } 
+    public List<TextHit> query(Node property, String qs) { return query(property, qs, 0) ; }
 
     @Override
-    public List<TextHit> query(String qs, int limit) {
+    public List<TextHit> query(Node property, String qs, int limit) {
         SolrDocumentList solrResults = solrQuery(qs, limit) ;
         List<TextHit> results = new ArrayList<>() ;
 
@@ -196,7 +196,14 @@ public class TextIndexSolr implements TextIndex
             // log.info("Entity: "+uriStr) ;
             Node n = TextQueryFuncs.stringToNode(str) ;
             Float score = (Float) sd.getFirstValue("score");
-            TextHit hit = new TextHit(n, score.floatValue());
+            // capture literal value, if stored
+            Node literal = null;
+            String field = (property != null) ? docDef.getField(property) : docDef.getPrimaryField();
+            String value = (String) sd.getFirstValue(field);
+            if (value != null) {
+                literal = NodeFactory.createLiteral(value); // FIXME: language and datatype
+            }
+            TextHit hit = new TextHit(n, score.floatValue(), literal);
             results.add(hit) ;
         }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
index b41d36b..7854726 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
@@ -19,6 +19,7 @@
 package org.apache.jena.query.text;
 
 import org.apache.jena.atlas.logging.Log ;
+import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.graph.NodeFactory ;
 import org.apache.jena.rdf.model.AnonId ;
@@ -78,7 +79,8 @@ public class TextQueryFuncs {
         String x = TextQueryFuncs.subjectToString(s) ;
         String graphText = TextQueryFuncs.graphNodeToString(g) ;
         String language = o.getLiteral().language() ;
-        Entity entity = new Entity(x, graphText, language) ;
+        RDFDatatype datatype = o.getLiteral().getDatatype() ;
+        Entity entity = new Entity(x, graphText, language, datatype) ;
         String graphField = defn.getGraphField() ;
         if ( defn.getGraphField() != null )
             entity.put(graphField, graphText) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 0032be8..3a0b214 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -33,6 +33,9 @@ import org.apache.jena.sparql.core.* ;
 import org.apache.jena.sparql.engine.ExecutionContext ;
 import org.apache.jena.sparql.engine.QueryIterator ;
 import org.apache.jena.sparql.engine.binding.Binding ;
+import org.apache.jena.sparql.engine.binding.BindingFactory ;
+import org.apache.jena.sparql.engine.binding.BindingMap ;
+import org.apache.jena.sparql.engine.iterator.QueryIterExtendByVar ;
 import org.apache.jena.sparql.engine.iterator.QueryIterPlainWrapper ;
 import org.apache.jena.sparql.engine.iterator.QueryIterSlice ;
 import org.apache.jena.sparql.mgt.Explain ;
@@ -65,8 +68,12 @@ public class TextQueryPF extends PropertyFunctionBase {
         DatasetGraph dsg = execCxt.getDataset() ;
         textIndex = chooseTextIndex(dsg) ;
 
-        if (argSubject.isList() && argSubject.getArgListSize() != 2)
-            throw new QueryBuildException("Subject has "+argSubject.getArgList().size()+" elements, not 2: "+argSubject);
+        if (argSubject.isList()) {
+            int size = argSubject.getArgListSize();
+            if (size != 2 && size != 3) {
+                throw new QueryBuildException("Subject has "+argSubject.getArgList().size()+" elements, not 2 or 3: "+argSubject);
+            }
+        }
 
         if (argObject.isList()) {
             List<Node> list = argObject.getArgList() ;
@@ -148,6 +155,7 @@ public class TextQueryPF extends PropertyFunctionBase {
         
         Node s = null;
         Node score = null;
+        Node literal = null;
 
         if (argSubject.isList()) {
             // Length checked in build()
@@ -156,6 +164,12 @@ public class TextQueryPF extends PropertyFunctionBase {
             
             if (!score.isVariable())
                 throw new QueryExecException("Hit score is not a variable: "+argSubject) ;
+
+            if (argSubject.getArgListSize() > 2) {
+                literal = argSubject.getArg(2);
+                if (!literal.isVariable())
+                    throw new QueryExecException("Hit literal is not a variable: "+argSubject) ;
+            }
         } else {
             s = argSubject.getArg() ;
         }
@@ -173,32 +187,34 @@ public class TextQueryPF extends PropertyFunctionBase {
         // ----
 
         QueryIterator qIter = (Var.isVar(s)) 
-            ? variableSubject(binding, s, score, match, execCxt) 
-            : concreteSubject(binding, s, score, match, execCxt) ;
+            ? variableSubject(binding, s, score, literal, match, execCxt)
+            : concreteSubject(binding, s, score, literal, match, execCxt) ;
         if (match.getLimit() >= 0)
             qIter = new QueryIterSlice(qIter, 0, match.getLimit(), execCxt) ;
         return qIter ;
     }
 
-    private QueryIterator variableSubject(Binding binding, Node s, Node score, StrMatch match, ExecutionContext execCxt) {
+    private QueryIterator variableSubject(Binding binding, Node s, Node score, Node literal, StrMatch match, ExecutionContext execCxt) {
         Var sVar = Var.alloc(s) ;
         Var scoreVar = (score==null) ? null : Var.alloc(score) ;
-        List<TextHit> r = query(match.getQueryString(), match.getLimit(), execCxt) ;
-        Function<TextHit,Binding> converter = new TextHitConverter(binding, sVar, scoreVar);
+        Var literalVar = (literal==null) ? null : Var.alloc(literal) ;
+        List<TextHit> r = query(match.getProperty(), match.getQueryString(), match.getLimit(), execCxt) ;
+        Function<TextHit,Binding> converter = new TextHitConverter(binding, sVar, scoreVar, literalVar);
         Iterator<Binding> bIter = Iter.map(r.iterator(), converter);
         QueryIterator qIter = new QueryIterPlainWrapper(bIter, execCxt);
         return qIter ;
     }
 
-    private QueryIterator concreteSubject(Binding binding, Node s, Node score, StrMatch match, ExecutionContext execCxt) {
+    private QueryIterator concreteSubject(Binding binding, Node s, Node score, Node literal, StrMatch match, ExecutionContext execCxt) {
         if (!s.isURI()) {
             log.warn("Subject not a URI: " + s) ;
             return IterLib.noResults(execCxt) ;
         }
 
         Var scoreVar = (score==null) ? null : Var.alloc(score) ;
+        Var literalVar = (literal==null) ? null : Var.alloc(literal) ;
         String qs = match.getQueryString() ;
-        List<TextHit> x = query(match.getQueryString(), -1, execCxt) ;
+        List<TextHit> x = query(match.getProperty(), match.getQueryString(), -1, execCxt) ;
         
         if ( x == null ) // null return value - empty result
             return IterLib.noResults(execCxt) ;
@@ -206,7 +222,17 @@ public class TextQueryPF extends PropertyFunctionBase {
         for (TextHit hit : x ) {
             if (hit.getNode().equals(s)) {
                 // found the node among the hits
-                return IterLib.oneResult(binding, scoreVar, NodeFactoryExtra.floatToNode(hit.getScore()), execCxt) ;
+                if (literalVar == null) {
+                    return IterLib.oneResult(binding, scoreVar, NodeFactoryExtra.floatToNode(hit.getScore()), execCxt);
+                }
+                BindingMap bmap = BindingFactory.create(binding);
+                if (scoreVar != null) {
+                    bmap.add(scoreVar, NodeFactoryExtra.floatToNode(hit.getScore()));
+                }
+                if (literalVar != null) {
+                    bmap.add(literalVar, hit.getLiteral());
+                }
+                return IterLib.result(bmap, execCxt) ;
             }
         }
 
@@ -214,7 +240,7 @@ public class TextQueryPF extends PropertyFunctionBase {
         return IterLib.noResults(execCxt) ;
     }
 
-    private List<TextHit> query(String queryString, int limit, ExecutionContext execCxt) {
+    private List<TextHit> query(Node property, String queryString, int limit, ExecutionContext execCxt) {
         // use the graph information in the text index if possible
         if (textIndex.getDocDef().getGraphField() != null
             && execCxt.getActiveGraph() instanceof GraphView) {
@@ -243,7 +269,7 @@ public class TextQueryPF extends PropertyFunctionBase {
         Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;
         if ( log.isDebugEnabled())
             log.debug("Text query: {} ({})", queryString,limit) ;
-        return textIndex.query(queryString, limit) ;
+        return textIndex.query(property, queryString, limit) ;
     }
     
     /** Deconstruct the node or list object argument and make a StrMatch 

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index abc6c97..98ffec7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -105,12 +105,23 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 isMultilingualSupport = mlsNode.asLiteral().getBoolean();
             }
 
+            boolean storeValues = false;
+            Statement storeValuesStatement = root.getProperty(pStoreValues);
+            if (null != storeValuesStatement) {
+                RDFNode svNode = storeValuesStatement.getObject();
+                if (! svNode.isLiteral()) {
+                    throw new TextIndexException("text:storeValues property must be a string : " + svNode);
+                }
+                storeValues = svNode.asLiteral().getBoolean();
+            }
+
             Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
             EntityDefinition docDef = (EntityDefinition)a.open(r) ;
             TextIndexConfig config = new TextIndexConfig(docDef);
             config.setAnalyzer(analyzer);
             config.setQueryAnalyzer(queryAnalyzer);
             config.setMultilingualSupport(isMultilingualSupport);
+            config.setValueStored(storeValues);
 
             return TextDatasetFactory.createLuceneIndex(directory, config) ;
         } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 743d773..fb14505 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -40,6 +40,7 @@ public class TextVocab
     public static final Property pServer            = Vocab.property(NS, "server") ;            // Solr
     public static final Property pDirectory         = Vocab.property(NS, "directory") ;         // Lucene
     public static final Property pMultilingualSupport   = Vocab.property(NS, "multilingualSupport") ;
+    public static final Property pStoreValues       = Vocab.property(NS, "storeValues") ;
     public static final Property pQueryAnalyzer     = Vocab.property(NS, "queryAnalyzer") ;
     public static final Property pEntityMap         = Vocab.property(NS, "entityMap") ;
     

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 07f141a..3459e43 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -34,6 +34,7 @@ import org.junit.runners.Suite.SuiteClasses ;
     , TestDatasetWithLuceneTextIndexWithLangField.class
     , TestDatasetWithLuceneGraphTextIndex.class
     , TestDatasetWithLuceneTextIndexDeletionSupport.class
+    , TestDatasetWithLuceneStoredLiterals.class
 
     // Embedded solr not supported 
     //, TestDatasetWithEmbeddedSolrTextIndex.class

http://git-wip-us.apache.org/repos/asf/jena/blob/1592c33f/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java
new file mode 100644
index 0000000..b55bbd7
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneStoredLiterals.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.Reader ;
+import java.io.StringReader ;
+import java.util.Arrays ;
+import java.util.HashMap ;
+import java.util.HashSet ;
+import java.util.Map ;
+import java.util.Set ;
+
+import org.apache.jena.assembler.Assembler ;
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.query.Dataset ;
+import org.apache.jena.query.Query ;
+import org.apache.jena.query.QueryExecution ;
+import org.apache.jena.query.QueryExecutionFactory ;
+import org.apache.jena.query.QueryFactory ;
+import org.apache.jena.query.QuerySolution ;
+import org.apache.jena.query.ReadWrite ;
+import org.apache.jena.query.ResultSet ;
+import org.apache.jena.query.text.assembler.TextAssembler ;
+import org.apache.jena.rdf.model.Literal ;
+import org.apache.jena.rdf.model.Model ;
+import org.apache.jena.rdf.model.ModelFactory ;
+import org.apache.jena.rdf.model.Resource ;
+import org.junit.After ;
+import org.junit.Before ;
+import org.junit.Test ;
+
+public class TestDatasetWithLuceneStoredLiterals extends AbstractTestDatasetWithTextIndex {
+
+    private static final String SPEC_BASE = "http://example.org/spec#";
+    private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+    private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+    private static final String SPEC;
+    static {
+        SPEC = StrUtils.strjoinNL(
+                    "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+                    "prefix ja:   <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+                    "prefix tdb:  <http://jena.hpl.hp.com/2008/tdb#>",
+                    "prefix text: <http://jena.apache.org/text#>",
+                    "prefix :     <" + SPEC_BASE + ">",
+                    "",
+                    "[] ja:loadClass    \"org.apache.jena.query.text.TextQuery\" .",
+                    "text:TextDataset      rdfs:subClassOf   ja:RDFDataset .",
+                    "text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .",
+
+                    ":" + SPEC_ROOT_LOCAL,
+                    "    a              text:TextDataset ;",
+                    "    text:dataset   :dataset ;",
+                    "    text:index     :indexLucene ;",
+                    "    .",
+                    "",
+                    ":dataset",
+                    "    a               ja:RDFDataset ;",
+                    "    ja:defaultGraph :graph ;",
+                    ".",
+                    ":graph",
+                    "    a               ja:MemoryModel ;",
+                    ".",
+                    "",
+                    ":indexLucene",
+                    "    a text:TextIndexLucene ;",
+                    "    text:directory \"mem\" ;",
+                    "    text:storeValues true ;",
+                    "    text:entityMap :entMap ;",
+                    "    .",
+                    "",
+                    ":entMap",
+                    "    a text:EntityMap ;",
+                    "    text:entityField      \"uri\" ;",
+                    "    text:defaultField     \"label\" ;",
+                    "    text:langField     \"lang\" ;",
+                    "    text:map (",
+                    "         [ text:field \"label\" ; text:predicate rdfs:label ]",
+                    "         [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+                    "         ) ."
+                    );
+    }
+
+    @Before
+    public void before() {
+        Reader reader = new StringReader(SPEC);
+        Model specModel = ModelFactory.createDefaultModel();
+        specModel.read(reader, "", "TURTLE");
+        TextAssembler.init();
+        Resource root = specModel.getResource(SPEC_ROOT_URI);
+        dataset = (Dataset) Assembler.general.open(root);
+    }
+
+    @After
+    public void after() {
+        dataset.close();
+    }
+
+    protected Map<String,Literal> doTestSearchWithLiterals(String turtle, String queryString, Set<String> expectedEntityURIs) {
+        Model model = dataset.getDefaultModel();
+        Reader reader = new StringReader(turtle);
+        dataset.begin(ReadWrite.WRITE);
+        model.read(reader, "", "TURTLE");
+        dataset.commit();
+
+        Map<String,Literal> literals = new HashMap<>();
+        Query query = QueryFactory.create(queryString) ;
+        dataset.begin(ReadWrite.READ);
+        try(QueryExecution qexec = QueryExecutionFactory.create(query, dataset)) {
+            ResultSet results = qexec.execSelect() ;
+            assertEquals(expectedEntityURIs.size() > 0, results.hasNext());
+            int count;
+            for (count=0; results.hasNext(); count++) {
+                QuerySolution soln = results.nextSolution();
+                String entityUri = soln.getResource("s").getURI();
+                assertTrue(expectedEntityURIs.contains(entityUri));
+                Literal literal = soln.getLiteral("literal");
+                assertNotNull(literal);
+                literals.put(entityUri, literal);
+            }
+            assertEquals(expectedEntityURIs.size(), count);
+        }
+        finally {
+            dataset.end() ;
+        }
+        return literals;
+    }
+
+    @Test
+    public void testLiteralValue() {
+        // test basic capturing of the literal value in a variable
+        final String turtle = PF_DATA ;
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?literal",
+                "WHERE {",
+                "    (?s ?score ?literal) text:query ('text') .",
+                "}"
+                );
+
+        Set<String> expectedURIs = new HashSet<>();
+        expectedURIs.addAll( Arrays.asList( R_S1 ) ) ;
+        Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+        Literal value = literals.get(R_S1);
+        assertNotNull(value);
+        assertEquals(NodeFactory.createLiteral("text"), value.asNode());
+    }
+
+    @Test
+    public void testLiteralValueNonDefaultField() {
+        // test basic capturing of the literal value in a variable
+        final String testName = "testLiteralValueNonDefaultField";
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + testName + ">",
+                "  rdfs:comment 'a text comment'",
+                "."
+                );
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?literal",
+                "WHERE {",
+                "    (?s ?score ?literal) text:query (rdfs:comment 'text') .",
+                "}"
+                );
+
+        Set<String> expectedURIs = new HashSet<>();
+        expectedURIs.addAll( Arrays.asList( RESOURCE_BASE + testName ) ) ;
+        Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+        Literal value = literals.get(RESOURCE_BASE + testName);
+        assertNotNull(value);
+        assertEquals(NodeFactory.createLiteral("a text comment"), value.asNode());
+    }
+
+    @Test
+    public void testLiteralValueWithLanguage() {
+        // test capturing of the literal value in a variable, with language tag
+        final String testName = "testLiteralValueWithLanguage";
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + testName + ">",
+                "  rdfs:label 'English language text'@en",
+                "."
+                );
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?literal",
+                "WHERE {",
+                "    (?s ?score ?literal) text:query ('text') .",
+                "}"
+                );
+
+        Set<String> expectedURIs = new HashSet<>();
+        expectedURIs.addAll( Arrays.asList( RESOURCE_BASE + testName ) ) ;
+        Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+        Literal value = literals.get( RESOURCE_BASE + testName );
+        assertNotNull(value);
+        assertEquals(NodeFactory.createLiteral("English language text", "en"), value.asNode());
+    }
+
+    @Test
+    public void testLiteralValueWithDatatype() {
+        // test capturing of the literal value in a variable, with datatype
+        final String testName = "testLiteralValueWithDatatype";
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + testName + ">",
+                "  rdfs:comment true",
+                "."
+                );
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?literal",
+                "WHERE {",
+                "    (?s ?score ?literal) text:query (rdfs:comment 'true') .",
+                "}"
+                );
+
+        Set<String> expectedURIs = new HashSet<>();
+        expectedURIs.addAll( Arrays.asList( RESOURCE_BASE + testName ) ) ;
+        Map<String,Literal> literals = doTestSearchWithLiterals(turtle, queryString, expectedURIs);
+        Literal value = literals.get( RESOURCE_BASE + testName );
+        assertNotNull(value);
+        assertEquals(NodeFactory.createLiteral("true", XSDDatatype.XSDboolean), value.asNode());
+    }
+
+}


[2/3] jena git commit: safer to use equals() for datatype comparison

Posted by an...@apache.org.
safer to use equals() for datatype comparison


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0dd62a8b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0dd62a8b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0dd62a8b

Branch: refs/heads/master
Commit: 0dd62a8beac7484e9fd174f91c63eb12732e8147
Parents: 1592c33
Author: Osma Suominen <os...@aalto.fi>
Authored: Sat Jun 27 00:20:16 2015 +0300
Committer: Osma Suominen <os...@aalto.fi>
Committed: Sat Jun 27 00:20:16 2015 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/jena/query/text/TextIndexLucene.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/0dd62a8b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index c02037c..e209326 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -258,7 +258,7 @@ public class TextIndexLucene implements TextIndex {
                 RDFDatatype datatype = entity.getDatatype();
                 if (lang != null && !"".equals(lang)) {
                     doc.add(new Field(langField, lang, StringField.TYPE_STORED));
-                } else if (datatype != null && datatype != XSDDatatype.XSDstring) {
+                } else if (datatype != null && !datatype.equals(XSDDatatype.XSDstring)) {
                     // for non-string and non-langString datatypes, store the datatype in langField
                     doc.add(new Field(langField, DATATYPE_PREFIX + datatype.getURI(), StringField.TYPE_STORED));
                 }


[3/3] jena git commit: JENA-978: jena-text stored literals: initial functionality and tests for Lucene

Posted by an...@apache.org.
JENA-978: jena-text stored literals: initial functionality and tests for Lucene


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/b7eac624
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/b7eac624
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/b7eac624

Branch: refs/heads/master
Commit: b7eac624cfe5c95b4a7f6ecddbdfc27bd361da0a
Parents: 01bc520 0dd62a8
Author: Andy Seaborne <an...@apache.org>
Authored: Fri Jul 3 17:59:22 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Fri Jul 3 18:01:20 2015 +0100

----------------------------------------------------------------------
 .../jena/query/text/DatasetGraphText.java       |   2 +-
 .../java/org/apache/jena/query/text/Entity.java |  10 +-
 .../org/apache/jena/query/text/TextHit.java     |  10 +-
 .../jena/query/text/TextHitConverter.java       |  11 +-
 .../org/apache/jena/query/text/TextIndex.java   |   4 +-
 .../apache/jena/query/text/TextIndexConfig.java |   9 +
 .../apache/jena/query/text/TextIndexLucene.java |  52 +++-
 .../apache/jena/query/text/TextIndexSolr.java   |  13 +-
 .../apache/jena/query/text/TextQueryFuncs.java  |   4 +-
 .../org/apache/jena/query/text/TextQueryPF.java |  50 +++-
 .../assembler/TextIndexLuceneAssembler.java     |  11 +
 .../jena/query/text/assembler/TextVocab.java    |   1 +
 .../org/apache/jena/query/text/TS_Text.java     |   1 +
 .../TestDatasetWithLuceneStoredLiterals.java    | 248 +++++++++++++++++++
 14 files changed, 390 insertions(+), 36 deletions(-)
----------------------------------------------------------------------