You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/06/17 22:55:19 UTC

[05/15] jena git commit: Storing a unique id with each lucene document for future retrieve and deletion.

Storing a unique id with each lucene document for future retrieve and deletion.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0d91e01c
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0d91e01c
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0d91e01c

Branch: refs/heads/master
Commit: 0d91e01cd5597e335850713e6acbe507b12e153e
Parents: b357ddf
Author: Alexis Miara <al...@hotmail.com>
Authored: Mon May 4 11:35:38 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Wed May 13 14:48:10 2015 -0400

----------------------------------------------------------------------
 .../java/org/apache/jena/query/text/Entity.java |  8 ++++++
 .../apache/jena/query/text/TextIndexLucene.java | 26 ++++++--------------
 .../apache/jena/query/text/TextQueryFuncs.java  |  9 ++++++-
 3 files changed, 24 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/0d91e01c/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
index d770c5a..c48a0eb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
@@ -25,11 +25,17 @@ public class Entity
 {
     private final String id ;
     private final String graph ;
+    private final String language ;
     private final Map<String, Object> map = new HashMap<>() ;
 
     public Entity(String entityId, String entityGraph) {
+        this(entityId, entityGraph, null);
+    }
+
+    public Entity(String entityId, String entityGraph, String lang) {
         this.id = entityId ;
         this.graph = entityGraph;
+        this.language = lang;
     }
 
     /** @deprecated Use {@linkplain #Entity(String, String)} */
@@ -40,6 +46,8 @@ public class Entity
 
     public String getGraph()                { return graph ; }
 
+    public String getLanguage()                { return language ; }
+
     public void put(String key, Object value)
     { map.put(key, value) ; }
     

http://git-wip-us.apache.org/repos/asf/jena/blob/0d91e01c/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index b11f000..1f873a1 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -25,6 +25,7 @@ import java.util.List ;
 import java.util.Map ;
 import java.util.Map.Entry ;
 
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.graph.NodeFactory ;
 import org.apache.jena.sparql.util.NodeFactoryExtra ;
@@ -75,9 +76,6 @@ public class TextIndexLucene implements TextIndex {
     private final Analyzer         analyzer ;
     private final Analyzer         queryAnalyzer ;
 
-    //the BORDER_DELIMITER constant is required for...
-    private static final String BORDER_DELIMITER = "borderdelimiter";
-
     // The IndexWriter can't be final because we may have to recreate it if rollback() is called.
     // However, it needs to be volatile in case the next write transaction is on a different thread,
     // but we do not need locking because we are assuming that there can only be one writer
@@ -220,24 +218,13 @@ public class TextIndexLucene implements TextIndex {
         if ( log.isDebugEnabled() )
             log.debug("Delete entity: "+entity) ;
         try {
-            TermQuery qUri = new TermQuery(new Term("uri", entity.getId()));
             Map<String, Object> map = entity.getMap();
             String property = map.keySet().iterator().next();
             String value = (String)map.get(property);
+            String key = entity.getGraph() + "-" + entity.getId() + "-" + value + "-" + entity.getLanguage();
+            Term uid = new Term("uid", DigestUtils.shaHex(key));
 
-            //escaping special characters to avoid problem in WildcardQuery
-            value = value.replace( "?", "\\?" );
-            value = value.replace( "*", "\\*" );
-            value = value.replace( "\"", "\\\"" );
-
-            QueryParser qp = new QueryParser(VER, property, analyzer);
-            Query qPropValue = qp.parse("\"" + BORDER_DELIMITER + " " + value + " " + BORDER_DELIMITER + "\"");
-
-            BooleanQuery q = new BooleanQuery();
-            q.add(qUri, BooleanClause.Occur.MUST);
-            q.add(qPropValue, BooleanClause.Occur.MUST);
-
-            indexWriter.deleteDocuments(q);
+            indexWriter.deleteDocuments(uid);
 
         } catch (Exception e) {
             throw new TextIndexException(e) ;
@@ -256,7 +243,10 @@ public class TextIndexLucene implements TextIndex {
         }
 
         for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
-            Field field = new Field(e.getKey(), BORDER_DELIMITER + " " + e.getValue() + " " + BORDER_DELIMITER, ftText) ;
+            Field field = new Field(e.getKey(), (String)e.getValue(), ftText) ;
+            doc.add(field) ;
+            String key = entity.getGraph() + "-" + entity.getId() + "-" + e.getValue() + "-" + entity.getLanguage();
+            field = new Field("uid", DigestUtils.shaHex(key), StringField.TYPE_STORED ) ;
             doc.add(field) ;
         }
         return doc ;

http://git-wip-us.apache.org/repos/asf/jena/blob/0d91e01c/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
index 512297e..d628c4a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
@@ -46,6 +46,12 @@ public class TextQueryFuncs {
         return nodeToString(g) ;
     }
 
+    /** retrieve language (if exists) if object is literal */
+    public static String getLiteralLanguage(Node o) {
+        String lang = o.getLiteral().language();
+        return lang;
+    }
+
     private static String nodeToString(Node n) {
         return (n.isURI() ) ? n.getURI() : "_:" + n.getBlankNodeLabel() ;
     }
@@ -77,7 +83,8 @@ public class TextQueryFuncs {
     
         String x = TextQueryFuncs.subjectToString(s) ;
         String graphText = TextQueryFuncs.graphNodeToString(g) ;
-        Entity entity = new Entity(x, graphText) ;
+        String language = TextQueryFuncs.getLiteralLanguage(o) ;
+        Entity entity = new Entity(x, graphText, language) ;
         String graphField = defn.getGraphField() ;
         if ( defn.getGraphField() != null )
             entity.put(graphField, graphText) ;