You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/06/22 20:44:07 UTC

svn commit: r1495768 - in /jena/trunk/jena-text/src: main/java/jena/ main/java/org/apache/jena/query/text/ main/java/org/apache/jena/query/text/assembler/ test/java/org/apache/jena/query/text/ test/java/org/apache/jena/query/text/assembler/

Author: andy
Date: Sat Jun 22 18:44:06 2013
New Revision: 1495768

URL: http://svn.apache.org/r1495768
Log:
Allow for muultiple properties to map to one indexed field.

Modified:
    jena/trunk/jena-text/src/main/java/jena/textindexer.java
    jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
    jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
    jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityMapAssembler.java
    jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
    jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java

Modified: jena/trunk/jena-text/src/main/java/jena/textindexer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/jena/textindexer.java?rev=1495768&r1=1495767&r2=1495768&view=diff
==============================================================================
--- jena/trunk/jena-text/src/main/java/jena/textindexer.java (original)
+++ jena/trunk/jena-text/src/main/java/jena/textindexer.java Sat Jun 22 18:44:06 2013
@@ -132,8 +132,12 @@ public class textindexer extends CmdARQ 
 
     private Set<Node> getIndexedProperties() {
         Set<Node> result = new HashSet<Node>() ;
-        for (Iterator<String> iter = entityDefinition.fields().iterator(); iter.hasNext();) {
-            result.add(entityDefinition.getPredicate(iter.next())) ;
+        for (String f : entityDefinition.fields()) {
+            System.out.println("-- "+f) ;
+            for ( Node p : entityDefinition.getPredicates(f) ) {
+                System.out.println("---- "+p) ;
+                result.add(p) ;
+            }
         }
         return result ;
     }
@@ -216,7 +220,7 @@ public class textindexer extends CmdARQ 
         void close() {
             long overallDuration = System.currentTimeMillis() - startTime ;
             String message = progressCount + " (" + progressCount / Math.max(overallDuration / 1000, 1)
-                             + " per second)" + progressMessage ;
+                             + " per second) " + progressMessage ;
             log.info(message) ;
         }
     }

Modified: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java?rev=1495768&r1=1495767&r2=1495768&view=diff
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java (original)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java Sat Jun 22 18:44:06 2013
@@ -16,56 +16,79 @@
  * limitations under the License.
  */
 
-package org.apache.jena.query.text;
+package org.apache.jena.query.text ;
 
 import java.util.Collection ;
 import java.util.Collections ;
 import java.util.HashMap ;
 import java.util.Map ;
 
+import org.apache.jena.atlas.lib.MultiMap ;
+
 import com.hp.hpl.jena.graph.Node ;
 
-/** Definition of a "document"
+/**
+ * Definition of a "document"
  */
-public class EntityDefinition
-{
-    private final Map<Node, String> predicateToField = new HashMap<Node, String>() ;
-    private final Map<String, Node> fieldToPredicate = new HashMap<String, Node>() ;
-    private final Collection<String> fields = Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
-    private final String entityField ;
-    private final String primaryField ;
-    
-    /** 
-     * @param entityField       The entity being indexed (e.g. it's URI). 
-     * @param primaryField      The primary/default field to search
-     * @param primaryProperty   The property associated with the primary/default field
+public class EntityDefinition {
+    private final Map<Node, String>      predicateToField = new HashMap<Node, String>() ;
+    private final MultiMap<String, Node> fieldToPredicate = MultiMap.createMapList() ;
+    private final Collection<String>     fields           = Collections.unmodifiableCollection(fieldToPredicate.keys()) ;
+    // private final Collection<String> fields =
+    // Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
+    private final String                 entityField ;
+    private final String                 primaryField ;
+    private final Node                   primaryPredicate ;
+
+    /**
+     * @param entityField
+     *            The entity being indexed (e.g. it's URI).
+     * @param primaryField
+     *            The primary/default field to search
+     * @param primaryPredicate
+     *            The property associated with the primary/default field
      */
-    public EntityDefinition(String entityField, String primaryField, Node primaryProperty)
-    { 
+    public EntityDefinition(String entityField, String primaryField, Node primaryPredicate) {
         this.entityField = entityField ;
         this.primaryField = primaryField ;
-        set(primaryField, primaryProperty) ;
+        this.primaryPredicate = primaryPredicate ;
+        if (primaryField == null && primaryPredicate != null)
+            throw new IllegalArgumentException("primaryField null but primaryPredicate not null") ;
+        if (primaryField != null && primaryPredicate == null)
+            throw new IllegalArgumentException("primaryField not null but primaryPredicate null") ;
+        if (primaryField != null && primaryPredicate != null)
+            set(primaryField, primaryPredicate) ;
+    }
+
+    public String getEntityField() {
+        return entityField ;
     }
-    
-    public String getEntityField() { return entityField ; }
-    
+
     public void set(String field, Node predicate) {
         predicateToField.put(predicate, field) ;
-        fieldToPredicate.put(field, predicate) ;
+        // Add uniquely.
+        Collection<Node> c = fieldToPredicate.get(field) ;
+        if (c == null || !c.contains(predicate))
+            fieldToPredicate.put(field, predicate) ;
     }
-    
-    public Node getPredicate(String field) {
+
+    public Collection<Node> getPredicates(String field) {
         return fieldToPredicate.get(field) ;
     }
-    
+
     public String getField(Node predicate) {
         return predicateToField.get(predicate) ;
     }
 
-    public Node getPrimaryPredicate()   { return fieldToPredicate.get(primaryField) ; }
-    
-    public String getPrimaryField()     { return primaryField ; }  
-    
-    public Collection<String> fields()  { return fields ; }
-}
+    public Node getPrimaryPredicate() {
+        return primaryPredicate ;
+    }
+
+    public String getPrimaryField() {
+        return primaryField ;
+    }
 
+    public Collection<String> fields() {
+        return fields ;
+    }
+}

Modified: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java?rev=1495768&r1=1495767&r2=1495768&view=diff
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java (original)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java Sat Jun 22 18:44:06 2013
@@ -108,7 +108,8 @@ public class TextIndexLucene implements 
     @Override
     public void addEntity(Entity entity)
     {
-        log.info("Add entity: "+entity) ;
+        if ( log.isDebugEnabled() )
+            log.debug("Add entity: "+entity) ;
         try {
             Document doc = doc(entity) ;
             indexWriter.addDocument(doc) ;

Modified: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityMapAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityMapAssembler.java?rev=1495768&r1=1495767&r2=1495768&view=diff
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityMapAssembler.java (original)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityMapAssembler.java Sat Jun 22 18:44:06 2013
@@ -20,8 +20,10 @@ package org.apache.jena.query.text.assem
 
 import static org.apache.jena.query.text.assembler.TextVocab.NS ;
 
+import java.util.Collection ;
 import java.util.List ;
 
+import org.apache.jena.atlas.lib.MultiMap ;
 import org.apache.jena.atlas.lib.StrUtils ;
 import org.apache.jena.query.text.EntityDefinition ;
 import org.apache.jena.query.text.TextIndexException ;
@@ -67,8 +69,11 @@ public class EntityMapAssembler extends 
         String qs1 = StrUtils.strjoinNL(prologue,
                                         "SELECT * {" ,
                                         "  ?eMap  :entityField  ?entityField ;" ,
-                                        "         :defaultField ?dftField ;" , 
                                         "         :map ?map" ,
+                                        "  OPTIONAL {" ,
+                                        "     ?eMap :defaultField ?dftField" , 
+                                        "     OPTIONAL { ?eMap :defaultPredicate ?dftPredicate } " ,
+                                        "  }",
                                          "}") ;
         ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1) ;
         pss.setIri("eMap", root.getURI()) ;
@@ -82,7 +87,9 @@ public class EntityMapAssembler extends 
         
         QuerySolution qsol1 = results.get(0) ;
         String entityField = qsol1.getLiteral("entityField").getLexicalForm() ;
+        
         String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null ;
+        Node defaultPredicate = qsol1.contains("dftPredicate") ? qsol1.get("dftPredicate").asNode() : null ;
         
         String qs2 = StrUtils.strjoinNL("SELECT * {",
                                         "  ?map list:member [ :field ?field ; :predicate ?predicate ]" ,
@@ -103,15 +110,34 @@ public class EntityMapAssembler extends 
             }
         }
         
-        if ( primaryProperty == null )
-            throw new TextIndexException("No definition of primary field '"+defaultField+"'") ; 
-        
-        EntityDefinition docDef = new EntityDefinition(entityField, defaultField, null) ;
-        for ( QuerySolution qsol : mapEntries )
-        {
+        MultiMap<String, Node> mapDefs = MultiMap.createMapList() ; 
+        for ( QuerySolution qsol : mapEntries ) {
             String field =  qsol.getLiteral("field").getLexicalForm() ;
             Resource p = qsol.getResource("predicate") ;
-            docDef.set(field, p.asNode()) ;
+            mapDefs.put(field, p.asNode()) ;
+        }
+        
+        // Primary field/predicate
+        if ( defaultField != null ) {
+            Collection<Node> c = mapDefs.get(defaultField) ;
+            if ( c == null )
+                throw new TextIndexException("No definition of primary field '"+defaultField+"'") ;
+            if ( defaultPredicate == null ) {
+                if ( c.size() != 1 )
+                    throw new TextIndexException("No single definition of primary predicate for primary field '"+defaultField+"'") ;
+                // Set default predicate
+                defaultPredicate = c.iterator().next() ;
+            } else {
+                if ( ! c.contains(defaultPredicate) )
+                    throw new TextIndexException("Primary field '"+defaultField+"' not asscoiated with property <"+defaultPredicate.getURI()+">" ) ;
+            }
+        }
+        
+        
+        EntityDefinition docDef = new EntityDefinition(entityField, defaultField, defaultPredicate) ;
+        for ( String f : mapDefs.keys() ) {
+            for ( Node p : mapDefs.get(f)) 
+                docDef.set(f, p) ;
         }
         return docDef ;
     }

Modified: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java?rev=1495768&r1=1495767&r2=1495768&view=diff
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java (original)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java Sat Jun 22 18:44:06 2013
@@ -34,7 +34,6 @@ import com.hp.hpl.jena.vocabulary.RDFS ;
 public class TestBuildTextDataset extends BaseTest
 {
     static final String DIR = "testing/TextQuery" ;
-    
 
     // Ensure assembler initialized. 
     @BeforeClass public static void setupClass() { TextQuery.init() ; } 

Modified: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java?rev=1495768&r1=1495767&r2=1495768&view=diff
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java (original)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java Sat Jun 22 18:44:06 2013
@@ -18,20 +18,20 @@
 
 package org.apache.jena.query.text.assembler;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.apache.jena.query.text.EntityDefinition;
-import org.apache.jena.query.text.TextIndexException;
-import org.junit.Test;
-
-import com.hp.hpl.jena.rdf.model.Model;
-import com.hp.hpl.jena.rdf.model.ModelFactory;
-import com.hp.hpl.jena.rdf.model.Property;
-import com.hp.hpl.jena.rdf.model.RDFNode;
-import com.hp.hpl.jena.rdf.model.Resource;
-import com.hp.hpl.jena.vocabulary.RDFS;
+import static org.junit.Assert.assertEquals ;
+import static org.junit.Assert.assertTrue ;
+import static org.junit.Assert.fail ;
+
+import java.util.Collection ;
+
+import org.apache.jena.atlas.lib.InternalErrorException ;
+import org.apache.jena.query.text.EntityDefinition ;
+import org.apache.jena.query.text.TextIndexException ;
+import org.junit.Test ;
+
+import com.hp.hpl.jena.graph.Node ;
+import com.hp.hpl.jena.rdf.model.* ;
+import com.hp.hpl.jena.vocabulary.RDFS ;
 
 /**
  * Test assembler for EntityMap
@@ -61,14 +61,23 @@ public class TestEntityMapAssembler {
 	@Test public void EntityHasMapEntries() {
 		EntityMapAssembler emAssembler = new EntityMapAssembler();
 		EntityDefinition entityDef = emAssembler.open(null, spec1, null);
-		assertEquals(entityDef.getPredicate(SPEC1_DEFAULT_FIELD), SPEC1_PREDICATE.asNode());
+		assertEquals(SPEC1_PREDICATE.asNode(), getOne(entityDef,SPEC1_DEFAULT_FIELD));
 	}
 	
-	@Test public void EntityHasMultipleMapEntries() {
+	private Object getOne(EntityDefinition entityDef, String field) {
+	    Collection<Node> x = entityDef.getPredicates(field) ;
+	    if ( x == null || x.size() == 0 )
+	        return null ;
+	    if ( x.size() != 1 )
+	        throw new InternalErrorException("Not unique: "+field) ;
+        return x.iterator().next() ; 
+    }
+
+    @Test public void EntityHasMultipleMapEntries() {
 		EntityMapAssembler emAssembler = new EntityMapAssembler();
 		EntityDefinition entityDef = emAssembler.open(null, spec2, null);
-		assertEquals(entityDef.getPredicate(SPEC2_DEFAULT_FIELD), SPEC2_PREDICATE1.asNode());
-		assertEquals(entityDef.getPredicate(SPEC2_FIELD2), SPEC2_PREDICATE2.asNode());
+		assertEquals(SPEC2_PREDICATE1.asNode(), getOne(entityDef,SPEC2_DEFAULT_FIELD));
+		assertEquals(SPEC2_PREDICATE2.asNode(), getOne(entityDef, SPEC2_FIELD2));
 	}
 	
 	@Test public void errorOnNoEntityField() {