You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/12/01 15:30:27 UTC

svn commit: r1416004 - in /stanbol/trunk: contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/ contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/ enhancer/engines/entityhublinking/src/main...

Author: rwesten
Date: Sat Dec  1 14:30:25 2012
New Revision: 1416004

URL: http://svn.apache.org/viewvc?rev=1416004&view=rev
Log:
STANBOL-823: Forgott to include new classes with the last commit

Added:
    stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java
    stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java
    stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java
Modified:
    stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java
    stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
    stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java

Modified: stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java?rev=1416004&r1=1416003&r2=1416004&view=diff
==============================================================================
--- stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java (original)
+++ stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java Sat Dec  1 14:30:25 2012
@@ -26,8 +26,8 @@ import org.apache.clerezza.rdf.core.acce
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.contenthub.servicesapi.store.Store;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.Store;
 import org.apache.stanbol.enhancer.store.jcr.JCRContentItem;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

Modified: stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java?rev=1416004&r1=1416003&r2=1416004&view=diff
==============================================================================
--- stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java (original)
+++ stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java Sat Dec  1 14:30:25 2012
@@ -12,7 +12,9 @@ import javax.jcr.ItemExistsException;
 import javax.jcr.Node;
 import javax.jcr.NodeIterator;
 import javax.jcr.PathNotFoundException;
+import javax.jcr.PropertyType;
 import javax.jcr.RepositoryException;
+import javax.jcr.Value;
 import javax.jcr.ValueFormatException;
 import javax.jcr.lock.LockException;
 import javax.jcr.nodetype.ConstraintViolationException;
@@ -23,12 +25,16 @@ import javax.jcr.version.VersionExceptio
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
+import org.apache.clerezza.rdf.core.access.LockableMGraphWrapper;
 import org.apache.clerezza.rdf.core.event.FilterTriple;
 import org.apache.clerezza.rdf.core.event.GraphEvent;
 import org.apache.clerezza.rdf.core.event.GraphListener;
 import org.apache.clerezza.rdf.core.impl.SimpleGraph;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.enhancer.core.contentitem.ContentItemImpl;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -37,7 +43,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 
-public class JCRContentItem implements ContentItem, GraphListener {
+public class JCRContentItem  implements ContentItem, GraphListener {
 
     public static final String ENHANCER_ID_PROP = "stanbolEnhancerId";
 
@@ -46,14 +52,19 @@ public class JCRContentItem implements C
     private static final String SUBJECT = "subject";
     private static final String JCR_DATA = "jcr:data";
     private static final String JCR_MIME_TYPE = "jcr:mimeType";
+    private static final String CONTENT_PART = "contentPart";
+    private static final String CONTENT_PART_ID = "contentPartId";
+    private static final String RDF_TRIPLE = "triple";
 
     private static final Logger log = LoggerFactory.getLogger(JCRContentItem.class);
 
     private Node jcrNode;
 
+    private String jcrNodePath;
+
     // private static byte[] data;
 
-    public JCRContentItem(String id, Node parent) throws InvalidQueryException,
+    public JCRContentItem(UriRef id, Node parent) throws InvalidQueryException,
             RepositoryException {
         jcrNode = JCRStore.findNodeById(id, parent);
         log.info("constructor with id: " + id);
@@ -74,6 +85,7 @@ public class JCRContentItem implements C
             log.info("found no node for id " + id + " creating new one");
             createNode(id, content, mimeType, metadata, parent);
         }
+        jcrNodePath = jcrNode.getPath();
     }
 
     private void createNode(String id, byte[] content, String mimeType,
@@ -115,8 +127,14 @@ public class JCRContentItem implements C
         jcrNode.getSession().save();
 
     }
-
-    private void persistTriple(String nameHint, Triple triple)
+    private void persistContentPart(UriRef id, Object contentPart) 
+            throws ItemExistsException, PathNotFoundException, VersionException, 
+            ConstraintViolationException, LockException, RepositoryException {
+        Node cpNode = jcrNode.addNode(CONTENT_PART);
+        cpNode.setProperty(CONTENT_PART_ID,id.getUnicodeString());
+        
+    }
+    private void persistTriple(Node graphNode, String nameHint, Triple triple)
             throws ItemExistsException, PathNotFoundException,
             VersionException, ConstraintViolationException, LockException,
             RepositoryException, ValueFormatException {
@@ -127,8 +145,12 @@ public class JCRContentItem implements C
                 name = nameHint;
             }
         }
-
-        Node tripleNode = jcrNode.addNode(name);
+        //TODO:
+        // 1) handle BNodes (create Nodes and use WEAKREFERENCE in triples?)
+        // 2) handle typed literals (use JCR PropertyTypes?)
+        //    Maybe we need an own LiteralFactory
+        // 3) handle Plain literals (use String and additional lang property?)
+        Node tripleNode = graphNode.addNode(RDF_TRIPLE);
         /*
          * TODO: Rupert Westenthaler 25.01.2011
          * Using the toString method of the subject, predicate and object is
@@ -143,6 +165,9 @@ public class JCRContentItem implements C
          */
         tripleNode.setProperty(SUBJECT, triple.getSubject().toString());
         tripleNode.setProperty(PREDICATE, triple.getPredicate().toString());
+        Value val;
+        
+        PropertyType
         tripleNode.setProperty(OBJECT, triple.getObject().toString());
         log.info("persisted triple " + triple.getSubject().toString() + " "
                 + triple.getPredicate().toString() + " "
@@ -181,13 +206,12 @@ public class JCRContentItem implements C
         return null;
     }
 
-    public MGraph getMetadata() {
+    public LockableMGraph getMetadata() {
         try {
             if (jcrNode == null) {
                 log.warn("entering getMetadata, but no node initialized");
             }
-            MGraph graph = new SimpleMGraph();
-
+            LockableMGraph graph = new LockableMGraphWrapper(new IndexedMGraph());
             // loop over children
             NodeIterator children = jcrNode.getNodes();
             while (children.hasNext()) {
@@ -218,20 +242,18 @@ public class JCRContentItem implements C
             return graph;
 
         } catch (ValueFormatException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
+            throw new IllegalStateException("Unable to parse RDF data from Node '" +
+                    jcrNodePath,e);
         } catch (IllegalStateException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
+            throw new IllegalStateException("Unable to parse RDF data from Node '" +
+                    jcrNodePath,e);
         } catch (PathNotFoundException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
+            throw new IllegalStateException("Unable to parse RDF data from Node '" +
+                    jcrNodePath,e);
         } catch (RepositoryException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
+            throw new IllegalStateException("Unable to parse RDF data from Node '" +
+                    jcrNodePath,e);
         }
-        return null;
-
     }
 
     public String getMimeType() {

Added: stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java?rev=1416004&view=auto
==============================================================================
--- stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java (added)
+++ stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java Sat Dec  1 14:30:25 2012
@@ -0,0 +1,24 @@
+package org.apache.stanbol.enhancer.engines.entityhublinking;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+public class EntityhubEntity extends Entity {
+    
+    private static RdfValueFactory vf = RdfValueFactory.getInstance();
+    private static UriRef entityRanking = new UriRef(RdfResourceEnum.entityRank.getUri());
+    
+    public EntityhubEntity(Representation rep) {
+        super(new UriRef(rep.getId()), 
+            (MGraph)vf.toRdfRepresentation(rep).getRdfGraph());
+    }
+    @Override
+    public Float getEntityRanking() {
+        return EnhancementEngineHelper.get(data, uri, entityRanking, Float.class, lf);
+    }
+}
\ No newline at end of file

Added: stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java?rev=1416004&view=auto
==============================================================================
--- stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java (added)
+++ stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java Sat Dec  1 14:30:25 2012
@@ -0,0 +1,83 @@
+package org.apache.stanbol.enhancer.engines.entitylinking;
+
+import java.util.Iterator;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TypedLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.collections.Predicate;
+import org.apache.commons.collections.PredicateUtils;
+import org.apache.commons.collections.Transformer;
+import org.apache.commons.collections.iterators.FilterIterator;
+import org.apache.commons.collections.iterators.TransformIterator;
+
+/**
+ * An Entity as returned by the {@link EntitySearcher} interface.
+ * {@link EntitySearcher} implementations that do support rankings for
+ * entities SHOULD override the {@link #getEntityRanking()} method.
+ */
+public class Entity {
+
+    protected static final LiteralFactory lf = LiteralFactory.getInstance();
+    
+    protected static final Transformer TRIPLE2OBJECT = new Transformer() {
+        @Override
+        public Object transform(Object input) {
+            return ((Triple)input).getObject();
+        }
+    };
+    protected static final Predicate PLAIN_LITERALS = PredicateUtils.instanceofPredicate(PlainLiteral.class);
+    protected static final Predicate TYPED_LITERALS = PredicateUtils.instanceofPredicate(TypedLiteral.class);
+    protected static final Predicate REFERENCES = PredicateUtils.instanceofPredicate(UriRef.class);
+    /**
+     * The URI of the Entity
+     */
+     protected final UriRef uri;
+    /**
+     * The data of the Entity. The graph is expected to contain all information
+     * of the entity by containing {@link Triple}s that use the {@link #uri} as
+     * {@link Triple#getSubject() subject}
+     */
+    protected final MGraph data;
+    
+    /**
+     * Constructs a new Entity
+     * @param uri
+     * @param data
+     */
+    public Entity(UriRef uri, MGraph data) {
+        this.uri = uri;
+        this.data = data;
+    }
+    public UriRef getUri() {
+        return uri;
+    }
+    public String getId(){
+        return uri.getUnicodeString();
+    }
+    public MGraph getData() {
+        return data;
+    }
+    @SuppressWarnings("unchecked")
+    public Iterator<PlainLiteral> getText(UriRef field) {
+        return new FilterIterator(new TransformIterator(data.filter(uri, field, null), TRIPLE2OBJECT), PLAIN_LITERALS);
+    }
+    @SuppressWarnings("unchecked")
+    public Iterator<UriRef> getReferences(UriRef field){
+        return new FilterIterator(new TransformIterator(data.filter(uri, field, null), TRIPLE2OBJECT), REFERENCES);
+    }
+    
+    /**
+     * The ranking for the entity in the range [0..1] or <code>null</code> 
+     * if not support.<p>
+     * This default implementation will returns <code>null</code>
+     * @return returns <code>null</code> as this default implementation
+     * does not support entity rankings
+     */
+    public Float getEntityRanking(){
+        return null;
+    }
+}
\ No newline at end of file

Added: stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java?rev=1416004&view=auto
==============================================================================
--- stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java (added)
+++ stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java Sat Dec  1 14:30:25 2012
@@ -0,0 +1,20 @@
+package org.apache.stanbol.enhancer.engines.entitylinking;
+
+public class EntitySearcherException extends Exception {
+
+    /** default serial version UID */
+    private static final long serialVersionUID = 1L;
+
+    public EntitySearcherException(String message) {
+        super(message);
+    }
+
+    public EntitySearcherException(Throwable cause) {
+        super(cause);
+    }
+
+    public EntitySearcherException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+}

Modified: stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java?rev=1416004&r1=1416003&r2=1416004&view=diff
==============================================================================
--- stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java (original)
+++ stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java Sat Dec  1 14:30:25 2012
@@ -75,6 +75,7 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
 import org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.SELECT;
 import org.apache.stanbol.entityhub.yard.solr.model.FieldMapper;
+import org.apache.stanbol.entityhub.yard.solr.model.IndexDataType;
 import org.apache.stanbol.entityhub.yard.solr.model.IndexField;
 import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
 import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
@@ -1244,16 +1245,26 @@ public class SolrYard extends AbstractYa
             String field = fields.next();
             float boost;
             Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
-            if(documentBoost != null){
-                boost = documentBoost;
-                if(fieldBoost != null){
-                    boost = boost*fieldBoost;
-                }
-            } else if(fieldBoost != null){
-                boost = fieldBoost;
-            } else {
+            //With solr 3.6 one can not set index time boosts on fields that omitNorms
+            //because of that we need to restrict the usage of boosts to those manually
+            //configured in the fieldBoostMap. Before bosts where dropped for fields that
+            //do not support them
+            if(fieldBoost != null){
+                boost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
+            } else { 
                 boost = -1;
             }
+            //the old code that does no longer work with Solr 3.6 :(
+//            if(documentBoost != null){
+//                boost = documentBoost;
+//                if(fieldBoost != null){
+//                    boost = boost*fieldBoost;
+//                }
+//            } else if(fieldBoost != null){
+//                boost = fieldBoost;
+//            } else {
+//                boost = -1;
+//            }
             for (Iterator<Object> values = representation.get(field); values.hasNext();) {
                 // now we need to get the indexField for the value
                 Object next = values.next();
@@ -1261,6 +1272,7 @@ public class SolrYard extends AbstractYa
                 try {
                     value = indexValueFactory.createIndexValue(next);
                     for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
+                        //Set Boosts only for text data types
                         if(boost > 0){
                             inputDocument.addField(fieldName, value.getValue(), boost);
                         } else {