You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/12/01 15:30:27 UTC
svn commit: r1416004 - in /stanbol/trunk:
contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/
contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/
enhancer/engines/entityhublinking/src/main...
Author: rwesten
Date: Sat Dec 1 14:30:25 2012
New Revision: 1416004
URL: http://svn.apache.org/viewvc?rev=1416004&view=rev
Log:
STANBOL-823: Forgott to include new classes with the last commit
Added:
stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java
Modified:
stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java
stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java
Modified: stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java?rev=1416004&r1=1416003&r2=1416004&view=diff
==============================================================================
--- stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java (original)
+++ stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/JCRStore.java Sat Dec 1 14:30:25 2012
@@ -26,8 +26,8 @@ import org.apache.clerezza.rdf.core.acce
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.contenthub.servicesapi.store.Store;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.Store;
import org.apache.stanbol.enhancer.store.jcr.JCRContentItem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java?rev=1416004&r1=1416003&r2=1416004&view=diff
==============================================================================
--- stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java (original)
+++ stanbol/trunk/contrib/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java Sat Dec 1 14:30:25 2012
@@ -12,7 +12,9 @@ import javax.jcr.ItemExistsException;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.PathNotFoundException;
+import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
+import javax.jcr.Value;
import javax.jcr.ValueFormatException;
import javax.jcr.lock.LockException;
import javax.jcr.nodetype.ConstraintViolationException;
@@ -23,12 +25,16 @@ import javax.jcr.version.VersionExceptio
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.LockableMGraph;
+import org.apache.clerezza.rdf.core.access.LockableMGraphWrapper;
import org.apache.clerezza.rdf.core.event.FilterTriple;
import org.apache.clerezza.rdf.core.event.GraphEvent;
import org.apache.clerezza.rdf.core.event.GraphListener;
import org.apache.clerezza.rdf.core.impl.SimpleGraph;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.enhancer.core.contentitem.ContentItemImpl;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -37,7 +43,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class JCRContentItem implements ContentItem, GraphListener {
+public class JCRContentItem implements ContentItem, GraphListener {
public static final String ENHANCER_ID_PROP = "stanbolEnhancerId";
@@ -46,14 +52,19 @@ public class JCRContentItem implements C
private static final String SUBJECT = "subject";
private static final String JCR_DATA = "jcr:data";
private static final String JCR_MIME_TYPE = "jcr:mimeType";
+ private static final String CONTENT_PART = "contentPart";
+ private static final String CONTENT_PART_ID = "contentPartId";
+ private static final String RDF_TRIPLE = "triple";
private static final Logger log = LoggerFactory.getLogger(JCRContentItem.class);
private Node jcrNode;
+ private String jcrNodePath;
+
// private static byte[] data;
- public JCRContentItem(String id, Node parent) throws InvalidQueryException,
+ public JCRContentItem(UriRef id, Node parent) throws InvalidQueryException,
RepositoryException {
jcrNode = JCRStore.findNodeById(id, parent);
log.info("constructor with id: " + id);
@@ -74,6 +85,7 @@ public class JCRContentItem implements C
log.info("found no node for id " + id + " creating new one");
createNode(id, content, mimeType, metadata, parent);
}
+ jcrNodePath = jcrNode.getPath();
}
private void createNode(String id, byte[] content, String mimeType,
@@ -115,8 +127,14 @@ public class JCRContentItem implements C
jcrNode.getSession().save();
}
-
- private void persistTriple(String nameHint, Triple triple)
+ private void persistContentPart(UriRef id, Object contentPart)
+ throws ItemExistsException, PathNotFoundException, VersionException,
+ ConstraintViolationException, LockException, RepositoryException {
+ Node cpNode = jcrNode.addNode(CONTENT_PART);
+ cpNode.setProperty(CONTENT_PART_ID,id.getUnicodeString());
+
+ }
+ private void persistTriple(Node graphNode, String nameHint, Triple triple)
throws ItemExistsException, PathNotFoundException,
VersionException, ConstraintViolationException, LockException,
RepositoryException, ValueFormatException {
@@ -127,8 +145,12 @@ public class JCRContentItem implements C
name = nameHint;
}
}
-
- Node tripleNode = jcrNode.addNode(name);
+ //TODO:
+ // 1) handle BNodes (create Nodes and use WEAKREFERENCE in triples?)
+ // 2) handle typed literals (use JCR PropertyTypes?)
+ // Maybe we need an own LiteralFactory
+ // 3) handle Plain literals (use String and additional lang property?)
+ Node tripleNode = graphNode.addNode(RDF_TRIPLE);
/*
* TODO: Rupert Westenthaler 25.01.2011
* Using the toString method of the subject, predicate and object is
@@ -143,6 +165,9 @@ public class JCRContentItem implements C
*/
tripleNode.setProperty(SUBJECT, triple.getSubject().toString());
tripleNode.setProperty(PREDICATE, triple.getPredicate().toString());
+ Value val;
+
+ PropertyType
tripleNode.setProperty(OBJECT, triple.getObject().toString());
log.info("persisted triple " + triple.getSubject().toString() + " "
+ triple.getPredicate().toString() + " "
@@ -181,13 +206,12 @@ public class JCRContentItem implements C
return null;
}
- public MGraph getMetadata() {
+ public LockableMGraph getMetadata() {
try {
if (jcrNode == null) {
log.warn("entering getMetadata, but no node initialized");
}
- MGraph graph = new SimpleMGraph();
-
+ LockableMGraph graph = new LockableMGraphWrapper(new IndexedMGraph());
// loop over children
NodeIterator children = jcrNode.getNodes();
while (children.hasNext()) {
@@ -218,20 +242,18 @@ public class JCRContentItem implements C
return graph;
} catch (ValueFormatException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
+ throw new IllegalStateException("Unable to parse RDF data from Node '" +
+ jcrNodePath,e);
} catch (IllegalStateException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
+ throw new IllegalStateException("Unable to parse RDF data from Node '" +
+ jcrNodePath,e);
} catch (PathNotFoundException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
+ throw new IllegalStateException("Unable to parse RDF data from Node '" +
+ jcrNodePath,e);
} catch (RepositoryException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
+ throw new IllegalStateException("Unable to parse RDF data from Node '" +
+ jcrNodePath,e);
}
- return null;
-
}
public String getMimeType() {
Added: stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java?rev=1416004&view=auto
==============================================================================
--- stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java (added)
+++ stanbol/trunk/enhancer/engines/entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/EntityhubEntity.java Sat Dec 1 14:30:25 2012
@@ -0,0 +1,24 @@
+package org.apache.stanbol.enhancer.engines.entityhublinking;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+public class EntityhubEntity extends Entity {
+
+ private static RdfValueFactory vf = RdfValueFactory.getInstance();
+ private static UriRef entityRanking = new UriRef(RdfResourceEnum.entityRank.getUri());
+
+ public EntityhubEntity(Representation rep) {
+ super(new UriRef(rep.getId()),
+ (MGraph)vf.toRdfRepresentation(rep).getRdfGraph());
+ }
+ @Override
+ public Float getEntityRanking() {
+ return EnhancementEngineHelper.get(data, uri, entityRanking, Float.class, lf);
+ }
+}
\ No newline at end of file
Added: stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java?rev=1416004&view=auto
==============================================================================
--- stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java (added)
+++ stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/Entity.java Sat Dec 1 14:30:25 2012
@@ -0,0 +1,83 @@
+package org.apache.stanbol.enhancer.engines.entitylinking;
+
+import java.util.Iterator;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TypedLiteral;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.collections.Predicate;
+import org.apache.commons.collections.PredicateUtils;
+import org.apache.commons.collections.Transformer;
+import org.apache.commons.collections.iterators.FilterIterator;
+import org.apache.commons.collections.iterators.TransformIterator;
+
+/**
+ * An Entity as returned by the {@link EntitySearcher} interface.
+ * {@link EntitySearcher} implementations that do support rankings for
+ * entities SHOULD override the {@link #getEntityRanking()} method.
+ */
+public class Entity {
+
+ protected static final LiteralFactory lf = LiteralFactory.getInstance();
+
+ protected static final Transformer TRIPLE2OBJECT = new Transformer() {
+ @Override
+ public Object transform(Object input) {
+ return ((Triple)input).getObject();
+ }
+ };
+ protected static final Predicate PLAIN_LITERALS = PredicateUtils.instanceofPredicate(PlainLiteral.class);
+ protected static final Predicate TYPED_LITERALS = PredicateUtils.instanceofPredicate(TypedLiteral.class);
+ protected static final Predicate REFERENCES = PredicateUtils.instanceofPredicate(UriRef.class);
+ /**
+ * The URI of the Entity
+ */
+ protected final UriRef uri;
+ /**
+ * The data of the Entity. The graph is expected to contain all information
+ * of the entity by containing {@link Triple}s that use the {@link #uri} as
+ * {@link Triple#getSubject() subject}
+ */
+ protected final MGraph data;
+
+ /**
+ * Constructs a new Entity
+ * @param uri
+ * @param data
+ */
+ public Entity(UriRef uri, MGraph data) {
+ this.uri = uri;
+ this.data = data;
+ }
+ public UriRef getUri() {
+ return uri;
+ }
+ public String getId(){
+ return uri.getUnicodeString();
+ }
+ public MGraph getData() {
+ return data;
+ }
+ @SuppressWarnings("unchecked")
+ public Iterator<PlainLiteral> getText(UriRef field) {
+ return new FilterIterator(new TransformIterator(data.filter(uri, field, null), TRIPLE2OBJECT), PLAIN_LITERALS);
+ }
+ @SuppressWarnings("unchecked")
+ public Iterator<UriRef> getReferences(UriRef field){
+ return new FilterIterator(new TransformIterator(data.filter(uri, field, null), TRIPLE2OBJECT), REFERENCES);
+ }
+
+ /**
+ * The ranking for the entity in the range [0..1] or <code>null</code>
+ * if not support.<p>
+ * This default implementation will returns <code>null</code>
+ * @return returns <code>null</code> as this default implementation
+ * does not support entity rankings
+ */
+ public Float getEntityRanking(){
+ return null;
+ }
+}
\ No newline at end of file
Added: stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java?rev=1416004&view=auto
==============================================================================
--- stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java (added)
+++ stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/EntitySearcherException.java Sat Dec 1 14:30:25 2012
@@ -0,0 +1,20 @@
+package org.apache.stanbol.enhancer.engines.entitylinking;
+
+public class EntitySearcherException extends Exception {
+
+ /** default serial version UID */
+ private static final long serialVersionUID = 1L;
+
+ public EntitySearcherException(String message) {
+ super(message);
+ }
+
+ public EntitySearcherException(Throwable cause) {
+ super(cause);
+ }
+
+ public EntitySearcherException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
Modified: stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java?rev=1416004&r1=1416003&r2=1416004&view=diff
==============================================================================
--- stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java (original)
+++ stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrYard.java Sat Dec 1 14:30:25 2012
@@ -75,6 +75,7 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
import org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.SELECT;
import org.apache.stanbol.entityhub.yard.solr.model.FieldMapper;
+import org.apache.stanbol.entityhub.yard.solr.model.IndexDataType;
import org.apache.stanbol.entityhub.yard.solr.model.IndexField;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
@@ -1244,16 +1245,26 @@ public class SolrYard extends AbstractYa
String field = fields.next();
float boost;
Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
- if(documentBoost != null){
- boost = documentBoost;
- if(fieldBoost != null){
- boost = boost*fieldBoost;
- }
- } else if(fieldBoost != null){
- boost = fieldBoost;
- } else {
+ //With solr 3.6 one can not set index time boosts on fields that omitNorms
+ //because of that we need to restrict the usage of boosts to those manually
+ //configured in the fieldBoostMap. Before bosts where dropped for fields that
+ //do not support them
+ if(fieldBoost != null){
+ boost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
+ } else {
boost = -1;
}
+ //the old code that does no longer work with Solr 3.6 :(
+// if(documentBoost != null){
+// boost = documentBoost;
+// if(fieldBoost != null){
+// boost = boost*fieldBoost;
+// }
+// } else if(fieldBoost != null){
+// boost = fieldBoost;
+// } else {
+// boost = -1;
+// }
for (Iterator<Object> values = representation.get(field); values.hasNext();) {
// now we need to get the indexField for the value
Object next = values.next();
@@ -1261,6 +1272,7 @@ public class SolrYard extends AbstractYa
try {
value = indexValueFactory.createIndexValue(next);
for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
+ //Set Boosts only for text data types
if(boost > 0){
inputDocument.addField(fieldName, value.getValue(), boost);
} else {