You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2011/09/22 08:51:36 UTC

svn commit: r1173968 [4/5] - in /incubator/stanbol/trunk: commons/installer/bundleprovider/src/main/java/org/apache/stanbol/commons/installer/provider/bundle/impl/ commons/jsonld/ commons/opennlp/ commons/opennlp/src/main/java/org/apache/stanbol/common...

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/LinkedEntity.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/LinkedEntity.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/LinkedEntity.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/LinkedEntity.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,222 @@
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.util.Span;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText;
+
+/**
+ * The occurrence of an detected Entity within the content. <p>
+ * Note that this class already stores the information in a structure as needed
+ * to write Enhancements as defined by the upcoming 2nd version of the
+ * Apache Stanbol Enhancement Structure (EntityAnnotation, TextOccurrence and
+ * EntitySuggestion). However it can also be used to write
+ * TextAnnotations and EntityAnnotations as defined by the 1st version
+ * @author Rupert Westenthaler
+ *
+ */
+public class LinkedEntity {
+    /**
+     * An mention of an linked entity within the text
+     * @author Rupert Westenthaler
+     *
+     */
+    public class Occurrence {
+        /**
+         * The maximum number of chars until that the current sentence is used
+         * as context for TextOcccurrences. If the sentence is longer a area of
+         * {@link #CONTEXT_TOKEN_COUNT} before and after the current selected
+         * text is used as context.<p>
+         * This is especially important in case no sentence detector is available
+         * for the current content. Because in this case the whole text is
+         * parsed as a single Sentence.
+         * TODO: Maybe find a more clever way to determine the context
+         */
+        public static final int MAX_CONTEXT_LENGTH = 200;
+        /**
+         * The number of tokens surrounding the current selected text used to
+         * calculate the context if the current sentence is longer than
+         * {@link #MAX_CONTEXT_LENGTH} chars.<p>
+         * This is especially important in case no sentence detector is available
+         * for the current content. Because in this case the whole text is
+         * parsed as a single Sentence.
+         * TODO: Maybe find a more clever way to determine the context
+         */
+        public static final int CONTEXT_TOKEN_COUNT = 5;
+        private final int start;
+        private final int end;
+        private final String context;
+
+        private Occurrence(AnalysedText sentence,int token) {
+            this(sentence,token,1);
+        }
+        private Occurrence(AnalysedText sentence,int startToken,int tokenSpan){
+            this.start = sentence.getOffset()+sentence.getTokens().get(startToken).getStart();
+            this.end = sentence.getOffset()+sentence.getTokens().get(startToken+tokenSpan-1).getEnd();
+            String context = sentence.getText();
+            if(context.length() > MAX_CONTEXT_LENGTH){
+                Span contextTokenSpan = new Span(
+                    Math.max(0, startToken-CONTEXT_TOKEN_COUNT),
+                    Math.min(startToken+tokenSpan+CONTEXT_TOKEN_COUNT, sentence.getTokens().size())-1);
+                context = context.substring(sentence.getTokens().get(contextTokenSpan.getStart()).getStart(),
+                    sentence.getTokens().get(contextTokenSpan.getEnd()).getEnd());
+            }
+            this.context = context;
+        }
+        /**
+         * The context (surrounding text) of the occurrence.
+         * @return
+         */
+        public String getContext() {
+            return context;
+        }
+        /**
+         * The start index of the occurrence
+         * @return the start index relative to the start of the text 
+         */
+        public int getStart() {
+            return start;
+        }
+        /**
+         * the end index of the occurrence
+         * @return the end index relative to the start of the text
+         */
+        public int getEnd() {
+            return end;
+        }
+        /**
+         * The selected text of this occurrence. Actually returns the value
+         * of {@link LinkedEntity#getSelectedText()}, because th
+         * @return
+         */
+        public String getSelectedText(){
+            return LinkedEntity.this.getSelectedText();
+        }
+        @Override
+        public String toString() {
+            return start+","+end;
+        }
+        @Override
+        public int hashCode() {
+            return context.hashCode()+start+end;
+        }
+        @Override
+        public boolean equals(Object arg0) {
+            return arg0 instanceof Occurrence && 
+                ((Occurrence)arg0).start == start &&
+                ((Occurrence)arg0).end == end &&
+                ((Occurrence)arg0).context.equals(context);
+        }
+    }
+    private final String selectedText;
+    private final Set<UriRef> types;
+    private final List<Suggestion> suggestions;
+    private final Collection<Occurrence> occurrences = new ArrayList<Occurrence>();
+    private final Collection<Occurrence> unmodOccurrences = Collections.unmodifiableCollection(occurrences);
+    /**
+     * Creates a new LinkedEntity for the parsed parameters
+     * @param selectedText the selected text
+     * @param suggestions the entity suggestions
+     * @param types the types of the linked entity. 
+     */
+    protected LinkedEntity(String selectedText, List<Suggestion> suggestions, Set<UriRef> types) {
+        this.suggestions = Collections.unmodifiableList(suggestions);
+        this.selectedText = selectedText;
+        this.types = Collections.unmodifiableSet(types);
+    }
+   /**
+     * Creates a new Linked Entity including the first {@link Occurrence}
+     * @param sentence the sentence (context) for the occurrence.
+     * @param startToken the index of the start token
+     * @param tokenSpan the number of token included in this span
+     * @param suggestions the entity suggestions
+     * @param types the types of the linked entity. 
+     */
+    protected LinkedEntity(AnalysedText sentence,int startToken,int tokenSpan, 
+                           List<Suggestion> suggestions, Set<UriRef> types) {
+        this(sentence.getText().substring(
+            sentence.getTokens().get(startToken).getStart(), 
+            sentence.getTokens().get(tokenSpan).getEnd()),suggestions,types);
+        addOccurrence(sentence, startToken,tokenSpan);
+    }
+    /**
+     * Getter for the selected text
+     * @return the selected text
+     */
+    public String getSelectedText() {
+        return selectedText;
+    }
+    
+    /**
+     * Getter for read only list of types
+     * @return the types
+     */
+    public Set<UriRef> getTypes() {
+        return types;
+    }
+    /**
+     * Adds an new Occurrence
+     * @param sentence the analysed sentence
+     * @param startToken the start token
+     * @param tokenSpan the number of tokens included in this span
+     * @return the new Occurrence also added to {@link #getOccurrences()}
+     */
+    protected Occurrence addOccurrence(AnalysedText sentence,int startToken,int tokenSpan){
+        Occurrence o = new Occurrence(sentence, startToken, tokenSpan);
+        occurrences.add(o);
+        return o;
+    }
+    /**
+     * Getter for the read only list of Occurrences
+     * @return the occurrences
+     */
+    public Collection<Occurrence> getOccurrences(){
+        return unmodOccurrences;
+    }
+    /**
+     * Getter for the read only list of Suggestions
+     * @return the suggestions
+     */
+    public List<Suggestion> getSuggestions(){
+        return suggestions;
+    }
+    
+    /**
+     * Getter for the Score
+     * @return The score of the first element in {@link #getSuggestions()} or 
+     * <code>0</code> if there are no suggestions
+     */
+    public double getScore(){
+        return suggestions.isEmpty() ? 0f : suggestions.get(0).getScore();
+    }
+    
+    /**
+     * Only considers the {@link #getSelectedText()}, because it is assumed that
+     * for the same selected text there MUST BE always the same suggestions with
+     * the same types and occurrences.
+     */
+    @Override
+    public int hashCode() {
+        return selectedText.hashCode();
+    }
+    /**
+     * Only considers the {@link #getSelectedText()}, because it is assumed that
+     * for the same selected text there MUST BE always the same suggestions with
+     * the same types and occurrences.
+     */
+    @Override
+    public boolean equals(Object arg0) {
+        return arg0 instanceof LinkedEntity && 
+        ((LinkedEntity)arg0).selectedText.equals(selectedText);
+    }
+    @Override
+    public String toString() {
+        return selectedText+'@'+occurrences+"->"+suggestions;
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/LinkedEntity.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/Suggestion.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/Suggestion.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/Suggestion.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/Suggestion.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,337 @@
+/**
+ * 
+ */
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking;
+
+import java.util.Comparator;
+import java.util.Iterator;
+
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+
+/**
+ * A suggestion of an entity in the {@link EntitySearcher} for a part of the
+ * text. This class does not include the actual position within the Text, 
+ * because it is intended to be used in combination with {@link LinkedEntity}.<p>
+ * This class also manages redirected entities and a state if redirects where
+ * already processed for this suggestion.<p>
+ * In addition this class also defines a set of {@link Comparator}s that are 
+ * used to sort suggestions base on how well the fit the text.
+ * @author Rupert Westenthaler
+ *
+ */
+public class Suggestion implements Comparable<Suggestion>{
+    private MATCH match = MATCH.NONE;
+    private int span = 0;
+    private int matchCount = 0;
+    private Text label;
+    private int labelTokenCount = 0;
+    private final Representation result;
+    private Representation redirectsTo;
+    private boolean redirectProcessed;
+    
+    private double score;
+    public static enum MATCH {
+        /**
+         * No match (to less tokens, wrong oder ...)
+         */
+        NONE,
+        /**
+         * Not all tokens but sufficient to suggest (with lower score)
+         */
+        PARTIAL,
+        /**
+         * All requested Tokens match, but it is no exact match e.g. because
+         * the label defines some additional tokens
+         */
+        FULL,
+        /**
+         * The label of the suggested Entity is exactly the requested string
+         */
+        EXACT,
+    }
+    protected Suggestion(Representation result){
+        if(result == null){
+            throw new IllegalArgumentException("The parsed Result MUST NOT be NULL!");
+        }
+        this.result = result;
+        //TODO Do no longer use the resultScore as the score. We need to provide an
+        //own algorithm to calculate scores!
+//        this.resultScore = result.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
+    }
+    /**
+     * Updates this suggestion 
+     * @param match the math type
+     * @param span the number of token this suggestion spans
+     * @param count the number of token that match with the suggestion within the span
+     * @param label the label that matches the tokens
+     * @param labelTokenCount the number of tokens of the label
+     */
+    protected void updateMatch(MATCH match,int span,int count,Text label,int labelTokenCount){
+        this.match = match;
+        //check the validity of the parameters to avoid later errors that are
+        //than hard to debug
+        if(match == MATCH.NONE){
+            this.span = 0;
+            this.matchCount = 0;
+            this.label = null;
+        } else {
+            if(span < 1 || count < 1){
+                throw new IllegalArgumentException("For "+match+" matches the token span and count MUST BE > 0");
+            }
+            if(match == MATCH.PARTIAL){
+                if(span <= count){
+                    throw new IllegalArgumentException("For "+match+" matches the token span MUST BE > than the token count!");
+                }
+            } else {
+                if(span != count){
+                    throw new IllegalArgumentException("For "+match+" matches the token span MUST BE equals to the token count!");
+                }
+            }
+        }
+        this.span = span;
+        this.matchCount = count;
+        this.label = label;
+        this.labelTokenCount = labelTokenCount;
+    }
+    /**
+     * Getter for the number of Tokens of the label. Usually needed to calculate
+     * the score (how good the label matches)
+     * @return the labelTokenCount
+     */
+    public final int getLabelTokenCount() {
+        return labelTokenCount;
+    }
+    /**
+     * Setter for the {@link MATCH} type of this suggestion
+     * @param match the match type
+     */
+    protected void setMatch(MATCH match) {
+        this.match = match;
+    }
+
+    /**
+     * Getter for the the type of the match
+     * @return The type of the match
+     */
+    public final MATCH getMatch() {
+        return match;
+    }
+    /**
+     * Getter for the number of the token matched by this suggestion
+     * @return The number of the token matched by this suggestion
+     */
+    public final int getSpan() {
+        return span;
+    }
+    /**
+     * Getter for the he number of matching tokens.
+     * @return The number of matching tokens.
+     */
+    public final int getMatchCount(){
+        return matchCount;
+    }
+    /**
+     * The actual label of the {@link #getResult() result} that produced the
+     * based match for the given search tokens.
+     * @return the label
+     */
+    public final Text getMatchedLabel() {
+        return label;
+    }
+    protected final void setMatchedLabel(Text label){
+        this.label = label;
+    }
+    /**
+     * Getter for the best label in the given language
+     * @param suggestion the suggestion
+     * @param nameField the field used to search for labels
+     * @param language the language
+     * @return the best match or {@link Suggestion#getMatchedLabel()} if non is found
+     */
+    public Text getBestLabel(String nameField, String language){
+        Representation rep = getRepresentation();
+        // 1. check if the returned Entity does has a label -> if not return null
+        // add labels (set only a single label. Use "en" if available!
+        Text label = null;
+        Iterator<Text> labels = rep.getText(nameField);
+        boolean matchFound = false;
+        while (labels.hasNext() && !matchFound) {
+            Text actLabel = labels.next();
+            if (label == null) { //take any label at first
+                label = actLabel;
+            }
+            //now we have already a label check the language
+            String actLang = actLabel.getLanguage();
+            //use startWith to match also en-GB and en-US ...
+            if (actLang != null && actLang.startsWith(language)) {
+                //prefer labels with the correct language
+                label = actLabel;
+                if(label.getText().equalsIgnoreCase(label.getText())){
+                    //found label in that language that exactly matches the
+                    //label used to match the text
+                    matchFound = true; 
+                }
+            }
+        }
+        if (label == null) { //if no label was found ... return the one used for the match
+            label = getMatchedLabel();
+        }
+        return label;
+
+    }
+    public final Representation getResult(){
+        return result;
+    }
+    @Override
+    public String toString() {
+        return label+"[m="+match+(match != MATCH.NONE ? ",c="+matchCount+",s="+span+']':"]");
+    }
+    /**
+     * The {@link RdfResourceEnum#entityRank entity rank} of the {@link #getResult() result}.
+     * The entity rank is the relative importance of an entity within an
+     * Collection of Entities (ReferencedSite, Thesaurus, Taxonomy ...).<p>
+     * This method returns the rank of the entity returned by
+     * {@link #getRepresentation()}. Therefore if an redirect is active it will
+     * be the rank of the redirected entity and not of the suggested result.
+     * @return the rank of the entity or <code>null</code> if not available
+     */
+    public Float getEntityRank() {
+        return getRepresentation().getFirst(RdfResourceEnum.entityRank.getUri(), Float.class);
+    }
+    /**
+     * @param score the score to set
+     */
+    public void setScore(double score) {
+        this.score = score;
+    }
+    /**
+     * @return the score
+     */
+    public double getScore() {
+        return score;
+    }
+    /**
+     * Returns <code>true</code> if the result has a registered redirect
+     * @return <code>true</code> if a redirect is present. Otherwise <code>false</code>
+     */
+    public boolean isRedirect(){
+        return redirectsTo != null;
+    }
+    /**
+     * Setter for Entity the {@link #getResult() result} of this match redirects
+     * to. Also sets {@link #setRedirectProcessed(boolean)} to <code>true</code>
+     * @param redirect the redirected entity or <code>null</code> if no redirect
+     * is present
+     */
+    protected void setRedirect(Representation redirect){
+        this.redirectsTo = redirect;
+        setRedirectProcessed(true);
+    }
+    /**
+     * Setter for the state if the redirects for this resultMatch where already
+     * processed. Calling {@link #setRedirect(Representation)} will set this
+     * automatically to <code>true</code>
+     * @param state the state.
+     */
+    protected void setRedirectProcessed(boolean state){
+        this.redirectProcessed = state;
+    }
+    /**
+     * Getter for the state if the redirect was processed for this ResultMatch
+     * @return the state
+     */
+    protected boolean isRedirectedProcessed(){
+        return redirectProcessed;
+    }
+    /**
+     * Getter for the Entity the {@link #getResult()} of this Entity redirects
+     * to. Returns <code>null</code> if there is no redirect. 
+     * @return the entity the {@link #getResult()} redirects to or <code>null</code>
+     * if there is no redirect
+     */
+    public Representation getRedirect(){
+        return redirectsTo;
+    }
+    
+    /**
+     * getter for the Representation of this result. In case of 
+     * <code>{@link #isRedirect()} == true</code> it returns the the 
+     * {@link #getRedirect()} otherwise it returns the {@link #getResult()}.<p>
+     * To check explicitly for the result of the redirect one needs to use
+     * {@link #getRedirect()} and {@link #getRedirect()} instead.
+     * @return The representation for this match. might be directly the 
+     * {@link #getResult() result} or if present the 
+     * {@link #getRedirect() redirected} resource. 
+     */
+    public final Representation getRepresentation(){
+        return redirectsTo == null ? result : redirectsTo;
+    }
+    
+    /**
+     * Compares {@link Suggestion} first based on the {@link Suggestion#getMatch()} value
+     * and secondly based on the {@link RdfResourceEnum#entityRank}.
+     */
+    public static final Comparator<Suggestion> MATCH_TYPE_SUGGESTION_COMPARATOR = new Comparator<Suggestion>() {
+        @Override
+        public int compare(Suggestion arg0, Suggestion arg1) {
+            if(arg0.match != arg1.match){
+                return arg1.match.ordinal() - arg0.match.ordinal(); //higher ordinal first
+            } else if(arg0.match == MATCH.NONE){
+                return 0; //do not further sort entries that do not match
+            } else {
+                Float arg0Rank = arg0.getEntityRank();
+                if(arg0Rank == null){
+                    arg0Rank = Float.valueOf(0);
+                }
+                Float arg1Rank = arg1.getEntityRank();
+                if(arg1Rank == null){
+                    arg1Rank = Float.valueOf(0);
+                }
+                return arg1Rank.compareTo(arg0Rank); //higher ranks first
+            }
+        }
+    };
+    /**
+     * Compares {@link Suggestion}s based on the {@link Suggestion#getScore()}.
+     * In case the scores are equals the call is forwarded to the
+     * {@link Suggestion#DEFAULT_SUGGESTION_COMPARATOR}.<p>
+     * This is NOT the default {@link Comparator} because score values are
+     * usually only calculated relative to the best matching suggestions and
+     * therefore only available later.
+     */
+    public static final Comparator<Suggestion> SCORE_COMPARATOR = new Comparator<Suggestion>() {
+        @Override
+        public int compare(Suggestion arg0, Suggestion arg1) {
+            return arg0.getScore() > arg1.getScore() ? -1 : //bigger score first
+                arg0.getScore() < arg1.getScore() ? 1 : 
+                    DEFAULT_SUGGESTION_COMPARATOR.compare(arg0, arg1);
+        }
+    };
+    /**
+     * Compares {@link Suggestion} first based on the {@link Suggestion#getMatchCount()} 
+     * number of matched tokens. If the number of the matched tokens is equals or
+     * any of the parsed {@link Suggestion} instances has {@link MATCH#NONE} it
+     * forwards the request to the {@link #MATCH_TYPE_SUGGESTION_COMPARATOR}.
+     */
+    public static final Comparator<Suggestion> DEFAULT_SUGGESTION_COMPARATOR = new Comparator<Suggestion>() {
+        @Override
+        public int compare(Suggestion arg0, Suggestion arg1) {
+            if(arg0.match == MATCH.NONE || arg1.match == MATCH.NONE ||
+                    arg0.matchCount == arg1.matchCount){
+                return MATCH_TYPE_SUGGESTION_COMPARATOR.compare(arg0, arg1);
+            } else {
+                return arg1.matchCount - arg0.matchCount; //bigger should be first
+            }
+        }
+    };
+    /**
+     * Implementation of the {@link Comparable} interface using
+     * {@link #MATCH_TYPE_SUGGESTION_COMPARATOR}.
+     */
+    @Override
+    public int compareTo(Suggestion other) {
+        return DEFAULT_SUGGESTION_COMPARATOR.compare(this, other);
+    }
+}
\ No newline at end of file

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/Suggestion.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntitySearcherUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntitySearcherUtils.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntitySearcherUtils.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntitySearcherUtils.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,51 @@
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntitySearcher;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
+
+public class EntitySearcherUtils {
+
+    /**
+     * Validated the parsed parameter as parsed to 
+     * {@link EntitySearcher#lookup(String, Set, List, String...)}
+     * and creates a fieldQuery for the parsed parameter
+     * @param field
+     * @param includeFields
+     * @param search
+     * @param languages
+     * @return
+     */
+    public final static FieldQuery createFieldQuery(FieldQueryFactory factory,
+                                        String field,
+                                        Set<String> includeFields,
+                                        List<String> search,
+                                        String... languages) {
+        if(field == null || field.isEmpty()){
+            throw new IllegalArgumentException("The parsed search field MUST NOT be NULL nor empty");
+        }
+        if(search == null || search.isEmpty()){
+            throw new IllegalArgumentException("The parsed list of search strings MUST NOT be NULL nor empty");
+        }
+        //build the query and than return the result
+        FieldQuery query = factory.createFieldQuery();
+        if(includeFields == null){
+            query.addSelectedField(field);
+        } else {
+            if(!includeFields.contains(field)){
+                query.addSelectedField(field);
+            }
+            for(String select : includeFields){
+                query.addSelectedField(select);
+            }
+        }
+        query.setLimit(20);//TODO make configurable
+        query.setConstraint(field, new TextConstraint(search, languages));
+        return query;
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntitySearcherUtils.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntityhubSearcher.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntityhubSearcher.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntityhubSearcher.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntityhubSearcher.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,68 @@
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntitySearcher;
+import org.apache.stanbol.entityhub.servicesapi.Entityhub;
+import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.osgi.framework.BundleContext;
+
+public final class EntityhubSearcher extends TrackingEntitySearcher<Entityhub> implements EntitySearcher {
+    
+    public EntityhubSearcher(BundleContext context) {
+        super(context,Entityhub.class,null);
+    }
+    
+    @Override
+    public Representation get(String id,Set<String> includeFields) {
+        if(id == null || id.isEmpty()){
+            return null;
+        }
+        Entityhub entityhub = getSearchService();
+        if(entityhub == null){
+            throw new IllegalStateException("The Entityhub is currently not active");
+        }
+        Entity entity;
+        try {
+            entity = entityhub.getEntity(id);
+        }  catch (EntityhubException e) {
+            throw new IllegalStateException("Exception while getting "+id+
+                " from the Entityhub",e);
+        }
+        return entity == null ? null : entity.getRepresentation();
+    }
+    @Override
+    public Collection<? extends Representation> lookup(String field,
+                                           Set<String> includeFields,
+                                           List<String> search,
+                                           String... languages) throws IllegalStateException {
+        Entityhub entityhub = getSearchService();
+        if(entityhub == null){
+            throw new IllegalStateException("The Entityhub is currently not active");
+        }
+        FieldQuery query = EntitySearcherUtils.createFieldQuery(entityhub.getQueryFactory(),
+            field, includeFields, search, languages);
+        QueryResultList<Representation> results;
+        try {
+            results = entityhub.find(query);
+        } catch (EntityhubException e) {
+            throw new IllegalStateException("Exception while searchign for "+
+                search+'@'+Arrays.toString(languages)+"in the Entityhub", e);
+        }
+        return results.results();
+    }
+
+    @Override
+    public boolean supportsOfflineMode() {
+        return true; //the entityhub is always offline
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/EntityhubSearcher.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,137 @@
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import opennlp.tools.util.Span;
+
+import org.apache.stanbol.commons.opennlp.PosTagsCollectionEnum;
+import org.apache.stanbol.commons.opennlp.PosTypeChunker;
+import org.apache.stanbol.commons.opennlp.PosTypeCollectionType;
+import org.apache.stanbol.commons.opennlp.TextAnalyzer;
+import org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText;
+import org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText.Token;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.AnalysedContent;
+/**
+ * Factory to {@link #create(String, String)} {@link AnalysedContent} instances
+ * based on OpenNLP and the {@link TextAnalyzer} utility.<p>
+ * This factory allows to configure a set of POS types that are used to
+ * determine if {@link Token}s are processed (used to search for terms) or not.
+ * This configuration is used by all {@link AnalysedContent} instances created
+ * by using this Factory.<p>
+ * Preconfigured sets of POS types are available by the 
+ * {@link PosTagsCollectionEnum}. The {@link PosTagsCollectionEnum#EN_NOUN}
+ * set is used as default.
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public class OpenNlpAnalysedContentFactory {
+    
+
+    private final TextAnalyzer textAnalyzer;
+    
+    private final Map<String,Set<String>> languagePosTags = new HashMap<String,Set<String>>();
+    /**
+     * The set of POS (Part-of-Speech) tags also used by the 
+     * {@link PosTypeChunker#DEFAULT_BUILD_CHUNK_POS_TYPES} as defaults.
+     * This will select Nouns and foreign words as defined in the 
+     * <a href="http://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html">
+     * Penn Treebank</a> tag set <p>
+     */
+    public final Set<String> DEFAULT_POS_TAGS = PosTagsCollectionEnum.EN_NOUN.getTags();
+        
+    
+    public static OpenNlpAnalysedContentFactory getInstance(TextAnalyzer textAnalyzer){
+        return new OpenNlpAnalysedContentFactory(textAnalyzer);
+    }
+    /**
+     * Setter for the POS tags used to process Words in the given language.
+     * The <code>null</code> language is used whenever no configuration is
+     * available for a given language. Setting the posTags to <code>null</code>
+     * will remove a language from the configuration.
+     * If a configuration for a given language is missing and there is also no
+     * default configuration (e.g. after calling 
+     * <code>setLanguagePosTags(null, null)</code>) {@link AnalysedContent}
+     * instances created by this factory will always return <code>false</code>
+     * on calls to {@link AnalysedContent#processPOS(String)};
+     * @param language the language
+     * @param posTags the pos tags
+     */
+    public void setLanguagePosTags(String language, Set<String> posTags){
+        if(posTags != null){
+            languagePosTags.put(language, Collections.unmodifiableSet(posTags));
+        } else {
+            languagePosTags.remove(language);
+        }
+    }
+    
+    protected OpenNlpAnalysedContentFactory(TextAnalyzer textAnalyzer){
+        if(textAnalyzer == null){
+            throw new IllegalArgumentException("The parsed TextAnalyzer MUST NOT be NULL!");
+        }
+        this.textAnalyzer = textAnalyzer;
+        setLanguagePosTags(null, DEFAULT_POS_TAGS);
+    }
+    
+    public AnalysedContent create(String text,String language){
+        return new OpenNlpAnalysedContent(text, language);
+    }
+
+    /**
+     * Implementation of the {@link AnalysedContent} based on OpenNLP and the
+     * {@link TextAnalyzer} component
+     * @author Rupert Westenthaler
+     *
+     */
+    private class OpenNlpAnalysedContent implements AnalysedContent{
+        private final String language;
+        private final Iterator<AnalysedText> sentences;
+        private final Set<String> posTags;
+
+        private OpenNlpAnalysedContent(String text, String lang){
+            this.language = lang;
+            this.sentences = textAnalyzer.analyse(text, lang);
+            this.posTags = PosTagsCollectionEnum.getPosTagCollection(lang, PosTypeCollectionType.NOUN);
+        }
+        
+        /**
+         * Getter for the Iterator over the analysed sentences. This Method
+         * is expected to return always the same Iterator instance.
+         * @return the iterator over the analysed sentences
+         */
+        public Iterator<AnalysedText> getAnalysedText() {
+            return sentences;
+        }
+        /**
+         * Called to check if a {@link Token} should be used to search for
+         * Concepts within the Taxonomy based on the POS tag of the Token.
+         * @param posTag the POS tag to check
+         * @return <code>true</code> if Tokens with this POS tag should be
+         * included in searches. Otherwise <code>false</code>. Also returns
+         * <code>true</code> if no POS type configuration is available for the
+         * language parsed in the constructor
+         */
+        @Override
+        public Boolean processPOS(String posTag) {
+            return posTags != null ? Boolean.valueOf(posTags.contains(posTag)) : null;
+        }
+        /**
+         * Not yet implemented.
+         * @param chunkTag the type of the chunk
+         * @return returns always <code>true</code>
+         */
+        @Override
+        public Boolean processChunk(String chunkTag) {
+            // TODO implement
+            return null;
+        }
+        @Override
+        public String[] tokenize(String label) {
+            return textAnalyzer.getTokenizer(language).tokenize(label);
+        }
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/ReferencedSiteSearcher.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/ReferencedSiteSearcher.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/ReferencedSiteSearcher.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/ReferencedSiteSearcher.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,78 @@
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntitySearcher;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSite;
+import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSiteException;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteConfiguration;
+import org.osgi.framework.BundleContext;
+
+public final class ReferencedSiteSearcher extends TrackingEntitySearcher<ReferencedSite> implements EntitySearcher {
+    
+    private final String siteId;
+
+    public ReferencedSiteSearcher(BundleContext context,String siteId) {
+        super(context, ReferencedSite.class, 
+            Collections.singletonMap(SiteConfiguration.ID,siteId));
+        this.siteId = siteId;
+    }
+    
+    @Override
+    public Representation get(String id,Set<String> includeFields) {
+        if(id == null || id.isEmpty()){
+            return null;
+        }
+        Entity entity;
+        ReferencedSite site = getSearchService();
+        if(site == null){
+            throw new IllegalStateException("ReferencedSite "+siteId+" is currently not available");
+        }
+        try {
+            entity = site.getEntity(id);
+        }  catch (ReferencedSiteException e) {
+            throw new IllegalStateException("Exception while getting "+id+
+                " from the ReferencedSite "+site.getId(),e);
+        }
+        return entity == null ? null : entity.getRepresentation();
+    }
+
+    @Override
+    public Collection<? extends Representation> lookup(String field,
+                                           Set<String> includeFields,
+                                           List<String> search,
+                                           String... languages) throws IllegalStateException {
+        //build the query and than return the result
+        ReferencedSite site = getSearchService();
+        if(site == null){
+            throw new IllegalStateException("ReferencedSite "+siteId+" is currently not available");
+        }
+        FieldQuery query = EntitySearcherUtils.createFieldQuery(site.getQueryFactory(), 
+            field, includeFields, search, languages);
+        QueryResultList<Representation> results;
+        try {
+            results = site.find(query);
+        } catch (ReferencedSiteException e) {
+            throw new IllegalStateException("Exception while searchign for "+
+                search+'@'+Arrays.toString(languages)+"in the ReferencedSite "+
+                site.getId(), e);
+        }
+        return results.results();
+    }
+
+    @Override
+    public boolean supportsOfflineMode() {
+        ReferencedSite site = getSearchService();
+        //Do not throw an exception here if the site is not available. Just return false
+        return site == null ? false : site.supportsLocalMode();
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/ReferencedSiteSearcher.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/TrackingEntitySearcher.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/TrackingEntitySearcher.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/TrackingEntitySearcher.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/TrackingEntitySearcher.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,85 @@
+package org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl;
+
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntitySearcher;
+import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSite;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteConfiguration;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.Filter;
+import org.osgi.framework.InvalidSyntaxException;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.util.tracker.ServiceTracker;
+/**
+ * Abstract super class for EntitySearchers that need to track the OSGI service
+ * used to lookup Entities. Used by the {@link EntityhubSearcher} and the 
+ * {@link ReferencedSiteSearcher} implementation
+ * @author Rupert Westenthaler
+ *
+ */
+public abstract class TrackingEntitySearcher<T> implements EntitySearcher {
+    
+    private ServiceTracker searchServiceTracker;
+    
+    /**
+     * Creates a new instance for the parsed parameter
+     * @param context the BundleContexed used to create the {@link ServiceTracker}
+     * listening for the SearchService
+     * @param serviceClass
+     * @param filterEntries
+     */
+    protected TrackingEntitySearcher(BundleContext context, Class<T> serviceClass,Map<String,String> filterEntries){
+        if(filterEntries == null || filterEntries.isEmpty()){
+            searchServiceTracker = new ServiceTracker(context, serviceClass.getName(), null);
+        } else {
+            StringBuffer filterString = new StringBuffer();
+            filterString.append(String.format("(&(objectclass=%s)",serviceClass.getName()));
+            for(Entry<String,String> filterEntry : filterEntries.entrySet()){
+                if(filterEntry.getKey() != null && !filterEntry.getKey().isEmpty() &&
+                    filterEntry.getValue() != null && !filterEntry.getValue().isEmpty()){
+                    filterString.append(String.format("(%s=%s)",
+                        filterEntry.getKey(),filterEntry.getValue()));
+                } else {
+                    throw new IllegalArgumentException("Illegal filterEntry "+filterEntry+". Both key and value MUST NOT be NULL nor emtpty!");
+                }
+            }
+            filterString.append(')');
+            Filter filter;
+            try {
+                filter = context.createFilter(filterString.toString());
+            } catch (InvalidSyntaxException e) {
+                throw new IllegalArgumentException(String.format(
+                    "Unable to build Filter for '%s' (class=%s,filter=%s)", 
+                    filterString,serviceClass,filterEntries),e);
+            }
+            searchServiceTracker = new ServiceTracker(context, filter, null);
+        }
+        searchServiceTracker.open();
+    }
+    /**
+     * Getter for the Service used to search for Entities. If the service is
+     * currently not available, than this method will return <code>null</code>
+     * @return The service of <code>null</code> if not available
+     */
+    @SuppressWarnings("unchecked") //type is ensured by OSGI
+    protected T getSearchService(){
+        if(searchServiceTracker == null){
+            throw new IllegalStateException("This TrackingEntitySearcher is already closed!");
+        } else {
+            return (T) searchServiceTracker.getService();
+        }
+    }
+    /**
+     * Closes the {@link ServiceTracker} used to track the service.
+     */
+    public void close(){
+        searchServiceTracker.close();
+        searchServiceTracker = null;
+    }
+    @Override
+    protected void finalize() throws Throwable {
+        close();
+        super.finalize();
+    }
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/TrackingEntitySearcher.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties Thu Sep 22 06:51:30 2011
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#===============================================================================
+#Properties and Options used to configure 
+#===============================================================================
+org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.name=Apache Stanbol Enhancement Engine for linking Keywords
+org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine.description=An engine that extracts keywords present within a Controlled Vocabulary mentioned within parsed ContentItem
+
+org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.name=Referenced Site
+org.apache.stanbol.enhancer.engines.keywordextraction.referencedSiteId.description=The ID of the Entityhub Referenced Site holding the Controlled Vocabulary (e.g. a taxonomy or just a set of named entities)
+
+org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.name=Use Simple Tokenizer
+org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer.description=This allows to deactivate the use of Language specific Tokenizers. For most European languages the Simple Tokenizer is sufficient.
+
+org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.name=Min Token Length
+org.apache.stanbol.enhancer.engines.keywordextraction.minSearchTokenLength.description=The minimum length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored in case a POS (Part of Speech) tagger is available for the language of the parsed content.
+
+#org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.name=Use Chunker
+#org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker.description=This allows to enable/disable the use of a Chunker. Even if enabled it will only be used if one is present for the language of the content.
+
+org.apache.stanbol.enhancer.engines.keywordextraction.nameField.name=Label Field 
+org.apache.stanbol.enhancer.engines.keywordextraction.nameField.description=The field used to match Entities with a mentions within the parsed text.
+
+org.apache.stanbol.enhancer.engines.keywordextraction.typeField.name=Type Field
+org.apache.stanbol.enhancer.engines.keywordextraction.typeField.description=The field used to retrieve the types of matched Entities. Values of that field are expected to be URIs
+
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.name=Redirect Field
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectField.description=Entities may define redirects to other Entities (e.g. "USA"(http://dbpedia.org/resource/USA) -> "United States"(http://dbpedia.org/resource/United_States). Values of this field are expected to link to other entities part of the controlled vocabulary
+
+org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.name=Suggestions
+org.apache.stanbol.enhancer.engines.keywordextraction.maxSuggestions.description=The maximal number of suggestions returned for a single mention. 
+
+org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.name=Number of Required Tokens
+org.apache.stanbol.enhancer.engines.keywordextraction.minFoundTokens.description=For lookups with several words (e.g. Dr Patrick Marshall) this is the minimum number of Tokens the label of an entity must match to be suggested. This is only used of the label does not exactly match a part of the text.
+
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.name=Redirect Mode
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.description=Defines how to process redirects of Entities mentioned in the parsed content.. Three modes to deal with such links are supported: Ignore redirects; Add values from redirected Entities to extracted; Follow Redirects and suggest the redirected Entity instead of the extracted.
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.follow=Follow Redirects
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.addValues=Keep extracted Entity, but add information of the redirected
+org.apache.stanbol.enhancer.engines.keywordextraction.redirectMode.option.ignore=Ignore Redirects
+
+org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.name=Languages
+org.apache.stanbol.enhancer.engines.keywordextraction.processedLanguages.description=Languages to process. An empty text indicates that all languages are processed. Use ',' as separator for languages (e.g. 'en,de' to enhance only English and German texts).

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java?rev=1173968&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java (added)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java Thu Sep 22 06:51:30 2011
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.keywordextraction;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.tokenize.SimpleTokenizer;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.stanbol.commons.opennlp.OpenNLP;
+import org.apache.stanbol.commons.opennlp.TextAnalyzer;
+import org.apache.stanbol.enhancer.engines.keywordextraction.impl.ClasspathDataFileProvider;
+import org.apache.stanbol.enhancer.engines.keywordextraction.impl.TestSearcherImpl;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntityLinker;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntityLinkerConfig;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.LinkedEntity;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.Suggestion;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntityLinkerConfig.RedirectProcessingMode;
+import org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl.OpenNlpAnalysedContentFactory;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * TODO: convert this to an integration test!
+ * @author Rupert Westenthaler
+ */
+public class TestTaxonomyLinker {
+
+    /**
+     * The context for the tests (same as in TestOpenNLPEnhancementEngine)
+     */
+    public static final String TEST_TEXT = "Dr. Patrick Marshall (1869 - November 1950) was a"
+        + " geologist who lived in New Zealand and worked at the University of Otago.";
+    public static final String TEST_TEXT2 = "A CBS televised debate between Australia's " +
+    		"candidates for Prime Minister in the upcoming US election has been rescheduled " +
+    		"and shortend, to avoid a clash with popular cookery sow MasterChef.";
+    
+    static TestSearcherImpl searcher;
+    static ValueFactory factory = InMemoryValueFactory.getInstance();
+    private static OpenNLP openNLP;
+    
+    public static final String NAME = NamespaceEnum.rdfs+"label";
+    public static final String TYPE = NamespaceEnum.rdf+"type";
+    public static final String REDIRECT = NamespaceEnum.rdfs+"seeAlso";
+
+    @BeforeClass
+    public static void setUpServices() throws IOException {
+        openNLP = new OpenNLP(new ClasspathDataFileProvider("DUMMY_SYMBOLIC_NAME"));
+        searcher = new TestSearcherImpl(NAME,SimpleTokenizer.INSTANCE);
+        //add some terms to the searcher
+        Representation rep = factory.createRepresentation("urn:test:PatrickMarshall");
+        rep.addNaturalText(NAME, "Patrick Marshall");
+        rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PERSON.getUnicodeString());
+        searcher.addEntity(rep);
+        rep = factory.createRepresentation("urn:test:Geologist");
+        rep.addNaturalText(NAME, "Geologist");
+        rep.addReference(TYPE, NamespaceEnum.skos+"Concept");
+        rep.addReference(REDIRECT, "urn:test:redirect:Geologist");
+        searcher.addEntity(rep);
+        //a redirect
+        rep = factory.createRepresentation("urn:test:redirect:Geologist");
+        rep.addNaturalText(NAME, "Geologe (redirect)");
+        rep.addReference(TYPE, NamespaceEnum.skos+"Concept");
+        searcher.addEntity(rep);
+        rep = factory.createRepresentation("urn:test:NewZealand");
+        rep.addNaturalText(NAME, "New Zealand");
+        rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
+        searcher.addEntity(rep);
+        rep = factory.createRepresentation("urn:test:UniversityOfOtago");
+        rep.addNaturalText(NAME, "University of Otago");
+        rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
+        searcher.addEntity(rep);
+        rep = factory.createRepresentation("urn:test:University");
+        rep.addNaturalText(NAME, "University");
+        rep.addReference(TYPE, NamespaceEnum.skos+"Concept");
+        searcher.addEntity(rep);
+        rep = factory.createRepresentation("urn:test:Otago");
+        rep.addNaturalText(NAME, "Otago");
+        rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
+        searcher.addEntity(rep);
+    }
+
+    @Before
+    public void bindServices() throws IOException {
+    }
+
+    @After
+    public void unbindServices() {
+    }
+
+    @AfterClass
+    public static void shutdownServices() {
+    }
+
+    public static ContentItem getContentItem(final String id, final String text) {
+        return new ContentItem() {
+
+            SimpleMGraph metadata = new SimpleMGraph();
+
+            public InputStream getStream() {
+                return new ByteArrayInputStream(text.getBytes());
+            }
+
+            public String getMimeType() {
+                return "text/plain";
+            }
+
+            public MGraph getMetadata() {
+                return metadata;
+            }
+
+            public String getId() {
+                return id;
+            }
+        };
+    }
+
+    @Test
+    public void testTaxonomyLinker() throws Exception{
+        TextAnalyzer textAnalyzer = new TextAnalyzer(openNLP);
+        OpenNlpAnalysedContentFactory acf = OpenNlpAnalysedContentFactory.getInstance(textAnalyzer);
+        EntityLinkerConfig config = new EntityLinkerConfig();
+        config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
+        EntityLinker linker = new EntityLinker(
+            acf.create(TEST_TEXT,"en"), searcher, config);
+        linker.process();
+        Map<String,List<String>> expectedResults = new HashMap<String,List<String>>();
+        expectedResults.put("Patrick Marshall", new ArrayList<String>(
+                Arrays.asList("urn:test:PatrickMarshall")));
+        expectedResults.put("geologist", new ArrayList<String>(
+                Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity
+        expectedResults.put("New Zealand", new ArrayList<String>(
+                Arrays.asList("urn:test:NewZealand")));
+        expectedResults.put("University of Otago", new ArrayList<String>(
+                Arrays.asList("urn:test:UniversityOfOtago")));
+        for(LinkedEntity linkedEntity : linker.getLinkedEntities().values()){
+            List<String> expectedSuggestions = expectedResults.remove(linkedEntity.getSelectedText());
+            assertNotNull("LinkedEntity "+linkedEntity.getSelectedText()+
+                "is not an expected Result (or was found twice)", expectedSuggestions);
+            linkedEntity.getSuggestions().iterator();
+            assertEquals("Number of suggestions "+linkedEntity.getSuggestions().size()+
+                " != number of expected suggestions "+expectedSuggestions.size()+
+                "for selection "+linkedEntity.getSelectedText(), 
+                linkedEntity.getSuggestions().size(),
+                expectedSuggestions.size());
+            double score = linkedEntity.getScore();
+            for(int i=0;i<expectedSuggestions.size();i++){
+                Suggestion suggestion = linkedEntity.getSuggestions().get(i);
+                assertEquals("Expecced Suggestion at Rank "+i+" expected: "+
+                    expectedSuggestions.get(i)+" suggestion: "+
+                    suggestion.getRepresentation().getId(),
+                    expectedSuggestions.get(i), 
+                    suggestion.getRepresentation().getId());
+                assertTrue("Score of suggestion "+i+"("+suggestion.getScore()+
+                    " > as of the previous one ("+score+")",
+                    score >= suggestion.getScore());
+                score = suggestion.getScore();
+            }
+        }
+    }
+
+}

Propchange: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/trunk/enhancer/engines/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/pom.xml Thu Sep 22 06:51:30 2011
@@ -50,6 +50,7 @@
     <module>metaxa</module>
     <module>geonames</module>
     <module>entitytagging</module>
+    <module>keywordextraction</module>
     <module>taxonomylinking</module>
     <!-- RICK based enhancement engine(s) -->
     <module>opencalais</module>

Modified: incubator/stanbol/trunk/enhancer/engines/refactor/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/refactor/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/refactor/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/refactor/pom.xml Thu Sep 22 06:51:30 2011
@@ -62,7 +62,7 @@
         <configuration>
           <instructions>
             <Export-Package>
-              org.apache.stanbol.enhancer.engines.refactor;version=${pom.version}
+              org.apache.stanbol.enhancer.engines.refactor;version=${project.version}
             </Export-Package>
             <Import-Package>
               !javax.xml.stream.*,

Modified: incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/Suggestion.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/Suggestion.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/Suggestion.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/Suggestion.java Thu Sep 22 06:51:30 2011
@@ -23,7 +23,6 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSite;

Modified: incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/TaxonomyLinkingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/TaxonomyLinkingEngine.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/TaxonomyLinkingEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/java/org/apache/stanbol/enhancer/engines/taxonomy/impl/TaxonomyLinkingEngine.java Thu Sep 22 06:51:30 2011
@@ -44,13 +44,10 @@ import opennlp.tools.sentdetect.Sentence
 import opennlp.tools.sentdetect.SentenceModel;
 import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.Span;
 
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
@@ -87,9 +84,7 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
 import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
 import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSite;
-import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSiteException;
 import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSiteManager;
-import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
 //removed annotations until engine actually does something
 //@Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
 //    specVersion = "1.1", metatype = true, immediate = true)
@@ -98,11 +93,22 @@ import org.osgi.service.cm.Configuration
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.w3c.dom.NameList;
 
+/**
+ * This is the first try of an EnhancementEngine that finds concepts present within 
+ * an taxonomy (controlled vocabulary) within content.<p>
+ * Currently users should not use this engine but use the KeywordLinkingEngine
+ * (org.apache.stanbol.enhancer.engine.keywordextraction bundle) instead.<p>
+ * It is planed to re-introduce this engine with additional features specific to
+ * taxonomies (such as support for concept hierarchies).
+ * @deprecated
+ * @author Rupert Westenthaler
+ *
+ */
 @Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
     specVersion = "1.1", metatype = true, immediate = true)
 @Service
+@Deprecated
 public class TaxonomyLinkingEngine implements EnhancementEngine, ServiceProperties {
 
     private static Logger log = LoggerFactory.getLogger(TaxonomyLinkingEngine.class);

Modified: incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/trunk/enhancer/engines/taxonomylinking/src/main/resources/OSGI-INF/metatype/metatype.properties Thu Sep 22 06:51:30 2011
@@ -16,8 +16,8 @@
 #===============================================================================
 #Properties and Options used to configure 
 #===============================================================================
-org.apache.stanbol.enhancer.engines.taxonomy.impl.TaxonomyLinkingEngine.name=Apache Stanbol Enhancement Engine for Taxonomy linking
-org.apache.stanbol.enhancer.engines.taxonomy.impl.TaxonomyLinkingEngine.description=An engine that extracts occurrencies of Taxonomy Terms within a parsed ContentItem
+org.apache.stanbol.enhancer.engines.taxonomy.impl.TaxonomyLinkingEngine.name=Apache Stanbol Enhancement Engine for Taxonomy linking (Deprecated)
+org.apache.stanbol.enhancer.engines.taxonomy.impl.TaxonomyLinkingEngine.description=NOTE that this Engine was replaced by the "Apache Stanbol Enhancement Engine for linking Keywords" instead. Description: An engine that extracts occurrencies of Taxonomy Terms within a parsed ContentItem
 
 org.apache.stanbol.enhancer.engines.taxonomy.referencedSiteId.name=Referenced Site
 org.apache.stanbol.enhancer.engines.taxonomy.referencedSiteId.description=The ID of the Entityhub Referenced Site holding the Taxonomy

Modified: incubator/stanbol/trunk/enhancer/generic/jobmanager/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/jobmanager/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/jobmanager/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/generic/jobmanager/pom.xml Thu Sep 22 06:51:30 2011
@@ -55,7 +55,7 @@
 				<configuration>
 					<instructions>
 						<Private-Package>
-							org.apache.stanbol.enhancer.jobmanager.impl.*
+							org.apache.stanbol.enhancer.jobmanager.impl.*;version=${project.version}
             			</Private-Package>
 					</instructions>
 				</configuration>

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/pom.xml Thu Sep 22 06:51:30 2011
@@ -55,12 +55,12 @@
 				<configuration>
 					<instructions>
 						<Export-Package>
-							org.apache.stanbol.enhancer.servicesapi;version=${pom.version},
-							org.apache.stanbol.enhancer.servicesapi.helper;version=${pom.version},
-							org.apache.stanbol.enhancer.servicesapi.rdf;version=${pom.version}
+							org.apache.stanbol.enhancer.servicesapi;version=${project.version},
+							org.apache.stanbol.enhancer.servicesapi.helper;version=${project.version},
+							org.apache.stanbol.enhancer.servicesapi.rdf;version=${project.version}
 			            </Export-Package>
 						<Private-Package>
-							org.apache.stanbol.enhancer.servicesapi.helper.impl;version=${pom.version}
+							org.apache.stanbol.enhancer.servicesapi.helper.impl;version=${project.version}
             			</Private-Package>
 					</instructions>
 				</configuration>

Modified: incubator/stanbol/trunk/enhancer/generic/standalone/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/standalone/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/standalone/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/generic/standalone/pom.xml Thu Sep 22 06:51:30 2011
@@ -55,7 +55,7 @@
 				<configuration>
 					<instructions>
 						<Private-Package>
-							org.apache.stanbol.enhancer.standalone.*;version=${pom.version}
+							org.apache.stanbol.enhancer.standalone.*;version=${project.version}
             			</Private-Package>
 					</instructions>
 				</configuration>

Modified: incubator/stanbol/trunk/enhancer/stores/jcrstore/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/stores/jcrstore/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/stores/jcrstore/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/stores/jcrstore/pom.xml Thu Sep 22 06:51:30 2011
@@ -35,7 +35,7 @@
         <extensions>true</extensions>
         <configuration>
           <instructions>
-            <Export-Package>org.apache.stanbol.enhancer.store.*;version=${pom.version}</Export-Package>
+            <Export-Package>org.apache.stanbol.enhancer.store.*;version=${project.version}</Export-Package>
             <Import-Package>*</Import-Package>
             <!-- Embed-Dependency>
                jcr, org.apache.sling.jcr.jackrabbit.server, org.apache.sling.jcr.api, org.apache.sling.jcr.base,

Modified: incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ReferencedSiteImpl.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ReferencedSiteImpl.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ReferencedSiteImpl.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ReferencedSiteImpl.java Thu Sep 22 06:51:30 2011
@@ -462,6 +462,8 @@ public class ReferencedSiteImpl implemen
                 if(rep != null){
                    entity = new EntityImpl(getId(), rep, null);
                    entity.getMetadata().set(RdfResourceEnum.isChached.getUri(), Boolean.TRUE);
+                } else if(siteConfiguration.getCacheStrategy() == CacheStrategy.all){
+                    return null; //do no remote lokkups on CacheStrategy.all!!
                 }
             } catch (YardException e) {
                 if (siteConfiguration.getEntityDereferencerType() == null || isOfflineMode()) {

Modified: incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/query/QueryResultListImpl.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/query/QueryResultListImpl.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/query/QueryResultListImpl.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/query/QueryResultListImpl.java Thu Sep 22 06:51:30 2011
@@ -106,6 +106,10 @@ public class QueryResultListImpl<T> impl
     public final Iterator<T> iterator() {
         return results.iterator();
     }
+    @Override
+    public Collection<T> results() {
+        return results;
+    }
     /* (non-Javadoc)
      * @see org.apache.stanbol.entityhub.core.query.ResultList#isEmpty()
      */

Modified: incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/QueryResultList.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/QueryResultList.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/QueryResultList.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/QueryResultList.java Thu Sep 22 06:51:30 2011
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.entityhub.servicesapi.query;
 
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.Set;
 
@@ -39,6 +40,11 @@ public interface QueryResultList<T> exte
      */
     Iterator<T> iterator();
     /**
+     * Unmodifiable collection of the results
+     * @return the resutls
+     */
+    Collection<? extends T> results();
+    /**
      * <code>true</code> if the result set is empty
      * @return <code>true</code> if the result set is empty. Otherwise <code>false</code>
      */

Modified: incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/ModelUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/ModelUtils.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/ModelUtils.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/util/ModelUtils.java Thu Sep 22 06:51:30 2011
@@ -141,7 +141,7 @@ public final class ModelUtils {
         }
         return info.toString();
     }
-    public static <T> Collection<T> asCollection(Iterator<T> it){
+    public static <T> Collection<T> asCollection(Iterator<? extends T> it){
         Collection<T> c = new ArrayList<T>();
         while(it.hasNext()){
             c.add(it.next());

Modified: incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/AssertEntityhubJson.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/AssertEntityhubJson.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/AssertEntityhubJson.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/AssertEntityhubJson.java Thu Sep 22 06:51:30 2011
@@ -50,10 +50,12 @@ public class AssertEntityhubJson {
         JSONObject jso = new JSONObject(re.getContent());
         JSONArray results = jso.getJSONArray("results");
         if(test.expectesResults()){
-            assertTrue("Missing Results for Query: \n "+test,
+            assertTrue("Missing Results for Query: \n "+test+
+                "\n Result:\n "+results.toString(4),
                 results.length() > 0);
         } else {
-            assertTrue("Unexpected Results for Query:\n "+test,
+            assertTrue("Unexpected Results for Query:\n "+test+
+                "\n Result:\n "+results.toString(4),
                 results.length() == 0);
         }
         Set<String> expectedIds;

Modified: incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/EntityhubTestBase.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/EntityhubTestBase.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/EntityhubTestBase.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/it/EntityhubTestBase.java Thu Sep 22 06:51:30 2011
@@ -18,16 +18,11 @@ import org.slf4j.LoggerFactory;
  */
 public abstract class EntityhubTestBase extends StanbolTestBase{
     
-    protected final Logger log;
+    private final Logger log = LoggerFactory.getLogger(getClass());
     
     private final Collection<String> referencedSites;
     
-    public EntityhubTestBase(Collection<String> referencedSites,Logger log) {
-        if(log != null){
-            this.log = log;
-        } else {
-            this.log = LoggerFactory.getLogger(getClass());
-        }
+    public EntityhubTestBase(Collection<String> referencedSites) {
         if(referencedSites == null){
             this.referencedSites = Collections.emptyList();
         } else {
@@ -86,7 +81,7 @@ public abstract class EntityhubTestBase 
                             referencedSite));
                     }
                 }
-                log.info("Enhancement engines checked, all present");
+                log.info("Entityhub services checked, all present");
                 return true;
             }
             

Modified: incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/query/QueryTestBase.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/query/QueryTestBase.java?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/query/QueryTestBase.java (original)
+++ incubator/stanbol/trunk/entityhub/generic/test/src/main/java/org/apache/stanbol/entityhub/test/query/QueryTestBase.java Thu Sep 22 06:51:30 2011
@@ -20,6 +20,7 @@ import org.codehaus.jettison.json.JSONEx
 import org.codehaus.jettison.json.JSONObject;
 import org.junit.Test;
 import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  *  <p>
@@ -44,6 +45,8 @@ import org.slf4j.Logger;
  */
 public abstract class QueryTestBase extends EntityhubTestBase {
     
+    private final Logger log = LoggerFactory.getLogger(getClass());
+    
     public static final String RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label";
     protected final String endpointPath;
     /**
@@ -52,8 +55,8 @@ public abstract class QueryTestBase exte
      * @param referencedSiteId if the 
      * @param log
      */
-    public QueryTestBase(String servicePath, String referencedSiteId,Logger log){
-        super(referencedSiteId == null ? null : Collections.singleton(referencedSiteId),log);
+    public QueryTestBase(String servicePath, String referencedSiteId){
+        super(referencedSiteId == null ? null : Collections.singleton(referencedSiteId));
         if(servicePath == null){
             throw new IllegalArgumentException("The path to the FieldQuery endpoint MUST NOT be NULL!");
         }
@@ -138,7 +141,7 @@ public abstract class QueryTestBase exte
         assertTrue("Result Query does not contain offset property",jQuery.has("offset"));
         assertTrue("Returned offset is != 0",jQuery.getInt("offset") == 0);
         
-        assertSelectedField(jQuery,RDFS_LABEL);
+        assertSelectedField(jQuery,getDefaultFindQueryField());
         
         JSONArray jConstraints = jQuery.optJSONArray("constraints");
         assertNotNull("Result Query is missing the 'constraints' property",jConstraints);
@@ -154,9 +157,17 @@ public abstract class QueryTestBase exte
         assertEquals("The 'patternType' of the Constraint is not 'wildcard' but "+constraint.opt("patternType"), 
             "wildcard",constraint.optString("patternType"));
         
-        assertEquals("The 'field' of the Constraint is not rdfs:label but "+constraint.opt("field"), 
-            RDFS_LABEL,constraint.optString("field"));
+        assertEquals("The 'field' of the Constraint is not "+getDefaultFindQueryField()+" but "+constraint.opt("field"), 
+            getDefaultFindQueryField(),constraint.optString("field"));
     }
+    /**
+     * Getter for the default field used for find queries of the 'field' parameter
+     * is not defined.<p>
+     * This default is different for the '/entityhub' and the other service
+     * endpoints that support find queries.
+     * @return the default field
+     */
+    protected abstract String getDefaultFindQueryField();
     
     @Test
     public void testCustomFieldParameter() throws IOException, JSONException {

Modified: incubator/stanbol/trunk/entityhub/indexing/core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/indexing/core/pom.xml?rev=1173968&r1=1173967&r2=1173968&view=diff
==============================================================================
--- incubator/stanbol/trunk/entityhub/indexing/core/pom.xml (original)
+++ incubator/stanbol/trunk/entityhub/indexing/core/pom.xml Thu Sep 22 06:51:30 2011
@@ -52,13 +52,13 @@
         <configuration>
           <instructions>
             <Export-Package>
-              org.apache.stanbol.entityhub.indexing.core;version=${pom.version},
-              org.apache.stanbol.entityhub.indexing.core.normaliser;version=${pom.version},
-              org.apache.stanbol.entityhub.indexing.core.processor;version=${pom.version}
+              org.apache.stanbol.entityhub.indexing.core;version=${project.version},
+              org.apache.stanbol.entityhub.indexing.core.normaliser;version=${project.version},
+              org.apache.stanbol.entityhub.indexing.core.processor;version=${project.version}
             </Export-Package>
             <!--
             <Private-Package>
-              org.apache.stanbol.entityhub.indexing.core.impl;version=${pom.version}
+              org.apache.stanbol.entityhub.indexing.core.impl;version=${project.version}
             </Private-Package>
             -->
           </instructions>