You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/07/11 18:59:09 UTC
svn commit: r1360296 - in
/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main:
java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/
java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/
java/org/apache/stanb...
Author: rwesten
Date: Wed Jul 11 16:59:08 2012
New Revision: 1360296
URL: http://svn.apache.org/viewvc?rev=1360296&view=rev
Log:
fixes for STANBOL-685 and STANBOL-686 as stated in the issue description.
Also added debug level loggings about metadata of processed Tokens
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinkerConfig.java
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1360296&r1=1360295&r2=1360296&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Wed Jul 11 16:59:08 2012
@@ -117,6 +117,8 @@ import org.slf4j.LoggerFactory;
},value="IGNORE"),
@Property(name=KeywordLinkingEngine.MIN_SEARCH_TOKEN_LENGTH,
intValue=EntityLinkerConfig.DEFAULT_MIN_SEARCH_TOKEN_LENGTH),
+ @Property(name=KeywordLinkingEngine.MIN_TOKEN_MATCH_FACTOR,floatValue=
+ EntityLinkerConfig.DEFAULT_MIN_TOKEN_MATCH_FACTOR),
@Property(name=KeywordLinkingEngine.KEYWORD_TOKENIZER,boolValue=false),
@Property(name=KeywordLinkingEngine.MAX_SUGGESTIONS,
intValue=EntityLinkerConfig.DEFAULT_SUGGESTIONS),
@@ -164,6 +166,7 @@ public class KeywordLinkingEngine
public static final String MIN_POS_TAG_PROBABILITY = "org.apache.stanbol.enhancer.engines.keywordextraction.minPosTagProbability";
public static final String TYPE_MAPPINGS = "org.apache.stanbol.enhancer.engines.keywordextraction.typeMappings";
public static final String KEYWORD_TOKENIZER = "org.apache.stanbol.enhancer.engines.keywordextraction.keywordTokenizer";
+ public static final String MIN_TOKEN_MATCH_FACTOR = "org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor";
// public static final String ENABLE_CHUNKER = "org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker";
/**
* Adds the dereference feature (STANBOL-333) also to this engine.
@@ -192,7 +195,7 @@ public class KeywordLinkingEngine
* language are processed.
*/
public static final Set<String> DEFAULT_LANGUAGES = Collections.emptySet();
- public static final double DEFAULT_MIN_POS_TAG_PROBABILITY = 0.8;
+ public static final double DEFAULT_MIN_POS_TAG_PROBABILITY = 0.6667;
/**
* The languages this engine is configured to enhance. An empty List is
* considered as active for any language
@@ -611,6 +614,7 @@ public class KeywordLinkingEngine
"The configured min POS tag probability MUST BE in the range [0..1] " +
"or < 0 to deactivate this feature (parsed value "+value+")!");
}
+ nlpConfig.setMinPosTagProbability(minPosTagProb);
value = configuration.get(KEYWORD_TOKENIZER);
//the keyword tokenizer config
if(value instanceof Boolean){
@@ -618,7 +622,8 @@ public class KeywordLinkingEngine
} else if(value != null && !value.toString().isEmpty()){
nlpConfig.forceKeywordTokenizer(Boolean.valueOf(value.toString()));
}
- nlpConfig.setMinPosTagProbability(minPosTagProb);
+ //nlpConfig.enablePosTypeChunker(false);
+ //nlpConfig.enableChunker(false);
analysedContentFactory = OpenNlpAnalysedContentFactory.getInstance(openNLP,nlpConfig);
}
@@ -632,6 +637,7 @@ public class KeywordLinkingEngine
* <li>{@link #MAX_SUGGESTIONS}
* <li>{@link #MIN_SEARCH_TOKEN_LENGTH}
* <li>{@link #MIN_FOUND_TOKENS}
+ * <li> {@link #MIN_TOKEN_MATCH_FACTOR}
* </ul>
* This Method create an new {@link EntityLinkerConfig} instance only if
* <code>{@link #linkerConfig} == null</code>. If the instance is already initialised
@@ -760,6 +766,30 @@ public class KeywordLinkingEngine
linkerConfig.setDefaultLanguage(defaultLang);
}
}
+ // init MIN_TOKEN_MATCH_FACTOR
+ value=configuration.get(MIN_TOKEN_MATCH_FACTOR);
+ float minTokenMatchFactor;
+ if(value instanceof Number){
+ minTokenMatchFactor = ((Number)value).floatValue();
+ } else if(value != null){
+ try {
+ minTokenMatchFactor = Float.valueOf(value.toString());
+ } catch (NumberFormatException e) {
+ throw new ConfigurationException(MIN_TOKEN_MATCH_FACTOR,
+ "Unable to parse the minimum token match factor from the parsed value "+value,e);
+ }
+ if(minTokenMatchFactor < 0){
+ minTokenMatchFactor = EntityLinkerConfig.DEFAULT_MIN_TOKEN_MATCH_FACTOR;
+ }
+ } else {
+ minTokenMatchFactor = EntityLinkerConfig.DEFAULT_MIN_TOKEN_MATCH_FACTOR;
+ }
+ if(minTokenMatchFactor == 0 || minTokenMatchFactor > 1){
+ throw new ConfigurationException(MIN_TOKEN_MATCH_FACTOR,
+ "The minimum token match factor MUST be > 0 and <= 1 (negative values for the default)");
+ }
+ linkerConfig.setMinTokenMatchFactor(minTokenMatchFactor);
+
//init type mappings
value = configuration.get(TYPE_MAPPINGS);
if(value instanceof String[]){ //support array
Modified: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java?rev=1360296&r1=1360295&r2=1360296&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java Wed Jul 11 16:59:08 2012
@@ -84,7 +84,16 @@ public class EntityLinker {
* Steps over the sentences, chunks, tokens of the {@link #sentences}
*/
public void process() throws EngineException {
+ int debugedIndex = 0;
while(state.next()) {
+ if(log.isDebugEnabled() && (state.getTokenIndex() > debugedIndex || state.getTokenIndex() == 0)){
+ debugedIndex = state.getTokenIndex();
+ Token token = state.getToken();
+ log.debug(" {} {} (pos:{}|prop:{})",new Object[]{
+ isProcessableToken(token)? '+':'-',
+ token.getText(),token.getPosTags(),token.getPosProbabilities()
+ });
+ }
if(isProcessableToken(state.getToken())){
List<String> searchStrings = new ArrayList<String>(config.getMaxSearchTokens());
searchStrings.add(state.getToken().getText());
@@ -96,6 +105,13 @@ public class EntityLinker {
state.getChunk().getEnd() : //the chunk
state.getSentence().getTokens().size()-1))){ //or sentence
Token included = state.getSentence().getTokens().get(includeTokenIndex);
+ if(log.isDebugEnabled() && includeTokenIndex > debugedIndex){
+ debugedIndex = includeTokenIndex;
+ log.debug(" {} {} (pos:{}|prop:{})",new Object[]{
+ isProcessableToken(included)? '+':'-',
+ included.getText(),included.getPosTags(),included.getPosProbabilities()
+ });
+ }
includeTokenIndex++;
if(isProcessableToken(included)){
searchStrings.add(included.getText());
@@ -355,20 +371,7 @@ public class EntityLinker {
}
return match;
}
-
- /**
- * The default value for the maximum number or non-processable tokens
- * allowed to be not matching with a label of an entity before the matching
- * is stopped.
- */
- private static int DEFAULT_MAX_NOT_FOUND = 1;
- /**
- * The value for the maximum number or non-processable tokens
- * allowed to be not matching with a label of an entity before the matching
- * is stopped.
- * TODO: make configurable!
- */
- private int maxNotFound = DEFAULT_MAX_NOT_FOUND;
+
/**
* @param match
* @param label
@@ -414,6 +417,7 @@ public class EntityLinker {
String currentTokenText;
int currentTokenLength;
int notFound = 0;
+ float minTokenMatchFactor = config.getMinTokenMatchFactor();
//search for matches within the correct order
for(int currentIndex = state.getTokenIndex();
currentIndex < state.getSentence().getTokens().size()
@@ -435,9 +439,9 @@ public class EntityLinker {
int labelTokenLength = labelTokenText.length();
float maxLength = currentTokenLength > labelTokenLength ? currentTokenLength : labelTokenLength;
float lengthDif = Math.abs(currentTokenLength - labelTokenLength);
- if((lengthDif/maxLength)<=0.3f){ //this prevents unnecessary string comparison
- int matchCount = compairTokens(currentTokenText, labelTokenText);
- if(matchCount/maxLength >= 0.7f){
+ if((lengthDif/maxLength)<=(1-minTokenMatchFactor)){ //this prevents unnecessary string comparison
+ int matchCount = compareTokens(currentTokenText, labelTokenText);
+ if(matchCount/maxLength >= minTokenMatchFactor){
lastfoundLabelIndex = i; //set the last found index to the current position
found = true; //set found to true -> stops iteration
matchFactor = matchCount/maxLength; //how good is the match
@@ -468,7 +472,7 @@ public class EntityLinker {
lastFoundIndex = currentIndex;
} else { //not found
notFound++;
- if(isProcessable || notFound > maxNotFound){
+ if(isProcessable || notFound > config.getMaxNotFound()){
//stop as soon as a token that needs to be processed is
//not found in the label or the maximum number of tokens
//that are not processable are not found
@@ -498,9 +502,9 @@ public class EntityLinker {
int labelTokenLength = labelTokenText.length();
float maxLength = currentTokenLength > labelTokenLength ? currentTokenLength : labelTokenLength;
float lengthDif = Math.abs(currentTokenLength - labelTokenLength);
- if((lengthDif/maxLength)<=0.3f){ //this prevents unnecessary string comparison
- int matchCount = compairTokens(currentTokenText, labelTokenText);
- if(matchCount/maxLength >= 0.7f){
+ if((lengthDif/maxLength)<=(1-minTokenMatchFactor)){ //this prevents unnecessary string comparison
+ int matchCount = compareTokens(currentTokenText, labelTokenText);
+ if(matchCount/maxLength >= minTokenMatchFactor){
found = true; //set found to true -> stops iteration
matchFactor = matchCount/maxLength; //how good is the match
}
@@ -515,7 +519,7 @@ public class EntityLinker {
currentIndex --;
} else {
notFound++;
- if(isProcessable || notFound > maxNotFound){
+ if(isProcessable || notFound > config.getMaxNotFound()){
//stop as soon as a token that needs to be processed is
//not found in the label or the maximum number of tokens
//that are not processable are not found
@@ -577,7 +581,7 @@ public class EntityLinker {
* @param token2 the second token
* @return the number of matching chars
*/
- private int compairTokens(String token1,String token2){
+ private int compareTokens(String token1,String token2){
int l1 = token1.length(); //length of the first token
int l2 = token2.length(); //length of the second token
//in case of same length check for equals first
@@ -626,7 +630,7 @@ public class EntityLinker {
do {
processToken = content.processPOS(posTags[i],posProb[i]);
i++;
- } while(processToken != null && processToken.equals(Boolean.FALSE) && i<posTags.length);
+ } while(processToken == null && i<posTags.length);
}
if(processToken == null) {
processToken = token.getText().length() >= config.getMinSearchTokenLength();
Modified: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinkerConfig.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinkerConfig.java?rev=1360296&r1=1360295&r2=1360296&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinkerConfig.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinkerConfig.java Wed Jul 11 16:59:08 2012
@@ -211,6 +211,45 @@ public class EntityLinkerConfig {
* detected for the text.
*/
private String defaultLanguage = DEFAULT_LANGUAGE;
+
+ /**
+ * Default for the maximum number of non-processable tokens that are
+ * allowed to not match before no further tokens are matched against a label
+ * of an Entity. <p>
+ * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
+ * as '.' is a non-processable token in the text that is missing in the
+ * label.<p>
+ * The default is set to <code>1</code>
+ */
+ public final static int DEFAULT_MAX_NOT_FOUND = 1;
+ /**
+ * Value of the maximum number of non-processable tokens that are
+ * allowed to not match before no further tokens are matched against a label
+ * of an Entity. <p>
+ * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
+ * as '.' is a non-processable token in the text that is missing in the
+ * label.
+ */
+ private int maxNotFound;
+ /**
+ * Default value for the minimum token match factor.
+ * If Tokens match is determined by comparing them using some algorithm.
+ * Results need to be in the range [0..1]. This factor defines the minimum
+ * similarity value so that a match is assumed. Not that this factor only
+ * is used for filtering out non-matching tokens. The similarity value will
+ * still used for calculating the confidence.<p>
+ * The default is set to <code>0.7</code>.
+ */
+ public final static float DEFAULT_MIN_TOKEN_MATCH_FACTOR = 0.7f;
+ /**
+ * If Tokens match is determined by comparing them using some algorithm.
+ * Results need to be in the range [0..1]. This factor defines the minimum
+ * similarity value so that a match is assumed. Not that this factor only
+ * is used for filtering out non-matching tokens. The similarity value will
+ * still used for calculating the confidence
+ */
+ private float minTokenMatchFactor;
+
/**
* Default constructor the initialises the configuration with the
* default values
@@ -226,6 +265,8 @@ public class EntityLinkerConfig {
setNameField(DEFAULT_NAME_FIELD);
setRedirectField(DEFAULT_REDIRECT_FIELD);
setTypeField(DEFAULT_TYPE_FIELD);
+ setMaxNotFound(DEFAULT_MAX_NOT_FOUND);
+ setMinTokenMatchFactor(DEFAULT_MIN_TOKEN_MATCH_FACTOR);
}
/**
* Getter for the uri of the field used for the names in the taxonomy
@@ -483,4 +524,62 @@ public class EntityLinkerConfig {
public String getDefaultLanguage() {
return defaultLanguage;
}
+ /**
+ * Getter for the maximum number of non-processable tokens that are
+ * allowed to not match before no further tokens are matched against a label
+ * of an Entity. <p>
+ * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
+ * as '.' is a non-processable token in the text that is missing in the
+ * label.
+ * @return the maxNotFound
+ */
+ public int getMaxNotFound() {
+ return maxNotFound;
+ }
+ /**
+ * Setter for the maximum number of non-processable tokens that are
+ * allowed to not match before no further tokens are matched against a label
+ * of an Entity. <p>
+ * This allows e.g. to match "Dr. Richard Dogles" with "Dr Richard Dogles"
+ * as '.' is a non-processable token in the text that is missing in the
+ * label.
+ * @param maxNotFound the maxNotFound to set
+ */
+ public void setMaxNotFound(int maxNotFound) {
+ if(maxNotFound < 0){
+ this.maxNotFound = DEFAULT_MAX_NOT_FOUND;
+ } else {
+ this.maxNotFound = maxNotFound;
+ }
+ }
+ /**
+ * Getter for the minimum token match Factor.
+ * If Tokens match is determined by comparing them using some algorithm.
+ * Results need to be in the range [0..1]. This factor defines the minimum
+ * similarity value so that a match is assumed. Not that this factor only
+ * is used for filtering out non-matching tokens. The similarity value will
+ * still used for calculating the confidence
+ * @return the minTokenMatchFactor
+ */
+ public float getMinTokenMatchFactor() {
+ return minTokenMatchFactor;
+ }
+ /**
+ * Setter for the minimum token match Factor.
+ * If Tokens match is determined by comparing them using some algorithm.
+ * Results need to be in the range [0..1]. This factor defines the minimum
+ * similarity value so that a match is assumed. Not that this factor only
+ * is used for filtering out non-matching tokens. The similarity value will
+ * still used for calculating the confidence
+ * @param minTokenMatchFactor the minTokenMatchFactor to set
+ */
+ public void setMinTokenMatchFactor(float minTokenMatchFactor) {
+ if(minTokenMatchFactor < 0 ){
+ this.minTokenMatchFactor = DEFAULT_MIN_TOKEN_MATCH_FACTOR;
+ } else if(minTokenMatchFactor == 0 || minTokenMatchFactor > 1){
+ throw new IllegalArgumentException("minimum Token Match Facter MUST be > 0 <= 1 (parsed: "+minTokenMatchFactor+")!");
+ } else {
+ this.minTokenMatchFactor = minTokenMatchFactor;
+ }
+ }
}
\ No newline at end of file
Modified: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java?rev=1360296&r1=1360295&r2=1360296&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/impl/OpenNlpAnalysedContentFactory.java Wed Jul 11 16:59:08 2012
@@ -113,6 +113,7 @@ public class OpenNlpAnalysedContentFacto
private class OpenNlpAnalysedContent implements AnalysedContent{
private final TextAnalyzer analyzer;
private final double minPosTagProbability;
+ private final double minExcludePosTagProbability;
private final Iterator<AnalysedText> sentences;
private final Set<String> posTags;
private final Tokenizer tokenizer;
@@ -124,6 +125,7 @@ public class OpenNlpAnalysedContentFacto
analyzer.getLanguage(), PosTypeCollectionType.NOUN);
this.tokenizer = analyzer.getTokenizer();
this.minPosTagProbability = analyzer.getConfig().getMinPosTypeProbability();
+ this.minExcludePosTagProbability = minPosTagProbability/2;
}
/**
@@ -135,19 +137,45 @@ public class OpenNlpAnalysedContentFacto
return sentences;
}
/**
- * Called to check if a {@link Token} should be used to search for
- * Concepts within the Taxonomy based on the POS tag of the Token.
- * @param posTag the POS tag to check
- * @param posProb the probability of the parsed POS tag
- * @return <code>true</code> if Tokens with this POS tag should be
- * included in searches. Otherwise <code>false</code>. Also returns
- * <code>true</code> if no POS type configuration is available for the
- * language parsed in the constructor
+ * This uses now two Tag Probabilities<ul>
+ * <li> {@link TextAnalyzerConfig#getMinPosTypeProbability()} for
+ * accepting POS tags that represent Nouns and
+ * <li> <code>minPosTypeProb/2</code> for rejecting POS tags that
+ * are not nouns
+ * </ul>
+ * Assuming that the <code>minPosTypePropb=0.667</code> a<ul>
+ * <li> noun with the prop 0.8 would result in returning <code>true</code>
+ * <li> noun with prop 0.5 would return <code>null</code>
+ * <li> verb with prop 0.4 would return <code>false</code>
+ * <li> verb with prop 0.3 would return <code>null</code>
+ * </ul>
+ * This new algorithm makes it less likely that non nouns are processed
+ * by the KeywordLinkingEngine as returning <code>null</code> as the
+ * minimum probability requirement is now much lower.<p>
+ * <i>NOTE:</i> Returning <code>null</code> usually results in using
+ * the fall-back (typically minTokenLnegh = 3) so most of those tokens
+ * where processed by the KeywordLinkingEngine.
+ * (see also STANBOL-685)
*/
@Override
public Boolean processPOS(String posTag, double posProb) {
- return posTags != null && posProb > minPosTagProbability ?
- Boolean.valueOf(posTags.contains(posTag)) : null;
+ if(posTags != null){
+ if(posTags.contains(posTag)){
+ if(posProb >= minPosTagProbability){
+ return Boolean.TRUE;
+ } else {
+ return null; //probability to low
+ }
+ } else {
+ if(posProb >= minExcludePosTagProbability){
+ return Boolean.FALSE;
+ } else {
+ return null; //probability to low
+ }
+ }
+ } else {
+ return null;
+ }
}
/**
* Not yet implemented.
Modified: incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1360296&r1=1360295&r2=1360296&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties Wed Jul 11 16:59:08 2012
@@ -115,3 +115,11 @@ org.apache.stanbol.enhancer.engines.keyw
to use a special Tokenizer for matching keywords and alpha numeric IDs. Typical language \
specific Tokenizers tned to split such IDs in several tokens and therefore might prevent \
a correct matching.
+
+org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.name=Minimum Token Match Factor
+org.apache.stanbol.enhancer.engines.keywordextraction.minTokenMatchFactor.description=If a Token \
+of the text is compared with a Token in the Label of an Entity the similarity of those is \
+expressed in the range [0..1]. This factor specifies the minimum similarity of two Tokens \
+so that they are considered to match. Lower values will allow more Tokens to match (e.g \
+inflected forms of words) but may also result in false positives. Regardless of the \
+configured value the similarity will influence the confidence of suggestions.