You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/12/21 21:16:33 UTC

svn commit: r1425126 - /stanbol/trunk/enhancement-engines/entitylinking/labeltokenizer-lucene/src/main/resources/OSGI-INF/metatype/metatype.properties

Author: rwesten
Date: Fri Dec 21 20:16:33 2012
New Revision: 1425126

URL: http://svn.apache.org/viewvc?rev=1425126&view=rev
Log:
STANBOL-849: added metatype.properties for the LabelTokenizer

Modified:
    stanbol/trunk/enhancement-engines/entitylinking/labeltokenizer-lucene/src/main/resources/OSGI-INF/metatype/metatype.properties

Modified: stanbol/trunk/enhancement-engines/entitylinking/labeltokenizer-lucene/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitylinking/labeltokenizer-lucene/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1425126&r1=1425125&r2=1425126&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/entitylinking/labeltokenizer-lucene/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/trunk/enhancement-engines/entitylinking/labeltokenizer-lucene/src/main/resources/OSGI-INF/metatype/metatype.properties Fri Dec 21 20:16:33 2012
@@ -13,116 +13,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-stanbol.enhancer.engine.name.name=Name
-stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
-used in the RESTful interface '/engine/<name>'
-
 service.ranking.name=Ranking
-service.ranking.description=If two enhancement engines with the same name are active the \
-one with the higher ranking will be used to process parsed content items.
+service.ranking.description=LabelTokenizer with higher ranking are requested first \
+to tokenize labels. Manually configured Tokenizers should use rankins >= 0
 
 
 #===============================================================================
 #Properties and Options used to configure 
 #===============================================================================
-org.apache.stanbol.enhancer.engines.entityhublinking.EntityhubLinkingEngine.name=Apache \
-Stanbol Enhancer Engine: Entityhub Linking
-org.apache.stanbol.enhancer.engines.entityhublinking.EntityhubLinkingEngine.description=An engine \
-that links NLP processed text with Entities of the Entityhub Sites.
-
-enhancer.engines.linking.entityhub.siteId.name=Referenced Site
-enhancer.engines.linking.entityhub.siteId.description=The ID of the \
-Entityhub Referenced Site holding the Controlled Vocabulary (e.g. a taxonomy or just a set of \
-named entities). To match against the whole Entityhub use "entityhub" as value.
-
-enhancer.engines.linking.minSearchTokenLength.name=Min Token Length
-enhancer.engines.linking.minSearchTokenLength.description=The minimum \
-length of Tokens used to lookup Entities within the Controlled Vocabulary. This parameter is ignored \
-in case a POS (Part of Speech) tagger is available for the language of the parsed content.
-
-enhancer.engines.linking.labelField.name=Label Field 
-enhancer.engines.linking.labelField.description=The field used to match \
-Entities with a mentions within the parsed text.
-
-enhancer.engines.linking.typeField.name=Type Field
-enhancer.engines.linking.typeField.description=The field used to \
-retrieve the types of matched Entities. Values of that field are expected to be URIs
-
-enhancer.engines.linking.caseSensitive.name=Case Sensitivity
-enhancer.engines.linking.caseSensitive.description=Allows to enable/disable \
-case sensitive matching
-
-enhancer.engines.linking.redirectField.name=Redirect Field
-enhancer.engines.linking.redirectField.description=Entities may \
-define redirects to other Entities (e.g. "USA"(http://dbpedia.org/resource/USA) -> \
-"United States"(http://dbpedia.org/resource/United_States). Values of this field are \
-expected to link to other entities part of the controlled vocabulary
-
-enhancer.engines.linking.suggestions.name=Suggestions
-enhancer.engines.linking.suggestions.description=The maximal \
-number of suggestions returned for a single mention. 
-
-enhancer.engines.linking.minFoundTokens.name=Number of Required Tokens
-enhancer.engines.linking.minFoundTokens.description=For lookups with \
-several words (e.g. Dr Patrick Marshall) this is the minimum number of Tokens the label of an \
-entity must match to be suggested. This is only used of the label does not exactly match a part \
-of the text.
-
-enhancer.engines.linking.redirectMode.name=Redirect Mode
-enhancer.engines.linking.redirectMode.description=Defines how to \
-process redirects of Entities mentioned in the parsed content.. Three modes to deal with such \
-links are supported: Ignore redirects; Add values from redirected Entities to extracted; Follow \
-Redirects and suggest the redirected Entity instead of the extracted.
-enhancer.engines.linking.redirectMode.option.follow=Follow Redirects
-enhancer.engines.linking.redirectMode.option.addValues=Keep extracted \
-Entity, but add information of the redirected
-enhancer.engines.linking.redirectMode.option.ignore=Ignore Redirects
-
-enhancer.engines.linking.properNounsState.name=Link ProperNouns only
-enhancer.engines.linking.properNounsState.description=If activated \
-only ProperNouns will be matched against the Vocabulary. If deactivated any Noun will be matched. \
-NOTE that this parameter requires a tag of the POS TagSet to be mapped against 'olia:PorperNoun'. \
-Otherwise mapping will not work as expected.
-
-enhancer.engines.linking.processedLanguages.name=Processed Languages
-enhancer.engines.linking.processedLanguages.description=Languages to \
-process and optionally language specific configurations. Syntax "{lang};{param-name}={param-value};\
-{param-name}={param-value};...". Supported {param-name}s: "lc" - processed Lexical Categories (see \
-LexicalCategory enumeration for possible values); "pos" - processed Pos types (see Pos enumeration \
-for possible values); "tag" - processed string pos tags; "prob" - minumum probability of pos annotations.
-
-enhancer.engines.linking.defaultMatchingLanguage.name=Default Matching Language
-enhancer.engines.linking.defaultMatchingLanguage.description=The language \
-used in addition to the language detected for the analysed text to search for Entities. Typically this \
-configuration is an empty string to search for labels without any language defined, but for some data \
-sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
-configuration (e.g. to 'en' in the case of DBpedia.org).
-
-enhancer.engines.linking.dereference.name=Dereference Entities
-enhancer.engines.linking.dereference.description=If enabled additional \
-data for suggested Entities are included.
-
-enhancer.engines.linking.dereferenceFields.name=Fields used for dereferencing
-enhancer.engines.linking.dereferenceFields.description=If 'Dereference Entities' is enabled \
-data from this fields will be included for suggested Entities.
-
-enhancer.engines.linking.typeMappings.name=Type Mappings
-enhancer.engines.linking.typeMappings.description=This allows to add \
-additional entity-type > text-annotation-type mappings. Such mappings are used to determine the \
-'dc:type' value of the 'fise:TextAnnotation' created for extracted entities. Usage: \
-variant (a) '{uri}' short for {uri} > {uri} or (b) '{source1};{source2};..;{sourceN} > {target}'. \
-Note that a {source} may be only mapped to a single {target}. Multiple {source} types \
-can be mapped to the same {target}.
-
-enhancer.engines.linking.keywordTokenizer.name=Keyword Tokenizer
-enhancer.engines.linking.keywordTokenizer.description=This allows \
-to use a special Tokenizer for matching keywords and alpha numeric IDs. Typical language \
-specific Tokenizers tned to split such IDs in several tokens and therefore might prevent \
-a correct matching.
-
-enhancer.engines.linking.minTokenScore.name=Minimum Token Score
-enhancer.engines.linking.minTokenScore.description=The minimum score a single Token \
-of the text must match a Token of the Label so that it is considered to match. [0..1].\
-The score is calculated by comparing the matching characters from the beginning of the two \
-tokens compared to the overal size of the token. So it allows derivations at the end of the \
-of the tokens (e.g because of inflected forms of words).
+org.apache.stanbol.enhancer.engines.entityhublinking.labeltokenizer.lucene.LuceneLabelTokenizer.name=Apache \
+Stanbol Enhancer EntityLinking: Solr/Lucene LabelTokenizer
+org.apache.stanbol.enhancer.engines.entityhublinking.labeltokenizer.lucene.LuceneLabelTokenizer.description=This \
+implementation allows to configure Lucene Analysers that are used to tokenize labels of Tokens processed by \
+the EntityLinkingEngine.
+
+enhancer.engine.linking.labeltokenizer.lucene.tokenizerFactory.name=TokenizerFactory
+enhancer.engine.linking.labeltokenizer.lucene.tokenizerFactory.description= The \
+Solr TokenizerFactory (required). Users need to use the fully qualified name the \
+'solr.{simple-name}' pattern does NOT work here!
+enhancer.engine.linking.labeltokenizer.lucene.tokenFilterFactory.name=TokenFolterFactories
+enhancer.engine.linking.labeltokenizer.lucene.tokenFilterFactory.description=The \
+Solr TokenFilterFactory (optional). Users need to use the fully qualified name the \
+'solr.{simple-name}' pattern does NOT work here!
+enhancer.engines.entitylinking.labeltokenizer.languages.name=Tokenized Languages
+enhancer.engines.entitylinking.labeltokenizer.languages.description=The Languages \
+that are supported by this configuration. The syntax supports '{lang1},{lang2},\
+!{lang3},*'
\ No newline at end of file