You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/01/07 09:00:35 UTC
svn commit: r1556137 - in
/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking:
IndexConfiguration.java TaggingSession.java
Author: rwesten
Date: Tue Jan 7 08:00:35 2014
New Revision: 1556137
URL: http://svn.apache.org/r1556137
Log:
merged implementation for STANBOL-1249 from 0.12 to trunk
Modified:
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
Modified: stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java?rev=1556137&r1=1556136&r2=1556137&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java (original)
+++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java Tue Jan 7 08:00:35 2014
@@ -290,9 +290,22 @@ public class IndexConfiguration {
this.rankingField = rankingField == null ? null :
FieldEncodingEnum.encodeFloat(rankingField, fieldEncoding);
}
-
+ /**
+ * Returns the CorpusInfo for the parsed language. If the language has an
+ * extension (e.g. en-US) it first tires to load the corpus for the exact
+ * match and falls back to the main lanugage (en) if such a corpus does not
+ * exist.
+ * @param language the language
+ * @return the corpus information or <code>null</code> if not present
+ */
public CorpusInfo getCorpus(String language) {
- return corpusInfos.get(language);
+ CorpusInfo langCorpusInfo = corpusInfos.get(language);
+ if(langCorpusInfo == null && language.indexOf('-') > 0){
+ String rootLang = language.substring(0,language.indexOf('-'));
+ log.debug(" - no FST corpus for {}. Fallback to {}", language,rootLang);
+ langCorpusInfo = corpusInfos.get(rootLang);
+ }
+ return langCorpusInfo;
}
/**
* Getter for the languages of all configured FST corpora
Modified: stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java?rev=1556137&r1=1556136&r2=1556137&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java (original)
+++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java Tue Jan 7 08:00:35 2014
@@ -131,7 +131,9 @@ public class TaggingSession implements C
//get the corpusInfo
CorpusInfo langCorpusInfo = config.getCorpus(language);
+ log.debug("> language Corpus: {}", langCorpusInfo);
CorpusInfo defaultCorpusInfo = config.getDefaultCorpus();
+ log.debug("> default Corpus: {}", defaultCorpusInfo);
//obtain the Solr Document Id field
SchemaField idSchemaField = config.getIndex().getLatestSchema().getUniqueKeyField();