You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/01/07 09:00:35 UTC

svn commit: r1556137 - in /stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking: IndexConfiguration.java TaggingSession.java

Author: rwesten
Date: Tue Jan  7 08:00:35 2014
New Revision: 1556137

URL: http://svn.apache.org/r1556137
Log:
merged implementation for STANBOL-1249 from 0.12 to trunk

Modified:
    stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
    stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java

Modified: stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java?rev=1556137&r1=1556136&r2=1556137&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java (original)
+++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java Tue Jan  7 08:00:35 2014
@@ -290,9 +290,22 @@ public class IndexConfiguration {
         this.rankingField = rankingField == null ? null :
             FieldEncodingEnum.encodeFloat(rankingField, fieldEncoding);
     }
-
+    /**
+     * Returns the CorpusInfo for the parsed language. If the language has an
+     * extension (e.g. en-US) it first tires to load the corpus for the exact
+     * match and falls back to the main lanugage (en) if such a corpus does not
+     * exist.
+     * @param language the language
+     * @return the corpus information or <code>null</code> if not present
+     */
     public CorpusInfo getCorpus(String language) {
-        return corpusInfos.get(language);
+        CorpusInfo langCorpusInfo =  corpusInfos.get(language);
+        if(langCorpusInfo == null && language.indexOf('-') > 0){
+        	String rootLang = language.substring(0,language.indexOf('-'));
+        	log.debug(" - no FST corpus for {}. Fallback to {}", language,rootLang);
+        	langCorpusInfo =  corpusInfos.get(rootLang);
+        }
+        return langCorpusInfo;
     }
     /**
      * Getter for the languages of all configured FST corpora

Modified: stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java?rev=1556137&r1=1556136&r2=1556137&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java (original)
+++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java Tue Jan  7 08:00:35 2014
@@ -131,7 +131,9 @@ public class TaggingSession implements C
         
         //get the corpusInfo
         CorpusInfo langCorpusInfo = config.getCorpus(language);
+        log.debug("> language Corpus: {}", langCorpusInfo);
         CorpusInfo defaultCorpusInfo = config.getDefaultCorpus();
+        log.debug("> default Corpus: {}", defaultCorpusInfo);
         
         //obtain the Solr Document Id field
         SchemaField idSchemaField = config.getIndex().getLatestSchema().getUniqueKeyField();